]> git.mxchange.org Git - friendica.git/blob - src/Util/Network.php
Move post/curl/fetchUrl/fetchUrlFull to own class "Network\HTTPRequest"
[friendica.git] / src / Util / Network.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2020, Friendica
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Util;
23
24 use DOMDocument;
25 use DomXPath;
26 use Friendica\Core\Hook;
27 use Friendica\Core\Logger;
28 use Friendica\Core\System;
29 use Friendica\DI;
30
31 class Network
32 {
33
34         /**
35          * Return raw post data from a post request
36          *
37          * @return string post data
38          */
39         public static function postdata()
40         {
41                 return file_get_contents('php://input');
42         }
43
44         /**
45          * Check URL to see if it's real
46          *
47          * Take a URL from the wild, prepend http:// if necessary
48          * and check DNS to see if it's real (or check if is a valid IP address)
49          *
50          * @param string $url The URL to be validated
51          * @return string|boolean The actual working URL, false else
52          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
53          */
54         public static function isUrlValid(string $url)
55         {
56                 if (DI::config()->get('system', 'disable_url_validation')) {
57                         return $url;
58                 }
59
60                 // no naked subdomains (allow localhost for tests)
61                 if (strpos($url, '.') === false && strpos($url, '/localhost/') === false) {
62                         return false;
63                 }
64
65                 if (substr($url, 0, 4) != 'http') {
66                         $url = 'http://' . $url;
67                 }
68
69                 /// @TODO Really suppress function outcomes? Why not find them + debug them?
70                 $h = @parse_url($url);
71
72                 if (!empty($h['host']) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP))) {
73                         return $url;
74                 }
75
76                 return false;
77         }
78
79         /**
80          * Checks that email is an actual resolvable internet address
81          *
82          * @param string $addr The email address
83          * @return boolean True if it's a valid email address, false if it's not
84          */
85         public static function isEmailDomainValid(string $addr)
86         {
87                 if (DI::config()->get('system', 'disable_email_validation')) {
88                         return true;
89                 }
90
91                 if (! strpos($addr, '@')) {
92                         return false;
93                 }
94
95                 $h = substr($addr, strpos($addr, '@') + 1);
96
97                 // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
98                 if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP))) {
99                         return true;
100                 }
101                 if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
102                         return true;
103                 }
104                 return false;
105         }
106
107         /**
108          * Check if URL is allowed
109          *
110          * Check $url against our list of allowed sites,
111          * wildcards allowed. If allowed_sites is unset return true;
112          *
113          * @param string $url URL which get tested
114          * @return boolean True if url is allowed otherwise return false
115          */
116         public static function isUrlAllowed(string $url)
117         {
118                 $h = @parse_url($url);
119
120                 if (! $h) {
121                         return false;
122                 }
123
124                 $str_allowed = DI::config()->get('system', 'allowed_sites');
125                 if (! $str_allowed) {
126                         return true;
127                 }
128
129                 $found = false;
130
131                 $host = strtolower($h['host']);
132
133                 // always allow our own site
134                 if ($host == strtolower($_SERVER['SERVER_NAME'])) {
135                         return true;
136                 }
137
138                 $fnmatch = function_exists('fnmatch');
139                 $allowed = explode(',', $str_allowed);
140
141                 if (count($allowed)) {
142                         foreach ($allowed as $a) {
143                                 $pat = strtolower(trim($a));
144                                 if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) {
145                                         $found = true;
146                                         break;
147                                 }
148                         }
149                 }
150                 return $found;
151         }
152
153         /**
154          * Checks if the provided url domain is on the domain blocklist.
155          * Returns true if it is or malformed URL, false if not.
156          *
157          * @param string $url The url to check the domain from
158          *
159          * @return boolean
160          */
161         public static function isUrlBlocked(string $url)
162         {
163                 $host = @parse_url($url, PHP_URL_HOST);
164                 if (!$host) {
165                         return false;
166                 }
167
168                 $domain_blocklist = DI::config()->get('system', 'blocklist', []);
169                 if (!$domain_blocklist) {
170                         return false;
171                 }
172
173                 foreach ($domain_blocklist as $domain_block) {
174                         if (fnmatch(strtolower($domain_block['domain']), strtolower($host))) {
175                                 return true;
176                         }
177                 }
178
179                 return false;
180         }
181
182         /**
183          * Check if email address is allowed to register here.
184          *
185          * Compare against our list (wildcards allowed).
186          *
187          * @param  string $email email address
188          * @return boolean False if not allowed, true if allowed
189          *                       or if allowed list is not configured
190          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
191          */
192         public static function isEmailDomainAllowed(string $email)
193         {
194                 $domain = strtolower(substr($email, strpos($email, '@') + 1));
195                 if (!$domain) {
196                         return false;
197                 }
198
199                 $str_allowed = DI::config()->get('system', 'allowed_email', '');
200                 if (empty($str_allowed)) {
201                         return true;
202                 }
203
204                 $allowed = explode(',', $str_allowed);
205
206                 return self::isDomainAllowed($domain, $allowed);
207         }
208
209         /**
210          * Checks for the existence of a domain in a domain list
211          *
212          * @param string $domain
213          * @param array  $domain_list
214          * @return boolean
215          */
216         public static function isDomainAllowed(string $domain, array $domain_list)
217         {
218                 $found = false;
219
220                 foreach ($domain_list as $item) {
221                         $pat = strtolower(trim($item));
222                         if (fnmatch($pat, $domain) || ($pat == $domain)) {
223                                 $found = true;
224                                 break;
225                         }
226                 }
227
228                 return $found;
229         }
230
231         public static function lookupAvatarByEmail(string $email)
232         {
233                 $avatar['size'] = 300;
234                 $avatar['email'] = $email;
235                 $avatar['url'] = '';
236                 $avatar['success'] = false;
237
238                 Hook::callAll('avatar_lookup', $avatar);
239
240                 if (! $avatar['success']) {
241                         $avatar['url'] = DI::baseUrl() . '/images/person-300.jpg';
242                 }
243
244                 Logger::log('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], Logger::DEBUG);
245                 return $avatar['url'];
246         }
247
248         /**
249          * Remove Google Analytics and other tracking platforms params from URL
250          *
251          * @param string $url Any user-submitted URL that may contain tracking params
252          * @return string The same URL stripped of tracking parameters
253          */
254         public static function stripTrackingQueryParams(string $url)
255         {
256                 $urldata = parse_url($url);
257                 if (!empty($urldata["query"])) {
258                         $query = $urldata["query"];
259                         parse_str($query, $querydata);
260
261                         if (is_array($querydata)) {
262                                 foreach ($querydata as $param => $value) {
263                                         if (in_array(
264                                                 $param,
265                                                 [
266                                                         "utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
267                                                         "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
268                                                         "fb_action_ids", "fb_action_types", "fb_ref",
269                                                         "awesm", "wtrid",
270                                                         "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"]
271                                                 )
272                                         ) {
273                                                 $pair = $param . "=" . urlencode($value);
274                                                 $url = str_replace($pair, "", $url);
275
276                                                 // Second try: if the url isn't encoded completely
277                                                 $pair = $param . "=" . str_replace(" ", "+", $value);
278                                                 $url = str_replace($pair, "", $url);
279
280                                                 // Third try: Maybey the url isn't encoded at all
281                                                 $pair = $param . "=" . $value;
282                                                 $url = str_replace($pair, "", $url);
283
284                                                 $url = str_replace(["?&", "&&"], ["?", ""], $url);
285                                         }
286                                 }
287                         }
288
289                         if (substr($url, -1, 1) == "?") {
290                                 $url = substr($url, 0, -1);
291                         }
292                 }
293
294                 return $url;
295         }
296
297         /**
298          * Add a missing base path (scheme and host) to a given url
299          *
300          * @param string $url
301          * @param string $basepath
302          * @return string url
303          */
304         public static function addBasePath(string $url, string $basepath)
305         {
306                 if (!empty(parse_url($url, PHP_URL_SCHEME)) || empty(parse_url($basepath, PHP_URL_SCHEME)) || empty($url) || empty(parse_url($url))) {
307                         return $url;
308                 }
309
310                 $base = ['scheme' => parse_url($basepath, PHP_URL_SCHEME),
311                         'host' => parse_url($basepath, PHP_URL_HOST)];
312
313                 $parts = array_merge($base, parse_url('/' . ltrim($url, '/')));
314                 return self::unparseURL($parts);
315         }
316
317         /**
318          * Returns the original URL of the provided URL
319          *
320          * This function strips tracking query params and follows redirections, either
321          * through HTTP code or meta refresh tags. Stops after 10 redirections.
322          *
323          * @todo  Remove the $fetchbody parameter that generates an extraneous HEAD request
324          *
325          * @see   ParseUrl::getSiteinfo
326          *
327          * @param string $url       A user-submitted URL
328          * @param int    $depth     The current redirection recursion level (internal)
329          * @param bool   $fetchbody Wether to fetch the body or not after the HEAD requests
330          * @return string A canonical URL
331          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
332          */
333         public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
334         {
335                 $a = DI::app();
336
337                 $url = self::stripTrackingQueryParams($url);
338
339                 if ($depth > 10) {
340                         return $url;
341                 }
342
343                 $url = trim($url, "'");
344
345                 $stamp1 = microtime(true);
346
347                 $ch = curl_init();
348                 curl_setopt($ch, CURLOPT_URL, $url);
349                 curl_setopt($ch, CURLOPT_HEADER, 1);
350                 curl_setopt($ch, CURLOPT_NOBODY, 1);
351                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
352                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
353                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
354
355                 curl_exec($ch);
356                 $curl_info = @curl_getinfo($ch);
357                 $http_code = $curl_info['http_code'];
358                 curl_close($ch);
359
360                 DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
361
362                 if ($http_code == 0) {
363                         return $url;
364                 }
365
366                 if (in_array($http_code, ['301', '302'])) {
367                         if (!empty($curl_info['redirect_url'])) {
368                                 return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
369                         } elseif (!empty($curl_info['location'])) {
370                                 return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
371                         }
372                 }
373
374                 // Check for redirects in the meta elements of the body if there are no redirects in the header.
375                 if (!$fetchbody) {
376                         return(self::finalUrl($url, ++$depth, true));
377                 }
378
379                 // if the file is too large then exit
380                 if ($curl_info["download_content_length"] > 1000000) {
381                         return $url;
382                 }
383
384                 // if it isn't a HTML file then exit
385                 if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
386                         return $url;
387                 }
388
389                 $stamp1 = microtime(true);
390
391                 $ch = curl_init();
392                 curl_setopt($ch, CURLOPT_URL, $url);
393                 curl_setopt($ch, CURLOPT_HEADER, 0);
394                 curl_setopt($ch, CURLOPT_NOBODY, 0);
395                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
396                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
397                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
398
399                 $body = curl_exec($ch);
400                 curl_close($ch);
401
402                 DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
403
404                 if (trim($body) == "") {
405                         return $url;
406                 }
407
408                 // Check for redirect in meta elements
409                 $doc = new DOMDocument();
410                 @$doc->loadHTML($body);
411
412                 $xpath = new DomXPath($doc);
413
414                 $list = $xpath->query("//meta[@content]");
415                 foreach ($list as $node) {
416                         $attr = [];
417                         if ($node->attributes->length) {
418                                 foreach ($node->attributes as $attribute) {
419                                         $attr[$attribute->name] = $attribute->value;
420                                 }
421                         }
422
423                         if (@$attr["http-equiv"] == 'refresh') {
424                                 $path = $attr["content"];
425                                 $pathinfo = explode(";", $path);
426                                 foreach ($pathinfo as $value) {
427                                         if (substr(strtolower($value), 0, 4) == "url=") {
428                                                 return self::finalUrl(substr($value, 4), ++$depth);
429                                         }
430                                 }
431                         }
432                 }
433
434                 return $url;
435         }
436
437         /**
438          * Find the matching part between two url
439          *
440          * @param string $url1
441          * @param string $url2
442          * @return string The matching part
443          */
444         public static function getUrlMatch(string $url1, string $url2)
445         {
446                 if (($url1 == "") || ($url2 == "")) {
447                         return "";
448                 }
449
450                 $url1 = Strings::normaliseLink($url1);
451                 $url2 = Strings::normaliseLink($url2);
452
453                 $parts1 = parse_url($url1);
454                 $parts2 = parse_url($url2);
455
456                 if (!isset($parts1["host"]) || !isset($parts2["host"])) {
457                         return "";
458                 }
459
460                 if (empty($parts1["scheme"])) {
461                         $parts1["scheme"] = '';
462                 }
463                 if (empty($parts2["scheme"])) {
464                         $parts2["scheme"] = '';
465                 }
466
467                 if ($parts1["scheme"] != $parts2["scheme"]) {
468                         return "";
469                 }
470
471                 if (empty($parts1["host"])) {
472                         $parts1["host"] = '';
473                 }
474                 if (empty($parts2["host"])) {
475                         $parts2["host"] = '';
476                 }
477
478                 if ($parts1["host"] != $parts2["host"]) {
479                         return "";
480                 }
481
482                 if (empty($parts1["port"])) {
483                         $parts1["port"] = '';
484                 }
485                 if (empty($parts2["port"])) {
486                         $parts2["port"] = '';
487                 }
488
489                 if ($parts1["port"] != $parts2["port"]) {
490                         return "";
491                 }
492
493                 $match = $parts1["scheme"]."://".$parts1["host"];
494
495                 if ($parts1["port"]) {
496                         $match .= ":".$parts1["port"];
497                 }
498
499                 if (empty($parts1["path"])) {
500                         $parts1["path"] = '';
501                 }
502                 if (empty($parts2["path"])) {
503                         $parts2["path"] = '';
504                 }
505
506                 $pathparts1 = explode("/", $parts1["path"]);
507                 $pathparts2 = explode("/", $parts2["path"]);
508
509                 $i = 0;
510                 $path = "";
511                 do {
512                         $path1 = $pathparts1[$i] ?? '';
513                         $path2 = $pathparts2[$i] ?? '';
514
515                         if ($path1 == $path2) {
516                                 $path .= $path1."/";
517                         }
518                 } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
519
520                 $match .= $path;
521
522                 return Strings::normaliseLink($match);
523         }
524
525         /**
526          * Glue url parts together
527          *
528          * @param array $parsed URL parts
529          *
530          * @return string The glued URL
531          */
532         public static function unparseURL(array $parsed)
533         {
534                 $get = function ($key) use ($parsed) {
535                         return isset($parsed[$key]) ? $parsed[$key] : null;
536                 };
537
538                 $pass      = $get('pass');
539                 $user      = $get('user');
540                 $userinfo  = $pass !== null ? "$user:$pass" : $user;
541                 $port      = $get('port');
542                 $scheme    = $get('scheme');
543                 $query     = $get('query');
544                 $fragment  = $get('fragment');
545                 $authority = ($userinfo !== null ? $userinfo."@" : '') .
546                                                 $get('host') .
547                                                 ($port ? ":$port" : '');
548
549                 return  (strlen($scheme) ? $scheme.":" : '') .
550                         (strlen($authority) ? "//".$authority : '') .
551                         $get('path') .
552                         (strlen($query) ? "?".$query : '') .
553                         (strlen($fragment) ? "#".$fragment : '');
554         }
555
556
557         /**
558          * Switch the scheme of an url between http and https
559          *
560          * @param string $url URL
561          *
562          * @return string switched URL
563          */
564         public static function switchScheme(string $url)
565         {
566                 $scheme = parse_url($url, PHP_URL_SCHEME);
567                 if (empty($scheme)) {
568                         return $url;
569                 }
570
571                 if ($scheme === 'http') {
572                         $url = str_replace('http://', 'https://', $url);
573                 } elseif ($scheme === 'https') {
574                         $url = str_replace('https://', 'http://', $url);
575                 }
576
577                 return $url;
578         }
579
580         /**
581          * Adds query string parameters to the provided URI. Replace the value of existing keys.
582          *
583          * @param string $path
584          * @param array  $additionalParams Associative array of parameters
585          * @return string
586          */
587         public static function appendQueryParam(string $path, array $additionalParams)
588         {
589                 $parsed = parse_url($path);
590
591                 $params = [];
592                 if (!empty($parsed['query'])) {
593                         parse_str($parsed['query'], $params);
594                 }
595
596                 $params = array_merge($params, $additionalParams);
597
598                 $parsed['query'] = http_build_query($params);
599
600                 return self::unparseURL($parsed);
601         }
602
603         /**
604          * Generates ETag and Last-Modified response headers and checks them against
605          * If-None-Match and If-Modified-Since request headers if present.
606          *
607          * Blocking function, sends 304 headers and exits if check passes.
608          *
609          * @param string $etag          The page etag
610          * @param string $last_modified The page last modification UTC date
611          * @throws \Exception
612          */
613         public static function checkEtagModified(string $etag, string $last_modified)
614         {
615                 $last_modified = DateTimeFormat::utc($last_modified, 'D, d M Y H:i:s') . ' GMT';
616
617                 /**
618                  * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
619                  */
620                 $if_none_match     = filter_input(INPUT_SERVER, 'HTTP_IF_NONE_MATCH');
621                 $if_modified_since = filter_input(INPUT_SERVER, 'HTTP_IF_MODIFIED_SINCE');
622                 $flag_not_modified = null;
623                 if ($if_none_match) {
624                         $result = [];
625                         preg_match('/^(?:W\/")?([^"]+)"?$/i', $etag, $result);
626                         $etagTrimmed = $result[1];
627                         // Lazy exact ETag match, could check weak/strong ETags
628                         $flag_not_modified = $if_none_match == '*' || strpos($if_none_match, $etagTrimmed) !== false;
629                 }
630
631                 if ($if_modified_since && (!$if_none_match || $flag_not_modified)) {
632                         // Lazy exact Last-Modified match, could check If-Modified-Since validity
633                         $flag_not_modified = $if_modified_since == $last_modified;
634                 }
635
636                 header('Etag: ' . $etag);
637                 header('Last-Modified: ' . $last_modified);
638
639                 if ($flag_not_modified) {
640                         header("HTTP/1.1 304 Not Modified");
641                         exit;
642                 }
643         }
644 }