]> git.mxchange.org Git - friendica.git/blob - src/Util/Network.php
Replace deprecated Addon::callHooks with Hook::callAll
[friendica.git] / src / Util / Network.php
1 <?php
2 /**
3  * @file src/Util/Network.php
4  */
5 namespace Friendica\Util;
6
7 use Friendica\Core\Hook;
8 use Friendica\Core\Logger;
9 use Friendica\Core\System;
10 use Friendica\Core\Config;
11 use Friendica\Network\CurlResult;
12 use DOMDocument;
13 use DomXPath;
14
15 class Network
16 {
17         /**
18          * Curl wrapper
19          *
20          * If binary flag is true, return binary results.
21          * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt")
22          * to preserve cookies from one request to the next.
23          *
24          * @brief Curl wrapper
25          * @param string  $url            URL to fetch
26          * @param boolean $binary         default false
27          *                                TRUE if asked to return binary results (file download)
28          * @param integer $redirects      The recursion counter for internal use - default 0
29          * @param integer $timeout        Timeout in seconds, default system config value or 60 seconds
30          * @param string  $accept_content supply Accept: header with 'accept_content' as the value
31          * @param string  $cookiejar      Path to cookie jar file
32          *
33          * @return string The fetched content
34          */
35         public static function fetchUrl($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '')
36         {
37                 $ret = self::fetchUrlFull($url, $binary, $redirects, $timeout, $accept_content, $cookiejar);
38
39                 return $ret->getBody();
40         }
41
42         /**
43          * Curl wrapper with array of return values.
44          *
45          * Inner workings and parameters are the same as @ref fetchUrl but returns an array with
46          * all the information collected during the fetch.
47          *
48          * @brief Curl wrapper with array of return values.
49          * @param string  $url            URL to fetch
50          * @param boolean $binary         default false
51          *                                TRUE if asked to return binary results (file download)
52          * @param integer $redirects      The recursion counter for internal use - default 0
53          * @param integer $timeout        Timeout in seconds, default system config value or 60 seconds
54          * @param string  $accept_content supply Accept: header with 'accept_content' as the value
55          * @param string  $cookiejar      Path to cookie jar file
56          *
57          * @return CurlResult With all relevant information, 'body' contains the actual fetched content.
58          */
59         public static function fetchUrlFull($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '')
60         {
61                 return self::curl(
62                         $url,
63                         $binary,
64                         $redirects,
65                         ['timeout'=>$timeout,
66                         'accept_content'=>$accept_content,
67                         'cookiejar'=>$cookiejar
68                         ]
69                 );
70         }
71
72         /**
73          * @brief fetches an URL.
74          *
75          * @param string  $url       URL to fetch
76          * @param boolean $binary    default false
77          *                           TRUE if asked to return binary results (file download)
78          * @param int     $redirects The recursion counter for internal use - default 0
79          * @param array   $opts      (optional parameters) assoziative array with:
80          *                           'accept_content' => supply Accept: header with 'accept_content' as the value
81          *                           'timeout' => int Timeout in seconds, default system config value or 60 seconds
82          *                           'http_auth' => username:password
83          *                           'novalidate' => do not validate SSL certs, default is to validate using our CA list
84          *                           'nobody' => only return the header
85          *                           'cookiejar' => path to cookie jar file
86          *                           'header' => header array
87          *
88          * @return CurlResult
89          */
90         public static function curl($url, $binary = false, &$redirects = 0, $opts = [])
91         {
92                 $ret = ['return_code' => 0, 'success' => false, 'header' => '', 'info' => '', 'body' => ''];
93
94                 $stamp1 = microtime(true);
95
96                 $a = \get_app();
97
98                 if (strlen($url) > 1000) {
99                         Logger::log('URL is longer than 1000 characters. Callstack: ' . System::callstack(20), Logger::DEBUG);
100                         return CurlResult::createErrorCurl(substr($url, 0, 200));
101                 }
102
103                 $parts = parse_url($url);
104                 $path_parts = explode('/', defaults($parts, 'path', ''));
105                 foreach ($path_parts as $part) {
106                         if (strlen($part) <> mb_strlen($part)) {
107                                 $parts2[] = rawurlencode($part);
108                         } else {
109                                 $parts2[] = $part;
110                         }
111                 }
112                 $parts['path'] = implode('/', $parts2);
113                 $url = self::unparseURL($parts);
114
115                 if (self::isUrlBlocked($url)) {
116                         Logger::log('domain of ' . $url . ' is blocked', Logger::DATA);
117                         return CurlResult::createErrorCurl($url);
118                 }
119
120                 $ch = @curl_init($url);
121
122                 if (($redirects > 8) || (!$ch)) {
123                         return CurlResult::createErrorCurl($url);
124                 }
125
126                 @curl_setopt($ch, CURLOPT_HEADER, true);
127
128                 if (!empty($opts['cookiejar'])) {
129                         curl_setopt($ch, CURLOPT_COOKIEJAR, $opts["cookiejar"]);
130                         curl_setopt($ch, CURLOPT_COOKIEFILE, $opts["cookiejar"]);
131                 }
132
133                 // These settings aren't needed. We're following the location already.
134                 //      @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
135                 //      @curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
136
137                 if (!empty($opts['accept_content'])) {
138                         curl_setopt(
139                                 $ch,
140                                 CURLOPT_HTTPHEADER,
141                                 ['Accept: ' . $opts['accept_content']]
142                         );
143                 }
144
145                 if (!empty($opts['header'])) {
146                         curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['header']);
147                 }
148
149                 @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
150                 @curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
151
152                 $range = intval(Config::get('system', 'curl_range_bytes', 0));
153
154                 if ($range > 0) {
155                         @curl_setopt($ch, CURLOPT_RANGE, '0-' . $range);
156                 }
157
158                 // Without this setting it seems as if some webservers send compressed content
159                 // This seems to confuse curl so that it shows this uncompressed.
160                 /// @todo  We could possibly set this value to "gzip" or something similar
161                 curl_setopt($ch, CURLOPT_ENCODING, '');
162
163                 if (!empty($opts['headers'])) {
164                         @curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['headers']);
165                 }
166
167                 if (!empty($opts['nobody'])) {
168                         @curl_setopt($ch, CURLOPT_NOBODY, $opts['nobody']);
169                 }
170
171                 if (!empty($opts['timeout'])) {
172                         @curl_setopt($ch, CURLOPT_TIMEOUT, $opts['timeout']);
173                 } else {
174                         $curl_time = Config::get('system', 'curl_timeout', 60);
175                         @curl_setopt($ch, CURLOPT_TIMEOUT, intval($curl_time));
176                 }
177
178                 // by default we will allow self-signed certs
179                 // but you can override this
180
181                 $check_cert = Config::get('system', 'verifyssl');
182                 @curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
183
184                 if ($check_cert) {
185                         @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
186                 }
187
188                 $proxy = Config::get('system', 'proxy');
189
190                 if (strlen($proxy)) {
191                         @curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
192                         @curl_setopt($ch, CURLOPT_PROXY, $proxy);
193                         $proxyuser = @Config::get('system', 'proxyuser');
194
195                         if (strlen($proxyuser)) {
196                                 @curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);
197                         }
198                 }
199
200                 if (Config::get('system', 'ipv4_resolve', false)) {
201                         curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
202                 }
203
204                 if ($binary) {
205                         @curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
206                 }
207
208                 // don't let curl abort the entire application
209                 // if it throws any errors.
210
211                 $s = @curl_exec($ch);
212                 $curl_info = @curl_getinfo($ch);
213
214                 // Special treatment for HTTP Code 416
215                 // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/416
216                 if (($curl_info['http_code'] == 416) && ($range > 0)) {
217                         @curl_setopt($ch, CURLOPT_RANGE, '');
218                         $s = @curl_exec($ch);
219                         $curl_info = @curl_getinfo($ch);
220                 }
221
222                 $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch));
223
224                 if ($curlResponse->isRedirectUrl()) {
225                         $redirects++;
226                         Logger::log('curl: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl());
227                         @curl_close($ch);
228                         return self::curl($curlResponse->getRedirectUrl(), $binary, $redirects, $opts);
229                 }
230
231                 @curl_close($ch);
232
233                 $a->saveTimestamp($stamp1, 'network');
234
235                 return $curlResponse;
236         }
237
238         /**
239          * @brief Send POST request to $url
240          *
241          * @param string  $url       URL to post
242          * @param mixed   $params    array of POST variables
243          * @param string  $headers   HTTP headers
244          * @param integer $redirects Recursion counter for internal use - default = 0
245          * @param integer $timeout   The timeout in seconds, default system config value or 60 seconds
246          *
247          * @return CurlResult The content
248          */
249         public static function post($url, $params, $headers = null, &$redirects = 0, $timeout = 0)
250         {
251                 $stamp1 = microtime(true);
252
253                 if (self::isUrlBlocked($url)) {
254                         Logger::log('post_url: domain of ' . $url . ' is blocked', Logger::DATA);
255                         return CurlResult::createErrorCurl($url);
256                 }
257
258                 $a = \get_app();
259                 $ch = curl_init($url);
260
261                 if (($redirects > 8) || (!$ch)) {
262                         return CurlResult::createErrorCurl($url);
263                 }
264
265                 Logger::log('post_url: start ' . $url, Logger::DATA);
266
267                 curl_setopt($ch, CURLOPT_HEADER, true);
268                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
269                 curl_setopt($ch, CURLOPT_POST, 1);
270                 curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
271                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
272
273                 if (Config::get('system', 'ipv4_resolve', false)) {
274                         curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
275                 }
276
277                 if (intval($timeout)) {
278                         curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
279                 } else {
280                         $curl_time = Config::get('system', 'curl_timeout', 60);
281                         curl_setopt($ch, CURLOPT_TIMEOUT, intval($curl_time));
282                 }
283
284                 if (defined('LIGHTTPD')) {
285                         if (!is_array($headers)) {
286                                 $headers = ['Expect:'];
287                         } else {
288                                 if (!in_array('Expect:', $headers)) {
289                                         array_push($headers, 'Expect:');
290                                 }
291                         }
292                 }
293
294                 if ($headers) {
295                         curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
296                 }
297
298                 $check_cert = Config::get('system', 'verifyssl');
299                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
300
301                 if ($check_cert) {
302                         @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
303                 }
304
305                 $proxy = Config::get('system', 'proxy');
306
307                 if (strlen($proxy)) {
308                         curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
309                         curl_setopt($ch, CURLOPT_PROXY, $proxy);
310                         $proxyuser = Config::get('system', 'proxyuser');
311                         if (strlen($proxyuser)) {
312                                 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);
313                         }
314                 }
315
316                 // don't let curl abort the entire application
317                 // if it throws any errors.
318
319                 $s = @curl_exec($ch);
320
321                 $base = $s;
322                 $curl_info = curl_getinfo($ch);
323
324                 $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch));
325
326                 if ($curlResponse->isRedirectUrl()) {
327                         $redirects++;
328                         Logger::log('post_url: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl());
329                         curl_close($ch);
330                         return self::post($curlResponse->getRedirectUrl(), $params, $headers, $redirects, $timeout);
331                 }
332
333                 curl_close($ch);
334
335                 $a->saveTimestamp($stamp1, 'network');
336
337                 Logger::log('post_url: end ' . $url, Logger::DATA);
338
339                 return $curlResponse;
340         }
341
342         /**
343          * @brief Check URL to see if it's real
344          *
345          * Take a URL from the wild, prepend http:// if necessary
346          * and check DNS to see if it's real (or check if is a valid IP address)
347          *
348          * @param string $url The URL to be validated
349          * @return string|boolean The actual working URL, false else
350          */
351         public static function isUrlValid($url)
352         {
353                 if (Config::get('system', 'disable_url_validation')) {
354                         return $url;
355                 }
356
357                 // no naked subdomains (allow localhost for tests)
358                 if (strpos($url, '.') === false && strpos($url, '/localhost/') === false) {
359                         return false;
360                 }
361
362                 if (substr($url, 0, 4) != 'http') {
363                         $url = 'http://' . $url;
364                 }
365
366                 /// @TODO Really suppress function outcomes? Why not find them + debug them?
367                 $h = @parse_url($url);
368
369                 if (!empty($h['host']) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP) )) {
370                         return $url;
371                 }
372
373                 return false;
374         }
375
376         /**
377          * @brief Checks that email is an actual resolvable internet address
378          *
379          * @param string $addr The email address
380          * @return boolean True if it's a valid email address, false if it's not
381          */
382         public static function isEmailDomainValid($addr)
383         {
384                 if (Config::get('system', 'disable_email_validation')) {
385                         return true;
386                 }
387
388                 if (! strpos($addr, '@')) {
389                         return false;
390                 }
391
392                 $h = substr($addr, strpos($addr, '@') + 1);
393
394                 // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
395                 if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) {
396                         return true;
397                 }
398                 if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
399                         return true;
400                 }
401                 return false;
402         }
403
404         /**
405          * @brief Check if URL is allowed
406          *
407          * Check $url against our list of allowed sites,
408          * wildcards allowed. If allowed_sites is unset return true;
409          *
410          * @param string $url URL which get tested
411          * @return boolean True if url is allowed otherwise return false
412          */
413         public static function isUrlAllowed($url)
414         {
415                 $h = @parse_url($url);
416
417                 if (! $h) {
418                         return false;
419                 }
420
421                 $str_allowed = Config::get('system', 'allowed_sites');
422                 if (! $str_allowed) {
423                         return true;
424                 }
425
426                 $found = false;
427
428                 $host = strtolower($h['host']);
429
430                 // always allow our own site
431                 if ($host == strtolower($_SERVER['SERVER_NAME'])) {
432                         return true;
433                 }
434
435                 $fnmatch = function_exists('fnmatch');
436                 $allowed = explode(',', $str_allowed);
437
438                 if (count($allowed)) {
439                         foreach ($allowed as $a) {
440                                 $pat = strtolower(trim($a));
441                                 if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) {
442                                         $found = true;
443                                         break;
444                                 }
445                         }
446                 }
447                 return $found;
448         }
449
450         /**
451          * Checks if the provided url domain is on the domain blocklist.
452          * Returns true if it is or malformed URL, false if not.
453          *
454          * @param string $url The url to check the domain from
455          *
456          * @return boolean
457          */
458         public static function isUrlBlocked($url)
459         {
460                 $host = @parse_url($url, PHP_URL_HOST);
461                 if (!$host) {
462                         return false;
463                 }
464
465                 $domain_blocklist = Config::get('system', 'blocklist', []);
466                 if (!$domain_blocklist) {
467                         return false;
468                 }
469
470                 foreach ($domain_blocklist as $domain_block) {
471                         if (strcasecmp($domain_block['domain'], $host) === 0) {
472                                 return true;
473                         }
474                 }
475
476                 return false;
477         }
478
479         /**
480          * @brief Check if email address is allowed to register here.
481          *
482          * Compare against our list (wildcards allowed).
483          *
484          * @param  string $email email address
485          * @return boolean False if not allowed, true if allowed
486          *    or if allowed list is not configured
487          */
488         public static function isEmailDomainAllowed($email)
489         {
490                 $domain = strtolower(substr($email, strpos($email, '@') + 1));
491                 if (!$domain) {
492                         return false;
493                 }
494
495                 $str_allowed = Config::get('system', 'allowed_email', '');
496                 if (empty($str_allowed)) {
497                         return true;
498                 }
499
500                 $allowed = explode(',', $str_allowed);
501
502                 return self::isDomainAllowed($domain, $allowed);
503         }
504
505         /**
506          * Checks for the existence of a domain in a domain list
507          *
508          * @brief Checks for the existence of a domain in a domain list
509          * @param string $domain
510          * @param array  $domain_list
511          * @return boolean
512          */
513         public static function isDomainAllowed($domain, array $domain_list)
514         {
515                 $found = false;
516
517                 foreach ($domain_list as $item) {
518                         $pat = strtolower(trim($item));
519                         if (fnmatch($pat, $domain) || ($pat == $domain)) {
520                                 $found = true;
521                                 break;
522                         }
523                 }
524
525                 return $found;
526         }
527
528         public static function lookupAvatarByEmail($email)
529         {
530                 $avatar['size'] = 300;
531                 $avatar['email'] = $email;
532                 $avatar['url'] = '';
533                 $avatar['success'] = false;
534
535                 Hook::callAll('avatar_lookup', $avatar);
536
537                 if (! $avatar['success']) {
538                         $avatar['url'] = System::baseUrl() . '/images/person-300.jpg';
539                 }
540
541                 Logger::log('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], Logger::DEBUG);
542                 return $avatar['url'];
543         }
544
545         /**
546          * @brief Remove Google Analytics and other tracking platforms params from URL
547          *
548          * @param string $url Any user-submitted URL that may contain tracking params
549          * @return string The same URL stripped of tracking parameters
550          */
551         public static function stripTrackingQueryParams($url)
552         {
553                 $urldata = parse_url($url);
554                 if (!empty($urldata["query"])) {
555                         $query = $urldata["query"];
556                         parse_str($query, $querydata);
557
558                         if (is_array($querydata)) {
559                                 foreach ($querydata as $param => $value) {
560                                         if (in_array(
561                                                 $param,
562                                                 [
563                                                         "utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
564                                                         "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
565                                                         "fb_action_ids", "fb_action_types", "fb_ref",
566                                                         "awesm", "wtrid",
567                                                         "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"]
568                                                 )
569                                         ) {
570                                                 $pair = $param . "=" . urlencode($value);
571                                                 $url = str_replace($pair, "", $url);
572
573                                                 // Second try: if the url isn't encoded completely
574                                                 $pair = $param . "=" . str_replace(" ", "+", $value);
575                                                 $url = str_replace($pair, "", $url);
576
577                                                 // Third try: Maybey the url isn't encoded at all
578                                                 $pair = $param . "=" . $value;
579                                                 $url = str_replace($pair, "", $url);
580
581                                                 $url = str_replace(["?&", "&&"], ["?", ""], $url);
582                                         }
583                                 }
584                         }
585
586                         if (substr($url, -1, 1) == "?") {
587                                 $url = substr($url, 0, -1);
588                         }
589                 }
590
591                 return $url;
592         }
593
594         /**
595          * @brief Returns the original URL of the provided URL
596          *
597          * This function strips tracking query params and follows redirections, either
598          * through HTTP code or meta refresh tags. Stops after 10 redirections.
599          *
600          * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
601          *
602          * @see ParseUrl::getSiteinfo
603          *
604          * @param string $url       A user-submitted URL
605          * @param int    $depth     The current redirection recursion level (internal)
606          * @param bool   $fetchbody Wether to fetch the body or not after the HEAD requests
607          * @return string A canonical URL
608          */
609         public static function finalUrl($url, $depth = 1, $fetchbody = false)
610         {
611                 $a = \get_app();
612
613                 $url = self::stripTrackingQueryParams($url);
614
615                 if ($depth > 10) {
616                         return $url;
617                 }
618
619                 $url = trim($url, "'");
620
621                 $stamp1 = microtime(true);
622
623                 $ch = curl_init();
624                 curl_setopt($ch, CURLOPT_URL, $url);
625                 curl_setopt($ch, CURLOPT_HEADER, 1);
626                 curl_setopt($ch, CURLOPT_NOBODY, 1);
627                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
628                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
629                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
630
631                 curl_exec($ch);
632                 $curl_info = @curl_getinfo($ch);
633                 $http_code = $curl_info['http_code'];
634                 curl_close($ch);
635
636                 $a->saveTimestamp($stamp1, "network");
637
638                 if ($http_code == 0) {
639                         return $url;
640                 }
641
642                 if (in_array($http_code, ['301', '302'])) {
643                         if (!empty($curl_info['redirect_url'])) {
644                                 return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
645                         } elseif (!empty($curl_info['location'])) {
646                                 return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
647                         }
648                 }
649
650                 // Check for redirects in the meta elements of the body if there are no redirects in the header.
651                 if (!$fetchbody) {
652                         return(self::finalUrl($url, ++$depth, true));
653                 }
654
655                 // if the file is too large then exit
656                 if ($curl_info["download_content_length"] > 1000000) {
657                         return $url;
658                 }
659
660                 // if it isn't a HTML file then exit
661                 if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
662                         return $url;
663                 }
664
665                 $stamp1 = microtime(true);
666
667                 $ch = curl_init();
668                 curl_setopt($ch, CURLOPT_URL, $url);
669                 curl_setopt($ch, CURLOPT_HEADER, 0);
670                 curl_setopt($ch, CURLOPT_NOBODY, 0);
671                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
672                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
673                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
674
675                 $body = curl_exec($ch);
676                 curl_close($ch);
677
678                 $a->saveTimestamp($stamp1, "network");
679
680                 if (trim($body) == "") {
681                         return $url;
682                 }
683
684                 // Check for redirect in meta elements
685                 $doc = new DOMDocument();
686                 @$doc->loadHTML($body);
687
688                 $xpath = new DomXPath($doc);
689
690                 $list = $xpath->query("//meta[@content]");
691                 foreach ($list as $node) {
692                         $attr = [];
693                         if ($node->attributes->length) {
694                                 foreach ($node->attributes as $attribute) {
695                                         $attr[$attribute->name] = $attribute->value;
696                                 }
697                         }
698
699                         if (@$attr["http-equiv"] == 'refresh') {
700                                 $path = $attr["content"];
701                                 $pathinfo = explode(";", $path);
702                                 foreach ($pathinfo as $value) {
703                                         if (substr(strtolower($value), 0, 4) == "url=") {
704                                                 return self::finalUrl(substr($value, 4), ++$depth);
705                                         }
706                                 }
707                         }
708                 }
709
710                 return $url;
711         }
712
713         /**
714          * @brief Find the matching part between two url
715          *
716          * @param string $url1
717          * @param string $url2
718          * @return string The matching part
719          */
720         public static function getUrlMatch($url1, $url2)
721         {
722                 if (($url1 == "") || ($url2 == "")) {
723                         return "";
724                 }
725
726                 $url1 = Strings::normaliseLink($url1);
727                 $url2 = Strings::normaliseLink($url2);
728
729                 $parts1 = parse_url($url1);
730                 $parts2 = parse_url($url2);
731
732                 if (!isset($parts1["host"]) || !isset($parts2["host"])) {
733                         return "";
734                 }
735
736                 if (empty($parts1["scheme"])) {
737                         $parts1["scheme"] = '';
738                 }
739                 if (empty($parts2["scheme"])) {
740                         $parts2["scheme"] = '';
741                 }
742
743                 if ($parts1["scheme"] != $parts2["scheme"]) {
744                         return "";
745                 }
746
747                 if (empty($parts1["host"])) {
748                         $parts1["host"] = '';
749                 }
750                 if (empty($parts2["host"])) {
751                         $parts2["host"] = '';
752                 }
753
754                 if ($parts1["host"] != $parts2["host"]) {
755                         return "";
756                 }
757
758                 if (empty($parts1["port"])) {
759                         $parts1["port"] = '';
760                 }
761                 if (empty($parts2["port"])) {
762                         $parts2["port"] = '';
763                 }
764
765                 if ($parts1["port"] != $parts2["port"]) {
766                         return "";
767                 }
768
769                 $match = $parts1["scheme"]."://".$parts1["host"];
770
771                 if ($parts1["port"]) {
772                         $match .= ":".$parts1["port"];
773                 }
774
775                 if (empty($parts1["path"])) {
776                         $parts1["path"] = '';
777                 }
778                 if (empty($parts2["path"])) {
779                         $parts2["path"] = '';
780                 }
781
782                 $pathparts1 = explode("/", $parts1["path"]);
783                 $pathparts2 = explode("/", $parts2["path"]);
784
785                 $i = 0;
786                 $path = "";
787                 do {
788                         $path1 = defaults($pathparts1, $i, '');
789                         $path2 = defaults($pathparts2, $i, '');
790
791                         if ($path1 == $path2) {
792                                 $path .= $path1."/";
793                         }
794                 } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
795
796                 $match .= $path;
797
798                 return Strings::normaliseLink($match);
799         }
800
801         /**
802          * @brief Glue url parts together
803          *
804          * @param array $parsed URL parts
805          *
806          * @return string The glued URL
807          */
808         public static function unparseURL($parsed)
809         {
810                 $get = function ($key) use ($parsed) {
811                         return isset($parsed[$key]) ? $parsed[$key] : null;
812                 };
813
814                 $pass      = $get('pass');
815                 $user      = $get('user');
816                 $userinfo  = $pass !== null ? "$user:$pass" : $user;
817                 $port      = $get('port');
818                 $scheme    = $get('scheme');
819                 $query     = $get('query');
820                 $fragment  = $get('fragment');
821                 $authority = ($userinfo !== null ? $userinfo."@" : '') .
822                                                 $get('host') .
823                                                 ($port ? ":$port" : '');
824
825                 return  (strlen($scheme) ? $scheme.":" : '') .
826                         (strlen($authority) ? "//".$authority : '') .
827                         $get('path') .
828                         (strlen($query) ? "?".$query : '') .
829                         (strlen($fragment) ? "#".$fragment : '');
830         }
831 }