src/Util/Network.php

   1 <?php
   2 /**
   3  * @file src/Util/Network.php
   4  */
   5 namespace Friendica\Util;
   6
   7 use Friendica\Core\Hook;
   8 use Friendica\Core\Logger;
   9 use Friendica\Core\System;
  10 use Friendica\Core\Config;
  11 use Friendica\Network\CurlResult;
  12 use DOMDocument;
  13 use DomXPath;
  14
  15 class Network
  16 {
  17         /**
  18          * Curl wrapper
  19          *
  20          * If binary flag is true, return binary results.
  21          * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt")
  22          * to preserve cookies from one request to the next.
  23          *
  24          * @brief Curl wrapper
  25          * @param string  $url            URL to fetch
  26          * @param boolean $binary         default false
  27          *                                TRUE if asked to return binary results (file download)
  28          * @param integer $redirects      The recursion counter for internal use - default 0
  29          * @param integer $timeout        Timeout in seconds, default system config value or 60 seconds
  30          * @param string  $accept_content supply Accept: header with 'accept_content' as the value
  31          * @param string  $cookiejar      Path to cookie jar file
  32          *
  33          * @return string The fetched content
  34          */
  35         public static function fetchUrl($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '')
  36         {
  37                 $ret = self::fetchUrlFull($url, $binary, $redirects, $timeout, $accept_content, $cookiejar);
  38
  39                 return $ret->getBody();
  40         }
  41
  42         /**
  43          * Curl wrapper with array of return values.
  44          *
  45          * Inner workings and parameters are the same as @ref fetchUrl but returns an array with
  46          * all the information collected during the fetch.
  47          *
  48          * @brief Curl wrapper with array of return values.
  49          * @param string  $url            URL to fetch
  50          * @param boolean $binary         default false
  51          *                                TRUE if asked to return binary results (file download)
  52          * @param integer $redirects      The recursion counter for internal use - default 0
  53          * @param integer $timeout        Timeout in seconds, default system config value or 60 seconds
  54          * @param string  $accept_content supply Accept: header with 'accept_content' as the value
  55          * @param string  $cookiejar      Path to cookie jar file
  56          *
  57          * @return CurlResult With all relevant information, 'body' contains the actual fetched content.
  58          */
  59         public static function fetchUrlFull($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '')
  60         {
  61                 return self::curl(
  62                         $url,
  63                         $binary,
  64                         $redirects,
  65                         ['timeout'=>$timeout,
  66                         'accept_content'=>$accept_content,
  67                         'cookiejar'=>$cookiejar
  68                         ]
  69                 );
  70         }
  71
  72         /**
  73          * @brief fetches an URL.
  74          *
  75          * @param string  $url       URL to fetch
  76          * @param boolean $binary    default false
  77          *                           TRUE if asked to return binary results (file download)
  78          * @param int     $redirects The recursion counter for internal use - default 0
  79          * @param array   $opts      (optional parameters) assoziative array with:
  80          *                           'accept_content' => supply Accept: header with 'accept_content' as the value
  81          *                           'timeout' => int Timeout in seconds, default system config value or 60 seconds
  82          *                           'http_auth' => username:password
  83          *                           'novalidate' => do not validate SSL certs, default is to validate using our CA list
  84          *                           'nobody' => only return the header
  85          *                           'cookiejar' => path to cookie jar file
  86          *                           'header' => header array
  87          *
  88          * @return CurlResult
  89          */
  90         public static function curl($url, $binary = false, &$redirects = 0, $opts = [])
  91         {
  92                 $ret = ['return_code' => 0, 'success' => false, 'header' => '', 'info' => '', 'body' => ''];
  93
  94                 $stamp1 = microtime(true);
  95
  96                 $a = \get_app();
  97
  98                 if (strlen($url) > 1000) {
  99                         Logger::log('URL is longer than 1000 characters. Callstack: ' . System::callstack(20), Logger::DEBUG);
 100                         return CurlResult::createErrorCurl(substr($url, 0, 200));
 101                 }
 102
 103                 $parts = parse_url($url);
 104                 $path_parts = explode('/', defaults($parts, 'path', ''));
 105                 foreach ($path_parts as $part) {
 106                         if (strlen($part) <> mb_strlen($part)) {
 107                                 $parts2[] = rawurlencode($part);
 108                         } else {
 109                                 $parts2[] = $part;
 110                         }
 111                 }
 112                 $parts['path'] = implode('/', $parts2);
 113                 $url = self::unparseURL($parts);
 114
 115                 if (self::isUrlBlocked($url)) {
 116                         Logger::log('domain of ' . $url . ' is blocked', Logger::DATA);
 117                         return CurlResult::createErrorCurl($url);
 118                 }
 119
 120                 $ch = @curl_init($url);
 121
 122                 if (($redirects > 8) || (!$ch)) {
 123                         return CurlResult::createErrorCurl($url);
 124                 }
 125
 126                 @curl_setopt($ch, CURLOPT_HEADER, true);
 127
 128                 if (!empty($opts['cookiejar'])) {
 129                         curl_setopt($ch, CURLOPT_COOKIEJAR, $opts["cookiejar"]);
 130                         curl_setopt($ch, CURLOPT_COOKIEFILE, $opts["cookiejar"]);
 131                 }
 132
 133                 // These settings aren't needed. We're following the location already.
 134                 //      @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
 135                 //      @curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
 136
 137                 if (!empty($opts['accept_content'])) {
 138                         curl_setopt(
 139                                 $ch,
 140                                 CURLOPT_HTTPHEADER,
 141                                 ['Accept: ' . $opts['accept_content']]
 142                         );
 143                 }
 144
 145                 if (!empty($opts['header'])) {
 146                         curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['header']);
 147                 }
 148
 149                 @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 150                 @curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 151
 152                 $range = intval(Config::get('system', 'curl_range_bytes', 0));
 153
 154                 if ($range > 0) {
 155                         @curl_setopt($ch, CURLOPT_RANGE, '0-' . $range);
 156                 }
 157
 158                 // Without this setting it seems as if some webservers send compressed content
 159                 // This seems to confuse curl so that it shows this uncompressed.
 160                 /// @todo  We could possibly set this value to "gzip" or something similar
 161                 curl_setopt($ch, CURLOPT_ENCODING, '');
 162
 163                 if (!empty($opts['headers'])) {
 164                         @curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['headers']);
 165                 }
 166
 167                 if (!empty($opts['nobody'])) {
 168                         @curl_setopt($ch, CURLOPT_NOBODY, $opts['nobody']);
 169                 }
 170
 171                 if (!empty($opts['timeout'])) {
 172                         @curl_setopt($ch, CURLOPT_TIMEOUT, $opts['timeout']);
 173                 } else {
 174                         $curl_time = Config::get('system', 'curl_timeout', 60);
 175                         @curl_setopt($ch, CURLOPT_TIMEOUT, intval($curl_time));
 176                 }
 177
 178                 // by default we will allow self-signed certs
 179                 // but you can override this
 180
 181                 $check_cert = Config::get('system', 'verifyssl');
 182                 @curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
 183
 184                 if ($check_cert) {
 185                         @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
 186                 }
 187
 188                 $proxy = Config::get('system', 'proxy');
 189
 190                 if (strlen($proxy)) {
 191                         @curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
 192                         @curl_setopt($ch, CURLOPT_PROXY, $proxy);
 193                         $proxyuser = @Config::get('system', 'proxyuser');
 194
 195                         if (strlen($proxyuser)) {
 196                                 @curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);
 197                         }
 198                 }
 199
 200                 if (Config::get('system', 'ipv4_resolve', false)) {
 201                         curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
 202                 }
 203
 204                 if ($binary) {
 205                         @curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
 206                 }
 207
 208                 // don't let curl abort the entire application
 209                 // if it throws any errors.
 210
 211                 $s = @curl_exec($ch);
 212                 $curl_info = @curl_getinfo($ch);
 213
 214                 // Special treatment for HTTP Code 416
 215                 // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/416
 216                 if (($curl_info['http_code'] == 416) && ($range > 0)) {
 217                         @curl_setopt($ch, CURLOPT_RANGE, '');
 218                         $s = @curl_exec($ch);
 219                         $curl_info = @curl_getinfo($ch);
 220                 }
 221
 222                 $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch));
 223
 224                 if ($curlResponse->isRedirectUrl()) {
 225                         $redirects++;
 226                         Logger::log('curl: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl());
 227                         @curl_close($ch);
 228                         return self::curl($curlResponse->getRedirectUrl(), $binary, $redirects, $opts);
 229                 }
 230
 231                 @curl_close($ch);
 232
 233                 $a->saveTimestamp($stamp1, 'network');
 234
 235                 return $curlResponse;
 236         }
 237
 238         /**
 239          * @brief Send POST request to $url
 240          *
 241          * @param string  $url       URL to post
 242          * @param mixed   $params    array of POST variables
 243          * @param string  $headers   HTTP headers
 244          * @param integer $redirects Recursion counter for internal use - default = 0
 245          * @param integer $timeout   The timeout in seconds, default system config value or 60 seconds
 246          *
 247          * @return CurlResult The content
 248          */
 249         public static function post($url, $params, $headers = null, &$redirects = 0, $timeout = 0)
 250         {
 251                 $stamp1 = microtime(true);
 252
 253                 if (self::isUrlBlocked($url)) {
 254                         Logger::log('post_url: domain of ' . $url . ' is blocked', Logger::DATA);
 255                         return CurlResult::createErrorCurl($url);
 256                 }
 257
 258                 $a = \get_app();
 259                 $ch = curl_init($url);
 260
 261                 if (($redirects > 8) || (!$ch)) {
 262                         return CurlResult::createErrorCurl($url);
 263                 }
 264
 265                 Logger::log('post_url: start ' . $url, Logger::DATA);
 266
 267                 curl_setopt($ch, CURLOPT_HEADER, true);
 268                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 269                 curl_setopt($ch, CURLOPT_POST, 1);
 270                 curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
 271                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 272
 273                 if (Config::get('system', 'ipv4_resolve', false)) {
 274                         curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
 275                 }
 276
 277                 if (intval($timeout)) {
 278                         curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
 279                 } else {
 280                         $curl_time = Config::get('system', 'curl_timeout', 60);
 281                         curl_setopt($ch, CURLOPT_TIMEOUT, intval($curl_time));
 282                 }
 283
 284                 if (defined('LIGHTTPD')) {
 285                         if (!is_array($headers)) {
 286                                 $headers = ['Expect:'];
 287                         } else {
 288                                 if (!in_array('Expect:', $headers)) {
 289                                         array_push($headers, 'Expect:');
 290                                 }
 291                         }
 292                 }
 293
 294                 if ($headers) {
 295                         curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
 296                 }
 297
 298                 $check_cert = Config::get('system', 'verifyssl');
 299                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
 300
 301                 if ($check_cert) {
 302                         @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
 303                 }
 304
 305                 $proxy = Config::get('system', 'proxy');
 306
 307                 if (strlen($proxy)) {
 308                         curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
 309                         curl_setopt($ch, CURLOPT_PROXY, $proxy);
 310                         $proxyuser = Config::get('system', 'proxyuser');
 311                         if (strlen($proxyuser)) {
 312                                 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);
 313                         }
 314                 }
 315
 316                 // don't let curl abort the entire application
 317                 // if it throws any errors.
 318
 319                 $s = @curl_exec($ch);
 320
 321                 $base = $s;
 322                 $curl_info = curl_getinfo($ch);
 323
 324                 $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch));
 325
 326                 if ($curlResponse->isRedirectUrl()) {
 327                         $redirects++;
 328                         Logger::log('post_url: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl());
 329                         curl_close($ch);
 330                         return self::post($curlResponse->getRedirectUrl(), $params, $headers, $redirects, $timeout);
 331                 }
 332
 333                 curl_close($ch);
 334
 335                 $a->saveTimestamp($stamp1, 'network');
 336
 337                 Logger::log('post_url: end ' . $url, Logger::DATA);
 338
 339                 return $curlResponse;
 340         }
 341
 342         /**
 343          * @brief Check URL to see if it's real
 344          *
 345          * Take a URL from the wild, prepend http:// if necessary
 346          * and check DNS to see if it's real (or check if is a valid IP address)
 347          *
 348          * @param string $url The URL to be validated
 349          * @return string|boolean The actual working URL, false else
 350          */
 351         public static function isUrlValid($url)
 352         {
 353                 if (Config::get('system', 'disable_url_validation')) {
 354                         return $url;
 355                 }
 356
 357                 // no naked subdomains (allow localhost for tests)
 358                 if (strpos($url, '.') === false && strpos($url, '/localhost/') === false) {
 359                         return false;
 360                 }
 361
 362                 if (substr($url, 0, 4) != 'http') {
 363                         $url = 'http://' . $url;
 364                 }
 365
 366                 /// @TODO Really suppress function outcomes? Why not find them + debug them?
 367                 $h = @parse_url($url);
 368
 369                 if (!empty($h['host']) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP) )) {
 370                         return $url;
 371                 }
 372
 373                 return false;
 374         }
 375
 376         /**
 377          * @brief Checks that email is an actual resolvable internet address
 378          *
 379          * @param string $addr The email address
 380          * @return boolean True if it's a valid email address, false if it's not
 381          */
 382         public static function isEmailDomainValid($addr)
 383         {
 384                 if (Config::get('system', 'disable_email_validation')) {
 385                         return true;
 386                 }
 387
 388                 if (! strpos($addr, '@')) {
 389                         return false;
 390                 }
 391
 392                 $h = substr($addr, strpos($addr, '@') + 1);
 393
 394                 // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
 395                 if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) {
 396                         return true;
 397                 }
 398                 if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
 399                         return true;
 400                 }
 401                 return false;
 402         }
 403
 404         /**
 405          * @brief Check if URL is allowed
 406          *
 407          * Check $url against our list of allowed sites,
 408          * wildcards allowed. If allowed_sites is unset return true;
 409          *
 410          * @param string $url URL which get tested
 411          * @return boolean True if url is allowed otherwise return false
 412          */
 413         public static function isUrlAllowed($url)
 414         {
 415                 $h = @parse_url($url);
 416
 417                 if (! $h) {
 418                         return false;
 419                 }
 420
 421                 $str_allowed = Config::get('system', 'allowed_sites');
 422                 if (! $str_allowed) {
 423                         return true;
 424                 }
 425
 426                 $found = false;
 427
 428                 $host = strtolower($h['host']);
 429
 430                 // always allow our own site
 431                 if ($host == strtolower($_SERVER['SERVER_NAME'])) {
 432                         return true;
 433                 }
 434
 435                 $fnmatch = function_exists('fnmatch');
 436                 $allowed = explode(',', $str_allowed);
 437
 438                 if (count($allowed)) {
 439                         foreach ($allowed as $a) {
 440                                 $pat = strtolower(trim($a));
 441                                 if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) {
 442                                         $found = true;
 443                                         break;
 444                                 }
 445                         }
 446                 }
 447                 return $found;
 448         }
 449
 450         /**
 451          * Checks if the provided url domain is on the domain blocklist.
 452          * Returns true if it is or malformed URL, false if not.
 453          *
 454          * @param string $url The url to check the domain from
 455          *
 456          * @return boolean
 457          */
 458         public static function isUrlBlocked($url)
 459         {
 460                 $host = @parse_url($url, PHP_URL_HOST);
 461                 if (!$host) {
 462                         return false;
 463                 }
 464
 465                 $domain_blocklist = Config::get('system', 'blocklist', []);
 466                 if (!$domain_blocklist) {
 467                         return false;
 468                 }
 469
 470                 foreach ($domain_blocklist as $domain_block) {
 471                         if (strcasecmp($domain_block['domain'], $host) === 0) {
 472                                 return true;
 473                         }
 474                 }
 475
 476                 return false;
 477         }
 478
 479         /**
 480          * @brief Check if email address is allowed to register here.
 481          *
 482          * Compare against our list (wildcards allowed).
 483          *
 484          * @param  string $email email address
 485          * @return boolean False if not allowed, true if allowed
 486          *    or if allowed list is not configured
 487          */
 488         public static function isEmailDomainAllowed($email)
 489         {
 490                 $domain = strtolower(substr($email, strpos($email, '@') + 1));
 491                 if (!$domain) {
 492                         return false;
 493                 }
 494
 495                 $str_allowed = Config::get('system', 'allowed_email', '');
 496                 if (empty($str_allowed)) {
 497                         return true;
 498                 }
 499
 500                 $allowed = explode(',', $str_allowed);
 501
 502                 return self::isDomainAllowed($domain, $allowed);
 503         }
 504
 505         /**
 506          * Checks for the existence of a domain in a domain list
 507          *
 508          * @brief Checks for the existence of a domain in a domain list
 509          * @param string $domain
 510          * @param array  $domain_list
 511          * @return boolean
 512          */
 513         public static function isDomainAllowed($domain, array $domain_list)
 514         {
 515                 $found = false;
 516
 517                 foreach ($domain_list as $item) {
 518                         $pat = strtolower(trim($item));
 519                         if (fnmatch($pat, $domain) || ($pat == $domain)) {
 520                                 $found = true;
 521                                 break;
 522                         }
 523                 }
 524
 525                 return $found;
 526         }
 527
 528         public static function lookupAvatarByEmail($email)
 529         {
 530                 $avatar['size'] = 300;
 531                 $avatar['email'] = $email;
 532                 $avatar['url'] = '';
 533                 $avatar['success'] = false;
 534
 535                 Hook::callAll('avatar_lookup', $avatar);
 536
 537                 if (! $avatar['success']) {
 538                         $avatar['url'] = System::baseUrl() . '/images/person-300.jpg';
 539                 }
 540
 541                 Logger::log('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], Logger::DEBUG);
 542                 return $avatar['url'];
 543         }
 544
 545         /**
 546          * @brief Remove Google Analytics and other tracking platforms params from URL
 547          *
 548          * @param string $url Any user-submitted URL that may contain tracking params
 549          * @return string The same URL stripped of tracking parameters
 550          */
 551         public static function stripTrackingQueryParams($url)
 552         {
 553                 $urldata = parse_url($url);
 554                 if (!empty($urldata["query"])) {
 555                         $query = $urldata["query"];
 556                         parse_str($query, $querydata);
 557
 558                         if (is_array($querydata)) {
 559                                 foreach ($querydata as $param => $value) {
 560                                         if (in_array(
 561                                                 $param,
 562                                                 [
 563                                                         "utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
 564                                                         "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
 565                                                         "fb_action_ids", "fb_action_types", "fb_ref",
 566                                                         "awesm", "wtrid",
 567                                                         "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"]
 568                                                 )
 569                                         ) {
 570                                                 $pair = $param . "=" . urlencode($value);
 571                                                 $url = str_replace($pair, "", $url);
 572
 573                                                 // Second try: if the url isn't encoded completely
 574                                                 $pair = $param . "=" . str_replace(" ", "+", $value);
 575                                                 $url = str_replace($pair, "", $url);
 576
 577                                                 // Third try: Maybey the url isn't encoded at all
 578                                                 $pair = $param . "=" . $value;
 579                                                 $url = str_replace($pair, "", $url);
 580
 581                                                 $url = str_replace(["?&", "&&"], ["?", ""], $url);
 582                                         }
 583                                 }
 584                         }
 585
 586                         if (substr($url, -1, 1) == "?") {
 587                                 $url = substr($url, 0, -1);
 588                         }
 589                 }
 590
 591                 return $url;
 592         }
 593
 594         /**
 595          * @brief Returns the original URL of the provided URL
 596          *
 597          * This function strips tracking query params and follows redirections, either
 598          * through HTTP code or meta refresh tags. Stops after 10 redirections.
 599          *
 600          * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
 601          *
 602          * @see ParseUrl::getSiteinfo
 603          *
 604          * @param string $url       A user-submitted URL
 605          * @param int    $depth     The current redirection recursion level (internal)
 606          * @param bool   $fetchbody Wether to fetch the body or not after the HEAD requests
 607          * @return string A canonical URL
 608          */
 609         public static function finalUrl($url, $depth = 1, $fetchbody = false)
 610         {
 611                 $a = \get_app();
 612
 613                 $url = self::stripTrackingQueryParams($url);
 614
 615                 if ($depth > 10) {
 616                         return $url;
 617                 }
 618
 619                 $url = trim($url, "'");
 620
 621                 $stamp1 = microtime(true);
 622
 623                 $ch = curl_init();
 624                 curl_setopt($ch, CURLOPT_URL, $url);
 625                 curl_setopt($ch, CURLOPT_HEADER, 1);
 626                 curl_setopt($ch, CURLOPT_NOBODY, 1);
 627                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 628                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 629                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 630
 631                 curl_exec($ch);
 632                 $curl_info = @curl_getinfo($ch);
 633                 $http_code = $curl_info['http_code'];
 634                 curl_close($ch);
 635
 636                 $a->saveTimestamp($stamp1, "network");
 637
 638                 if ($http_code == 0) {
 639                         return $url;
 640                 }
 641
 642                 if (in_array($http_code, ['301', '302'])) {
 643                         if (!empty($curl_info['redirect_url'])) {
 644                                 return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
 645                         } elseif (!empty($curl_info['location'])) {
 646                                 return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
 647                         }
 648                 }
 649
 650                 // Check for redirects in the meta elements of the body if there are no redirects in the header.
 651                 if (!$fetchbody) {
 652                         return(self::finalUrl($url, ++$depth, true));
 653                 }
 654
 655                 // if the file is too large then exit
 656                 if ($curl_info["download_content_length"] > 1000000) {
 657                         return $url;
 658                 }
 659
 660                 // if it isn't a HTML file then exit
 661                 if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
 662                         return $url;
 663                 }
 664
 665                 $stamp1 = microtime(true);
 666
 667                 $ch = curl_init();
 668                 curl_setopt($ch, CURLOPT_URL, $url);
 669                 curl_setopt($ch, CURLOPT_HEADER, 0);
 670                 curl_setopt($ch, CURLOPT_NOBODY, 0);
 671                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 672                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 673                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 674
 675                 $body = curl_exec($ch);
 676                 curl_close($ch);
 677
 678                 $a->saveTimestamp($stamp1, "network");
 679
 680                 if (trim($body) == "") {
 681                         return $url;
 682                 }
 683
 684                 // Check for redirect in meta elements
 685                 $doc = new DOMDocument();
 686                 @$doc->loadHTML($body);
 687
 688                 $xpath = new DomXPath($doc);
 689
 690                 $list = $xpath->query("//meta[@content]");
 691                 foreach ($list as $node) {
 692                         $attr = [];
 693                         if ($node->attributes->length) {
 694                                 foreach ($node->attributes as $attribute) {
 695                                         $attr[$attribute->name] = $attribute->value;
 696                                 }
 697                         }
 698
 699                         if (@$attr["http-equiv"] == 'refresh') {
 700                                 $path = $attr["content"];
 701                                 $pathinfo = explode(";", $path);
 702                                 foreach ($pathinfo as $value) {
 703                                         if (substr(strtolower($value), 0, 4) == "url=") {
 704                                                 return self::finalUrl(substr($value, 4), ++$depth);
 705                                         }
 706                                 }
 707                         }
 708                 }
 709
 710                 return $url;
 711         }
 712
 713         /**
 714          * @brief Find the matching part between two url
 715          *
 716          * @param string $url1
 717          * @param string $url2
 718          * @return string The matching part
 719          */
 720         public static function getUrlMatch($url1, $url2)
 721         {
 722                 if (($url1 == "") || ($url2 == "")) {
 723                         return "";
 724                 }
 725
 726                 $url1 = Strings::normaliseLink($url1);
 727                 $url2 = Strings::normaliseLink($url2);
 728
 729                 $parts1 = parse_url($url1);
 730                 $parts2 = parse_url($url2);
 731
 732                 if (!isset($parts1["host"]) || !isset($parts2["host"])) {
 733                         return "";
 734                 }
 735
 736                 if (empty($parts1["scheme"])) {
 737                         $parts1["scheme"] = '';
 738                 }
 739                 if (empty($parts2["scheme"])) {
 740                         $parts2["scheme"] = '';
 741                 }
 742
 743                 if ($parts1["scheme"] != $parts2["scheme"]) {
 744                         return "";
 745                 }
 746
 747                 if (empty($parts1["host"])) {
 748                         $parts1["host"] = '';
 749                 }
 750                 if (empty($parts2["host"])) {
 751                         $parts2["host"] = '';
 752                 }
 753
 754                 if ($parts1["host"] != $parts2["host"]) {
 755                         return "";
 756                 }
 757
 758                 if (empty($parts1["port"])) {
 759                         $parts1["port"] = '';
 760                 }
 761                 if (empty($parts2["port"])) {
 762                         $parts2["port"] = '';
 763                 }
 764
 765                 if ($parts1["port"] != $parts2["port"]) {
 766                         return "";
 767                 }
 768
 769                 $match = $parts1["scheme"]."://".$parts1["host"];
 770
 771                 if ($parts1["port"]) {
 772                         $match .= ":".$parts1["port"];
 773                 }
 774
 775                 if (empty($parts1["path"])) {
 776                         $parts1["path"] = '';
 777                 }
 778                 if (empty($parts2["path"])) {
 779                         $parts2["path"] = '';
 780                 }
 781
 782                 $pathparts1 = explode("/", $parts1["path"]);
 783                 $pathparts2 = explode("/", $parts2["path"]);
 784
 785                 $i = 0;
 786                 $path = "";
 787                 do {
 788                         $path1 = defaults($pathparts1, $i, '');
 789                         $path2 = defaults($pathparts2, $i, '');
 790
 791                         if ($path1 == $path2) {
 792                                 $path .= $path1."/";
 793                         }
 794                 } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
 795
 796                 $match .= $path;
 797
 798                 return Strings::normaliseLink($match);
 799         }
 800
 801         /**
 802          * @brief Glue url parts together
 803          *
 804          * @param array $parsed URL parts
 805          *
 806          * @return string The glued URL
 807          */
 808         public static function unparseURL($parsed)
 809         {
 810                 $get = function ($key) use ($parsed) {
 811                         return isset($parsed[$key]) ? $parsed[$key] : null;
 812                 };
 813
 814                 $pass      = $get('pass');
 815                 $user      = $get('user');
 816                 $userinfo  = $pass !== null ? "$user:$pass" : $user;
 817                 $port      = $get('port');
 818                 $scheme    = $get('scheme');
 819                 $query     = $get('query');
 820                 $fragment  = $get('fragment');
 821                 $authority = ($userinfo !== null ? $userinfo."@" : '') .
 822                                                 $get('host') .
 823                                                 ($port ? ":$port" : '');
 824
 825                 return  (strlen($scheme) ? $scheme.":" : '') .
 826                         (strlen($authority) ? "//".$authority : '') .
 827                         $get('path') .
 828                         (strlen($query) ? "?".$query : '') .
 829                         (strlen($fragment) ? "#".$fragment : '');
 830         }
 831 }