src/Util/Network.php

   1 <?php
   2 /**
   3  * @file src/Util/Network.php
   4  */
   5 namespace Friendica\Util;
   6
   7 use Friendica\Core\Addon;
   8 use Friendica\Core\Logger;
   9 use Friendica\Core\System;
  10 use Friendica\Core\Config;
  11 use Friendica\Network\CurlResult;
  12 use Friendica\Util\Strings;
  13 use DOMDocument;
  14 use DomXPath;
  15
  16 class Network
  17 {
  18         /**
  19          * Curl wrapper
  20          *
  21          * If binary flag is true, return binary results.
  22          * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt")
  23          * to preserve cookies from one request to the next.
  24          *
  25          * @brief Curl wrapper
  26          * @param string  $url            URL to fetch
  27          * @param boolean $binary         default false
  28          *                                TRUE if asked to return binary results (file download)
  29          * @param integer $redirects      The recursion counter for internal use - default 0
  30          * @param integer $timeout        Timeout in seconds, default system config value or 60 seconds
  31          * @param string  $accept_content supply Accept: header with 'accept_content' as the value
  32          * @param string  $cookiejar      Path to cookie jar file
  33          *
  34          * @return string The fetched content
  35          */
  36         public static function fetchUrl($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '')
  37         {
  38                 $ret = self::fetchUrlFull($url, $binary, $redirects, $timeout, $accept_content, $cookiejar);
  39
  40                 return $ret->getBody();
  41         }
  42
  43         /**
  44          * Curl wrapper with array of return values.
  45          *
  46          * Inner workings and parameters are the same as @ref fetchUrl but returns an array with
  47          * all the information collected during the fetch.
  48          *
  49          * @brief Curl wrapper with array of return values.
  50          * @param string  $url            URL to fetch
  51          * @param boolean $binary         default false
  52          *                                TRUE if asked to return binary results (file download)
  53          * @param integer $redirects      The recursion counter for internal use - default 0
  54          * @param integer $timeout        Timeout in seconds, default system config value or 60 seconds
  55          * @param string  $accept_content supply Accept: header with 'accept_content' as the value
  56          * @param string  $cookiejar      Path to cookie jar file
  57          *
  58          * @return CurlResult With all relevant information, 'body' contains the actual fetched content.
  59          */
  60         public static function fetchUrlFull($url, $binary = false, &$redirects = 0, $timeout = 0, $accept_content = null, $cookiejar = '')
  61         {
  62                 return self::curl(
  63                         $url,
  64                         $binary,
  65                         $redirects,
  66                         ['timeout'=>$timeout,
  67                         'accept_content'=>$accept_content,
  68                         'cookiejar'=>$cookiejar
  69                         ]
  70                 );
  71         }
  72
  73         /**
  74          * @brief fetches an URL.
  75          *
  76          * @param string  $url       URL to fetch
  77          * @param boolean $binary    default false
  78          *                           TRUE if asked to return binary results (file download)
  79          * @param int     $redirects The recursion counter for internal use - default 0
  80          * @param array   $opts      (optional parameters) assoziative array with:
  81          *                           'accept_content' => supply Accept: header with 'accept_content' as the value
  82          *                           'timeout' => int Timeout in seconds, default system config value or 60 seconds
  83          *                           'http_auth' => username:password
  84          *                           'novalidate' => do not validate SSL certs, default is to validate using our CA list
  85          *                           'nobody' => only return the header
  86          *                           'cookiejar' => path to cookie jar file
  87          *                           'header' => header array
  88          *
  89          * @return CurlResult
  90          */
  91         public static function curl($url, $binary = false, &$redirects = 0, $opts = [])
  92         {
  93                 $ret = ['return_code' => 0, 'success' => false, 'header' => '', 'info' => '', 'body' => ''];
  94
  95                 $stamp1 = microtime(true);
  96
  97                 $a = \get_app();
  98
  99                 if (strlen($url) > 1000) {
 100                         Logger::log('URL is longer than 1000 characters. Callstack: ' . System::callstack(20), Logger::DEBUG);
 101                         return CurlResult::createErrorCurl(substr($url, 0, 200));
 102                 }
 103
 104                 $parts = parse_url($url);
 105                 $path_parts = explode('/', defaults($parts, 'path', ''));
 106                 foreach ($path_parts as $part) {
 107                         if (strlen($part) <> mb_strlen($part)) {
 108                                 $parts2[] = rawurlencode($part);
 109                         } else {
 110                                 $parts2[] = $part;
 111                         }
 112                 }
 113                 $parts['path'] = implode('/', $parts2);
 114                 $url = self::unparseURL($parts);
 115
 116                 if (self::isUrlBlocked($url)) {
 117                         Logger::log('domain of ' . $url . ' is blocked', Logger::DATA);
 118                         return CurlResult::createErrorCurl($url);
 119                 }
 120
 121                 $ch = @curl_init($url);
 122
 123                 if (($redirects > 8) || (!$ch)) {
 124                         return CurlResult::createErrorCurl($url);
 125                 }
 126
 127                 @curl_setopt($ch, CURLOPT_HEADER, true);
 128
 129                 if (!empty($opts['cookiejar'])) {
 130                         curl_setopt($ch, CURLOPT_COOKIEJAR, $opts["cookiejar"]);
 131                         curl_setopt($ch, CURLOPT_COOKIEFILE, $opts["cookiejar"]);
 132                 }
 133
 134                 // These settings aren't needed. We're following the location already.
 135                 //      @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
 136                 //      @curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
 137
 138                 if (!empty($opts['accept_content'])) {
 139                         curl_setopt(
 140                                 $ch,
 141                                 CURLOPT_HTTPHEADER,
 142                                 ['Accept: ' . $opts['accept_content']]
 143                         );
 144                 }
 145
 146                 if (!empty($opts['header'])) {
 147                         curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['header']);
 148                 }
 149
 150                 @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 151                 @curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 152
 153                 $range = intval(Config::get('system', 'curl_range_bytes', 0));
 154
 155                 if ($range > 0) {
 156                         @curl_setopt($ch, CURLOPT_RANGE, '0-' . $range);
 157                 }
 158
 159                 // Without this setting it seems as if some webservers send compressed content
 160                 // This seems to confuse curl so that it shows this uncompressed.
 161                 /// @todo  We could possibly set this value to "gzip" or something similar
 162                 curl_setopt($ch, CURLOPT_ENCODING, '');
 163
 164                 if (!empty($opts['headers'])) {
 165                         @curl_setopt($ch, CURLOPT_HTTPHEADER, $opts['headers']);
 166                 }
 167
 168                 if (!empty($opts['nobody'])) {
 169                         @curl_setopt($ch, CURLOPT_NOBODY, $opts['nobody']);
 170                 }
 171
 172                 if (!empty($opts['timeout'])) {
 173                         @curl_setopt($ch, CURLOPT_TIMEOUT, $opts['timeout']);
 174                 } else {
 175                         $curl_time = Config::get('system', 'curl_timeout', 60);
 176                         @curl_setopt($ch, CURLOPT_TIMEOUT, intval($curl_time));
 177                 }
 178
 179                 // by default we will allow self-signed certs
 180                 // but you can override this
 181
 182                 $check_cert = Config::get('system', 'verifyssl');
 183                 @curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
 184
 185                 if ($check_cert) {
 186                         @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
 187                 }
 188
 189                 $proxy = Config::get('system', 'proxy');
 190
 191                 if (strlen($proxy)) {
 192                         @curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
 193                         @curl_setopt($ch, CURLOPT_PROXY, $proxy);
 194                         $proxyuser = @Config::get('system', 'proxyuser');
 195
 196                         if (strlen($proxyuser)) {
 197                                 @curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);
 198                         }
 199                 }
 200
 201                 if (Config::get('system', 'ipv4_resolve', false)) {
 202                         curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
 203                 }
 204
 205                 if ($binary) {
 206                         @curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
 207                 }
 208
 209                 // don't let curl abort the entire application
 210                 // if it throws any errors.
 211
 212                 $s = @curl_exec($ch);
 213                 $curl_info = @curl_getinfo($ch);
 214
 215                 // Special treatment for HTTP Code 416
 216                 // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/416
 217                 if (($curl_info['http_code'] == 416) && ($range > 0)) {
 218                         @curl_setopt($ch, CURLOPT_RANGE, '');
 219                         $s = @curl_exec($ch);
 220                         $curl_info = @curl_getinfo($ch);
 221                 }
 222
 223                 $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch));
 224
 225                 if ($curlResponse->isRedirectUrl()) {
 226                         $redirects++;
 227                         Logger::log('curl: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl());
 228                         @curl_close($ch);
 229                         return self::curl($curlResponse->getRedirectUrl(), $binary, $redirects, $opts);
 230                 }
 231
 232                 @curl_close($ch);
 233
 234                 $a->saveTimestamp($stamp1, 'network');
 235
 236                 return $curlResponse;
 237         }
 238
 239         /**
 240          * @brief Send POST request to $url
 241          *
 242          * @param string  $url       URL to post
 243          * @param mixed   $params    array of POST variables
 244          * @param string  $headers   HTTP headers
 245          * @param integer $redirects Recursion counter for internal use - default = 0
 246          * @param integer $timeout   The timeout in seconds, default system config value or 60 seconds
 247          *
 248          * @return CurlResult The content
 249          */
 250         public static function post($url, $params, $headers = null, &$redirects = 0, $timeout = 0)
 251         {
 252                 $stamp1 = microtime(true);
 253
 254                 if (self::isUrlBlocked($url)) {
 255                         Logger::log('post_url: domain of ' . $url . ' is blocked', Logger::DATA);
 256                         return CurlResult::createErrorCurl($url);
 257                 }
 258
 259                 $a = \get_app();
 260                 $ch = curl_init($url);
 261
 262                 if (($redirects > 8) || (!$ch)) {
 263                         return CurlResult::createErrorCurl($url);
 264                 }
 265
 266                 Logger::log('post_url: start ' . $url, Logger::DATA);
 267
 268                 curl_setopt($ch, CURLOPT_HEADER, true);
 269                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 270                 curl_setopt($ch, CURLOPT_POST, 1);
 271                 curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
 272                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 273
 274                 if (Config::get('system', 'ipv4_resolve', false)) {
 275                         curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
 276                 }
 277
 278                 if (intval($timeout)) {
 279                         curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
 280                 } else {
 281                         $curl_time = Config::get('system', 'curl_timeout', 60);
 282                         curl_setopt($ch, CURLOPT_TIMEOUT, intval($curl_time));
 283                 }
 284
 285                 if (defined('LIGHTTPD')) {
 286                         if (!is_array($headers)) {
 287                                 $headers = ['Expect:'];
 288                         } else {
 289                                 if (!in_array('Expect:', $headers)) {
 290                                         array_push($headers, 'Expect:');
 291                                 }
 292                         }
 293                 }
 294
 295                 if ($headers) {
 296                         curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
 297                 }
 298
 299                 $check_cert = Config::get('system', 'verifyssl');
 300                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
 301
 302                 if ($check_cert) {
 303                         @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
 304                 }
 305
 306                 $proxy = Config::get('system', 'proxy');
 307
 308                 if (strlen($proxy)) {
 309                         curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
 310                         curl_setopt($ch, CURLOPT_PROXY, $proxy);
 311                         $proxyuser = Config::get('system', 'proxyuser');
 312                         if (strlen($proxyuser)) {
 313                                 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);
 314                         }
 315                 }
 316
 317                 // don't let curl abort the entire application
 318                 // if it throws any errors.
 319
 320                 $s = @curl_exec($ch);
 321
 322                 $base = $s;
 323                 $curl_info = curl_getinfo($ch);
 324
 325                 $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch));
 326
 327                 if ($curlResponse->isRedirectUrl()) {
 328                         $redirects++;
 329                         Logger::log('post_url: redirect ' . $url . ' to ' . $curlResponse->getRedirectUrl());
 330                         curl_close($ch);
 331                         return self::post($curlResponse->getRedirectUrl(), $params, $headers, $redirects, $timeout);
 332                 }
 333
 334                 curl_close($ch);
 335
 336                 $a->saveTimestamp($stamp1, 'network');
 337
 338                 Logger::log('post_url: end ' . $url, Logger::DATA);
 339
 340                 return $curlResponse;
 341         }
 342
 343         /**
 344          * @brief Check URL to see if it's real
 345          *
 346          * Take a URL from the wild, prepend http:// if necessary
 347          * and check DNS to see if it's real (or check if is a valid IP address)
 348          *
 349          * @param string $url The URL to be validated
 350          * @return string|boolean The actual working URL, false else
 351          */
 352         public static function isUrlValid($url)
 353         {
 354                 if (Config::get('system', 'disable_url_validation')) {
 355                         return $url;
 356                 }
 357
 358                 // no naked subdomains (allow localhost for tests)
 359                 if (strpos($url, '.') === false && strpos($url, '/localhost/') === false) {
 360                         return false;
 361                 }
 362
 363                 if (substr($url, 0, 4) != 'http') {
 364                         $url = 'http://' . $url;
 365                 }
 366
 367                 /// @TODO Really suppress function outcomes? Why not find them + debug them?
 368                 $h = @parse_url($url);
 369
 370                 if (!empty($h['host']) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP) )) {
 371                         return $url;
 372                 }
 373
 374                 return false;
 375         }
 376
 377         /**
 378          * @brief Checks that email is an actual resolvable internet address
 379          *
 380          * @param string $addr The email address
 381          * @return boolean True if it's a valid email address, false if it's not
 382          */
 383         public static function isEmailDomainValid($addr)
 384         {
 385                 if (Config::get('system', 'disable_email_validation')) {
 386                         return true;
 387                 }
 388
 389                 if (! strpos($addr, '@')) {
 390                         return false;
 391                 }
 392
 393                 $h = substr($addr, strpos($addr, '@') + 1);
 394
 395                 // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
 396                 if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP) )) {
 397                         return true;
 398                 }
 399                 if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
 400                         return true;
 401                 }
 402                 return false;
 403         }
 404
 405         /**
 406          * @brief Check if URL is allowed
 407          *
 408          * Check $url against our list of allowed sites,
 409          * wildcards allowed. If allowed_sites is unset return true;
 410          *
 411          * @param string $url URL which get tested
 412          * @return boolean True if url is allowed otherwise return false
 413          */
 414         public static function isUrlAllowed($url)
 415         {
 416                 $h = @parse_url($url);
 417
 418                 if (! $h) {
 419                         return false;
 420                 }
 421
 422                 $str_allowed = Config::get('system', 'allowed_sites');
 423                 if (! $str_allowed) {
 424                         return true;
 425                 }
 426
 427                 $found = false;
 428
 429                 $host = strtolower($h['host']);
 430
 431                 // always allow our own site
 432                 if ($host == strtolower($_SERVER['SERVER_NAME'])) {
 433                         return true;
 434                 }
 435
 436                 $fnmatch = function_exists('fnmatch');
 437                 $allowed = explode(',', $str_allowed);
 438
 439                 if (count($allowed)) {
 440                         foreach ($allowed as $a) {
 441                                 $pat = strtolower(trim($a));
 442                                 if (($fnmatch && fnmatch($pat, $host)) || ($pat == $host)) {
 443                                         $found = true;
 444                                         break;
 445                                 }
 446                         }
 447                 }
 448                 return $found;
 449         }
 450
 451         /**
 452          * Checks if the provided url domain is on the domain blocklist.
 453          * Returns true if it is or malformed URL, false if not.
 454          *
 455          * @param string $url The url to check the domain from
 456          *
 457          * @return boolean
 458          */
 459         public static function isUrlBlocked($url)
 460         {
 461                 $host = @parse_url($url, PHP_URL_HOST);
 462                 if (!$host) {
 463                         return false;
 464                 }
 465
 466                 $domain_blocklist = Config::get('system', 'blocklist', []);
 467                 if (!$domain_blocklist) {
 468                         return false;
 469                 }
 470
 471                 foreach ($domain_blocklist as $domain_block) {
 472                         if (strcasecmp($domain_block['domain'], $host) === 0) {
 473                                 return true;
 474                         }
 475                 }
 476
 477                 return false;
 478         }
 479
 480         /**
 481          * @brief Check if email address is allowed to register here.
 482          *
 483          * Compare against our list (wildcards allowed).
 484          *
 485          * @param  string $email email address
 486          * @return boolean False if not allowed, true if allowed
 487          *    or if allowed list is not configured
 488          */
 489         public static function isEmailDomainAllowed($email)
 490         {
 491                 $domain = strtolower(substr($email, strpos($email, '@') + 1));
 492                 if (!$domain) {
 493                         return false;
 494                 }
 495
 496                 $str_allowed = Config::get('system', 'allowed_email', '');
 497                 if (empty($str_allowed)) {
 498                         return true;
 499                 }
 500
 501                 $allowed = explode(',', $str_allowed);
 502
 503                 return self::isDomainAllowed($domain, $allowed);
 504         }
 505
 506         /**
 507          * Checks for the existence of a domain in a domain list
 508          *
 509          * @brief Checks for the existence of a domain in a domain list
 510          * @param string $domain
 511          * @param array  $domain_list
 512          * @return boolean
 513          */
 514         public static function isDomainAllowed($domain, array $domain_list)
 515         {
 516                 $found = false;
 517
 518                 foreach ($domain_list as $item) {
 519                         $pat = strtolower(trim($item));
 520                         if (fnmatch($pat, $domain) || ($pat == $domain)) {
 521                                 $found = true;
 522                                 break;
 523                         }
 524                 }
 525
 526                 return $found;
 527         }
 528
 529         public static function lookupAvatarByEmail($email)
 530         {
 531                 $avatar['size'] = 300;
 532                 $avatar['email'] = $email;
 533                 $avatar['url'] = '';
 534                 $avatar['success'] = false;
 535
 536                 Addon::callHooks('avatar_lookup', $avatar);
 537
 538                 if (! $avatar['success']) {
 539                         $avatar['url'] = System::baseUrl() . '/images/person-300.jpg';
 540                 }
 541
 542                 Logger::log('Avatar: ' . $avatar['email'] . ' ' . $avatar['url'], Logger::DEBUG);
 543                 return $avatar['url'];
 544         }
 545
 546         /**
 547          * @brief Remove Google Analytics and other tracking platforms params from URL
 548          *
 549          * @param string $url Any user-submitted URL that may contain tracking params
 550          * @return string The same URL stripped of tracking parameters
 551          */
 552         public static function stripTrackingQueryParams($url)
 553         {
 554                 $urldata = parse_url($url);
 555                 if (!empty($urldata["query"])) {
 556                         $query = $urldata["query"];
 557                         parse_str($query, $querydata);
 558
 559                         if (is_array($querydata)) {
 560                                 foreach ($querydata as $param => $value) {
 561                                         if (in_array(
 562                                                 $param,
 563                                                 [
 564                                                         "utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
 565                                                         "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
 566                                                         "fb_action_ids", "fb_action_types", "fb_ref",
 567                                                         "awesm", "wtrid",
 568                                                         "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"]
 569                                                 )
 570                                         ) {
 571                                                 $pair = $param . "=" . urlencode($value);
 572                                                 $url = str_replace($pair, "", $url);
 573
 574                                                 // Second try: if the url isn't encoded completely
 575                                                 $pair = $param . "=" . str_replace(" ", "+", $value);
 576                                                 $url = str_replace($pair, "", $url);
 577
 578                                                 // Third try: Maybey the url isn't encoded at all
 579                                                 $pair = $param . "=" . $value;
 580                                                 $url = str_replace($pair, "", $url);
 581
 582                                                 $url = str_replace(["?&", "&&"], ["?", ""], $url);
 583                                         }
 584                                 }
 585                         }
 586
 587                         if (substr($url, -1, 1) == "?") {
 588                                 $url = substr($url, 0, -1);
 589                         }
 590                 }
 591
 592                 return $url;
 593         }
 594
 595         /**
 596          * @brief Returns the original URL of the provided URL
 597          *
 598          * This function strips tracking query params and follows redirections, either
 599          * through HTTP code or meta refresh tags. Stops after 10 redirections.
 600          *
 601          * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
 602          *
 603          * @see ParseUrl::getSiteinfo
 604          *
 605          * @param string $url       A user-submitted URL
 606          * @param int    $depth     The current redirection recursion level (internal)
 607          * @param bool   $fetchbody Wether to fetch the body or not after the HEAD requests
 608          * @return string A canonical URL
 609          */
 610         public static function finalUrl($url, $depth = 1, $fetchbody = false)
 611         {
 612                 $a = \get_app();
 613
 614                 $url = self::stripTrackingQueryParams($url);
 615
 616                 if ($depth > 10) {
 617                         return $url;
 618                 }
 619
 620                 $url = trim($url, "'");
 621
 622                 $stamp1 = microtime(true);
 623
 624                 $ch = curl_init();
 625                 curl_setopt($ch, CURLOPT_URL, $url);
 626                 curl_setopt($ch, CURLOPT_HEADER, 1);
 627                 curl_setopt($ch, CURLOPT_NOBODY, 1);
 628                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 629                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 630                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 631
 632                 curl_exec($ch);
 633                 $curl_info = @curl_getinfo($ch);
 634                 $http_code = $curl_info['http_code'];
 635                 curl_close($ch);
 636
 637                 $a->saveTimestamp($stamp1, "network");
 638
 639                 if ($http_code == 0) {
 640                         return $url;
 641                 }
 642
 643                 if (in_array($http_code, ['301', '302'])) {
 644                         if (!empty($curl_info['redirect_url'])) {
 645                                 return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
 646                         } elseif (!empty($curl_info['location'])) {
 647                                 return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
 648                         }
 649                 }
 650
 651                 // Check for redirects in the meta elements of the body if there are no redirects in the header.
 652                 if (!$fetchbody) {
 653                         return(self::finalUrl($url, ++$depth, true));
 654                 }
 655
 656                 // if the file is too large then exit
 657                 if ($curl_info["download_content_length"] > 1000000) {
 658                         return $url;
 659                 }
 660
 661                 // if it isn't a HTML file then exit
 662                 if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
 663                         return $url;
 664                 }
 665
 666                 $stamp1 = microtime(true);
 667
 668                 $ch = curl_init();
 669                 curl_setopt($ch, CURLOPT_URL, $url);
 670                 curl_setopt($ch, CURLOPT_HEADER, 0);
 671                 curl_setopt($ch, CURLOPT_NOBODY, 0);
 672                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 673                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 674                 curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
 675
 676                 $body = curl_exec($ch);
 677                 curl_close($ch);
 678
 679                 $a->saveTimestamp($stamp1, "network");
 680
 681                 if (trim($body) == "") {
 682                         return $url;
 683                 }
 684
 685                 // Check for redirect in meta elements
 686                 $doc = new DOMDocument();
 687                 @$doc->loadHTML($body);
 688
 689                 $xpath = new DomXPath($doc);
 690
 691                 $list = $xpath->query("//meta[@content]");
 692                 foreach ($list as $node) {
 693                         $attr = [];
 694                         if ($node->attributes->length) {
 695                                 foreach ($node->attributes as $attribute) {
 696                                         $attr[$attribute->name] = $attribute->value;
 697                                 }
 698                         }
 699
 700                         if (@$attr["http-equiv"] == 'refresh') {
 701                                 $path = $attr["content"];
 702                                 $pathinfo = explode(";", $path);
 703                                 foreach ($pathinfo as $value) {
 704                                         if (substr(strtolower($value), 0, 4) == "url=") {
 705                                                 return self::finalUrl(substr($value, 4), ++$depth);
 706                                         }
 707                                 }
 708                         }
 709                 }
 710
 711                 return $url;
 712         }
 713
 714         /**
 715          * @brief Find the matching part between two url
 716          *
 717          * @param string $url1
 718          * @param string $url2
 719          * @return string The matching part
 720          */
 721         public static function getUrlMatch($url1, $url2)
 722         {
 723                 if (($url1 == "") || ($url2 == "")) {
 724                         return "";
 725                 }
 726
 727                 $url1 = Strings::normaliseLink($url1);
 728                 $url2 = Strings::normaliseLink($url2);
 729
 730                 $parts1 = parse_url($url1);
 731                 $parts2 = parse_url($url2);
 732
 733                 if (!isset($parts1["host"]) || !isset($parts2["host"])) {
 734                         return "";
 735                 }
 736
 737                 if (empty($parts1["scheme"])) {
 738                         $parts1["scheme"] = '';
 739                 }
 740                 if (empty($parts2["scheme"])) {
 741                         $parts2["scheme"] = '';
 742                 }
 743
 744                 if ($parts1["scheme"] != $parts2["scheme"]) {
 745                         return "";
 746                 }
 747
 748                 if (empty($parts1["host"])) {
 749                         $parts1["host"] = '';
 750                 }
 751                 if (empty($parts2["host"])) {
 752                         $parts2["host"] = '';
 753                 }
 754
 755                 if ($parts1["host"] != $parts2["host"]) {
 756                         return "";
 757                 }
 758
 759                 if (empty($parts1["port"])) {
 760                         $parts1["port"] = '';
 761                 }
 762                 if (empty($parts2["port"])) {
 763                         $parts2["port"] = '';
 764                 }
 765
 766                 if ($parts1["port"] != $parts2["port"]) {
 767                         return "";
 768                 }
 769
 770                 $match = $parts1["scheme"]."://".$parts1["host"];
 771
 772                 if ($parts1["port"]) {
 773                         $match .= ":".$parts1["port"];
 774                 }
 775
 776                 if (empty($parts1["path"])) {
 777                         $parts1["path"] = '';
 778                 }
 779                 if (empty($parts2["path"])) {
 780                         $parts2["path"] = '';
 781                 }
 782
 783                 $pathparts1 = explode("/", $parts1["path"]);
 784                 $pathparts2 = explode("/", $parts2["path"]);
 785
 786                 $i = 0;
 787                 $path = "";
 788                 do {
 789                         $path1 = defaults($pathparts1, $i, '');
 790                         $path2 = defaults($pathparts2, $i, '');
 791
 792                         if ($path1 == $path2) {
 793                                 $path .= $path1."/";
 794                         }
 795                 } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
 796
 797                 $match .= $path;
 798
 799                 return Strings::normaliseLink($match);
 800         }
 801
 802         /**
 803          * @brief Glue url parts together
 804          *
 805          * @param array $parsed URL parts
 806          *
 807          * @return string The glued URL
 808          */
 809         public static function unparseURL($parsed)
 810         {
 811                 $get = function ($key) use ($parsed) {
 812                         return isset($parsed[$key]) ? $parsed[$key] : null;
 813                 };
 814
 815                 $pass      = $get('pass');
 816                 $user      = $get('user');
 817                 $userinfo  = $pass !== null ? "$user:$pass" : $user;
 818                 $port      = $get('port');
 819                 $scheme    = $get('scheme');
 820                 $query     = $get('query');
 821                 $fragment  = $get('fragment');
 822                 $authority = ($userinfo !== null ? $userinfo."@" : '') .
 823                                                 $get('host') .
 824                                                 ($port ? ":$port" : '');
 825
 826                 return  (strlen($scheme) ? $scheme.":" : '') .
 827                         (strlen($authority) ? "//".$authority : '') .
 828                         $get('path') .
 829                         (strlen($query) ? "?".$query : '') .
 830                         (strlen($fragment) ? "#".$fragment : '');
 831         }
 832 }