]> git.mxchange.org Git - friendica.git/blobdiff - src/Network/HTTPRequest.php
Rename "HTTPRequest::curl()" to HTTPRequest::get()
[friendica.git] / src / Network / HTTPRequest.php
index eaef6966d73035612e0c036f55ac697df4a9b523..f9279fa6028473711a543e4c12ead24cab0e484b 100644 (file)
 
 namespace Friendica\Network;
 
+use DOMDocument;
+use DomXPath;
 use Friendica\App;
 use Friendica\Core\Config\IConfig;
-use Friendica\Core\Logger;
 use Friendica\Core\System;
 use Friendica\Util\Network;
 use Friendica\Util\Profiler;
@@ -41,14 +42,14 @@ class HTTPRequest
        /** @var IConfig */
        private $config;
        /** @var string */
-       private $userAgent;
+       private $baseUrl;
 
-       public function __construct(LoggerInterface $logger, Profiler $profiler, IConfig $config, App $a)
+       public function __construct(LoggerInterface $logger, Profiler $profiler, IConfig $config, App\BaseURL $baseUrl)
        {
-               $this->logger    = $logger;
-               $this->profiler  = $profiler;
-               $this->config    = $config;
-               $this->userAgent = $a->getUserAgent();
+               $this->logger   = $logger;
+               $this->profiler = $profiler;
+               $this->config   = $config;
+               $this->baseUrl  = $baseUrl->get();
        }
 
        /**
@@ -70,7 +71,7 @@ class HTTPRequest
         * @return CurlResult
         * @throws \Friendica\Network\HTTPException\InternalServerErrorException
         */
-       public function curl(string $url, bool $binary = false, array $opts = [], int &$redirects = 0)
+       public function get(string $url, bool $binary = false, array $opts = [], int &$redirects = 0)
        {
                $stamp1 = microtime(true);
 
@@ -127,7 +128,7 @@ class HTTPRequest
                }
 
                @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-               @curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
+               @curl_setopt($ch, CURLOPT_USERAGENT, $this->getUserAgent());
 
                $range = intval($this->config->get('system', 'curl_range_bytes', 0));
 
@@ -205,7 +206,7 @@ class HTTPRequest
                        $redirects++;
                        $this->logger->notice('Curl redirect.', ['url' => $url, 'to' => $curlResponse->getRedirectUrl()]);
                        @curl_close($ch);
-                       return self::curl($curlResponse->getRedirectUrl(), $binary, $opts, $redirects);
+                       return $this->get($curlResponse->getRedirectUrl(), $binary, $opts, $redirects);
                }
 
                @curl_close($ch);
@@ -232,7 +233,7 @@ class HTTPRequest
                $stamp1 = microtime(true);
 
                if (Network::isUrlBlocked($url)) {
-                       $this->logger->info('Domain is blocked.'. ['url' => $url]);
+                       $this->logger->info('Domain is blocked.' . ['url' => $url]);
                        return CurlResult::createErrorCurl($url);
                }
 
@@ -248,7 +249,7 @@ class HTTPRequest
                curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                curl_setopt($ch, CURLOPT_POST, 1);
                curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
-               curl_setopt($ch, CURLOPT_USERAGENT, $a->getUserAgent());
+               curl_setopt($ch, CURLOPT_USERAGENT, $this->getUserAgent());
 
                if ($this->config->get('system', 'ipv4_resolve', false)) {
                        curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
@@ -296,7 +297,7 @@ class HTTPRequest
                        $redirects++;
                        $this->logger->info('Post redirect.', ['url' => $url, 'to' => $curlResponse->getRedirectUrl()]);
                        curl_close($ch);
-                       return self::post($curlResponse->getRedirectUrl(), $params, $headers, $redirects, $timeout);
+                       return $this->post($curlResponse->getRedirectUrl(), $params, $headers, $redirects, $timeout);
                }
 
                curl_close($ch);
@@ -314,15 +315,133 @@ class HTTPRequest
                                        array_push($headers, 'Expect:');
                                }
                        }
-                       Logger::info('Server responds with 417, applying workaround', ['url' => $url]);
-                       return self::post($url, $params, $headers, $redirects, $timeout);
+                       $this->logger->info('Server responds with 417, applying workaround', ['url' => $url]);
+                       return $this->post($url, $params, $headers, $redirects, $timeout);
                }
 
-               Logger::log('post_url: end ' . $url, Logger::DATA);
+               $this->logger->debug('Post_url: End.', ['url' => $url]);
 
                return $curlResponse;
        }
 
+       /**
+        * Returns the original URL of the provided URL
+        *
+        * This function strips tracking query params and follows redirections, either
+        * through HTTP code or meta refresh tags. Stops after 10 redirections.
+        *
+        * @todo  Remove the $fetchbody parameter that generates an extraneous HEAD request
+        *
+        * @see   ParseUrl::getSiteinfo
+        *
+        * @param string $url       A user-submitted URL
+        * @param int    $depth     The current redirection recursion level (internal)
+        * @param bool   $fetchbody Wether to fetch the body or not after the HEAD requests
+        * @return string A canonical URL
+        * @throws \Friendica\Network\HTTPException\InternalServerErrorException
+        */
+       public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
+       {
+               $url = Network::stripTrackingQueryParams($url);
+
+               if ($depth > 10) {
+                       return $url;
+               }
+
+               $url = trim($url, "'");
+
+               $stamp1 = microtime(true);
+
+               $ch = curl_init();
+               curl_setopt($ch, CURLOPT_URL, $url);
+               curl_setopt($ch, CURLOPT_HEADER, 1);
+               curl_setopt($ch, CURLOPT_NOBODY, 1);
+               curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+               curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+               curl_setopt($ch, CURLOPT_USERAGENT, $this->getUserAgent());
+
+               curl_exec($ch);
+               $curl_info = @curl_getinfo($ch);
+               $http_code = $curl_info['http_code'];
+               curl_close($ch);
+
+               $this->profiler->saveTimestamp($stamp1, "network", System::callstack());
+
+               if ($http_code == 0) {
+                       return $url;
+               }
+
+               if (in_array($http_code, ['301', '302'])) {
+                       if (!empty($curl_info['redirect_url'])) {
+                               return $this->finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
+                       } elseif (!empty($curl_info['location'])) {
+                               return $this->finalUrl($curl_info['location'], ++$depth, $fetchbody);
+                       }
+               }
+
+               // Check for redirects in the meta elements of the body if there are no redirects in the header.
+               if (!$fetchbody) {
+                       return $this->finalUrl($url, ++$depth, true);
+               }
+
+               // if the file is too large then exit
+               if ($curl_info["download_content_length"] > 1000000) {
+                       return $url;
+               }
+
+               // if it isn't a HTML file then exit
+               if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
+                       return $url;
+               }
+
+               $stamp1 = microtime(true);
+
+               $ch = curl_init();
+               curl_setopt($ch, CURLOPT_URL, $url);
+               curl_setopt($ch, CURLOPT_HEADER, 0);
+               curl_setopt($ch, CURLOPT_NOBODY, 0);
+               curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+               curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+               curl_setopt($ch, CURLOPT_USERAGENT, $this->getUserAgent());
+
+               $body = curl_exec($ch);
+               curl_close($ch);
+
+               $this->profiler->saveTimestamp($stamp1, "network", System::callstack());
+
+               if (trim($body) == "") {
+                       return $url;
+               }
+
+               // Check for redirect in meta elements
+               $doc = new DOMDocument();
+               @$doc->loadHTML($body);
+
+               $xpath = new DomXPath($doc);
+
+               $list = $xpath->query("//meta[@content]");
+               foreach ($list as $node) {
+                       $attr = [];
+                       if ($node->attributes->length) {
+                               foreach ($node->attributes as $attribute) {
+                                       $attr[$attribute->name] = $attribute->value;
+                               }
+                       }
+
+                       if (@$attr["http-equiv"] == 'refresh') {
+                               $path = $attr["content"];
+                               $pathinfo = explode(";", $path);
+                               foreach ($pathinfo as $value) {
+                                       if (substr(strtolower($value), 0, 4) == "url=") {
+                                               return $this->finalUrl(substr($value, 4), ++$depth);
+                                       }
+                               }
+                       }
+               }
+
+               return $url;
+       }
+
        /**
         * Curl wrapper
         *
@@ -367,7 +486,7 @@ class HTTPRequest
         */
        public function fetchUrlFull(string $url, bool $binary = false, int $timeout = 0, string $accept_content = '', string $cookiejar = '', int &$redirects = 0)
        {
-               return $this->curl(
+               return $this->get(
                        $url,
                        $binary,
                        [
@@ -378,4 +497,19 @@ class HTTPRequest
                        $redirects
                );
        }
+
+       /**
+        * Returns the current UserAgent as a String
+        *
+        * @return string the UserAgent as a String
+        */
+       public function getUserAgent()
+       {
+               return
+                       FRIENDICA_PLATFORM . " '" .
+                       FRIENDICA_CODENAME . "' " .
+                       FRIENDICA_VERSION . '-' .
+                       DB_UPDATE_VERSION . '; ' .
+                       $this->baseUrl;
+       }
 }