]> git.mxchange.org Git - friendica.git/blobdiff - src/Util/Network.php
Merge pull request #13161 from annando/bluesky-activities
[friendica.git] / src / Util / Network.php
index cdcb39a49605c2529e3de2f0d20b449deb9de44e..f7ceab5433b217bbc33c2ccb98770cbaf40020f1 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * @copyright Copyright (C) 2010-2022, the Friendica project
+ * @copyright Copyright (C) 2010-2023, the Friendica project
  *
  * @license GNU AGPL version 3 or any later version
  *
@@ -25,7 +25,11 @@ use Friendica\Core\Hook;
 use Friendica\Core\Logger;
 use Friendica\DI;
 use Friendica\Model\Contact;
+use Friendica\Network\HTTPClient\Client\HttpClientAccept;
+use Friendica\Network\HTTPClient\Client\HttpClientOptions;
 use Friendica\Network\HTTPException\NotModifiedException;
+use GuzzleHttp\Psr7\Uri;
+use Psr\Http\Message\UriInterface;
 
 class Network
 {
@@ -47,6 +51,7 @@ class Network
         * and check DNS to see if it's real (or check if is a valid IP address)
         *
         * @param string $url The URL to be validated
+        *
         * @return string|boolean The actual working URL, false else
         * @throws \Friendica\Network\HTTPException\InternalServerErrorException
         */
@@ -65,14 +70,39 @@ class Network
                        $url = 'http://' . $url;
                }
 
-               /// @TODO Really suppress function outcomes? Why not find them + debug them?
-               $h = @parse_url($url);
+               $xrd_timeout = DI::config()->get('system', 'xrd_timeout');
+               $host = parse_url($url, PHP_URL_HOST);
 
-               if (!empty($h['host']) && (@dns_get_record($h['host'], DNS_A + DNS_CNAME) || filter_var($h['host'], FILTER_VALIDATE_IP))) {
-                       return $url;
+               if (empty($host) || !(filter_var($host, FILTER_VALIDATE_IP) || @dns_get_record($host . '.', DNS_A + DNS_AAAA))) {
+                       return false;
                }
 
-               return false;
+               if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) {
+                       $options = [HttpClientOptions::VERIFY => true, HttpClientOptions::TIMEOUT => $xrd_timeout];
+                       try {
+                               $curlResult = DI::httpClient()->head($url, $options);
+                       } catch (\Exception $e) {
+                               return false;
+                       }
+
+                       // Workaround for systems that can't handle a HEAD request. Don't retry on timeouts.
+                       if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) {
+                               try {
+                                       $curlResult = DI::httpClient()->get($url, HttpClientAccept::DEFAULT, $options);
+                               } catch (\Exception $e) {
+                                       return false;
+                               }
+                       }
+
+                       if (!$curlResult->isSuccess()) {
+                               Logger::notice('Url not reachable', ['host' => $host, 'url' => $url]);
+                               return false;
+                       } elseif ($curlResult->isRedirectUrl()) {
+                               $url = $curlResult->getRedirectUrl();
+                       }
+               }
+
+               return $url;
        }
 
        /**
@@ -81,7 +111,7 @@ class Network
         * @param string $addr The email address
         * @return boolean True if it's a valid email address, false if it's not
         */
-       public static function isEmailDomainValid(string $addr)
+       public static function isEmailDomainValid(string $addr): bool
        {
                if (DI::config()->get('system', 'disable_email_validation')) {
                        return true;
@@ -94,7 +124,7 @@ class Network
                $h = substr($addr, strpos($addr, '@') + 1);
 
                // Concerning the @ see here: https://stackoverflow.com/questions/36280957/dns-get-record-a-temporary-server-error-occurred
-               if ($h && (@dns_get_record($h, DNS_A + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP))) {
+               if ($h && (@dns_get_record($h, DNS_A + DNS_AAAA + DNS_MX) || filter_var($h, FILTER_VALIDATE_IP))) {
                        return true;
                }
                if ($h && @dns_get_record($h, DNS_CNAME + DNS_MX)) {
@@ -112,7 +142,7 @@ class Network
         * @param string $url URL which get tested
         * @return boolean True if url is allowed otherwise return false
         */
-       public static function isUrlAllowed(string $url)
+       public static function isUrlAllowed(string $url): bool
        {
                $h = @parse_url($url);
 
@@ -156,11 +186,28 @@ class Network
         * @param string $url The url to check the domain from
         *
         * @return boolean
+        *
+        * @deprecated since 2023.03 Use isUriBlocked instead
         */
-       public static function isUrlBlocked(string $url)
+       public static function isUrlBlocked(string $url): bool
        {
-               $host = @parse_url($url, PHP_URL_HOST);
-               if (!$host) {
+               try {
+                       return self::isUriBlocked(new Uri($url));
+               } catch (\Throwable $e) {
+                       Logger::warning('Invalid URL', ['url' => $url]);
+                       return false;
+               }
+       }
+
+       /**
+        * Checks if the provided URI domain is on the domain blocklist.
+        *
+        * @param UriInterface $uri
+        * @return boolean
+        */
+       public static function isUriBlocked(UriInterface $uri): bool
+       {
+               if (!$uri->getHost()) {
                        return false;
                }
 
@@ -170,7 +217,7 @@ class Network
                }
 
                foreach ($domain_blocklist as $domain_block) {
-                       if (fnmatch(strtolower($domain_block['domain']), strtolower($host))) {
+                       if (fnmatch(strtolower($domain_block['domain']), strtolower($uri->getHost()))) {
                                return true;
                        }
                }
@@ -186,7 +233,7 @@ class Network
         *
         * @return boolean
         */
-       public static function isRedirectBlocked(string $url)
+       public static function isRedirectBlocked(string $url): bool
        {
                $host = @parse_url($url, PHP_URL_HOST);
                if (!$host) {
@@ -217,7 +264,7 @@ class Network
         *                       or if allowed list is not configured
         * @throws \Friendica\Network\HTTPException\InternalServerErrorException
         */
-       public static function isEmailDomainAllowed(string $email)
+       public static function isEmailDomainAllowed(string $email): bool
        {
                $domain = strtolower(substr($email, strpos($email, '@') + 1));
                if (!$domain) {
@@ -239,9 +286,10 @@ class Network
         *
         * @param string $domain
         * @param array  $domain_list
+        *
         * @return boolean
         */
-       public static function isDomainAllowed(string $domain, array $domain_list)
+       public static function isDomainAllowed(string $domain, array $domain_list): bool
        {
                $found = false;
 
@@ -256,7 +304,7 @@ class Network
                return $found;
        }
 
-       public static function lookupAvatarByEmail(string $email)
+       public static function lookupAvatarByEmail(string $email): string
        {
                $avatar['size'] = 300;
                $avatar['email'] = $email;
@@ -277,13 +325,15 @@ class Network
         * Remove Google Analytics and other tracking platforms params from URL
         *
         * @param string $url Any user-submitted URL that may contain tracking params
+        *
         * @return string The same URL stripped of tracking parameters
         */
-       public static function stripTrackingQueryParams(string $url)
+       public static function stripTrackingQueryParams(string $url): string
        {
                $urldata = parse_url($url);
-               if (!empty($urldata["query"])) {
-                       $query = $urldata["query"];
+
+               if (!empty($urldata['query'])) {
+                       $query = $urldata['query'];
                        parse_str($query, $querydata);
 
                        if (is_array($querydata)) {
@@ -291,30 +341,32 @@ class Network
                                        if (in_array(
                                                $param,
                                                [
-                                                       "utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
-                                                       "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
-                                                       "fb_action_ids", "fb_action_types", "fb_ref",
-                                                       "awesm", "wtrid",
-                                                       "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"]
+                                                       'utm_source', 'utm_medium', 'utm_term', 'utm_content', 'utm_campaign',
+                                                       // As seen from Purism
+                                                       'mtm_source', 'mtm_medium', 'mtm_term', 'mtm_content', 'mtm_campaign',
+                                                       'wt_mc', 'pk_campaign', 'pk_kwd', 'mc_cid', 'mc_eid',
+                                                       'fb_action_ids', 'fb_action_types', 'fb_ref',
+                                                       'awesm', 'wtrid',
+                                                       'woo_campaign', 'woo_source', 'woo_medium', 'woo_content', 'woo_term']
                                                )
                                        ) {
-                                               $pair = $param . "=" . urlencode($value);
-                                               $url = str_replace($pair, "", $url);
+                                               $pair = $param . '=' . urlencode($value);
+                                               $url = str_replace($pair, '', $url);
 
                                                // Second try: if the url isn't encoded completely
-                                               $pair = $param . "=" . str_replace(" ", "+", $value);
-                                               $url = str_replace($pair, "", $url);
+                                               $pair = $param . '=' . str_replace(' ', '+', $value);
+                                               $url = str_replace($pair, '', $url);
 
-                                               // Third try: Maybey the url isn't encoded at all
-                                               $pair = $param . "=" . $value;
-                                               $url = str_replace($pair, "", $url);
+                                               // Third try: Maybe the url isn't encoded at all
+                                               $pair = $param . '=' . $value;
+                                               $url = str_replace($pair, '', $url);
 
-                                               $url = str_replace(["?&", "&&"], ["?", ""], $url);
+                                               $url = str_replace(['?&', '&&'], ['?', ''], $url);
                                        }
                                }
                        }
 
-                       if (substr($url, -1, 1) == "?") {
+                       if (substr($url, -1, 1) == '?') {
                                $url = substr($url, 0, -1);
                        }
                }
@@ -327,16 +379,20 @@ class Network
         *
         * @param string $url
         * @param string $basepath
+        *
         * @return string url
         */
-       public static function addBasePath(string $url, string $basepath)
+       public static function addBasePath(string $url, string $basepath): string
        {
+               $url = trim($url);
                if (!empty(parse_url($url, PHP_URL_SCHEME)) || empty(parse_url($basepath, PHP_URL_SCHEME)) || empty($url) || empty(parse_url($url))) {
                        return $url;
                }
 
-               $base = ['scheme' => parse_url($basepath, PHP_URL_SCHEME),
-                       'host' => parse_url($basepath, PHP_URL_HOST)];
+               $base = [
+                       'scheme' => parse_url($basepath, PHP_URL_SCHEME),
+                       'host' => parse_url($basepath, PHP_URL_HOST),
+               ];
 
                $parts = array_merge($base, parse_url('/' . ltrim($url, '/')));
                return self::unparseURL($parts);
@@ -347,12 +403,13 @@ class Network
         *
         * @param string $url1
         * @param string $url2
-        * @return string The matching part
+        *
+        * @return string The matching part or empty string on error
         */
-       public static function getUrlMatch(string $url1, string $url2)
+       public static function getUrlMatch(string $url1, string $url2): string
        {
-               if (($url1 == "") || ($url2 == "")) {
-                       return "";
+               if (($url1 == '') || ($url2 == '')) {
+                       return '';
                }
 
                $url1 = Strings::normaliseLink($url1);
@@ -361,67 +418,67 @@ class Network
                $parts1 = parse_url($url1);
                $parts2 = parse_url($url2);
 
-               if (!isset($parts1["host"]) || !isset($parts2["host"])) {
-                       return "";
+               if (!isset($parts1['host']) || !isset($parts2['host'])) {
+                       return '';
                }
 
-               if (empty($parts1["scheme"])) {
-                       $parts1["scheme"] = '';
+               if (empty($parts1['scheme'])) {
+                       $parts1['scheme'] = '';
                }
-               if (empty($parts2["scheme"])) {
-                       $parts2["scheme"] = '';
+               if (empty($parts2['scheme'])) {
+                       $parts2['scheme'] = '';
                }
 
-               if ($parts1["scheme"] != $parts2["scheme"]) {
-                       return "";
+               if ($parts1['scheme'] != $parts2['scheme']) {
+                       return '';
                }
 
-               if (empty($parts1["host"])) {
-                       $parts1["host"] = '';
+               if (empty($parts1['host'])) {
+                       $parts1['host'] = '';
                }
-               if (empty($parts2["host"])) {
-                       $parts2["host"] = '';
+               if (empty($parts2['host'])) {
+                       $parts2['host'] = '';
                }
 
-               if ($parts1["host"] != $parts2["host"]) {
-                       return "";
+               if ($parts1['host'] != $parts2['host']) {
+                       return '';
                }
 
-               if (empty($parts1["port"])) {
-                       $parts1["port"] = '';
+               if (empty($parts1['port'])) {
+                       $parts1['port'] = '';
                }
-               if (empty($parts2["port"])) {
-                       $parts2["port"] = '';
+               if (empty($parts2['port'])) {
+                       $parts2['port'] = '';
                }
 
-               if ($parts1["port"] != $parts2["port"]) {
-                       return "";
+               if ($parts1['port'] != $parts2['port']) {
+                       return '';
                }
 
-               $match = $parts1["scheme"]."://".$parts1["host"];
+               $match = $parts1['scheme'] . '://' . $parts1['host'];
 
-               if ($parts1["port"]) {
-                       $match .= ":".$parts1["port"];
+               if ($parts1['port']) {
+                       $match .= ':' . $parts1['port'];
                }
 
-               if (empty($parts1["path"])) {
-                       $parts1["path"] = '';
+               if (empty($parts1['path'])) {
+                       $parts1['path'] = '';
                }
-               if (empty($parts2["path"])) {
-                       $parts2["path"] = '';
+               if (empty($parts2['path'])) {
+                       $parts2['path'] = '';
                }
 
-               $pathparts1 = explode("/", $parts1["path"]);
-               $pathparts2 = explode("/", $parts2["path"]);
+               $pathparts1 = explode('/', $parts1['path']);
+               $pathparts2 = explode('/', $parts2['path']);
 
                $i = 0;
-               $path = "";
+               $path = '';
                do {
                        $path1 = $pathparts1[$i] ?? '';
                        $path2 = $pathparts2[$i] ?? '';
 
                        if ($path1 == $path2) {
-                               $path .= $path1."/";
+                               $path .= $path1 . '/';
                        }
                } while (($path1 == $path2) && ($i++ <= count($pathparts1)));
 
@@ -435,10 +492,10 @@ class Network
         *
         * @param array $parsed URL parts
         *
-        * @return string The glued URL.
-        * @deprecated since version 2021.12, use a UriInterface object like GuzzleHttp\Psr7\Uri instead
+        * @return string|null The glued URL or null on error
+        * @deprecated since version 2021.12, use GuzzleHttp\Psr7\Uri::fromParts($parts) instead
         */
-       public static function unparseURL(array $parsed)
+       public static function unparseURL(array $parsed): string
        {
                $get = function ($key) use ($parsed) {
                        return isset($parsed[$key]) ? $parsed[$key] : null;
@@ -451,21 +508,22 @@ class Network
                $scheme    = $get('scheme');
                $query     = $get('query');
                $fragment  = $get('fragment');
-               $authority = ($userinfo !== null ? $userinfo."@" : '') .
+               $authority = ($userinfo !== null ? $userinfo . '@' : '') .
                                                $get('host') .
                                                ($port ? ":$port" : '');
 
-               return  (strlen($scheme) ? $scheme.":" : '') .
-                       (strlen($authority) ? "//".$authority : '') .
+               return  (!empty($scheme) ? $scheme . ':' : '') .
+                       (!empty($authority) ? '//' . $authority : '') .
                        $get('path') .
-                       (strlen($query) ? "?".$query : '') .
-                       (strlen($fragment) ? "#".$fragment : '');
+                       (!empty($query) ? '?' . $query : '') .
+                       (!empty($fragment) ? '#' . $fragment : '');
        }
 
        /**
         * Convert an URI to an IDN compatible URI
         *
         * @param string $uri
+        *
         * @return string
         */
        public static function convertToIdn(string $uri): string
@@ -473,11 +531,13 @@ class Network
                $parts = parse_url($uri);
                if (!empty($parts['scheme']) && !empty($parts['host'])) {
                        $parts['host'] = idn_to_ascii($parts['host']);
-                       $uri = self::unparseURL($parts);
-               } elseif (strstr($uri, '@')) {
+                       $uri = (string)Uri::fromParts($parts);
+               } else {
                        $parts = explode('@', $uri);
                        if (count($parts) == 2) {
                                $uri = $parts[0] . '@' . idn_to_ascii($parts[1]);
+                       } else {
+                               $uri = idn_to_ascii($uri);
                        }
                }
 
@@ -487,11 +547,11 @@ class Network
        /**
         * Switch the scheme of an url between http and https
         *
-        * @param string $url URL
+        * @param string $url
         *
-        * @return string switched URL
+        * @return string Switched URL
         */
-       public static function switchScheme(string $url)
+       public static function switchScheme(string $url): string
        {
                $scheme = parse_url($url, PHP_URL_SCHEME);
                if (empty($scheme)) {
@@ -512,9 +572,10 @@ class Network
         *
         * @param string $path
         * @param array  $additionalParams Associative array of parameters
+        *
         * @return string
         */
-       public static function appendQueryParam(string $path, array $additionalParams)
+       public static function appendQueryParam(string $path, array $additionalParams): string
        {
                $parsed = parse_url($path);
 
@@ -538,6 +599,8 @@ class Network
         *
         * @param string $etag          The page etag
         * @param string $last_modified The page last modification UTC date
+        *
+        * @return void
         * @throws \Exception
         */
        public static function checkEtagModified(string $etag, string $last_modified)
@@ -574,10 +637,11 @@ class Network
        /**
         * Check if the given URL is a local link
         *
-        * @param string $url 
-        * @return bool 
+        * @param string $url
+        *
+        * @return bool
         */
-       public static function isLocalLink(string $url)
+       public static function isLocalLink(string $url): bool
        {
                return (strpos(Strings::normaliseLink($url), Strings::normaliseLink(DI::baseUrl())) !== false);
        }
@@ -585,10 +649,10 @@ class Network
        /**
         * Check if the given URL is a valid HTTP/HTTPS URL
         *
-        * @param string $url 
-        * @return bool 
+        * @param string $url
+        * @return bool
         */
-       public static function isValidHttpUrl(string $url)
+       public static function isValidHttpUrl(string $url): bool
        {
                $scheme = parse_url($url, PHP_URL_SCHEME);
                return !empty($scheme) && in_array($scheme, ['http', 'https']) && parse_url($url, PHP_URL_HOST);