]> git.mxchange.org Git - friendica.git/blobdiff - src/Network/Probe.php
Merge pull request #11379 from annando/accept
[friendica.git] / src / Network / Probe.php
index d0aaaa7a0777f9cefda2af300b43344cd657ca55..810885323df40873630a12b839bfa934c82f0a87 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * @copyright Copyright (C) 2010-2021, the Friendica project
+ * @copyright Copyright (C) 2010-2022, the Friendica project
  *
  * @license GNU AGPL version 3 or any later version
  *
@@ -34,6 +34,8 @@ use Friendica\Model\Contact;
 use Friendica\Model\GServer;
 use Friendica\Model\Profile;
 use Friendica\Model\User;
+use Friendica\Network\HTTPClient\Client\HttpClient;
+use Friendica\Network\HTTPClient\Client\HttpClientOptions;
 use Friendica\Protocol\ActivityNamespace;
 use Friendica\Protocol\ActivityPub;
 use Friendica\Protocol\Email;
@@ -43,6 +45,7 @@ use Friendica\Util\DateTimeFormat;
 use Friendica\Util\Network;
 use Friendica\Util\Strings;
 use Friendica\Util\XML;
+use GuzzleHttp\Psr7\Uri;
 
 /**
  * This class contain functions for probing URL
@@ -57,26 +60,23 @@ class Probe
        /**
         * Remove stuff from an URI that doesn't belong there
         *
-        * @param string $URI
+        * @param string $rawUri
         * @return string Cleaned URI
         */
-       public static function cleanURI(string $URI)
+       public static function cleanURI(string $rawUri): string
        {
                // At first remove leading and trailing junk
-               $URI = trim($URI, "@#?:/ \t\n\r\0\x0B");
+               $rawUri = trim($rawUri, "@#?:/ \t\n\r\0\x0B");
 
-               $parts = parse_url($URI);
-
-               if (empty($parts['scheme'])) {
-                       return $URI;
+               $uri = new Uri($rawUri);
+               if (!$uri->getScheme()) {
+                       return $uri->__toString();
                }
 
                // Remove the URL fragment, since these shouldn't be part of any profile URL
-               unset($parts['fragment']);
-
-               $URI = Network::unparseURL($parts);
+               $uri = $uri->withFragment('');
 
-               return $URI;
+               return $uri->__toString();
        }
 
        /**
@@ -89,8 +89,8 @@ class Probe
        private static function rearrangeData($data)
        {
                $fields = ["name", "nick", "guid", "url", "addr", "alias", "photo", "header",
-                               "account-type", "community", "keywords", "location", "about", "hide",
-                               "batch", "notify", "poll", "request", "confirm", "subscribe", "poco",
+                               "account-type", "community", "keywords", "location", "about", "xmpp", "matrix",
+                               "hide", "batch", "notify", "poll", "request", "confirm", "subscribe", "poco",
                                "following", "followers", "inbox", "outbox", "sharedinbox",
                                "priority", "network", "pubkey", "manually-approve", "baseurl", "gsid"];
 
@@ -101,7 +101,7 @@ class Probe
                        if (isset($data[$field])) {
                                if (in_array($field, $numeric_fields)) {
                                        $newdata[$field] = (int)$data[$field];
-                               } else {        
+                               } else {
                                        $newdata[$field] = $data[$field];
                                }
                        } elseif (!in_array($field, $numeric_fields)) {
@@ -170,7 +170,7 @@ class Probe
                Logger::info('Probing', ['host' => $host, 'ssl_url' => $ssl_url, 'url' => $url, 'callstack' => System::callstack(20)]);
                $xrd = null;
 
-               $curlResult = DI::httpRequest()->get($ssl_url, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
+               $curlResult = DI::httpClient()->get($ssl_url, [HttpClientOptions::TIMEOUT => $xrd_timeout, HttpClientOptions::ACCEPT_CONTENT => ['application/xrd+xml']]);
                $ssl_connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
                if ($curlResult->isSuccess()) {
                        $xml = $curlResult->getBody();
@@ -187,7 +187,7 @@ class Probe
                }
 
                if (!is_object($xrd) && !empty($url)) {
-                       $curlResult = DI::httpRequest()->get($url, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
+                       $curlResult = DI::httpClient()->get($url, [HttpClientOptions::TIMEOUT => $xrd_timeout, HttpClientOptions::ACCEPT_CONTENT => ['application/xrd+xml']]);
                        $connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
                        if ($curlResult->isTimeout()) {
                                Logger::info('Probing timeout', ['url' => $url]);
@@ -231,6 +231,11 @@ class Probe
                        }
                }
 
+               if (Network::isUrlBlocked($host_url)) {
+                       Logger::info('Domain is blocked', ['url' => $host]);
+                       return [];
+               }
+
                self::$baseurl = $host_url;
 
                Logger::info('Probing successful', ['host' => $host]);
@@ -424,16 +429,11 @@ class Probe
         */
        private static function getHideStatus($url)
        {
-               $curlResult = DI::httpRequest()->get($url);
+               $curlResult = DI::httpClient()->get($url, [HttpClientOptions::CONTENT_LENGTH => 1000000]);
                if (!$curlResult->isSuccess()) {
                        return false;
                }
 
-               // If the file is too large then exit
-               if (($curlResult->getInfo()['download_content_length'] ?? 0) > 1000000) {
-                       return false;
-               }
-
                // If it isn't a HTML file then exit
                if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
                        return false;
@@ -507,16 +507,17 @@ class Probe
         * Get webfinger data from a given URI
         *
         * @param string $uri
-        * @return array Webfinger array
+        * @return array
+        * @throws HTTPException\InternalServerErrorException
         */
-       private static function getWebfingerArray(string $uri)
+       private static function getWebfingerArray(string $uri): array
        {
                $parts = parse_url($uri);
 
                if (!empty($parts['scheme']) && !empty($parts['host'])) {
                        $host = $parts['host'];
                        if (!empty($parts['port'])) {
-                               $host .= ':'.$parts['port'];
+                               $host .= ':' . $parts['port'];
                        }
 
                        $baseurl = $parts['scheme'] . '://' . $host;
@@ -524,32 +525,27 @@ class Probe
                        $nick = '';
                        $addr = '';
 
-                       $path_parts = explode("/", trim($parts['path'] ?? '', "/"));
+                       $path_parts = explode('/', trim($parts['path'] ?? '', '/'));
                        if (!empty($path_parts)) {
                                $nick = ltrim(end($path_parts), '@');
-                               // When the last part of the URI is numeric then it is most likely an ID and not a nick name
-                               if (!is_numeric($nick)) {
-                                       $addr = $nick."@".$host;
-                               } else {
-                                       $nick = '';
-                               }
+                               $addr = $nick . '@' . $host;
                        }
 
-                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, HttpClient::ACCEPT_JRD_JSON, $uri, $addr);
                        if (empty($webfinger)) {
                                $lrdd = self::hostMeta($host);
                        }
 
                        if (empty($webfinger) && empty($lrdd)) {
                                while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) {
-                                       $host .= "/".array_shift($path_parts);
+                                       $host    .= '/' . array_shift($path_parts);
                                        $baseurl = $parts['scheme'] . '://' . $host;
 
                                        if (!empty($nick)) {
-                                               $addr = $nick."@".$host;
+                                               $addr = $nick . '@' . $host;
                                        }
 
-                                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, HttpClient::ACCEPT_JRD_JSON, $uri, $addr);
                                        if (empty($webfinger)) {
                                                $lrdd = self::hostMeta($host);
                                        }
@@ -567,13 +563,13 @@ class Probe
                        $nick = substr($uri, 0, strpos($uri, '@'));
                        $addr = $uri;
 
-                       $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                       $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, HttpClient::ACCEPT_JRD_JSON, $uri, $addr);
                        if (self::$istimeout) {
                                return [];
                        }
 
                        if (empty($webfinger)) {
-                               $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                               $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, HttpClient::ACCEPT_JRD_JSON, $uri, $addr);
                                if (self::$istimeout) {
                                        return [];
                                }
@@ -630,6 +626,11 @@ class Probe
         */
        private static function getWebfinger(string $template, string $type, string $uri, string $addr)
        {
+               if (Network::isUrlBlocked($template)) {
+                       Logger::info('Domain is blocked', ['url' => $template]);
+                       return [];
+               }
+
                // First try the address because this is the primary purpose of webfinger
                if (!empty($addr)) {
                        $detected = $addr;
@@ -676,46 +677,31 @@ class Probe
                        'uri'     => $uri,
                        'network' => $network,
                        'uid'     => $uid,
-                       'result'  => [],
+                       'result'  => null,
                ];
 
                Hook::callAll('probe_detect', $hookData);
 
-               if ($hookData['result']) {
-                       if (!is_array($hookData['result'])) {
-                               return [];
-                       } else {
-                               return $hookData['result'];
-                       }
+               if (isset($hookData['result'])) {
+                       return is_array($hookData['result']) ? $hookData['result'] : [];
                }
 
                $parts = parse_url($uri);
-
-               if (!empty($parts['scheme']) && !empty($parts['host'])) {
-                       if (in_array($parts['host'], ['twitter.com', 'mobile.twitter.com'])) {
-                               return self::twitter($uri);
-                       }
-               } elseif (strstr($uri, '@')) {
-                       // If the URI starts with "mailto:" then jump directly to the mail detection
-                       if (strpos($uri, 'mailto:') !== false) {
-                               $uri = str_replace('mailto:', '', $uri);
-                               return self::mail($uri, $uid);
-                       }
-
-                       if ($network == Protocol::MAIL) {
-                               return self::mail($uri, $uid);
-                       }
-
-                       if (Strings::endsWith($uri, '@twitter.com')
-                               || Strings::endsWith($uri, '@mobile.twitter.com')
-                       ) {
-                               return self::twitter($uri);
-                       }
-               } else {
+               if (empty($parts['scheme']) && empty($parts['host']) && !strstr($parts['path'], '@')) {
                        Logger::info('URI was not detectable', ['uri' => $uri]);
                        return [];
                }
 
+               // If the URI starts with "mailto:" then jump directly to the mail detection
+               if (strpos($uri, 'mailto:') !== false) {
+                       $uri = str_replace('mailto:', '', $uri);
+                       return self::mail($uri, $uid);
+               }
+
+               if ($network == Protocol::MAIL) {
+                       return self::mail($uri, $uid);
+               }
+
                Logger::info('Probing start', ['uri' => $uri]);
 
                if (!empty($ap_profile['addr']) && ($ap_profile['addr'] != $uri)) {
@@ -848,7 +834,7 @@ class Probe
 
        public static function pollZot($url, $data)
        {
-               $curlResult = DI::httpRequest()->get($url);
+               $curlResult = DI::httpClient()->get($url);
                if ($curlResult->isTimeout()) {
                        return $data;
                }
@@ -945,7 +931,7 @@ class Probe
        {
                $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20);
 
-               $curlResult = DI::httpRequest()->get($url, ['timeout' => $xrd_timeout, 'accept_content' => $type]);
+               $curlResult = DI::httpClient()->get($url, [HttpClientOptions::TIMEOUT => $xrd_timeout, HttpClientOptions::ACCEPT_CONTENT => [$type]]);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1014,7 +1000,7 @@ class Probe
         */
        private static function pollNoscrape($noscrape_url, $data)
        {
-               $curlResult = DI::httpRequest()->get($noscrape_url);
+               $curlResult = DI::httpClient()->get($noscrape_url);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return $data;
@@ -1067,6 +1053,14 @@ class Probe
                        $data["about"] = $json["about"];
                }
 
+               if (!empty($json["xmpp"])) {
+                       $data["xmpp"] = $json["xmpp"];
+               }
+
+               if (!empty($json["matrix"])) {
+                       $data["matrix"] = $json["matrix"];
+               }
+
                if (!empty($json["key"])) {
                        $data["pubkey"] = $json["key"];
                }
@@ -1272,7 +1266,7 @@ class Probe
         */
        private static function pollHcard($hcard_url, $data, $dfrn = false)
        {
-               $curlResult = DI::httpRequest()->get($hcard_url);
+               $curlResult = DI::httpClient()->get($hcard_url);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1412,6 +1406,8 @@ class Probe
                                $data["guid"] = $link["href"];
                        } elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") && (($link["type"] ?? "") == "text/html") && !empty($link["href"])) {
                                $data["url"] = $link["href"];
+                       } elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") && empty($link["type"]) && !empty($link["href"])) {
+                               $profile_url = $link["href"];
                        } elseif (($link["rel"] == ActivityNamespace::FEED) && !empty($link["href"])) {
                                $data["poll"] = $link["href"];
                        } elseif (($link["rel"] == ActivityNamespace::POCO) && !empty($link["href"])) {
@@ -1428,6 +1424,10 @@ class Probe
                        }
                }
 
+               if (empty($data["url"]) && !empty($profile_url)) {
+                       $data["url"] = $profile_url;
+               }
+
                if (empty($data["url"]) || empty($hcard_url)) {
                        return [];
                }
@@ -1527,7 +1527,7 @@ class Probe
                                                        $pubkey = substr($pubkey, 5);
                                                }
                                        } elseif (Strings::normaliseLink($pubkey) == 'http://') {
-                                               $curlResult = DI::httpRequest()->get($pubkey);
+                                               $curlResult = DI::httpClient()->get($pubkey);
                                                if ($curlResult->isTimeout()) {
                                                        self::$istimeout = true;
                                                        return $short ? false : [];
@@ -1561,7 +1561,7 @@ class Probe
                }
 
                // Fetch all additional data from the feed
-               $curlResult = DI::httpRequest()->get($data["poll"]);
+               $curlResult = DI::httpClient()->get($data["poll"]);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1613,7 +1613,7 @@ class Probe
         */
        private static function pumpioProfileData($profile_link)
        {
-               $curlResult = DI::httpRequest()->get($profile_link);
+               $curlResult = DI::httpClient()->get($profile_link);
                if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
                        return [];
                }
@@ -1722,33 +1722,6 @@ class Probe
                return $data;
        }
 
-       /**
-        * Check for twitter contact
-        *
-        * @param string $uri
-        *
-        * @return array twitter data
-        */
-       private static function twitter($uri)
-       {
-               if (preg_match('=([^@]+)@(?:mobile\.)?twitter\.com$=i', $uri, $matches)) {
-                       $nick = $matches[1];
-               } elseif (preg_match('=^https?://(?:mobile\.)?twitter\.com/(.+)=i', $uri, $matches)) {
-                       $nick = $matches[1];
-               } else {
-                       return [];
-               }
-
-               $data = [];
-               $data['url'] = 'https://twitter.com/' . $nick;
-               $data['addr'] = $nick . '@twitter.com';
-               $data['nick'] = $data['name'] = $nick;
-               $data['network'] = Protocol::TWITTER;
-               $data['baseurl'] = 'https://twitter.com';
-
-               return $data;
-       }
-
        /**
         * Checks HTML page for RSS feed link
         *
@@ -1853,7 +1826,7 @@ class Probe
         */
        private static function feed($url, $probe = true)
        {
-               $curlResult = DI::httpRequest()->get($url);
+               $curlResult = DI::httpClient()->get($url);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1976,8 +1949,6 @@ class Probe
                                                        $data["name"] .= $perspart->text;
                                                }
                                        }
-
-                                       $data["name"] = Strings::escapeTags($data["name"]);
                                }
                        }
                }
@@ -2028,7 +1999,7 @@ class Probe
        /**
         * Fetch the last date that the contact had posted something (publically)
         *
-        * @param string $data  probing result
+        * @param array $data  probing result
         * @return string last activity
         */
        public static function getLastUpdate(array $data)
@@ -2077,7 +2048,7 @@ class Probe
                        return '';
                }
 
-               $curlResult = DI::httpRequest()->get($gserver['noscrape'] . '/' . $data['nick']);
+               $curlResult = DI::httpClient()->get($gserver['noscrape'] . '/' . $data['nick']);
 
                if ($curlResult->isSuccess() && !empty($curlResult->getBody())) {
                        $noscrape = json_decode($curlResult->getBody(), true);
@@ -2152,8 +2123,8 @@ class Probe
        private static function updateFromFeed(array $data)
        {
                // Search for the newest entry in the feed
-               $curlResult = DI::httpRequest()->get($data['poll']);
-               if (!$curlResult->isSuccess()) {
+               $curlResult = DI::httpClient()->get($data['poll']);
+               if (!$curlResult->isSuccess() || !$curlResult->getBody()) {
                        return '';
                }
 
@@ -2210,28 +2181,29 @@ class Probe
                                throw new HTTPException\NotFoundException('User not found.');
                        }
 
-                       $profile   = User::getOwnerDataById($uid);
+                       $owner     = User::getOwnerDataById($uid);
                        $approfile = ActivityPub\Transmitter::getProfile($uid);
 
-                       if (empty($profile['gsid'])) {
-                               $profile['gsid'] = GServer::getID($approfile['generator']['url']);
+                       if (empty($owner['gsid'])) {
+                               $owner['gsid'] = GServer::getID($approfile['generator']['url']);
                        }
 
                        $data = [
-                               'name' => $profile['name'], 'nick' => $profile['nick'], 'guid' => $approfile['diaspora:guid'] ?? '',
-                               'url' => $profile['url'], 'addr' => $profile['addr'], 'alias' => $profile['alias'],
-                               'photo' => Contact::getAvatarUrlForId($profile['id'], $profile['updated']),
-                               'header' => $profile['header'] ? Contact::getHeaderUrlForId($profile['id'], $profile['updated']) : '',
-                               'account-type' => $profile['contact-type'], 'community' => ($profile['contact-type'] == User::ACCOUNT_TYPE_COMMUNITY),
-                               'keywords' => $profile['keywords'], 'location' => $profile['location'], 'about' => $profile['about'],
-                               'hide' => !$profile['net-publish'], 'batch' => '', 'notify' => $profile['notify'],
-                               'poll' => $profile['poll'], 'request' => $profile['request'], 'confirm' => $profile['confirm'],
-                               'subscribe' => $approfile['generator']['url'] . '/follow?url={uri}', 'poco' => $profile['poco'],
+                               'name' => $owner['name'], 'nick' => $owner['nick'], 'guid' => $approfile['diaspora:guid'] ?? '',
+                               'url' => $owner['url'], 'addr' => $owner['addr'], 'alias' => $owner['alias'],
+                               'photo' => User::getAvatarUrl($owner),
+                               'header' => $owner['header'] ? Contact::getHeaderUrlForId($owner['id'], $owner['updated']) : '',
+                               'account-type' => $owner['contact-type'], 'community' => ($owner['contact-type'] == User::ACCOUNT_TYPE_COMMUNITY),
+                               'keywords' => $owner['keywords'], 'location' => $owner['location'], 'about' => $owner['about'],
+                               'xmpp' => $owner['xmpp'], 'matrix' => $owner['matrix'],
+                               'hide' => !$owner['net-publish'], 'batch' => '', 'notify' => $owner['notify'],
+                               'poll' => $owner['poll'], 'request' => $owner['request'], 'confirm' => $owner['confirm'],
+                               'subscribe' => $approfile['generator']['url'] . '/follow?url={uri}', 'poco' => $owner['poco'],
                                'following' => $approfile['following'], 'followers' => $approfile['followers'],
                                'inbox' => $approfile['inbox'], 'outbox' => $approfile['outbox'],
                                'sharedinbox' => $approfile['endpoints']['sharedInbox'], 'network' => Protocol::DFRN,
-                               'pubkey' => $profile['upubkey'], 'baseurl' => $approfile['generator']['url'], 'gsid' => $profile['gsid'],
-                               'manually-approve' => in_array($profile['page-flags'], [User::PAGE_FLAGS_NORMAL, User::PAGE_FLAGS_PRVGROUP])
+                               'pubkey' => $owner['upubkey'], 'baseurl' => $approfile['generator']['url'], 'gsid' => $owner['gsid'],
+                               'manually-approve' => in_array($owner['page-flags'], [User::PAGE_FLAGS_NORMAL, User::PAGE_FLAGS_PRVGROUP])
                        ];
                } catch (Exception $e) {
                        // Default values for non existing targets