]> git.mxchange.org Git - friendica.git/blobdiff - src/Network/Probe.php
Merge pull request #11518 from annando/issue-11504
[friendica.git] / src / Network / Probe.php
index bdc12aadc1a967c6346480c3dd66470881ab21f3..c5ecf96ed3746d884072d3de29ab0fffaf00527f 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * @copyright Copyright (C) 2010-2021, the Friendica project
+ * @copyright Copyright (C) 2010-2022, the Friendica project
  *
  * @license GNU AGPL version 3 or any later version
  *
@@ -34,6 +34,8 @@ use Friendica\Model\Contact;
 use Friendica\Model\GServer;
 use Friendica\Model\Profile;
 use Friendica\Model\User;
+use Friendica\Network\HTTPClient\Client\HttpClientAccept;
+use Friendica\Network\HTTPClient\Client\HttpClientOptions;
 use Friendica\Protocol\ActivityNamespace;
 use Friendica\Protocol\ActivityPub;
 use Friendica\Protocol\Email;
@@ -43,6 +45,7 @@ use Friendica\Util\DateTimeFormat;
 use Friendica\Util\Network;
 use Friendica\Util\Strings;
 use Friendica\Util\XML;
+use GuzzleHttp\Psr7\Uri;
 
 /**
  * This class contain functions for probing URL
@@ -57,26 +60,25 @@ class Probe
        /**
         * Remove stuff from an URI that doesn't belong there
         *
-        * @param string $URI
+        * @param string $rawUri
         * @return string Cleaned URI
         */
-       public static function cleanURI(string $URI)
+       public static function cleanURI(string $rawUri): string
        {
                // At first remove leading and trailing junk
-               $URI = trim($URI, "@#?:/ \t\n\r\0\x0B");
+               $rawUri = trim($rawUri, "@#?:/ \t\n\r\0\x0B");
 
-               $parts = parse_url($URI);
+               $rawUri = Network::convertToIdn($rawUri);
 
-               if (empty($parts['scheme'])) {
-                       return $URI;
+               $uri = new Uri($rawUri);
+               if (!$uri->getScheme()) {
+                       return $uri->__toString();
                }
 
                // Remove the URL fragment, since these shouldn't be part of any profile URL
-               unset($parts['fragment']);
+               $uri = $uri->withFragment('');
 
-               $URI = Network::unparseURL($parts);
-
-               return $URI;
+               return $uri->__toString();
        }
 
        /**
@@ -101,7 +103,7 @@ class Probe
                        if (isset($data[$field])) {
                                if (in_array($field, $numeric_fields)) {
                                        $newdata[$field] = (int)$data[$field];
-                               } else {        
+                               } else {
                                        $newdata[$field] = $data[$field];
                                }
                        } elseif (!in_array($field, $numeric_fields)) {
@@ -170,7 +172,7 @@ class Probe
                Logger::info('Probing', ['host' => $host, 'ssl_url' => $ssl_url, 'url' => $url, 'callstack' => System::callstack(20)]);
                $xrd = null;
 
-               $curlResult = DI::httpRequest()->get($ssl_url, ['timeout' => $xrd_timeout, 'accept_content' => ['application/xrd+xml']]);
+               $curlResult = DI::httpClient()->get($ssl_url, HttpClientAccept::XRD_XML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
                $ssl_connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
                if ($curlResult->isSuccess()) {
                        $xml = $curlResult->getBody();
@@ -187,7 +189,7 @@ class Probe
                }
 
                if (!is_object($xrd) && !empty($url)) {
-                       $curlResult = DI::httpRequest()->get($url, ['timeout' => $xrd_timeout, 'accept_content' => ['application/xrd+xml']]);
+                       $curlResult = DI::httpClient()->get($url, HttpClientAccept::XRD_XML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
                        $connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
                        if ($curlResult->isTimeout()) {
                                Logger::info('Probing timeout', ['url' => $url]);
@@ -231,6 +233,11 @@ class Probe
                        }
                }
 
+               if (Network::isUrlBlocked($host_url)) {
+                       Logger::info('Domain is blocked', ['url' => $host]);
+                       return [];
+               }
+
                self::$baseurl = $host_url;
 
                Logger::info('Probing successful', ['host' => $host]);
@@ -238,49 +245,6 @@ class Probe
                return $lrdd;
        }
 
-       /**
-        * Perform Webfinger lookup and return DFRN data
-        *
-        * Given an email style address, perform webfinger lookup and
-        * return the resulting DFRN profile URL, or if no DFRN profile URL
-        * is located, returns an OStatus subscription template (prefixed
-        * with the string 'stat:' to identify it as on OStatus template).
-        * If this isn't an email style address just return $webbie.
-        * Return an empty string if email-style addresses but webfinger fails,
-        * or if the resultant personal XRD doesn't contain a supported
-        * subscription/friend-request attribute.
-        *
-        * amended 7/9/2011 to return an hcard which could save potentially loading
-        * a lengthy content page to scrape dfrn attributes
-        *
-        * @param string $webbie    Address that should be probed
-        * @param string $hcard_url Link to the hcard - is returned by reference
-        *
-        * @return string profile link
-        * @throws HTTPException\InternalServerErrorException
-        */
-       public static function webfingerDfrn(string $webbie, string &$hcard_url)
-       {
-               $profile_link = '';
-
-               $links = self::lrdd($webbie);
-               Logger::debug('Result', ['url' => $webbie, 'links' => $links]);
-               if (!empty($links) && is_array($links)) {
-                       foreach ($links as $link) {
-                               if ($link['@attributes']['rel'] === ActivityNamespace::DFRN) {
-                                       $profile_link = $link['@attributes']['href'];
-                               }
-                               if (($link['@attributes']['rel'] === ActivityNamespace::OSTATUSSUB) && ($profile_link == "")) {
-                                       $profile_link = 'stat:'.$link['@attributes']['template'];
-                               }
-                               if ($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') {
-                                       $hcard_url = $link['@attributes']['href'];
-                               }
-                       }
-               }
-               return $profile_link;
-       }
-
        /**
         * Check an URI for LRDD data
         *
@@ -424,7 +388,7 @@ class Probe
         */
        private static function getHideStatus($url)
        {
-               $curlResult = DI::httpRequest()->get($url, ['content_length' => 1000000]);
+               $curlResult = DI::httpClient()->get($url, HttpClientAccept::HTML, [HttpClientOptions::CONTENT_LENGTH => 1000000]);
                if (!$curlResult->isSuccess()) {
                        return false;
                }
@@ -502,16 +466,17 @@ class Probe
         * Get webfinger data from a given URI
         *
         * @param string $uri
-        * @return array Webfinger array
+        * @return array
+        * @throws HTTPException\InternalServerErrorException
         */
-       private static function getWebfingerArray(string $uri)
+       private static function getWebfingerArray(string $uri): array
        {
                $parts = parse_url($uri);
 
                if (!empty($parts['scheme']) && !empty($parts['host'])) {
                        $host = $parts['host'];
                        if (!empty($parts['port'])) {
-                               $host .= ':'.$parts['port'];
+                               $host .= ':' . $parts['port'];
                        }
 
                        $baseurl = $parts['scheme'] . '://' . $host;
@@ -519,32 +484,27 @@ class Probe
                        $nick = '';
                        $addr = '';
 
-                       $path_parts = explode("/", trim($parts['path'] ?? '', "/"));
+                       $path_parts = explode('/', trim($parts['path'] ?? '', '/'));
                        if (!empty($path_parts)) {
                                $nick = ltrim(end($path_parts), '@');
-                               // When the last part of the URI is numeric then it is most likely an ID and not a nick name
-                               if (!is_numeric($nick)) {
-                                       $addr = $nick."@".$host;
-                               } else {
-                                       $nick = '';
-                               }
+                               $addr = $nick . '@' . $host;
                        }
 
-                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
                        if (empty($webfinger)) {
                                $lrdd = self::hostMeta($host);
                        }
 
                        if (empty($webfinger) && empty($lrdd)) {
                                while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) {
-                                       $host .= "/".array_shift($path_parts);
+                                       $host    .= '/' . array_shift($path_parts);
                                        $baseurl = $parts['scheme'] . '://' . $host;
 
                                        if (!empty($nick)) {
-                                               $addr = $nick."@".$host;
+                                               $addr = $nick . '@' . $host;
                                        }
 
-                                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
                                        if (empty($webfinger)) {
                                                $lrdd = self::hostMeta($host);
                                        }
@@ -562,13 +522,13 @@ class Probe
                        $nick = substr($uri, 0, strpos($uri, '@'));
                        $addr = $uri;
 
-                       $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                       $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
                        if (self::$istimeout) {
                                return [];
                        }
 
                        if (empty($webfinger)) {
-                               $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                               $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
                                if (self::$istimeout) {
                                        return [];
                                }
@@ -625,6 +585,11 @@ class Probe
         */
        private static function getWebfinger(string $template, string $type, string $uri, string $addr)
        {
+               if (Network::isUrlBlocked($template)) {
+                       Logger::info('Domain is blocked', ['url' => $template]);
+                       return [];
+               }
+
                // First try the address because this is the primary purpose of webfinger
                if (!empty($addr)) {
                        $detected = $addr;
@@ -671,46 +636,31 @@ class Probe
                        'uri'     => $uri,
                        'network' => $network,
                        'uid'     => $uid,
-                       'result'  => [],
+                       'result'  => null,
                ];
 
                Hook::callAll('probe_detect', $hookData);
 
-               if ($hookData['result']) {
-                       if (!is_array($hookData['result'])) {
-                               return [];
-                       } else {
-                               return $hookData['result'];
-                       }
+               if (isset($hookData['result'])) {
+                       return is_array($hookData['result']) ? $hookData['result'] : [];
                }
 
                $parts = parse_url($uri);
-
-               if (!empty($parts['scheme']) && !empty($parts['host'])) {
-                       if (in_array($parts['host'], ['twitter.com', 'mobile.twitter.com'])) {
-                               return self::twitter($uri);
-                       }
-               } elseif (strstr($uri, '@')) {
-                       // If the URI starts with "mailto:" then jump directly to the mail detection
-                       if (strpos($uri, 'mailto:') !== false) {
-                               $uri = str_replace('mailto:', '', $uri);
-                               return self::mail($uri, $uid);
-                       }
-
-                       if ($network == Protocol::MAIL) {
-                               return self::mail($uri, $uid);
-                       }
-
-                       if (Strings::endsWith($uri, '@twitter.com')
-                               || Strings::endsWith($uri, '@mobile.twitter.com')
-                       ) {
-                               return self::twitter($uri);
-                       }
-               } else {
+               if (empty($parts['scheme']) && empty($parts['host']) && !strstr($parts['path'], '@')) {
                        Logger::info('URI was not detectable', ['uri' => $uri]);
                        return [];
                }
 
+               // If the URI starts with "mailto:" then jump directly to the mail detection
+               if (strpos($uri, 'mailto:') !== false) {
+                       $uri = str_replace('mailto:', '', $uri);
+                       return self::mail($uri, $uid);
+               }
+
+               if ($network == Protocol::MAIL) {
+                       return self::mail($uri, $uid);
+               }
+
                Logger::info('Probing start', ['uri' => $uri]);
 
                if (!empty($ap_profile['addr']) && ($ap_profile['addr'] != $uri)) {
@@ -843,7 +793,7 @@ class Probe
 
        public static function pollZot($url, $data)
        {
-               $curlResult = DI::httpRequest()->get($url);
+               $curlResult = DI::httpClient()->get($url, HttpClientAccept::JSON);
                if ($curlResult->isTimeout()) {
                        return $data;
                }
@@ -940,7 +890,7 @@ class Probe
        {
                $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20);
 
-               $curlResult = DI::httpRequest()->get($url, ['timeout' => $xrd_timeout, 'accept_content' => [$type]]);
+               $curlResult = DI::httpClient()->get($url, $type, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1009,7 +959,7 @@ class Probe
         */
        private static function pollNoscrape($noscrape_url, $data)
        {
-               $curlResult = DI::httpRequest()->get($noscrape_url);
+               $curlResult = DI::httpClient()->get($noscrape_url, HttpClientAccept::JSON);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return $data;
@@ -1275,7 +1225,7 @@ class Probe
         */
        private static function pollHcard($hcard_url, $data, $dfrn = false)
        {
-               $curlResult = DI::httpRequest()->get($hcard_url);
+               $curlResult = DI::httpClient()->get($hcard_url, HttpClientAccept::HTML);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1536,11 +1486,12 @@ class Probe
                                                        $pubkey = substr($pubkey, 5);
                                                }
                                        } elseif (Strings::normaliseLink($pubkey) == 'http://') {
-                                               $curlResult = DI::httpRequest()->get($pubkey);
+                                               $curlResult = DI::httpClient()->get($pubkey, HttpClientAccept::MAGIC_KEY);
                                                if ($curlResult->isTimeout()) {
                                                        self::$istimeout = true;
                                                        return $short ? false : [];
                                                }
+                                               Logger::debug('Fetched public key', ['Content-Type' => $curlResult->getHeader('Content-Type'), 'url' => $pubkey]);
                                                $pubkey = $curlResult->getBody();
                                        }
 
@@ -1570,7 +1521,7 @@ class Probe
                }
 
                // Fetch all additional data from the feed
-               $curlResult = DI::httpRequest()->get($data["poll"]);
+               $curlResult = DI::httpClient()->get($data["poll"], HttpClientAccept::FEED_XML);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1622,7 +1573,7 @@ class Probe
         */
        private static function pumpioProfileData($profile_link)
        {
-               $curlResult = DI::httpRequest()->get($profile_link);
+               $curlResult = DI::httpClient()->get($profile_link, HttpClientAccept::HTML);
                if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
                        return [];
                }
@@ -1731,33 +1682,6 @@ class Probe
                return $data;
        }
 
-       /**
-        * Check for twitter contact
-        *
-        * @param string $uri
-        *
-        * @return array twitter data
-        */
-       private static function twitter($uri)
-       {
-               if (preg_match('=([^@]+)@(?:mobile\.)?twitter\.com$=i', $uri, $matches)) {
-                       $nick = $matches[1];
-               } elseif (preg_match('=^https?://(?:mobile\.)?twitter\.com/(.+)=i', $uri, $matches)) {
-                       $nick = $matches[1];
-               } else {
-                       return [];
-               }
-
-               $data = [];
-               $data['url'] = 'https://twitter.com/' . $nick;
-               $data['addr'] = $nick . '@twitter.com';
-               $data['nick'] = $data['name'] = $nick;
-               $data['network'] = Protocol::TWITTER;
-               $data['baseurl'] = 'https://twitter.com';
-
-               return $data;
-       }
-
        /**
         * Checks HTML page for RSS feed link
         *
@@ -1862,7 +1786,7 @@ class Probe
         */
        private static function feed($url, $probe = true)
        {
-               $curlResult = DI::httpRequest()->get($url);
+               $curlResult = DI::httpClient()->get($url, HttpClientAccept::FEED_XML);
                if ($curlResult->isTimeout()) {
                        self::$istimeout = true;
                        return [];
@@ -1985,8 +1909,6 @@ class Probe
                                                        $data["name"] .= $perspart->text;
                                                }
                                        }
-
-                                       $data["name"] = Strings::escapeTags($data["name"]);
                                }
                        }
                }
@@ -2037,7 +1959,7 @@ class Probe
        /**
         * Fetch the last date that the contact had posted something (publically)
         *
-        * @param string $data  probing result
+        * @param array $data  probing result
         * @return string last activity
         */
        public static function getLastUpdate(array $data)
@@ -2086,7 +2008,7 @@ class Probe
                        return '';
                }
 
-               $curlResult = DI::httpRequest()->get($gserver['noscrape'] . '/' . $data['nick']);
+               $curlResult = DI::httpClient()->get($gserver['noscrape'] . '/' . $data['nick'], HttpClientAccept::JSON);
 
                if ($curlResult->isSuccess() && !empty($curlResult->getBody())) {
                        $noscrape = json_decode($curlResult->getBody(), true);
@@ -2161,7 +2083,7 @@ class Probe
        private static function updateFromFeed(array $data)
        {
                // Search for the newest entry in the feed
-               $curlResult = DI::httpRequest()->get($data['poll']);
+               $curlResult = DI::httpClient()->get($data['poll'], HttpClientAccept::ATOM_XML);
                if (!$curlResult->isSuccess() || !$curlResult->getBody()) {
                        return '';
                }
@@ -2219,29 +2141,29 @@ class Probe
                                throw new HTTPException\NotFoundException('User not found.');
                        }
 
-                       $profile   = User::getOwnerDataById($uid);
+                       $owner     = User::getOwnerDataById($uid);
                        $approfile = ActivityPub\Transmitter::getProfile($uid);
 
-                       if (empty($profile['gsid'])) {
-                               $profile['gsid'] = GServer::getID($approfile['generator']['url']);
+                       if (empty($owner['gsid'])) {
+                               $owner['gsid'] = GServer::getID($approfile['generator']['url']);
                        }
 
                        $data = [
-                               'name' => $profile['name'], 'nick' => $profile['nick'], 'guid' => $approfile['diaspora:guid'] ?? '',
-                               'url' => $profile['url'], 'addr' => $profile['addr'], 'alias' => $profile['alias'],
-                               'photo' => Contact::getAvatarUrlForId($profile['id'], '', $profile['updated']),
-                               'header' => $profile['header'] ? Contact::getHeaderUrlForId($profile['id'], $profile['updated']) : '',
-                               'account-type' => $profile['contact-type'], 'community' => ($profile['contact-type'] == User::ACCOUNT_TYPE_COMMUNITY),
-                               'keywords' => $profile['keywords'], 'location' => $profile['location'], 'about' => $profile['about'],
-                               'xmpp' => $profile['xmpp'], 'matrix' => $profile['matrix'], 
-                               'hide' => !$profile['net-publish'], 'batch' => '', 'notify' => $profile['notify'],
-                               'poll' => $profile['poll'], 'request' => $profile['request'], 'confirm' => $profile['confirm'],
-                               'subscribe' => $approfile['generator']['url'] . '/follow?url={uri}', 'poco' => $profile['poco'],
+                               'name' => $owner['name'], 'nick' => $owner['nick'], 'guid' => $approfile['diaspora:guid'] ?? '',
+                               'url' => $owner['url'], 'addr' => $owner['addr'], 'alias' => $owner['alias'],
+                               'photo' => User::getAvatarUrl($owner),
+                               'header' => $owner['header'] ? Contact::getHeaderUrlForId($owner['id'], $owner['updated']) : '',
+                               'account-type' => $owner['contact-type'], 'community' => ($owner['contact-type'] == User::ACCOUNT_TYPE_COMMUNITY),
+                               'keywords' => $owner['keywords'], 'location' => $owner['location'], 'about' => $owner['about'],
+                               'xmpp' => $owner['xmpp'], 'matrix' => $owner['matrix'],
+                               'hide' => !$owner['net-publish'], 'batch' => '', 'notify' => $owner['notify'],
+                               'poll' => $owner['poll'], 'request' => $owner['request'], 'confirm' => $owner['confirm'],
+                               'subscribe' => $approfile['generator']['url'] . '/follow?url={uri}', 'poco' => $owner['poco'],
                                'following' => $approfile['following'], 'followers' => $approfile['followers'],
                                'inbox' => $approfile['inbox'], 'outbox' => $approfile['outbox'],
                                'sharedinbox' => $approfile['endpoints']['sharedInbox'], 'network' => Protocol::DFRN,
-                               'pubkey' => $profile['upubkey'], 'baseurl' => $approfile['generator']['url'], 'gsid' => $profile['gsid'],
-                               'manually-approve' => in_array($profile['page-flags'], [User::PAGE_FLAGS_NORMAL, User::PAGE_FLAGS_PRVGROUP])
+                               'pubkey' => $owner['upubkey'], 'baseurl' => $approfile['generator']['url'], 'gsid' => $owner['gsid'],
+                               'manually-approve' => in_array($owner['page-flags'], [User::PAGE_FLAGS_NORMAL, User::PAGE_FLAGS_PRVGROUP])
                        ];
                } catch (Exception $e) {
                        // Default values for non existing targets