]> git.mxchange.org Git - friendica.git/blobdiff - src/Network/Probe.php
Issue 8807: Improved feed detecting
[friendica.git] / src / Network / Probe.php
index ff364d12a17935de455c1253c35251008ee6b19a..60f085fb09e7a9bf67621394481e3ad9dcaae651 100644 (file)
@@ -47,6 +47,8 @@ use Friendica\Util\XML;
  */
 class Probe
 {
+       const WEBFINGER = '/.well-known/webfinger?resource={uri}';
+
        private static $baseurl;
        private static $istimeout;
 
@@ -207,7 +209,7 @@ class Probe
                        return [];
                }
 
-               $lrdd = ['application/jrd+json' => $host_url . '/.well-known/webfinger?resource={uri}'];
+               $lrdd = [];
 
                foreach ($links["xrd"]["link"] as $value => $link) {
                        if (!empty($link["@attributes"])) {
@@ -285,65 +287,11 @@ class Probe
         */
        public static function lrdd(string $uri)
        {
-               $lrdd = self::hostMeta($uri);
-               $webfinger = null;
-
-               if (is_bool($lrdd)) {
+               $data = self::getWebfingerArray($uri);
+               if (empty($data)) {
                        return [];
                }
-
-               if (!$lrdd) {
-                       $parts = @parse_url($uri);
-                       if (!$parts || empty($parts["host"]) || empty($parts["path"])) {
-                               return [];
-                       }
-
-                       $host = $parts['scheme'] . '://' . $parts["host"];
-                       if (!empty($parts["port"])) {
-                               $host .= ':'.$parts["port"];
-                       }
-
-                       $path_parts = explode("/", trim($parts["path"], "/"));
-
-                       $nick = array_pop($path_parts);
-
-                       do {
-                               $lrdd = self::hostMeta($host);
-                               $host .= "/".array_shift($path_parts);
-                       } while (!$lrdd && (sizeof($path_parts) > 0));
-               }
-
-               if (!$lrdd) {
-                       Logger::log("No lrdd data found for ".$uri, Logger::DEBUG);
-                       return [];
-               }
-
-               foreach ($lrdd as $type => $template) {
-                       if ($webfinger) {
-                               continue;
-                       }
-
-                       $path = str_replace('{uri}', urlencode($uri), $template);
-                       $webfinger = self::webfinger($path, $type);
-
-                       if (!$webfinger && (strstr($uri, "@"))) {
-                               $path = str_replace('{uri}', urlencode("acct:".$uri), $template);
-                               $webfinger = self::webfinger($path, $type);
-                       }
-
-                       // Special treatment for Mastodon
-                       // Problem is that Mastodon uses an URL format like http://domain.tld/@nick
-                       // But the webfinger for this format fails.
-                       if (!$webfinger && !empty($nick)) {
-                               // Mastodon uses a "@" as prefix for usernames in their url format
-                               $nick = ltrim($nick, '@');
-
-                               $addr = $nick."@".$host;
-
-                               $path = str_replace('{uri}', urlencode("acct:".$addr), $template);
-                               $webfinger = self::webfinger($path, $type);
-                       }
-               }
+               $webfinger = $data['webfinger'];
 
                if (empty($webfinger["links"])) {
                        Logger::log("No webfinger links found for ".$uri, Logger::DEBUG);
@@ -381,8 +329,9 @@ class Probe
         */
        public static function uri($uri, $network = '', $uid = -1, $cache = true)
        {
+               $cachekey = 'Probe::uri:' . $network . ':' . $uri;
                if ($cache) {
-                       $result = DI::cache()->get('Probe::uri:' . $network . ':' . $uri);
+                       $result = DI::cache()->get($cachekey);
                        if (!is_null($result)) {
                                return $result;
                        }
@@ -392,19 +341,19 @@ class Probe
                        $uid = local_user();
                }
 
-               self::$istimeout = false;
-
-               if ($network != Protocol::ACTIVITYPUB) {
-                       $data = self::detect($uri, $network, $uid);
+               if (empty($network) || ($network == Protocol::ACTIVITYPUB)) {
+                       $ap_profile = ActivityPub::probeProfile($uri, !$cache);
                } else {
-                       $data = null;
+                       $ap_profile = [];
                }
 
-               // When the previous detection process had got a time out
-               // we could falsely detect a Friendica profile as AP profile.
-               if (!self::$istimeout) {
-                       $ap_profile = ActivityPub::probeProfile($uri, !$cache);
+               self::$istimeout = false;
 
+               if ($network != Protocol::ACTIVITYPUB) {
+                       $data = self::detect($uri, $network, $uid, $ap_profile);
+                       if (!is_array($data)) {
+                               $data = [];
+                       }
                        if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) {
                                $data = $ap_profile;
                        } elseif (!empty($ap_profile)) {
@@ -412,7 +361,7 @@ class Probe
                                $data = array_merge($ap_profile, $data);
                        }
                } else {
-                       Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]);
+                       $data = $ap_profile;
                }
 
                if (!isset($data['url'])) {
@@ -441,10 +390,6 @@ class Probe
                        }
                }
 
-               if (!empty(self::$baseurl)) {
-                       $data['baseurl'] = self::$baseurl;
-               }
-
                if (!empty($data['baseurl']) && empty($data['gsid'])) {
                        $data['gsid'] = GServer::getID($data['baseurl']);
                }
@@ -466,7 +411,7 @@ class Probe
 
                // Only store into the cache if the value seems to be valid
                if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) {
-                       DI::cache()->set('Probe::uri:' . $network . ':' . $uri, $data, Duration::DAY);
+                       DI::cache()->set($cachekey, $data, Duration::DAY);
                }
 
                return $data;
@@ -537,69 +482,179 @@ class Probe
        }
 
        /**
-        * Checks if a profile url should be OStatus but only provides partial information
-        *
-        * @param array  $webfinger Webfinger data
-        * @param string $lrdd      Path template for webfinger request
-        * @param string $type      type
+        * Fetch the "subscribe" and add it to the result
         *
-        * @return array fixed webfinger data
-        * @throws HTTPException\InternalServerErrorException
+        * @param array $result
+        * @param array $webfinger
+        * @return array result
         */
-       private static function fixOStatus($webfinger, $lrdd, $type)
+       private static function getSubscribeLink(array $result, array $webfinger)
        {
-               if (empty($webfinger['links']) || empty($webfinger['subject'])) {
-                       return $webfinger;
+               if (empty($webfinger['links'])) {
+                       return $result;
                }
 
-               $is_ostatus = false;
-               $has_key = false;
-
                foreach ($webfinger['links'] as $link) {
-                       if ($link['rel'] == ActivityNamespace::OSTATUSSUB) {
-                               $is_ostatus = true;
+                       if (!empty($link['template']) && ($link['rel'] === ActivityNamespace::OSTATUSSUB)) {
+                               $result['subscribe'] = $link['template'];
+                       }
+               }
+
+               return $result;
+       }
+
+       /**
+        * Get webfinger data from a given URI
+        *
+        * @param string $uri
+        * @return array Webfinger array
+        */
+       private static function getWebfingerArray(string $uri)
+       {
+               $parts = parse_url($uri);
+
+               if (!empty($parts['scheme']) && !empty($parts['host'])) {
+                       $host = $parts['host'];
+                       if (!empty($parts['port'])) {
+                               $host .= ':'.$parts['port'];
+                       }
+
+                       $baseurl = $parts['scheme'] . '://' . $host;
+
+                       $nick = '';
+                       $addr = '';
+
+                       $path_parts = explode("/", trim($parts['path'] ?? '', "/"));
+                       if (!empty($path_parts)) {
+                               $nick = ltrim(end($path_parts), '@');
+                               // When the last part of the URI is numeric then it is most likely an ID and not a nick name
+                               if (!is_numeric($nick)) {
+                                       $addr = $nick."@".$host;
+                               } else {
+                                       $nick = '';
+                               }
+                       }
+
+                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                       if (empty($webfinger)) {
+                               $lrdd = self::hostMeta($host);
+                       }
+
+                       if (empty($webfinger) && empty($lrdd)) {
+                               while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) {
+                                       $host .= "/".array_shift($path_parts);
+                                       $baseurl = $parts['scheme'] . '://' . $host;
+
+                                       if (!empty($nick)) {
+                                               $addr = $nick."@".$host;
+                                       }
+
+                                       $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                                       if (empty($webfinger)) {
+                                               $lrdd = self::hostMeta($host);
+                                       }
+                               }
+
+                               if (empty($lrdd) && empty($webfinger)) {
+                                       return [];
+                               }
+                       }
+               } elseif (strstr($uri, '@')) {
+                       // Remove "acct:" from the URI
+                       $uri = str_replace('acct:', '', $uri);
+
+                       $host = substr($uri, strpos($uri, '@') + 1);
+                       $nick = substr($uri, 0, strpos($uri, '@'));
+                       $addr = $uri;
+
+                       $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                       if (self::$istimeout) {
+                               return [];
+                       }
+
+                       if (empty($webfinger)) {
+                               $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+                               if (self::$istimeout) {
+                                       return [];
+                               }
+                       } else {
+                               $baseurl = 'https://' . $host;
                        }
-                       if ($link['rel'] == 'magic-public-key') {
-                               $has_key = true;
+
+                       if (empty($webfinger)) {
+                               $lrdd = self::hostMeta($host);
+                               if (self::$istimeout) {
+                                       return [];
+                               }
+                               $baseurl = self::$baseurl;
+                       } else {
+                               $baseurl = 'http://' . $host;
                        }
+               } else {
+                       Logger::info('URI was not detectable', ['uri' => $uri]);
+                       return [];
                }
 
-               if (!$is_ostatus || $has_key) {
-                       return $webfinger;
+               if (empty($webfinger)) {
+                       foreach ($lrdd as $type => $template) {
+                               if ($webfinger) {
+                                       continue;
+                               }
+
+                               $webfinger = self::getWebfinger($template, $type, $uri, $addr);
+                       }
                }
 
-               $url = Network::switchScheme($webfinger['subject']);
-               $path = str_replace('{uri}', urlencode($url), $lrdd);
-               $webfinger2 = self::webfinger($path, $type);
+               if (empty($webfinger)) {
+                       return [];
+               }
 
-               // Is the new webfinger detectable as OStatus?
-               if (self::ostatus($webfinger2, true)) {
-                       $webfinger = $webfinger2;
+               if ($webfinger['detected'] == $addr) {
+                       $webfinger['nick'] = $nick;
+                       $webfinger['addr'] = $addr;
                }
 
+               $webfinger['baseurl'] = $baseurl;
+
                return $webfinger;
        }
 
        /**
-        * Fetch the "subscribe" and add it to the result
+        * Perform network request for webfinger data
         *
-        * @param array $result
-        * @param array $webfinger
-        * @return array result
+        * @param string $template
+        * @param string $type
+        * @param string $uri
+        * @param string $addr
+        * @return array webfinger results
         */
-       private static function getSubscribeLink(array $result, array $webfinger)
+       private static function getWebfinger(string $template, string $type, string $uri, string $addr)
        {
-               if (empty($webfinger['links'])) {
-                       return $result;
+               // First try the address because this is the primary purpose of webfinger
+               if (!empty($addr)) {
+                       $detected = $addr;
+                       $path = str_replace('{uri}', urlencode("acct:" . $addr), $template);
+                       $webfinger = self::webfinger($path, $type);
+                       if (self::$istimeout) {
+                               return [];
+                       }
                }
 
-               foreach ($webfinger['links'] as $link) {
-                       if ($link['rel'] === ActivityNamespace::OSTATUSSUB) {
-                               $result['subscribe'] = $link['template'];
+               // Then try the URI
+               if (empty($webfinger) && $uri != $addr) {
+                       $detected = $uri;
+                       $path = str_replace('{uri}', urlencode($uri), $template);
+                       $webfinger = self::webfinger($path, $type);
+                       if (self::$istimeout) {
+                               return [];
                        }
                }
 
-               return $result;
+               if (empty($webfinger)) {
+                       return [];
+               }
+
+               return ['webfinger' => $webfinger, 'detected' => $detected];
        }
 
        /**
@@ -607,17 +662,16 @@ class Probe
         *
         * This function is only called by the "uri" function that adds caching and rearranging of data.
         *
-        * @param string  $uri     Address that should be probed
-        * @param string  $network Test for this specific network
-        * @param integer $uid     User ID for the probe (only used for mails)
+        * @param string  $uri        Address that should be probed
+        * @param string  $network    Test for this specific network
+        * @param integer $uid        User ID for the probe (only used for mails)
+        * @param array   $ap_profile Previously probed AP profile
         *
         * @return array uri data
         * @throws HTTPException\InternalServerErrorException
         */
-       private static function detect($uri, $network, $uid)
+       private static function detect(string $uri, string $network, int $uid, array $ap_profile)
        {
-               $parts = parse_url($uri);
-
                $hookData = [
                        'uri'     => $uri,
                        'network' => $network,
@@ -628,40 +682,19 @@ class Probe
                Hook::callAll('probe_detect', $hookData);
 
                if ($hookData['result']) {
-                       return $hookData['result'];
-               }
-
-               if (!empty($parts["scheme"]) && !empty($parts["host"])) {
-                       $host = $parts["host"];
-                       if (!empty($parts["port"])) {
-                               $host .= ':'.$parts["port"];
-                       }
-
-                       if ($host == 'twitter.com') {
-                               return self::twitter($uri);
-                       }
-                       $lrdd = self::hostMeta($host);
-
-                       if (is_bool($lrdd)) {
+                       if (!is_array($hookData['result'])) {
                                return [];
+                       } else {
+                               return $hookData['result'];
                        }
+               }
 
-                       $path_parts = explode("/", trim($parts['path'] ?? '', "/"));
+               $parts = parse_url($uri);
 
-                       while (!$lrdd && (sizeof($path_parts) > 1)) {
-                               $host .= "/".array_shift($path_parts);
-                               $lrdd = self::hostMeta($host);
-                       }
-                       if (!$lrdd) {
-                               Logger::log('No XRD data was found for '.$uri, Logger::DEBUG);
-                               return self::feed($uri);
+               if (!empty($parts['scheme']) && !empty($parts['host'])) {
+                       if ($parts['host'] == 'twitter.com') {
+                               return self::twitter($uri);
                        }
-                       $nick = array_pop($path_parts);
-
-                       // Mastodon uses a "@" as prefix for usernames in their url format
-                       $nick = ltrim($nick, '@');
-
-                       $addr = $nick."@".$host;
                } elseif (strstr($uri, '@')) {
                        // If the URI starts with "mailto:" then jump directly to the mail detection
                        if (strpos($uri, 'mailto:') !== false) {
@@ -672,74 +705,35 @@ class Probe
                        if ($network == Protocol::MAIL) {
                                return self::mail($uri, $uid);
                        }
-                       // Remove "acct:" from the URI
-                       $uri = str_replace('acct:', '', $uri);
-
-                       $host = substr($uri, strpos($uri, '@') + 1);
-                       $nick = substr($uri, 0, strpos($uri, '@'));
 
                        if (strpos($uri, '@twitter.com')) {
                                return self::twitter($uri);
                        }
-                       $lrdd = self::hostMeta($host);
-
-                       if (is_bool($lrdd)) {
-                               return [];
-                       }
-
-                       if (!$lrdd) {
-                               Logger::log('No XRD data was found for '.$uri, Logger::DEBUG);
-                               return self::mail($uri, $uid);
-                       }
-                       $addr = $uri;
                } else {
-                       Logger::log("Uri ".$uri." was not detectable", Logger::DEBUG);
+                       Logger::info('URI was not detectable', ['uri' => $uri]);
                        return [];
                }
 
-               $webfinger = false;
-
-               /// @todo Do we need the prefix "acct:" or "acct://"?
-
-               foreach ($lrdd as $type => $template) {
-                       if ($webfinger) {
-                               continue;
-                       }
-
-                       // At first try it with the given uri
-                       $path = str_replace('{uri}', urlencode($uri), $template);
-                       $webfinger = self::webfinger($path, $type);
-
-                       // Fix possible problems with GNU Social probing to wrong scheme
-                       $webfinger = self::fixOStatus($webfinger, $template, $type);
-
-                       // We cannot be sure that the detected address was correct, so we don't use the values
-                       if ($webfinger && ($uri != $addr)) {
-                               $nick = "";
-                               $addr = "";
-                       }
-
-                       // Try webfinger with the address (user@domain.tld)
-                       if (!$webfinger) {
-                               $path = str_replace('{uri}', urlencode($addr), $template);
-                               $webfinger = self::webfinger($path, $type);
-                       }
+               Logger::info('Probing start', ['uri' => $uri]);
 
-                       // Mastodon needs to have it with "acct:"
-                       if (!$webfinger) {
-                               $path = str_replace('{uri}', urlencode("acct:".$addr), $template);
-                               $webfinger = self::webfinger($path, $type);
+               $data = self::getWebfingerArray($uri);
+               if (empty($data)) {
+                       if (!empty($parts['scheme'])) {
+                               return self::feed($uri);
+                       } elseif (!empty($uid)) {
+                               return self::mail($uri, $uid);
+                       } else {
+                               return [];
                        }
                }
 
-               if (!$webfinger) {
-                       return self::feed($uri);
-               }
+               $webfinger = $data['webfinger'];
+               $nick = $data['nick'] ?? '';
+               $addr = $data['addr'] ?? '';
+               $baseurl = $data['baseurl'] ?? '';
 
                $result = [];
 
-               Logger::info("Probing", ['uri' => $uri]);
-
                if (in_array($network, ["", Protocol::DFRN])) {
                        $result = self::dfrn($webfinger);
                }
@@ -750,12 +744,13 @@ class Probe
                        $result = self::ostatus($webfinger);
                }
                if (in_array($network, ['', Protocol::ZOT])) {
-                       $result = self::zot($webfinger, $result);
+                       $result = self::zot($webfinger, $result, $baseurl);
                }
                if ((!$result && ($network == "")) || ($network == Protocol::PUMPIO)) {
                        $result = self::pumpio($webfinger, $addr);
                }
-               if ((!$result && ($network == "")) || ($network == Protocol::FEED)) {
+
+               if (empty($result['network']) && empty($ap_profile['network']) || ($network == Protocol::FEED)) {
                        $result = self::feed($uri);
                } else {
                        // We overwrite the detected nick with our try if the previois routines hadn't detected it.
@@ -775,18 +770,15 @@ class Probe
                        $result["network"] = Protocol::PHANTOM;
                }
 
+               if (empty($result['baseurl']) && !empty($baseurl)) {
+                       $result['baseurl'] = $baseurl;
+               }
+
                if (empty($result["url"])) {
                        $result["url"] = $uri;
                }
 
-               Logger::log($uri." is ".$result["network"], Logger::DEBUG);
-
-               if (empty($result["baseurl"]) && ($result["network"] != Protocol::PHANTOM)) {
-                       $pos = strpos($result["url"], $host);
-                       if ($pos) {
-                               $result["baseurl"] = substr($result["url"], 0, $pos).$host;
-                       }
-               }
+               Logger::info('Probing done', ['uri' => $uri, 'network' => $result["network"]]);
 
                return $result;
        }
@@ -800,7 +792,7 @@ class Probe
         * @return array Zot data
         * @throws HTTPException\InternalServerErrorException
         */
-       private static function zot($webfinger, $data)
+       private static function zot($webfinger, $data, $baseurl)
        {
                if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) {
                        foreach ($webfinger["aliases"] as $alias) {
@@ -821,12 +813,12 @@ class Probe
                        }
                }
 
-               if (empty($zot_url) && !empty($data['addr']) && !empty(self::$baseurl)) {
-                       $condition = ['nurl' => Strings::normaliseLink(self::$baseurl), 'platform' => ['hubzilla']];
+               if (empty($zot_url) && !empty($data['addr']) && !empty($baseurl)) {
+                       $condition = ['nurl' => Strings::normaliseLink($baseurl), 'platform' => ['hubzilla']];
                        if (!DBA::exists('gserver', $condition)) {
                                return $data;
                        }
-                       $zot_url = self::$baseurl . '/.well-known/zot-info?address=' . $data['addr'];
+                       $zot_url = $baseurl . '/.well-known/zot-info?address=' . $data['addr'];
                }
 
                if (empty($zot_url)) {
@@ -1511,7 +1503,7 @@ class Probe
                                        && (($link["type"] ?? "") == "text/html")
                                        && ($link["href"] != "")
                                ) {
-                                       $data["url"] = $link["href"];
+                                       $data["url"] = $data["alias"] = $link["href"];
                                } elseif (($link["rel"] == "salmon") && !empty($link["href"])) {
                                        $data["notify"] = $link["href"];
                                } elseif (($link["rel"] == ActivityNamespace::FEED) && !empty($link["href"])) {
@@ -1594,8 +1586,7 @@ class Probe
                        $data["url"] = $feed_data["header"]["author-link"];
                }
 
-               if (($data['poll'] == $data['url']) && ($data["alias"] != '')) {
-                       $data['url'] = $data["alias"];
+               if ($data["url"] == $data["alias"]) {
                        $data["alias"] = '';
                }
 
@@ -1884,12 +1875,6 @@ class Probe
                $data["url"] = $url;
                $data["poll"] = $url;
 
-               if (!empty($feed_data["header"]["author-link"])) {
-                       $data["baseurl"] = $feed_data["header"]["author-link"];
-               } else {
-                       $data["baseurl"] = $data["url"];
-               }
-
                $data["network"] = Protocol::FEED;
 
                return $data;