X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FNetwork%2FProbe.php;h=920dac47e6771911002a8fefc4f9546855cdc388;hb=a98957eeb29cfb142dfc4cd3e03c5bdfbc373466;hp=ec857836cc1162268e06d9e90f6fd6ef186d934e;hpb=8293d5ed0af78ae6cc5629bb1f982a7b29794d32;p=friendica.git diff --git a/src/Network/Probe.php b/src/Network/Probe.php index ec857836cc..920dac47e6 100644 --- a/src/Network/Probe.php +++ b/src/Network/Probe.php @@ -24,12 +24,15 @@ namespace Friendica\Network; use DOMDocument; use DomXPath; use Friendica\Core\Cache\Duration; +use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Protocol; +use Friendica\Core\System; use Friendica\Database\DBA; use Friendica\DI; -use Friendica\Model\Contact; +use Friendica\Model\GServer; use Friendica\Model\Profile; +use Friendica\Model\User; use Friendica\Protocol\ActivityNamespace; use Friendica\Protocol\ActivityPub; use Friendica\Protocol\Email; @@ -44,9 +47,36 @@ use Friendica\Util\XML; */ class Probe { + const WEBFINGER = '/.well-known/webfinger?resource={uri}'; + private static $baseurl; private static $istimeout; + /** + * Remove stuff from an URI that doesn't belong there + * + * @param string $URI + * @return string Cleaned URI + */ + public static function cleanURI(string $URI) + { + // At first remove leading and trailing junk + $URI = trim($URI, "@#?:/ \t\n\r\0\x0B"); + + $parts = parse_url($URI); + + if (empty($parts['scheme'])) { + return $URI; + } + + // Remove the URL fragment, since these shouldn't be part of any profile URL + unset($parts['fragment']); + + $URI = Network::unparseURL($parts); + + return $URI; + } + /** * Rearrange the array so that it always has the same order * @@ -57,17 +87,23 @@ class Probe private static function rearrangeData($data) { $fields = ["name", "nick", "guid", "url", "addr", "alias", "photo", "account-type", - "community", "keywords", "location", "about", "gender", "hide", - "batch", "notify", "poll", "request", "confirm", "poco", + "community", "keywords", "location", "about", "hide", + "batch", "notify", "poll", "request", "confirm", "subscribe", "poco", "following", "followers", "inbox", "outbox", "sharedinbox", - "priority", "network", "pubkey", "baseurl"]; + "priority", "network", "pubkey", "baseurl", "gsid"]; $newdata = []; foreach ($fields as $field) { if (isset($data[$field])) { - $newdata[$field] = $data[$field]; - } else { + if (in_array($field, ["gsid", "hide", "account-type"])) { + $newdata[$field] = (int)$data[$field]; + } else { + $newdata[$field] = $data[$field]; + } + } elseif ($field != "gsid") { $newdata[$field] = ""; + } else { + $newdata[$field] = null; } } @@ -116,54 +152,64 @@ class Probe // Reset the static variable self::$baseurl = ''; - $ssl_url = "https://".$host."/.well-known/host-meta"; - $url = "http://".$host."/.well-known/host-meta"; + // Handles the case when the hostname contains the scheme + if (!parse_url($host, PHP_URL_SCHEME)) { + $ssl_url = "https://" . $host . "/.well-known/host-meta"; + $url = "http://" . $host . "/.well-known/host-meta"; + } else { + $ssl_url = $host . "/.well-known/host-meta"; + $url = ''; + } $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20); - Logger::log("Probing for ".$host, Logger::DEBUG); + Logger::info('Probing', ['host' => $host, 'ssl_url' => $ssl_url, 'url' => $url, 'callstack' => System::callstack(20)]); $xrd = null; $curlResult = Network::curl($ssl_url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); $ssl_connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0); if ($curlResult->isSuccess()) { $xml = $curlResult->getBody(); - $xrd = XML::parseString($xml, false); - $host_url = 'https://'.$host; + $xrd = XML::parseString($xml, true); + if (!empty($url)) { + $host_url = 'https://' . $host; + } else { + $host_url = $host; + } } elseif ($curlResult->isTimeout()) { - Logger::info('Probing timeout', ['url' => $ssl_url], Logger::DEBUG); + Logger::info('Probing timeout', ['url' => $ssl_url]); self::$istimeout = true; - return false; + return []; } - if (!is_object($xrd)) { + if (!is_object($xrd) && !empty($url)) { $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); $connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0); if ($curlResult->isTimeout()) { - Logger::info('Probing timeout', ['url' => $url], Logger::DEBUG); + Logger::info('Probing timeout', ['url' => $url]); self::$istimeout = true; - return false; + return []; } elseif ($connection_error && $ssl_connection_error) { self::$istimeout = true; - return false; + return []; } $xml = $curlResult->getBody(); - $xrd = XML::parseString($xml, false); + $xrd = XML::parseString($xml, true); $host_url = 'http://'.$host; } if (!is_object($xrd)) { - Logger::log("No xrd object found for ".$host, Logger::DEBUG); + Logger::info('No xrd object found', ['host' => $host]); return []; } $links = XML::elementToArray($xrd); if (!isset($links["xrd"]["link"])) { - Logger::log("No xrd data found for ".$host, Logger::DEBUG); + Logger::info('No xrd data found', ['host' => $host]); return []; } - $lrdd = ['application/jrd+json' => $host_url . '/.well-known/webfinger?resource={uri}']; + $lrdd = []; foreach ($links["xrd"]["link"] as $value => $link) { if (!empty($link["@attributes"])) { @@ -183,7 +229,7 @@ class Probe self::$baseurl = $host_url; - Logger::log("Probing successful for ".$host, Logger::DEBUG); + Logger::info('Probing successful', ['host' => $host]); return $lrdd; } @@ -214,7 +260,7 @@ class Probe $profile_link = ''; $links = self::lrdd($webbie); - Logger::log('webfingerDfrn: '.$webbie.':'.print_r($links, true), Logger::DATA); + Logger::debug('Result', ['url' => $webbie, 'links' => $links]); if (!empty($links) && is_array($links)) { foreach ($links as $link) { if ($link['@attributes']['rel'] === ActivityNamespace::DFRN) { @@ -231,114 +277,34 @@ class Probe return $profile_link; } - /** - * Get the link for the remote follow page for a given profile link - * - * @param sting $profile - * @return string Remote follow page link - */ - public static function getRemoteFollowLink(string $profile) - { - $follow_link = ''; - - $links = self::lrdd($profile); - - if (!empty($links) && is_array($links)) { - foreach ($links as $link) { - if ($link['@attributes']['rel'] === ActivityNamespace::OSTATUSSUB) { - $follow_link = $link['@attributes']['template']; - } - } - } - return $follow_link; - } - /** * Check an URI for LRDD data * - * this is a replacement for the "lrdd" function. - * It isn't used in this class and has some redundancies in the code. - * When time comes we can check the existing calls for "lrdd" if we can rework them. - * - * @param string $uri Address that should be probed + * @param string $uri Address that should be probed * * @return array uri data * @throws HTTPException\InternalServerErrorException */ - public static function lrdd($uri) + public static function lrdd(string $uri) { - $lrdd = self::hostMeta($uri); - $webfinger = null; - - if (is_bool($lrdd)) { + $data = self::getWebfingerArray($uri); + if (empty($data)) { return []; } + $webfinger = $data['webfinger']; - if (!$lrdd) { - $parts = @parse_url($uri); - if (!$parts || empty($parts["host"]) || empty($parts["path"])) { - return []; - } - - $host = $parts["host"]; - if (!empty($parts["port"])) { - $host .= ':'.$parts["port"]; - } - - $path_parts = explode("/", trim($parts["path"], "/")); - - $nick = array_pop($path_parts); - - do { - $lrdd = self::hostMeta($host); - $host .= "/".array_shift($path_parts); - } while (!$lrdd && (sizeof($path_parts) > 0)); - } - - if (!$lrdd) { - Logger::log("No lrdd data found for ".$uri, Logger::DEBUG); + if (empty($webfinger["links"])) { + Logger::info('No webfinger links found', ['uri' => $uri]); return []; } - foreach ($lrdd as $type => $template) { - if ($webfinger) { - continue; - } - - $path = str_replace('{uri}', urlencode($uri), $template); - $webfinger = self::webfinger($path, $type); - - if (!$webfinger && (strstr($uri, "@"))) { - $path = str_replace('{uri}', urlencode("acct:".$uri), $template); - $webfinger = self::webfinger($path, $type); - } - - // Special treatment for Mastodon - // Problem is that Mastodon uses an URL format like http://domain.tld/@nick - // But the webfinger for this format fails. - if (!$webfinger && !empty($nick)) { - // Mastodon uses a "@" as prefix for usernames in their url format - $nick = ltrim($nick, '@'); - - $addr = $nick."@".$host; - - $path = str_replace('{uri}', urlencode("acct:".$addr), $template); - $webfinger = self::webfinger($path, $type); - } - } - - if (!is_array($webfinger["links"])) { - Logger::log("No webfinger links found for ".$uri, Logger::DEBUG); - return false; - } - $data = []; foreach ($webfinger["links"] as $link) { $data[] = ["@attributes" => $link]; } - if (is_array($webfinger["aliases"])) { + if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { foreach ($webfinger["aliases"] as $alias) { $data[] = ["@attributes" => ["rel" => "alias", @@ -363,8 +329,9 @@ class Probe */ public static function uri($uri, $network = '', $uid = -1, $cache = true) { + $cachekey = 'Probe::uri:' . $network . ':' . $uri; if ($cache) { - $result = DI::cache()->get('Probe::uri:' . $network . ':' . $uri); + $result = DI::cache()->get($cachekey); if (!is_null($result)) { return $result; } @@ -374,19 +341,19 @@ class Probe $uid = local_user(); } - self::$istimeout = false; - - if ($network != Protocol::ACTIVITYPUB) { - $data = self::detect($uri, $network, $uid); + if (empty($network) || ($network == Protocol::ACTIVITYPUB)) { + $ap_profile = ActivityPub::probeProfile($uri); } else { - $data = null; + $ap_profile = []; } - // When the previous detection process had got a time out - // we could falsely detect a Friendica profile as AP profile. - if (!self::$istimeout) { - $ap_profile = ActivityPub::probeProfile($uri); + self::$istimeout = false; + if ($network != Protocol::ACTIVITYPUB) { + $data = self::detect($uri, $network, $uid, $ap_profile); + if (!is_array($data)) { + $data = []; + } if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) { $data = $ap_profile; } elseif (!empty($ap_profile)) { @@ -394,16 +361,14 @@ class Probe $data = array_merge($ap_profile, $data); } } else { - Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]); + $data = $ap_profile; } if (!isset($data['url'])) { $data['url'] = $uri; } - if (!empty($data['photo']) && !empty($data['baseurl'])) { - $data['baseurl'] = Network::getUrlMatch(Strings::normaliseLink($data['baseurl']), Strings::normaliseLink($data['photo'])); - } elseif (empty($data['photo'])) { + if (empty($data['photo'])) { $data['photo'] = DI::baseUrl() . '/images/person-300.jpg'; } @@ -425,8 +390,8 @@ class Probe } } - if (!empty(self::$baseurl)) { - $data['baseurl'] = self::$baseurl; + if (!empty($data['baseurl']) && empty($data['gsid'])) { + $data['gsid'] = GServer::getID($data['baseurl']); } if (empty($data['network'])) { @@ -446,7 +411,7 @@ class Probe // Only store into the cache if the value seems to be valid if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) { - DI::cache()->set('Probe::uri:' . $network . ':' . $uri, $data, Duration::DAY); + DI::cache()->set($cachekey, $data, Duration::DAY); } return $data; @@ -517,173 +482,259 @@ class Probe } /** - * Checks if a profile url should be OStatus but only provides partial information + * Fetch the "subscribe" and add it to the result * - * @param array $webfinger Webfinger data - * @param string $lrdd Path template for webfinger request - * @param string $type type - * - * @return array fixed webfinger data - * @throws HTTPException\InternalServerErrorException + * @param array $result + * @param array $webfinger + * @return array result */ - private static function fixOStatus($webfinger, $lrdd, $type) + private static function getSubscribeLink(array $result, array $webfinger) { - if (empty($webfinger['links']) || empty($webfinger['subject'])) { - return $webfinger; + if (empty($webfinger['links'])) { + return $result; } - $is_ostatus = false; - $has_key = false; - foreach ($webfinger['links'] as $link) { - if ($link['rel'] == ActivityNamespace::OSTATUSSUB) { - $is_ostatus = true; - } - if ($link['rel'] == 'magic-public-key') { - $has_key = true; + if (!empty($link['template']) && ($link['rel'] === ActivityNamespace::OSTATUSSUB)) { + $result['subscribe'] = $link['template']; } } - if (!$is_ostatus || $has_key) { - return $webfinger; - } - - $url = Network::switchScheme($webfinger['subject']); - $path = str_replace('{uri}', urlencode($url), $lrdd); - $webfinger2 = self::webfinger($path, $type); - - // Is the new webfinger detectable as OStatus? - if (self::ostatus($webfinger2, true)) { - $webfinger = $webfinger2; - } - - return $webfinger; + return $result; } /** - * Fetch information (protocol endpoints and user information) about a given uri - * - * This function is only called by the "uri" function that adds caching and rearranging of data. - * - * @param string $uri Address that should be probed - * @param string $network Test for this specific network - * @param integer $uid User ID for the probe (only used for mails) + * Get webfinger data from a given URI * - * @return array uri data - * @throws HTTPException\InternalServerErrorException + * @param string $uri + * @return array Webfinger array */ - private static function detect($uri, $network, $uid) + private static function getWebfingerArray(string $uri) { $parts = parse_url($uri); - if (!empty($parts["scheme"]) && !empty($parts["host"])) { - $host = $parts["host"]; - if (!empty($parts["port"])) { - $host .= ':'.$parts["port"]; + if (!empty($parts['scheme']) && !empty($parts['host'])) { + $host = $parts['host']; + if (!empty($parts['port'])) { + $host .= ':'.$parts['port']; } - if ($host == 'twitter.com') { - return self::twitter($uri); - } - $lrdd = self::hostMeta($host); + $baseurl = $parts['scheme'] . '://' . $host; - if (is_bool($lrdd)) { - return []; - } + $nick = ''; + $addr = ''; $path_parts = explode("/", trim($parts['path'] ?? '', "/")); + if (!empty($path_parts)) { + $nick = ltrim(end($path_parts), '@'); + // When the last part of the URI is numeric then it is most likely an ID and not a nick name + if (!is_numeric($nick)) { + $addr = $nick."@".$host; + } else { + $nick = ''; + } + } - while (!$lrdd && (sizeof($path_parts) > 1)) { - $host .= "/".array_shift($path_parts); + $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (empty($webfinger)) { $lrdd = self::hostMeta($host); } - if (!$lrdd) { - Logger::log('No XRD data was found for '.$uri, Logger::DEBUG); - return self::feed($uri); - } - $nick = array_pop($path_parts); - // Mastodon uses a "@" as prefix for usernames in their url format - $nick = ltrim($nick, '@'); + if (empty($webfinger) && empty($lrdd)) { + while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) { + $host .= "/".array_shift($path_parts); + $baseurl = $parts['scheme'] . '://' . $host; - $addr = $nick."@".$host; - } elseif (strstr($uri, '@')) { - // If the URI starts with "mailto:" then jump directly to the mail detection - if (strpos($uri, 'mailto:') !== false) { - $uri = str_replace('mailto:', '', $uri); - return self::mail($uri, $uid); - } + if (!empty($nick)) { + $addr = $nick."@".$host; + } - if ($network == Protocol::MAIL) { - return self::mail($uri, $uid); + $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (empty($webfinger)) { + $lrdd = self::hostMeta($host); + } + } + + if (empty($lrdd) && empty($webfinger)) { + return []; + } } + } elseif (strstr($uri, '@')) { // Remove "acct:" from the URI $uri = str_replace('acct:', '', $uri); $host = substr($uri, strpos($uri, '@') + 1); $nick = substr($uri, 0, strpos($uri, '@')); + $addr = $uri; - if (strpos($uri, '@twitter.com')) { - return self::twitter($uri); + $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (self::$istimeout) { + return []; } - $lrdd = self::hostMeta($host); - if (is_bool($lrdd)) { - return []; + if (empty($webfinger)) { + $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (self::$istimeout) { + return []; + } + } else { + $baseurl = 'https://' . $host; } - if (!$lrdd) { - Logger::log('No XRD data was found for '.$uri, Logger::DEBUG); - return self::mail($uri, $uid); + if (empty($webfinger)) { + $lrdd = self::hostMeta($host); + if (self::$istimeout) { + return []; + } + $baseurl = self::$baseurl; + } else { + $baseurl = 'http://' . $host; } - $addr = $uri; } else { - Logger::log("Uri ".$uri." was not detectable", Logger::DEBUG); - return false; + Logger::info('URI was not detectable', ['uri' => $uri]); + return []; } - $webfinger = false; + if (empty($webfinger)) { + foreach ($lrdd as $type => $template) { + if ($webfinger) { + continue; + } - /// @todo Do we need the prefix "acct:" or "acct://"? + $webfinger = self::getWebfinger($template, $type, $uri, $addr); + } + } - foreach ($lrdd as $type => $template) { - if ($webfinger) { - continue; + if (empty($webfinger)) { + return []; + } + + if ($webfinger['detected'] == $addr) { + $webfinger['nick'] = $nick; + $webfinger['addr'] = $addr; + } + + $webfinger['baseurl'] = $baseurl; + + return $webfinger; + } + + /** + * Perform network request for webfinger data + * + * @param string $template + * @param string $type + * @param string $uri + * @param string $addr + * @return array webfinger results + */ + private static function getWebfinger(string $template, string $type, string $uri, string $addr) + { + // First try the address because this is the primary purpose of webfinger + if (!empty($addr)) { + $detected = $addr; + $path = str_replace('{uri}', urlencode("acct:" . $addr), $template); + $webfinger = self::webfinger($path, $type); + if (self::$istimeout) { + return []; } + } - // At first try it with the given uri + // Then try the URI + if (empty($webfinger) && $uri != $addr) { + $detected = $uri; $path = str_replace('{uri}', urlencode($uri), $template); $webfinger = self::webfinger($path, $type); + if (self::$istimeout) { + return []; + } + } + + if (empty($webfinger)) { + return []; + } + + return ['webfinger' => $webfinger, 'detected' => $detected]; + } + + /** + * Fetch information (protocol endpoints and user information) about a given uri + * + * This function is only called by the "uri" function that adds caching and rearranging of data. + * + * @param string $uri Address that should be probed + * @param string $network Test for this specific network + * @param integer $uid User ID for the probe (only used for mails) + * @param array $ap_profile Previously probed AP profile + * + * @return array uri data + * @throws HTTPException\InternalServerErrorException + */ + private static function detect(string $uri, string $network, int $uid, array $ap_profile) + { + $hookData = [ + 'uri' => $uri, + 'network' => $network, + 'uid' => $uid, + 'result' => [], + ]; + + Hook::callAll('probe_detect', $hookData); + + if ($hookData['result']) { + if (!is_array($hookData['result'])) { + return []; + } else { + return $hookData['result']; + } + } - // Fix possible problems with GNU Social probing to wrong scheme - $webfinger = self::fixOStatus($webfinger, $template, $type); + $parts = parse_url($uri); - // We cannot be sure that the detected address was correct, so we don't use the values - if ($webfinger && ($uri != $addr)) { - $nick = ""; - $addr = ""; + if (!empty($parts['scheme']) && !empty($parts['host'])) { + if (in_array($parts['host'], ['twitter.com', 'mobile.twitter.com'])) { + return self::twitter($uri); + } + } elseif (strstr($uri, '@')) { + // If the URI starts with "mailto:" then jump directly to the mail detection + if (strpos($uri, 'mailto:') !== false) { + $uri = str_replace('mailto:', '', $uri); + return self::mail($uri, $uid); } - // Try webfinger with the address (user@domain.tld) - if (!$webfinger) { - $path = str_replace('{uri}', urlencode($addr), $template); - $webfinger = self::webfinger($path, $type); + if ($network == Protocol::MAIL) { + return self::mail($uri, $uid); } - // Mastodon needs to have it with "acct:" - if (!$webfinger) { - $path = str_replace('{uri}', urlencode("acct:".$addr), $template); - $webfinger = self::webfinger($path, $type); + if (Strings::endsWith($uri, '@twitter.com') + || Strings::endsWith($uri, '@mobile.twitter.com') + ) { + return self::twitter($uri); } + } else { + Logger::info('URI was not detectable', ['uri' => $uri]); + return []; } - if (!$webfinger) { - return self::feed($uri); + Logger::info('Probing start', ['uri' => $uri]); + + $data = self::getWebfingerArray($uri); + if (empty($data)) { + if (!empty($parts['scheme'])) { + return self::feed($uri); + } elseif (!empty($uid)) { + return self::mail($uri, $uid); + } else { + return []; + } } - $result = false; + $webfinger = $data['webfinger']; + $nick = $data['nick'] ?? ''; + $addr = $data['addr'] ?? ''; + $baseurl = $data['baseurl'] ?? ''; - Logger::log("Probing ".$uri, Logger::DEBUG); + $result = []; if (in_array($network, ["", Protocol::DFRN])) { $result = self::dfrn($webfinger); @@ -695,12 +746,12 @@ class Probe $result = self::ostatus($webfinger); } if (in_array($network, ['', Protocol::ZOT])) { - $result = self::zot($webfinger, $result); + $result = self::zot($webfinger, $result, $baseurl); } if ((!$result && ($network == "")) || ($network == Protocol::PUMPIO)) { $result = self::pumpio($webfinger, $addr); } - if ((!$result && ($network == "")) || ($network == Protocol::FEED)) { + if (empty($result['network']) && empty($ap_profile['network']) || ($network == Protocol::FEED)) { $result = self::feed($uri); } else { // We overwrite the detected nick with our try if the previois routines hadn't detected it. @@ -714,22 +765,22 @@ class Probe } } + $result = self::getSubscribeLink($result, $webfinger); + if (empty($result["network"])) { $result["network"] = Protocol::PHANTOM; } + if (empty($result['baseurl']) && !empty($baseurl)) { + $result['baseurl'] = $baseurl; + } + if (empty($result["url"])) { $result["url"] = $uri; } - Logger::log($uri." is ".$result["network"], Logger::DEBUG); + Logger::info('Probing done', ['uri' => $uri, 'network' => $result["network"]]); - if (empty($result["baseurl"]) && ($result["network"] != Protocol::PHANTOM)) { - $pos = strpos($result["url"], $host); - if ($pos) { - $result["baseurl"] = substr($result["url"], 0, $pos).$host; - } - } return $result; } @@ -742,7 +793,7 @@ class Probe * @return array Zot data * @throws HTTPException\InternalServerErrorException */ - private static function zot($webfinger, $data) + private static function zot($webfinger, $data, $baseurl) { if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { foreach ($webfinger["aliases"] as $alias) { @@ -763,12 +814,12 @@ class Probe } } - if (empty($zot_url) && !empty($data['addr']) && !empty(self::$baseurl)) { - $condition = ['nurl' => Strings::normaliseLink(self::$baseurl), 'platform' => ['hubzilla']]; + if (empty($zot_url) && !empty($data['addr']) && !empty($baseurl)) { + $condition = ['nurl' => Strings::normaliseLink($baseurl), 'platform' => ['hubzilla']]; if (!DBA::exists('gserver', $condition)) { return $data; } - $zot_url = self::$baseurl . '/.well-known/zot-info?address=' . $data['addr']; + $zot_url = $baseurl . '/.well-known/zot-info?address=' . $data['addr']; } if (empty($zot_url)) { @@ -841,7 +892,7 @@ class Probe } if (!empty($json['public_forum'])) { $data['community'] = $json['public_forum']; - $data['account-type'] = Contact::PAGE_COMMUNITY; + $data['account-type'] = User::PAGE_FLAGS_COMMUNITY; } if (!empty($json['profile'])) { @@ -849,9 +900,6 @@ class Probe if (!empty($profile['description'])) { $data['about'] = $profile['description']; } - if (!empty($profile['gender'])) { - $data['gender'] = $profile['gender']; - } if (!empty($profile['keywords'])) { $keywords = implode(', ', $profile['keywords']); if (!empty($keywords)) { @@ -886,37 +934,37 @@ class Probe * @return array webfinger data * @throws HTTPException\InternalServerErrorException */ - private static function webfinger($url, $type) + public static function webfinger($url, $type) { $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20); $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => $type]); if ($curlResult->isTimeout()) { self::$istimeout = true; - return false; + return []; } $data = $curlResult->getBody(); $webfinger = json_decode($data, true); - if (is_array($webfinger)) { + if (!empty($webfinger)) { if (!isset($webfinger["links"])) { - Logger::log("No json webfinger links for ".$url, Logger::DEBUG); - return false; + Logger::info('No json webfinger links', ['url' => $url]); + return []; } return $webfinger; } // If it is not JSON, maybe it is XML - $xrd = XML::parseString($data, false); + $xrd = XML::parseString($data, true); if (!is_object($xrd)) { - Logger::log("No webfinger data retrievable for ".$url, Logger::DEBUG); - return false; + Logger::info('No webfinger data retrievable', ['url' => $url]); + return []; } $xrd_arr = XML::elementToArray($xrd); if (!isset($xrd_arr["xrd"]["link"])) { - Logger::log("No XML webfinger links for ".$url, Logger::DEBUG); - return false; + Logger::info('No XML webfinger links', ['url' => $url]); + return []; } $webfinger = []; @@ -962,18 +1010,18 @@ class Probe $curlResult = Network::curl($noscrape_url); if ($curlResult->isTimeout()) { self::$istimeout = true; - return false; + return []; } $content = $curlResult->getBody(); if (!$content) { - Logger::log("Empty body for ".$noscrape_url, Logger::DEBUG); - return false; + Logger::info('Empty body', ['url' => $noscrape_url]); + return []; } $json = json_decode($content, true); if (!is_array($json)) { - Logger::log("No json data for ".$noscrape_url, Logger::DEBUG); - return false; + Logger::info('No json data', ['url' => $noscrape_url]); + return []; } if (!empty($json["fn"])) { @@ -1012,10 +1060,6 @@ class Probe $data["about"] = $json["about"]; } - if (!empty($json["gender"])) { - $data["gender"] = $json["gender"]; - } - if (!empty($json["key"])) { $data["pubkey"] = $json["key"]; } @@ -1090,7 +1134,7 @@ class Probe { $data = []; - Logger::log("Check profile ".$profile_link, Logger::DEBUG); + Logger::info('Check profile', ['link' => $profile_link]); // Fetch data via noscrape - this is faster $noscrape_url = str_replace(["/hcard/", "/profile/"], "/noscrape/", $profile_link); @@ -1124,7 +1168,7 @@ class Probe $prof_data["fn"] = $data['name'] ?? null; $prof_data["key"] = $data['pubkey'] ?? null; - Logger::log("Result for profile ".$profile_link.": ".print_r($prof_data, true), Logger::DEBUG); + Logger::debug('Result', ['link' => $profile_link, 'data' => $prof_data]); return $prof_data; } @@ -1187,7 +1231,7 @@ class Probe } if (!isset($data["network"]) || ($hcard_url == "")) { - return false; + return []; } // Fetch data via noscrape - this is faster @@ -1224,23 +1268,23 @@ class Probe $curlResult = Network::curl($hcard_url); if ($curlResult->isTimeout()) { self::$istimeout = true; - return false; + return []; } $content = $curlResult->getBody(); if (!$content) { - return false; + return []; } $doc = new DOMDocument(); if (!@$doc->loadHTML($content)) { - return false; + return []; } $xpath = new DomXPath($doc); $vcards = $xpath->query("//div[contains(concat(' ', @class, ' '), ' vcard ')]"); if (!is_object($vcards)) { - return false; + return []; } if (!isset($data["baseurl"])) { @@ -1378,7 +1422,7 @@ class Probe } if (empty($data["url"]) || empty($hcard_url)) { - return false; + return []; } if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { @@ -1399,7 +1443,7 @@ class Probe $data = self::pollHcard($hcard_url, $data); if (!$data) { - return false; + return []; } if (!empty($data["url"]) @@ -1419,7 +1463,7 @@ class Probe $data["notify"] = $data["baseurl"] . "/receive/users/" . $data["guid"]; $data["batch"] = $data["baseurl"] . "/receive/public"; } else { - return false; + return []; } return $data; @@ -1452,7 +1496,7 @@ class Probe $data["addr"] = str_replace('acct:', '', $webfinger["subject"]); } - if (is_array($webfinger["links"])) { + if (!empty($webfinger["links"])) { // The array is reversed to take into account the order of preference for same-rel links // See: https://tools.ietf.org/html/rfc7033#section-4.4.4 foreach (array_reverse($webfinger["links"]) as $link) { @@ -1460,7 +1504,7 @@ class Probe && (($link["type"] ?? "") == "text/html") && ($link["href"] != "") ) { - $data["url"] = $link["href"]; + $data["url"] = $data["alias"] = $link["href"]; } elseif (($link["rel"] == "salmon") && !empty($link["href"])) { $data["notify"] = $link["href"]; } elseif (($link["rel"] == ActivityNamespace::FEED) && !empty($link["href"])) { @@ -1478,7 +1522,7 @@ class Probe $curlResult = Network::curl($pubkey); if ($curlResult->isTimeout()) { self::$istimeout = true; - return false; + return $short ? false : []; } $pubkey = $curlResult->getBody(); } @@ -1500,7 +1544,7 @@ class Probe ) { $data["network"] = Protocol::OSTATUS; } else { - return false; + return $short ? false : []; } if ($short) { @@ -1511,12 +1555,12 @@ class Probe $curlResult = Network::curl($data["poll"]); if ($curlResult->isTimeout()) { self::$istimeout = true; - return false; + return []; } $feed = $curlResult->getBody(); $feed_data = Feed::import($feed); if (!$feed_data) { - return false; + return []; } if (!empty($feed_data["header"]["author-name"])) { @@ -1543,8 +1587,7 @@ class Probe $data["url"] = $feed_data["header"]["author-link"]; } - if (($data['poll'] == $data['url']) && ($data["alias"] != '')) { - $data['url'] = $data["alias"]; + if ($data["url"] == $data["alias"]) { $data["alias"] = ''; } @@ -1563,12 +1606,12 @@ class Probe { $curlResult = Network::curl($profile_link); if (!$curlResult->isSuccess()) { - return false; + return []; } $doc = new DOMDocument(); if (!@$doc->loadHTML($curlResult->getBody())) { - return false; + return []; } $xpath = new DomXPath($doc); @@ -1649,13 +1692,13 @@ class Probe $data["network"] = Protocol::PUMPIO; } else { - return false; + return []; } $profile_data = self::pumpioProfileData($data["url"]); if (!$profile_data) { - return false; + return []; } $data = array_merge($data, $profile_data); @@ -1679,9 +1722,9 @@ class Probe */ private static function twitter($uri) { - if (preg_match('=(.*)@twitter.com=i', $uri, $matches)) { + if (preg_match('=([^@]+)@(?:mobile\.)?twitter\.com$=i', $uri, $matches)) { $nick = $matches[1]; - } elseif (preg_match('=https?://twitter.com/(.*)=i', $uri, $matches)) { + } elseif (preg_match('=^https?://(?:mobile\.)?twitter\.com/(.+)=i', $uri, $matches)) { $nick = $matches[1]; } else { return []; @@ -1694,87 +1737,91 @@ class Probe $data['network'] = Protocol::TWITTER; $data['baseurl'] = 'https://twitter.com'; - $curlResult = Network::curl($data['url'], false); - if (!$curlResult->isSuccess()) { - return []; - } + return $data; + } - $body = $curlResult->getBody(); + /** + * Checks HTML page for RSS feed link + * + * @param string $url Page link + * @param string $body Page body string + * @return string|false Feed link or false if body was invalid HTML document + */ + public static function getFeedLink(string $url, string $body) + { $doc = new DOMDocument(); - @$doc->loadHTML($body); - $xpath = new DOMXPath($doc); + if (!@$doc->loadHTML($body)) { + return false; + } - $list = $xpath->query('//img[@class]'); - foreach ($list as $node) { - $img_attr = []; - if ($node->attributes->length) { - foreach ($node->attributes as $attribute) { - $img_attr[$attribute->name] = $attribute->value; - } - } + $xpath = new DOMXPath($doc); - if (empty($img_attr['class'])) { - continue; - } + $feedUrl = $xpath->evaluate('string(/html/head/link[@type="application/rss+xml" and @rel="alternate"]/@href)'); - if (strpos($img_attr['class'], 'ProfileAvatar-image') !== false) { - if (!empty($img_attr['src'])) { - $data['photo'] = $img_attr['src']; - } - if (!empty($img_attr['alt'])) { - $data['name'] = $img_attr['alt']; - } - } - } + $feedUrl = $feedUrl ? self::ensureAbsoluteLinkFromHTMLDoc($feedUrl, $url, $xpath) : ''; - return $data; + return $feedUrl; } /** - * Check page for feed link + * Return an absolute URL in the context of a HTML document retrieved from the provided URL. + * + * Loosely based on RFC 1808 * - * @param string $url Page link + * @see https://tools.ietf.org/html/rfc1808 * - * @return string feed link + * @param string $href The potential relative href found in the HTML document + * @param string $base The HTML document URL + * @param DOMXPath $xpath The HTML document XPath + * @return string */ - private static function getFeedLink($url) + private static function ensureAbsoluteLinkFromHTMLDoc(string $href, string $base, DOMXPath $xpath) { - $curlResult = Network::curl($url); - if (!$curlResult->isSuccess()) { - return false; + if (filter_var($href, FILTER_VALIDATE_URL)) { + return $href; } - $doc = new DOMDocument(); - if (!@$doc->loadHTML($curlResult->getBody())) { - return false; - } + $base = $xpath->evaluate('string(/html/head/base/@href)') ?: $base; - $xpath = new DomXPath($doc); + $baseParts = parse_url($base); - //$feeds = $xpath->query("/html/head/link[@type='application/rss+xml']"); - $feeds = $xpath->query("/html/head/link[@type='application/rss+xml' and @rel='alternate']"); - if (!is_object($feeds)) { - return false; - } + // Naked domain case (scheme://basehost) + $path = $baseParts['path'] ?? '/'; - if ($feeds->length == 0) { - return false; - } + // Remove the filename part of the path if it exists (/base/path/file) + $path = implode('/', array_slice(explode('/', $path), 0, -1)); - $feed_url = ""; + $hrefParts = parse_url($href); - foreach ($feeds as $feed) { - $attr = []; - foreach ($feed->attributes as $attribute) { - $attr[$attribute->name] = trim($attribute->value); + // Root path case (/path) including relative scheme case (//host/path) + if ($hrefParts['path'] && $hrefParts['path'][0] == '/') { + $path = $hrefParts['path']; + } else { + $path = $path . '/' . $hrefParts['path']; + + // Resolve arbitrary relative path + // Lifted from https://www.php.net/manual/en/function.realpath.php#84012 + $parts = array_filter(explode('/', $path), 'strlen'); + $absolutes = array(); + foreach ($parts as $part) { + if ('.' == $part) continue; + if ('..' == $part) { + array_pop($absolutes); + } else { + $absolutes[] = $part; + } } - if (empty($feed_url) && !empty($attr['href'])) { - $feed_url = $attr["href"]; - } + $path = '/' . implode('/', $absolutes); } - return $feed_url; + // Relative scheme case (//host/path) + $baseParts['host'] = $hrefParts['host'] ?? $baseParts['host']; + $baseParts['path'] = $path; + unset($baseParts['query']); + unset($baseParts['fragment']); + + return Network::unparseURL($baseParts); } /** @@ -1791,20 +1838,20 @@ class Probe $curlResult = Network::curl($url); if ($curlResult->isTimeout()) { self::$istimeout = true; - return false; + return []; } $feed = $curlResult->getBody(); $feed_data = Feed::import($feed); if (!$feed_data) { if (!$probe) { - return false; + return []; } - $feed_url = self::getFeedLink($url); + $feed_url = self::getFeedLink($url, $feed); if (!$feed_url) { - return false; + return []; } return self::feed($feed_url, false); @@ -1829,12 +1876,6 @@ class Probe $data["url"] = $url; $data["poll"] = $url; - if (!empty($feed_data["header"]["author-link"])) { - $data["baseurl"] = $feed_data["header"]["author-link"]; - } else { - $data["baseurl"] = $data["url"]; - } - $data["network"] = Protocol::FEED; return $data; @@ -1852,11 +1893,11 @@ class Probe private static function mail($uri, $uid) { if (!Network::isEmailDomainValid($uri)) { - return false; + return []; } if ($uid == 0) { - return false; + return []; } $user = DBA::selectFirst('user', ['prvkey'], ['uid' => $uid]); @@ -1866,7 +1907,7 @@ class Probe $mailacct = DBA::selectFirst('mailacct', $fields, $condition); if (!DBA::isResult($user) || !DBA::isResult($mailacct)) { - return false; + return []; } $mailbox = Email::constructMailboxName($mailacct); @@ -1874,14 +1915,14 @@ class Probe openssl_private_decrypt(hex2bin($mailacct['pass']), $password, $user['prvkey']); $mbox = Email::connect($mailbox, $mailacct['user'], $password); if (!$mbox) { - return false; + return []; } $msgs = Email::poll($mbox, $uri); - Logger::log('searching '.$uri.', '.count($msgs).' messages found.', Logger::DEBUG); + Logger::info('Messages found', ['uri' => $uri, 'count' => count($msgs)]); if (!count($msgs)) { - return false; + return []; } $phost = substr($uri, strpos($uri, '@') + 1); @@ -1961,7 +2002,7 @@ class Probe $fixed = $scheme.$host.$port.$path.$query.$fragment; - Logger::log('Base: '.$base.' - Avatar: '.$avatar.' - Fixed: '.$fixed, Logger::DATA); + Logger::debug('Avatar fixed', ['base' => $base, 'avatar' => $avatar, 'fixed' => $fixed]); return $fixed; }