X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FNetwork%2FProbe.php;h=f4ca0398a85f2f972c231ca419137dced9b325c7;hb=122ad0af14f046c2462a03fe33967dc41abfc8b5;hp=dcb0bf192f68b84a0574e3f9e5a79129fe18ab80;hpb=aedbb0d627606ff7eb7302e2881c31059b6cc1d9;p=friendica.git diff --git a/src/Network/Probe.php b/src/Network/Probe.php index dcb0bf192f..f4ca0398a8 100644 --- a/src/Network/Probe.php +++ b/src/Network/Probe.php @@ -23,7 +23,6 @@ namespace Friendica\Network; use DOMDocument; use DomXPath; -use Friendica\Core\Cache\Duration; use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Protocol; @@ -33,11 +32,13 @@ use Friendica\DI; use Friendica\Model\Contact; use Friendica\Model\GServer; use Friendica\Model\Profile; +use Friendica\Model\User; use Friendica\Protocol\ActivityNamespace; use Friendica\Protocol\ActivityPub; use Friendica\Protocol\Email; use Friendica\Protocol\Feed; use Friendica\Util\Crypto; +use Friendica\Util\DateTimeFormat; use Friendica\Util\Network; use Friendica\Util\Strings; use Friendica\Util\XML; @@ -47,6 +48,8 @@ use Friendica\Util\XML; */ class Probe { + const WEBFINGER = '/.well-known/webfinger?resource={uri}'; + private static $baseurl; private static $istimeout; @@ -88,17 +91,19 @@ class Probe "community", "keywords", "location", "about", "hide", "batch", "notify", "poll", "request", "confirm", "subscribe", "poco", "following", "followers", "inbox", "outbox", "sharedinbox", - "priority", "network", "pubkey", "baseurl", "gsid"]; + "priority", "network", "pubkey", "manually-approve", "baseurl", "gsid"]; + + $numeric_fields = ["gsid", "hide", "account-type", "manually-approve"]; $newdata = []; foreach ($fields as $field) { if (isset($data[$field])) { - if (in_array($field, ["gsid", "hide", "account-type"])) { + if (in_array($field, $numeric_fields)) { $newdata[$field] = (int)$data[$field]; } else { $newdata[$field] = $data[$field]; } - } elseif ($field != "gsid") { + } elseif (!in_array($field, $numeric_fields)) { $newdata[$field] = ""; } else { $newdata[$field] = null; @@ -164,7 +169,7 @@ class Probe Logger::info('Probing', ['host' => $host, 'ssl_url' => $ssl_url, 'url' => $url, 'callstack' => System::callstack(20)]); $xrd = null; - $curlResult = Network::curl($ssl_url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); + $curlResult = DI::httpRequest()->get($ssl_url, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); $ssl_connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0); if ($curlResult->isSuccess()) { $xml = $curlResult->getBody(); @@ -175,16 +180,16 @@ class Probe $host_url = $host; } } elseif ($curlResult->isTimeout()) { - Logger::info('Probing timeout', ['url' => $ssl_url], Logger::DEBUG); + Logger::info('Probing timeout', ['url' => $ssl_url]); self::$istimeout = true; return []; } if (!is_object($xrd) && !empty($url)) { - $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); + $curlResult = DI::httpRequest()->get($url, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); $connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0); if ($curlResult->isTimeout()) { - Logger::info('Probing timeout', ['url' => $url], Logger::DEBUG); + Logger::info('Probing timeout', ['url' => $url]); self::$istimeout = true; return []; } elseif ($connection_error && $ssl_connection_error) { @@ -197,17 +202,17 @@ class Probe $host_url = 'http://'.$host; } if (!is_object($xrd)) { - Logger::log("No xrd object found for ".$host, Logger::DEBUG); + Logger::info('No xrd object found', ['host' => $host]); return []; } $links = XML::elementToArray($xrd); if (!isset($links["xrd"]["link"])) { - Logger::log("No xrd data found for ".$host, Logger::DEBUG); + Logger::info('No xrd data found', ['host' => $host]); return []; } - $lrdd = ['application/jrd+json' => $host_url . '/.well-known/webfinger?resource={uri}']; + $lrdd = []; foreach ($links["xrd"]["link"] as $value => $link) { if (!empty($link["@attributes"])) { @@ -227,7 +232,7 @@ class Probe self::$baseurl = $host_url; - Logger::log("Probing successful for ".$host, Logger::DEBUG); + Logger::info('Probing successful', ['host' => $host]); return $lrdd; } @@ -253,12 +258,12 @@ class Probe * @return string profile link * @throws HTTPException\InternalServerErrorException */ - public static function webfingerDfrn($webbie, &$hcard_url) + public static function webfingerDfrn(string $webbie, string &$hcard_url) { $profile_link = ''; $links = self::lrdd($webbie); - Logger::log('webfingerDfrn: '.$webbie.':'.print_r($links, true), Logger::DATA); + Logger::debug('Result', ['url' => $webbie, 'links' => $links]); if (!empty($links) && is_array($links)) { foreach ($links as $link) { if ($link['@attributes']['rel'] === ActivityNamespace::DFRN) { @@ -285,68 +290,14 @@ class Probe */ public static function lrdd(string $uri) { - $lrdd = self::hostMeta($uri); - $webfinger = null; - - if (is_bool($lrdd)) { - return []; - } - - if (!$lrdd) { - $parts = @parse_url($uri); - if (!$parts || empty($parts["host"]) || empty($parts["path"])) { - return []; - } - - $host = $parts['scheme'] . '://' . $parts["host"]; - if (!empty($parts["port"])) { - $host .= ':'.$parts["port"]; - } - - $path_parts = explode("/", trim($parts["path"], "/")); - - $nick = array_pop($path_parts); - - do { - $lrdd = self::hostMeta($host); - $host .= "/".array_shift($path_parts); - } while (!$lrdd && (sizeof($path_parts) > 0)); - } - - if (!$lrdd) { - Logger::log("No lrdd data found for ".$uri, Logger::DEBUG); + $data = self::getWebfingerArray($uri); + if (empty($data)) { return []; } - - foreach ($lrdd as $type => $template) { - if ($webfinger) { - continue; - } - - $path = str_replace('{uri}', urlencode($uri), $template); - $webfinger = self::webfinger($path, $type); - - if (!$webfinger && (strstr($uri, "@"))) { - $path = str_replace('{uri}', urlencode("acct:".$uri), $template); - $webfinger = self::webfinger($path, $type); - } - - // Special treatment for Mastodon - // Problem is that Mastodon uses an URL format like http://domain.tld/@nick - // But the webfinger for this format fails. - if (!$webfinger && !empty($nick)) { - // Mastodon uses a "@" as prefix for usernames in their url format - $nick = ltrim($nick, '@'); - - $addr = $nick."@".$host; - - $path = str_replace('{uri}', urlencode("acct:".$addr), $template); - $webfinger = self::webfinger($path, $type); - } - } + $webfinger = $data['webfinger']; if (empty($webfinger["links"])) { - Logger::log("No webfinger links found for ".$uri, Logger::DEBUG); + Logger::info('No webfinger links found', ['uri' => $uri]); return []; } @@ -379,12 +330,13 @@ class Probe * @throws HTTPException\InternalServerErrorException * @throws \ImagickException */ - public static function uri($uri, $network = '', $uid = -1, $cache = true) + public static function uri($uri, $network = '', $uid = -1) { - if ($cache) { - $result = DI::cache()->get('Probe::uri:' . $network . ':' . $uri); - if (!is_null($result)) { - return $result; + // Local profiles aren't probed via network + if (empty($network) && strpos($uri, DI::baseUrl()->getHostname())) { + $data = self::localProbe($uri); + if (!empty($data)) { + return $data; } } @@ -392,19 +344,19 @@ class Probe $uid = local_user(); } - self::$istimeout = false; - - if ($network != Protocol::ACTIVITYPUB) { - $data = self::detect($uri, $network, $uid); + if (empty($network) || ($network == Protocol::ACTIVITYPUB)) { + $ap_profile = ActivityPub::probeProfile($uri); } else { - $data = null; + $ap_profile = []; } - // When the previous detection process had got a time out - // we could falsely detect a Friendica profile as AP profile. - if (!self::$istimeout) { - $ap_profile = ActivityPub::probeProfile($uri, !$cache); + self::$istimeout = false; + if ($network != Protocol::ACTIVITYPUB) { + $data = self::detect($uri, $network, $uid, $ap_profile); + if (!is_array($data)) { + $data = []; + } if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) { $data = $ap_profile; } elseif (!empty($ap_profile)) { @@ -412,7 +364,7 @@ class Probe $data = array_merge($ap_profile, $data); } } else { - Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]); + $data = $ap_profile; } if (!isset($data['url'])) { @@ -420,7 +372,7 @@ class Probe } if (empty($data['photo'])) { - $data['photo'] = DI::baseUrl() . '/images/person-300.jpg'; + $data['photo'] = DI::baseUrl() . Contact::DEFAULT_AVATAR_PHOTO; } if (empty($data['name'])) { @@ -441,10 +393,6 @@ class Probe } } - if (empty($data['baseurl']) && !empty(self::$baseurl)) { - $data['baseurl'] = self::$baseurl; - } - if (!empty($data['baseurl']) && empty($data['gsid'])) { $data['gsid'] = GServer::getID($data['baseurl']); } @@ -462,14 +410,7 @@ class Probe $data['hide'] = self::getHideStatus($data['url']); } - $data = self::rearrangeData($data); - - // Only store into the cache if the value seems to be valid - if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) { - DI::cache()->set('Probe::uri:' . $network . ':' . $uri, $data, Duration::DAY); - } - - return $data; + return self::rearrangeData($data); } @@ -482,7 +423,7 @@ class Probe */ private static function getHideStatus($url) { - $curlResult = Network::curl($url); + $curlResult = DI::httpRequest()->get($url); if (!$curlResult->isSuccess()) { return false; } @@ -558,22 +499,175 @@ class Probe return $result; } + /** + * Get webfinger data from a given URI + * + * @param string $uri + * @return array Webfinger array + */ + private static function getWebfingerArray(string $uri) + { + $parts = parse_url($uri); + + if (!empty($parts['scheme']) && !empty($parts['host'])) { + $host = $parts['host']; + if (!empty($parts['port'])) { + $host .= ':'.$parts['port']; + } + + $baseurl = $parts['scheme'] . '://' . $host; + + $nick = ''; + $addr = ''; + + $path_parts = explode("/", trim($parts['path'] ?? '', "/")); + if (!empty($path_parts)) { + $nick = ltrim(end($path_parts), '@'); + // When the last part of the URI is numeric then it is most likely an ID and not a nick name + if (!is_numeric($nick)) { + $addr = $nick."@".$host; + } else { + $nick = ''; + } + } + + $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (empty($webfinger)) { + $lrdd = self::hostMeta($host); + } + + if (empty($webfinger) && empty($lrdd)) { + while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) { + $host .= "/".array_shift($path_parts); + $baseurl = $parts['scheme'] . '://' . $host; + + if (!empty($nick)) { + $addr = $nick."@".$host; + } + + $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (empty($webfinger)) { + $lrdd = self::hostMeta($host); + } + } + + if (empty($lrdd) && empty($webfinger)) { + return []; + } + } + } elseif (strstr($uri, '@')) { + // Remove "acct:" from the URI + $uri = str_replace('acct:', '', $uri); + + $host = substr($uri, strpos($uri, '@') + 1); + $nick = substr($uri, 0, strpos($uri, '@')); + $addr = $uri; + + $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (self::$istimeout) { + return []; + } + + if (empty($webfinger)) { + $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr); + if (self::$istimeout) { + return []; + } + } else { + $baseurl = 'https://' . $host; + } + + if (empty($webfinger)) { + $lrdd = self::hostMeta($host); + if (self::$istimeout) { + return []; + } + $baseurl = self::$baseurl; + } else { + $baseurl = 'http://' . $host; + } + } else { + Logger::info('URI was not detectable', ['uri' => $uri]); + return []; + } + + if (empty($webfinger)) { + foreach ($lrdd as $type => $template) { + if ($webfinger) { + continue; + } + + $webfinger = self::getWebfinger($template, $type, $uri, $addr); + } + } + + if (empty($webfinger)) { + return []; + } + + if ($webfinger['detected'] == $addr) { + $webfinger['nick'] = $nick; + $webfinger['addr'] = $addr; + } + + $webfinger['baseurl'] = $baseurl; + + return $webfinger; + } + + /** + * Perform network request for webfinger data + * + * @param string $template + * @param string $type + * @param string $uri + * @param string $addr + * @return array webfinger results + */ + private static function getWebfinger(string $template, string $type, string $uri, string $addr) + { + // First try the address because this is the primary purpose of webfinger + if (!empty($addr)) { + $detected = $addr; + $path = str_replace('{uri}', urlencode("acct:" . $addr), $template); + $webfinger = self::webfinger($path, $type); + if (self::$istimeout) { + return []; + } + } + + // Then try the URI + if (empty($webfinger) && $uri != $addr) { + $detected = $uri; + $path = str_replace('{uri}', urlencode($uri), $template); + $webfinger = self::webfinger($path, $type); + if (self::$istimeout) { + return []; + } + } + + if (empty($webfinger)) { + return []; + } + + return ['webfinger' => $webfinger, 'detected' => $detected]; + } + /** * Fetch information (protocol endpoints and user information) about a given uri * * This function is only called by the "uri" function that adds caching and rearranging of data. * - * @param string $uri Address that should be probed - * @param string $network Test for this specific network - * @param integer $uid User ID for the probe (only used for mails) + * @param string $uri Address that should be probed + * @param string $network Test for this specific network + * @param integer $uid User ID for the probe (only used for mails) + * @param array $ap_profile Previously probed AP profile * * @return array uri data * @throws HTTPException\InternalServerErrorException */ - private static function detect($uri, $network, $uid) + private static function detect(string $uri, string $network, int $uid, array $ap_profile) { - $parts = parse_url($uri); - $hookData = [ 'uri' => $uri, 'network' => $network, @@ -584,40 +678,19 @@ class Probe Hook::callAll('probe_detect', $hookData); if ($hookData['result']) { - return $hookData['result']; - } - - if (!empty($parts["scheme"]) && !empty($parts["host"])) { - $host = $parts["host"]; - if (!empty($parts["port"])) { - $host .= ':'.$parts["port"]; - } - - if ($host == 'twitter.com') { - return self::twitter($uri); - } - $lrdd = self::hostMeta($host); - - if (is_bool($lrdd)) { + if (!is_array($hookData['result'])) { return []; + } else { + return $hookData['result']; } + } - $path_parts = explode("/", trim($parts['path'] ?? '', "/")); + $parts = parse_url($uri); - while (!$lrdd && (sizeof($path_parts) > 1)) { - $host .= "/".array_shift($path_parts); - $lrdd = self::hostMeta($host); - } - if (!$lrdd) { - Logger::log('No XRD data was found for '.$uri, Logger::DEBUG); - return self::feed($uri); + if (!empty($parts['scheme']) && !empty($parts['host'])) { + if (in_array($parts['host'], ['twitter.com', 'mobile.twitter.com'])) { + return self::twitter($uri); } - $nick = array_pop($path_parts); - - // Mastodon uses a "@" as prefix for usernames in their url format - $nick = ltrim($nick, '@'); - - $addr = $nick."@".$host; } elseif (strstr($uri, '@')) { // If the URI starts with "mailto:" then jump directly to the mail detection if (strpos($uri, 'mailto:') !== false) { @@ -628,73 +701,44 @@ class Probe if ($network == Protocol::MAIL) { return self::mail($uri, $uid); } - // Remove "acct:" from the URI - $uri = str_replace('acct:', '', $uri); - - $host = substr($uri, strpos($uri, '@') + 1); - $nick = substr($uri, 0, strpos($uri, '@')); - if (strpos($uri, '@twitter.com')) { + if (Strings::endsWith($uri, '@twitter.com') + || Strings::endsWith($uri, '@mobile.twitter.com') + ) { return self::twitter($uri); } - $lrdd = self::hostMeta($host); - - if (is_bool($lrdd)) { - return []; - } - - if (!$lrdd) { - Logger::log('No XRD data was found for '.$uri, Logger::DEBUG); - return self::mail($uri, $uid); - } - $addr = $uri; } else { - Logger::log("Uri ".$uri." was not detectable", Logger::DEBUG); + Logger::info('URI was not detectable', ['uri' => $uri]); return []; } - $webfinger = false; - - /// @todo Do we need the prefix "acct:" or "acct://"? - - foreach ($lrdd as $type => $template) { - if ($webfinger) { - continue; - } - - // Try the URI first - if ($uri != $addr) { - $path = str_replace('{uri}', urlencode($uri), $template); - $webfinger = self::webfinger($path, $type); - } + Logger::info('Probing start', ['uri' => $uri]); - // Then try the address - if (!$webfinger) { - $path = str_replace('{uri}', urlencode("acct:" . $addr), $template); - $webfinger = self::webfinger($path, $type); - } + if (!empty($ap_profile['addr']) && ($ap_profile['addr'] != $uri)) { + $data = self::getWebfingerArray($ap_profile['addr']); + } - // Finally try without the "acct" - if (!$webfinger) { - $path = str_replace('{uri}', urlencode($addr), $template); - $webfinger = self::webfinger($path, $type); - } + if (empty($data)) { + $data = self::getWebfingerArray($uri); + } - // We cannot be sure that the detected address was correct, so we don't use the values - if ($webfinger && ($uri != $addr)) { - $nick = ""; - $addr = ""; + if (empty($data)) { + if (!empty($parts['scheme'])) { + return self::feed($uri); + } elseif (!empty($uid)) { + return self::mail($uri, $uid); + } else { + return []; } } - if (!$webfinger) { - return self::feed($uri); - } + $webfinger = $data['webfinger']; + $nick = $data['nick'] ?? ''; + $addr = $data['addr'] ?? ''; + $baseurl = $data['baseurl'] ?? ''; $result = []; - Logger::info("Probing", ['uri' => $uri]); - if (in_array($network, ["", Protocol::DFRN])) { $result = self::dfrn($webfinger); } @@ -705,12 +749,12 @@ class Probe $result = self::ostatus($webfinger); } if (in_array($network, ['', Protocol::ZOT])) { - $result = self::zot($webfinger, $result); + $result = self::zot($webfinger, $result, $baseurl); } if ((!$result && ($network == "")) || ($network == Protocol::PUMPIO)) { $result = self::pumpio($webfinger, $addr); } - if ((!$result && ($network == "")) || ($network == Protocol::FEED)) { + if (empty($result['network']) && empty($ap_profile['network']) || ($network == Protocol::FEED)) { $result = self::feed($uri); } else { // We overwrite the detected nick with our try if the previois routines hadn't detected it. @@ -730,11 +774,15 @@ class Probe $result["network"] = Protocol::PHANTOM; } + if (empty($result['baseurl']) && !empty($baseurl)) { + $result['baseurl'] = $baseurl; + } + if (empty($result["url"])) { $result["url"] = $uri; } - Logger::log($uri." is ".$result["network"], Logger::DEBUG); + Logger::info('Probing done', ['uri' => $uri, 'network' => $result["network"]]); return $result; } @@ -748,7 +796,7 @@ class Probe * @return array Zot data * @throws HTTPException\InternalServerErrorException */ - private static function zot($webfinger, $data) + private static function zot($webfinger, $data, $baseurl) { if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { foreach ($webfinger["aliases"] as $alias) { @@ -769,12 +817,12 @@ class Probe } } - if (empty($zot_url) && !empty($data['addr']) && !empty(self::$baseurl)) { - $condition = ['nurl' => Strings::normaliseLink(self::$baseurl), 'platform' => ['hubzilla']]; + if (empty($zot_url) && !empty($data['addr']) && !empty($baseurl)) { + $condition = ['nurl' => Strings::normaliseLink($baseurl), 'platform' => ['hubzilla']]; if (!DBA::exists('gserver', $condition)) { return $data; } - $zot_url = self::$baseurl . '/.well-known/zot-info?address=' . $data['addr']; + $zot_url = $baseurl . '/.well-known/zot-info?address=' . $data['addr']; } if (empty($zot_url)) { @@ -796,7 +844,7 @@ class Probe public static function pollZot($url, $data) { - $curlResult = Network::curl($url); + $curlResult = DI::httpRequest()->get($url); if ($curlResult->isTimeout()) { return $data; } @@ -847,7 +895,7 @@ class Probe } if (!empty($json['public_forum'])) { $data['community'] = $json['public_forum']; - $data['account-type'] = Contact::PAGE_COMMUNITY; + $data['account-type'] = User::PAGE_FLAGS_COMMUNITY; } if (!empty($json['profile'])) { @@ -893,7 +941,7 @@ class Probe { $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20); - $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => $type]); + $curlResult = DI::httpRequest()->get($url, ['timeout' => $xrd_timeout, 'accept_content' => $type]); if ($curlResult->isTimeout()) { self::$istimeout = true; return []; @@ -903,7 +951,7 @@ class Probe $webfinger = json_decode($data, true); if (!empty($webfinger)) { if (!isset($webfinger["links"])) { - Logger::log("No json webfinger links for ".$url, Logger::DEBUG); + Logger::info('No json webfinger links', ['url' => $url]); return []; } return $webfinger; @@ -912,13 +960,13 @@ class Probe // If it is not JSON, maybe it is XML $xrd = XML::parseString($data, true); if (!is_object($xrd)) { - Logger::log("No webfinger data retrievable for ".$url, Logger::DEBUG); + Logger::info('No webfinger data retrievable', ['url' => $url]); return []; } $xrd_arr = XML::elementToArray($xrd); if (!isset($xrd_arr["xrd"]["link"])) { - Logger::log("No XML webfinger links for ".$url, Logger::DEBUG); + Logger::info('No XML webfinger links', ['url' => $url]); return []; } @@ -962,20 +1010,20 @@ class Probe */ private static function pollNoscrape($noscrape_url, $data) { - $curlResult = Network::curl($noscrape_url); + $curlResult = DI::httpRequest()->get($noscrape_url); if ($curlResult->isTimeout()) { self::$istimeout = true; return []; } $content = $curlResult->getBody(); if (!$content) { - Logger::log("Empty body for ".$noscrape_url, Logger::DEBUG); + Logger::info('Empty body', ['url' => $noscrape_url]); return []; } $json = json_decode($content, true); if (!is_array($json)) { - Logger::log("No json data for ".$noscrape_url, Logger::DEBUG); + Logger::info('No json data', ['url' => $noscrape_url]); return []; } @@ -1089,7 +1137,7 @@ class Probe { $data = []; - Logger::log("Check profile ".$profile_link, Logger::DEBUG); + Logger::info('Check profile', ['link' => $profile_link]); // Fetch data via noscrape - this is faster $noscrape_url = str_replace(["/hcard/", "/profile/"], "/noscrape/", $profile_link); @@ -1123,7 +1171,7 @@ class Probe $prof_data["fn"] = $data['name'] ?? null; $prof_data["key"] = $data['pubkey'] ?? null; - Logger::log("Result for profile ".$profile_link.": ".print_r($prof_data, true), Logger::DEBUG); + Logger::debug('Result', ['link' => $profile_link, 'data' => $prof_data]); return $prof_data; } @@ -1220,7 +1268,7 @@ class Probe */ private static function pollHcard($hcard_url, $data, $dfrn = false) { - $curlResult = Network::curl($hcard_url); + $curlResult = DI::httpRequest()->get($hcard_url); if ($curlResult->isTimeout()) { self::$istimeout = true; return []; @@ -1408,6 +1456,7 @@ class Probe && !empty($hcard_url) ) { $data["network"] = Protocol::DIASPORA; + $data["manually-approve"] = false; // The Diaspora handle must always be lowercase if (!empty($data["addr"])) { @@ -1474,7 +1523,7 @@ class Probe $pubkey = substr($pubkey, 5); } } elseif (Strings::normaliseLink($pubkey) == 'http://') { - $curlResult = Network::curl($pubkey); + $curlResult = DI::httpRequest()->get($pubkey); if ($curlResult->isTimeout()) { self::$istimeout = true; return $short ? false : []; @@ -1498,6 +1547,7 @@ class Probe && isset($data["url"]) ) { $data["network"] = Protocol::OSTATUS; + $data["manually-approve"] = false; } else { return $short ? false : []; } @@ -1507,7 +1557,7 @@ class Probe } // Fetch all additional data from the feed - $curlResult = Network::curl($data["poll"]); + $curlResult = DI::httpRequest()->get($data["poll"]); if ($curlResult->isTimeout()) { self::$istimeout = true; return []; @@ -1559,7 +1609,7 @@ class Probe */ private static function pumpioProfileData($profile_link) { - $curlResult = Network::curl($profile_link); + $curlResult = DI::httpRequest()->get($profile_link); if (!$curlResult->isSuccess()) { return []; } @@ -1677,9 +1727,9 @@ class Probe */ private static function twitter($uri) { - if (preg_match('=(.*)@twitter.com=i', $uri, $matches)) { + if (preg_match('=([^@]+)@(?:mobile\.)?twitter\.com$=i', $uri, $matches)) { $nick = $matches[1]; - } elseif (preg_match('=https?://twitter.com/(.*)=i', $uri, $matches)) { + } elseif (preg_match('=^https?://(?:mobile\.)?twitter\.com/(.+)=i', $uri, $matches)) { $nick = $matches[1]; } else { return []; @@ -1739,6 +1789,9 @@ class Probe $base = $xpath->evaluate('string(/html/head/base/@href)') ?: $base; $baseParts = parse_url($base); + if (empty($baseParts['host'])) { + return $href; + } // Naked domain case (scheme://basehost) $path = $baseParts['path'] ?? '/'; @@ -1748,26 +1801,28 @@ class Probe $hrefParts = parse_url($href); - // Root path case (/path) including relative scheme case (//host/path) - if ($hrefParts['path'] && $hrefParts['path'][0] == '/') { - $path = $hrefParts['path']; - } else { - $path = $path . '/' . $hrefParts['path']; - - // Resolve arbitrary relative path - // Lifted from https://www.php.net/manual/en/function.realpath.php#84012 - $parts = array_filter(explode('/', $path), 'strlen'); - $absolutes = array(); - foreach ($parts as $part) { - if ('.' == $part) continue; - if ('..' == $part) { - array_pop($absolutes); - } else { - $absolutes[] = $part; + if (!empty($hrefParts['path'])) { + // Root path case (/path) including relative scheme case (//host/path) + if ($hrefParts['path'] && $hrefParts['path'][0] == '/') { + $path = $hrefParts['path']; + } else { + $path = $path . '/' . $hrefParts['path']; + + // Resolve arbitrary relative path + // Lifted from https://www.php.net/manual/en/function.realpath.php#84012 + $parts = array_filter(explode('/', $path), 'strlen'); + $absolutes = array(); + foreach ($parts as $part) { + if ('.' == $part) continue; + if ('..' == $part) { + array_pop($absolutes); + } else { + $absolutes[] = $part; + } } - } - $path = '/' . implode('/', $absolutes); + $path = '/' . implode('/', $absolutes); + } } // Relative scheme case (//host/path) @@ -1790,7 +1845,7 @@ class Probe */ private static function feed($url, $probe = true) { - $curlResult = Network::curl($url); + $curlResult = DI::httpRequest()->get($url); if ($curlResult->isTimeout()) { self::$istimeout = true; return []; @@ -1831,12 +1886,6 @@ class Probe $data["url"] = $url; $data["poll"] = $url; - if (!empty($feed_data["header"]["author-link"])) { - $data["baseurl"] = $feed_data["header"]["author-link"]; - } else { - $data["baseurl"] = $data["url"]; - } - $data["network"] = Protocol::FEED; return $data; @@ -1880,7 +1929,7 @@ class Probe } $msgs = Email::poll($mbox, $uri); - Logger::log('searching '.$uri.', '.count($msgs).' messages found.', Logger::DEBUG); + Logger::info('Messages found', ['uri' => $uri, 'count' => count($msgs)]); if (!count($msgs)) { return []; @@ -1963,8 +2012,220 @@ class Probe $fixed = $scheme.$host.$port.$path.$query.$fragment; - Logger::log('Base: '.$base.' - Avatar: '.$avatar.' - Fixed: '.$fixed, Logger::DATA); + Logger::debug('Avatar fixed', ['base' => $base, 'avatar' => $avatar, 'fixed' => $fixed]); return $fixed; } + + /** + * Fetch the last date that the contact had posted something (publically) + * + * @param string $data probing result + * @return string last activity + */ + public static function getLastUpdate(array $data) + { + $uid = User::getIdForURL($data['url']); + if (!empty($uid)) { + $contact = Contact::selectFirst(['url', 'last-item'], ['self' => true, 'uid' => $uid]); + if (!empty($contact['last-item'])) { + return $contact['last-item']; + } + } + + if ($lastUpdate = self::updateFromNoScrape($data)) { + return $lastUpdate; + } + + if (!empty($data['outbox'])) { + return self::updateFromOutbox($data['outbox'], $data); + } elseif (!empty($data['poll']) && ($data['network'] == Protocol::ACTIVITYPUB)) { + return self::updateFromOutbox($data['poll'], $data); + } elseif (!empty($data['poll'])) { + return self::updateFromFeed($data); + } + + return ''; + } + + /** + * Fetch the last activity date from the "noscrape" endpoint + * + * @param array $data Probing result + * @return string last activity + * + * @return bool 'true' if update was successful or the server was unreachable + */ + private static function updateFromNoScrape(array $data) + { + if (empty($data['baseurl'])) { + return ''; + } + + // Check the 'noscrape' endpoint when it is a Friendica server + $gserver = DBA::selectFirst('gserver', ['noscrape'], ["`nurl` = ? AND `noscrape` != ''", + Strings::normaliseLink($data['baseurl'])]); + if (!DBA::isResult($gserver)) { + return ''; + } + + $curlResult = DI::httpRequest()->get($gserver['noscrape'] . '/' . $data['nick']); + + if ($curlResult->isSuccess() && !empty($curlResult->getBody())) { + $noscrape = json_decode($curlResult->getBody(), true); + if (!empty($noscrape) && !empty($noscrape['updated'])) { + return DateTimeFormat::utc($noscrape['updated'], DateTimeFormat::MYSQL); + } + } + + return ''; + } + + /** + * Fetch the last activity date from an ActivityPub Outbox + * + * @param string $feed + * @param array $data Probing result + * @return string last activity + * @throws \Friendica\Network\HTTPException\InternalServerErrorException + */ + private static function updateFromOutbox(string $feed, array $data) + { + $outbox = ActivityPub::fetchContent($feed); + if (empty($outbox)) { + return ''; + } + + if (!empty($outbox['orderedItems'])) { + $items = $outbox['orderedItems']; + } elseif (!empty($outbox['first']['orderedItems'])) { + $items = $outbox['first']['orderedItems']; + } elseif (!empty($outbox['first']['href']) && ($outbox['first']['href'] != $feed)) { + return self::updateFromOutbox($outbox['first']['href'], $data); + } elseif (!empty($outbox['first'])) { + if (is_string($outbox['first']) && ($outbox['first'] != $feed)) { + return self::updateFromOutbox($outbox['first'], $data); + } else { + Logger::warning('Unexpected data', ['outbox' => $outbox]); + } + return ''; + } else { + $items = []; + } + + $last_updated = ''; + foreach ($items as $activity) { + if (!empty($activity['published'])) { + $published = DateTimeFormat::utc($activity['published']); + } elseif (!empty($activity['object']['published'])) { + $published = DateTimeFormat::utc($activity['object']['published']); + } else { + continue; + } + + if ($last_updated < $published) { + $last_updated = $published; + } + } + + if (!empty($last_updated)) { + return $last_updated; + } + + return ''; + } + + /** + * Fetch the last activity date from an XML feed + * + * @param array $data Probing result + * @return string last activity + */ + private static function updateFromFeed(array $data) + { + // Search for the newest entry in the feed + $curlResult = DI::httpRequest()->get($data['poll']); + if (!$curlResult->isSuccess()) { + return ''; + } + + $doc = new DOMDocument(); + @$doc->loadXML($curlResult->getBody()); + + $xpath = new DOMXPath($doc); + $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); + + $entries = $xpath->query('/atom:feed/atom:entry'); + + $last_updated = ''; + + foreach ($entries as $entry) { + $published_item = $xpath->query('atom:published/text()', $entry)->item(0); + $updated_item = $xpath->query('atom:updated/text()' , $entry)->item(0); + $published = !empty($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null; + $updated = !empty($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null; + + if (empty($published) || empty($updated)) { + Logger::notice('Invalid entry for XPath.', ['entry' => $entry, 'url' => $data['url']]); + continue; + } + + if ($last_updated < $published) { + $last_updated = $published; + } + + if ($last_updated < $updated) { + $last_updated = $updated; + } + } + + if (!empty($last_updated)) { + return $last_updated; + } + + return ''; + } + + /** + * Probe data from local profiles without network traffic + * + * @param string $url + * @return array probed data + */ + private static function localProbe(string $url) + { + $uid = User::getIdForURL($url); + if (empty($uid)) { + return []; + } + + $profile = User::getOwnerDataById($uid); + if (empty($profile)) { + return []; + } + + $approfile = ActivityPub\Transmitter::getProfile($uid); + if (empty($approfile)) { + return []; + } + + if (empty($profile['gsid'])) { + $profile['gsid'] = GServer::getID($approfile['generator']['url']); + } + + $data = ['name' => $profile['name'], 'nick' => $profile['nick'], 'guid' => $approfile['diaspora:guid'] ?? '', + 'url' => $profile['url'], 'addr' => $profile['addr'], 'alias' => $profile['alias'], + 'photo' => $profile['photo'], 'account-type' => $profile['contact-type'], + 'community' => ($profile['contact-type'] == User::ACCOUNT_TYPE_COMMUNITY), + 'keywords' => $profile['keywords'], 'location' => $profile['location'], 'about' => $profile['about'], + 'hide' => !$profile['net-publish'], 'batch' => '', 'notify' => $profile['notify'], + 'poll' => $profile['poll'], 'request' => $profile['request'], 'confirm' => $profile['confirm'], + 'subscribe' => $approfile['generator']['url'] . '/follow?url={uri}', 'poco' => $profile['poco'], + 'following' => $approfile['following'], 'followers' => $approfile['followers'], + 'inbox' => $approfile['inbox'], 'outbox' => $approfile['outbox'], + 'sharedinbox' => $approfile['endpoints']['sharedInbox'], 'network' => Protocol::DFRN, + 'pubkey' => $profile['upubkey'], 'baseurl' => $approfile['generator']['url'], 'gsid' => $profile['gsid'], + 'manually-approve' => in_array($profile['page-flags'], [User::PAGE_FLAGS_NORMAL, User::PAGE_FLAGS_PRVGROUP])]; + return self::rearrangeData($data); + } }