X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;ds=sidebyside;f=src%2FNetwork%2FProbe.php;h=5872ae587b53fe84efd4248699a98a2d9cf5c064;hb=e27915a819397f391b5c50b5bf15e2475eb084bf;hp=629453f1823cc50035050a543df66ffd455d9e5f;hpb=4653d7d3b0972de78130ebeca8c3217a1cd65fe4;p=friendica.git diff --git a/src/Network/Probe.php b/src/Network/Probe.php index 629453f182..5872ae587b 100644 --- a/src/Network/Probe.php +++ b/src/Network/Probe.php @@ -1,23 +1,36 @@ . + * */ -namespace Friendica\Network; -/** - * @file src/Network/Probe.php - * @brief Functions for probing URL - */ +namespace Friendica\Network; use DOMDocument; use DomXPath; -use Friendica\Core\Cache; -use Friendica\Core\Config; +use Friendica\Core\Cache\Duration; use Friendica\Core\Logger; use Friendica\Core\Protocol; use Friendica\Core\System; use Friendica\Database\DBA; +use Friendica\DI; use Friendica\Model\Contact; +use Friendica\Model\GServer; use Friendica\Model\Profile; use Friendica\Protocol\ActivityNamespace; use Friendica\Protocol\ActivityPub; @@ -29,8 +42,7 @@ use Friendica\Util\Strings; use Friendica\Util\XML; /** - * @brief This class contain functions for probing URL - * + * This class contain functions for probing URL */ class Probe { @@ -38,7 +50,32 @@ class Probe private static $istimeout; /** - * @brief Rearrange the array so that it always has the same order + * Remove stuff from an URI that doesn't belong there + * + * @param string $URI + * @return string Cleaned URI + */ + public static function cleanURI(string $URI) + { + // At first remove leading and trailing junk + $URI = trim($URI, "@#?:/ \t\n\r\0\x0B"); + + $parts = parse_url($URI); + + if (empty($parts['scheme'])) { + return $URI; + } + + // Remove the URL fragment, since these shouldn't be part of any profile URL + unset($parts['fragment']); + + $URI = Network::unparseURL($parts); + + return $URI; + } + + /** + * Rearrange the array so that it always has the same order * * @param array $data Unordered data * @@ -47,17 +84,19 @@ class Probe private static function rearrangeData($data) { $fields = ["name", "nick", "guid", "url", "addr", "alias", "photo", "account-type", - "community", "keywords", "location", "about", "gender", "hide", + "community", "keywords", "location", "about", "hide", "batch", "notify", "poll", "request", "confirm", "poco", "following", "followers", "inbox", "outbox", "sharedinbox", - "priority", "network", "pubkey", "baseurl"]; + "priority", "network", "pubkey", "baseurl", "gsid"]; $newdata = []; foreach ($fields as $field) { if (isset($data[$field])) { $newdata[$field] = $data[$field]; - } else { + } elseif ($field != "gsid") { $newdata[$field] = ""; + } else { + $newdata[$field] = null; } } @@ -68,7 +107,7 @@ class Probe } /** - * @brief Check if the hostname belongs to the own server + * Check if the hostname belongs to the own server * * @param string $host The hostname that is to be checked * @@ -76,7 +115,7 @@ class Probe */ private static function ownHost($host) { - $own_host = \get_app()->getHostName(); + $own_host = DI::baseUrl()->getHostname(); $parts = parse_url($host); @@ -91,7 +130,7 @@ class Probe } /** - * @brief Probes for webfinger path via "host-meta" + * Probes for webfinger path via "host-meta" * * We have to check if the servers in the future still will offer this. * It seems as if it was dropped from the standard. @@ -106,30 +145,50 @@ class Probe // Reset the static variable self::$baseurl = ''; - $ssl_url = "https://".$host."/.well-known/host-meta"; - $url = "http://".$host."/.well-known/host-meta"; + // Handles the case when the hostname contains the scheme + if (!parse_url($host, PHP_URL_SCHEME)) { + $ssl_url = "https://" . $host . "/.well-known/host-meta"; + $url = "http://" . $host . "/.well-known/host-meta"; + } else { + $ssl_url = $host . "/.well-known/host-meta"; + $url = ''; + } - $xrd_timeout = Config::get('system', 'xrd_timeout', 20); + $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20); - Logger::log("Probing for ".$host, Logger::DEBUG); + Logger::info('Probing', ['host' => $host, 'ssl_url' => $ssl_url, 'url' => $url, 'callstack' => System::callstack(20)]); $xrd = null; $curlResult = Network::curl($ssl_url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); + $ssl_connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0); if ($curlResult->isSuccess()) { $xml = $curlResult->getBody(); - $xrd = XML::parseString($xml, false); - $host_url = 'https://'.$host; + $xrd = XML::parseString($xml, true); + if (!empty($url)) { + $host_url = 'https://' . $host; + } else { + $host_url = $host; + } + } elseif ($curlResult->isTimeout()) { + Logger::info('Probing timeout', ['url' => $ssl_url], Logger::DEBUG); + self::$istimeout = true; + return false; } - if (!is_object($xrd)) { + if (!is_object($xrd) && !empty($url)) { $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); + $connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0); if ($curlResult->isTimeout()) { - Logger::log("Probing timeout for " . $url, Logger::DEBUG); + Logger::info('Probing timeout', ['url' => $url], Logger::DEBUG); + self::$istimeout = true; + return false; + } elseif ($connection_error && $ssl_connection_error) { self::$istimeout = true; return false; } + $xml = $curlResult->getBody(); - $xrd = XML::parseString($xml, false); + $xrd = XML::parseString($xml, true); $host_url = 'http://'.$host; } if (!is_object($xrd)) { @@ -169,7 +228,7 @@ class Probe } /** - * @brief Perform Webfinger lookup and return DFRN data + * Perform Webfinger lookup and return DFRN data * * Given an email style address, perform webfinger lookup and * return the resulting DFRN profile URL, or if no DFRN profile URL @@ -212,11 +271,29 @@ class Probe } /** - * @brief Check an URI for LRDD data + * Get the link for the remote follow page for a given profile link * - * this is a replacement for the "lrdd" function. - * It isn't used in this class and has some redundancies in the code. - * When time comes we can check the existing calls for "lrdd" if we can rework them. + * @param sting $profile + * @return string Remote follow page link + */ + public static function getRemoteFollowLink(string $profile) + { + $follow_link = ''; + + $links = self::lrdd($profile); + + if (!empty($links) && is_array($links)) { + foreach ($links as $link) { + if ($link['@attributes']['rel'] === ActivityNamespace::OSTATUSSUB) { + $follow_link = $link['@attributes']['template']; + } + } + } + return $follow_link; + } + + /** + * Check an URI for LRDD data * * @param string $uri Address that should be probed * @@ -238,7 +315,7 @@ class Probe return []; } - $host = $parts["host"]; + $host = $parts['scheme'] . '://' . $parts["host"]; if (!empty($parts["port"])) { $host .= ':'.$parts["port"]; } @@ -296,7 +373,7 @@ class Probe $data[] = ["@attributes" => $link]; } - if (is_array($webfinger["aliases"])) { + if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { foreach ($webfinger["aliases"] as $alias) { $data[] = ["@attributes" => ["rel" => "alias", @@ -308,7 +385,7 @@ class Probe } /** - * @brief Fetch information (protocol endpoints and user information) about a given uri + * Fetch information (protocol endpoints and user information) about a given uri * * @param string $uri Address that should be probed * @param string $network Test for this specific network @@ -322,7 +399,7 @@ class Probe public static function uri($uri, $network = '', $uid = -1, $cache = true) { if ($cache) { - $result = Cache::get('Probe::uri:' . $network . ':' . $uri); + $result = DI::cache()->get('Probe::uri:' . $network . ':' . $uri); if (!is_null($result)) { return $result; } @@ -359,10 +436,10 @@ class Probe $data['url'] = $uri; } - if (!empty($data['photo']) && !empty($data["baseurl"])) { + if (!empty($data['photo']) && !empty($data['baseurl'])) { $data['baseurl'] = Network::getUrlMatch(Strings::normaliseLink($data['baseurl']), Strings::normaliseLink($data['photo'])); } elseif (empty($data['photo'])) { - $data['photo'] = System::baseUrl() . '/images/person-300.jpg'; + $data['photo'] = DI::baseUrl() . '/images/person-300.jpg'; } if (empty($data['name'])) { @@ -387,10 +464,19 @@ class Probe $data['baseurl'] = self::$baseurl; } + if (!empty($data['baseurl']) && empty($data['gsid'])) { + $data['gsid'] = GServer::getID($data['baseurl']); + } + if (empty($data['network'])) { $data['network'] = Protocol::PHANTOM; } + // Ensure that local connections always are DFRN + if (($network == '') && ($data['network'] != Protocol::PHANTOM) && (self::ownHost($data['baseurl'] ?? '') || self::ownHost($data['url']))) { + $data['network'] = Protocol::DFRN; + } + if (!isset($data['hide']) && in_array($data['network'], Protocol::FEDERATED)) { $data['hide'] = self::getHideStatus($data['url']); } @@ -399,7 +485,7 @@ class Probe // Only store into the cache if the value seems to be valid if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) { - Cache::set('Probe::uri:' . $network . ':' . $uri, $data, Cache::DAY); + DI::cache()->set('Probe::uri:' . $network . ':' . $uri, $data, Duration::DAY); } return $data; @@ -470,7 +556,7 @@ class Probe } /** - * @brief Checks if a profile url should be OStatus but only provides partial information + * Checks if a profile url should be OStatus but only provides partial information * * @param array $webfinger Webfinger data * @param string $lrdd Path template for webfinger request @@ -514,7 +600,7 @@ class Probe } /** - * @brief Fetch information (protocol endpoints and user information) about a given uri + * Fetch information (protocol endpoints and user information) about a given uri * * This function is only called by the "uri" function that adds caching and rearranging of data. * @@ -641,20 +727,20 @@ class Probe if (in_array($network, ["", Protocol::DFRN])) { $result = self::dfrn($webfinger); } - if ((empty($result['network']) && ($network == "")) || ($network == Protocol::DIASPORA)) { - $result = self::diaspora($webfinger, $result); - } - if ((empty($result['network']) && ($network == "")) || ($network == Protocol::OSTATUS)) { - $result = self::ostatus($webfinger, false, $result); + if ((!$result && ($network == "")) || ($network == Protocol::DIASPORA)) { + $result = self::diaspora($webfinger); } - if ((empty($result['network']) && ($network == "")) || ($network == Protocol::PUMPIO)) { - $result = self::pumpio($webfinger, $addr, $result); + if ((!$result && ($network == "")) || ($network == Protocol::OSTATUS)) { + $result = self::ostatus($webfinger); } - if ((empty($result['network']) && ($network == "")) || ($network == Protocol::ZOT)) { + if (in_array($network, ['', Protocol::ZOT])) { $result = self::zot($webfinger, $result); } - if ((empty($result['network']) && ($network == "")) || ($network == Protocol::FEED)) { - $result = self::feed($uri, true, $result); + if ((!$result && ($network == "")) || ($network == Protocol::PUMPIO)) { + $result = self::pumpio($webfinger, $addr); + } + if ((!$result && ($network == "")) || ($network == Protocol::FEED)) { + $result = self::feed($uri); } else { // We overwrite the detected nick with our try if the previois routines hadn't detected it. // Additionally it is overwritten when the nickname doesn't make sense (contains spaces). @@ -697,6 +783,18 @@ class Probe */ private static function zot($webfinger, $data) { + if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { + foreach ($webfinger["aliases"] as $alias) { + if (substr($alias, 0, 5) == 'acct:') { + $data["addr"] = substr($alias, 5); + } + } + } + + if (!empty($webfinger["subject"]) && (substr($webfinger["subject"], 0, 5) == "acct:")) { + $data["addr"] = substr($webfinger["subject"], 5); + } + $zot_url = ''; foreach ($webfinger['links'] as $link) { if (($link['rel'] == 'http://purl.org/zot/protocol') && !empty($link['href'])) { @@ -705,11 +803,25 @@ class Probe } if (empty($zot_url) && !empty($data['addr']) && !empty(self::$baseurl)) { + $condition = ['nurl' => Strings::normaliseLink(self::$baseurl), 'platform' => ['hubzilla']]; + if (!DBA::exists('gserver', $condition)) { + return $data; + } $zot_url = self::$baseurl . '/.well-known/zot-info?address=' . $data['addr']; } - if (!empty($zot_url)) { - $data = self::pollZot($zot_url, $data); + if (empty($zot_url)) { + return $data; + } + + $data = self::pollZot($zot_url, $data); + + if (!empty($data['url']) && !empty($webfinger['aliases']) && is_array($webfinger['aliases'])) { + foreach ($webfinger['aliases'] as $alias) { + if (!strstr($alias, '@') && Strings::normaliseLink($alias) != Strings::normaliseLink($data['url'])) { + $data['alias'] = $alias; + } + } } return $data; @@ -731,23 +843,28 @@ class Probe return $data; } - if (!empty($json['protocols']) && in_array('zot', $json['protocols'])) { - $data['network'] = Protocol::ZOT; - } elseif (!isset($json['protocols'])) { - $data['network'] = Protocol::ZOT; + if (empty($data['network'])) { + if (!empty($json['protocols']) && in_array('zot', $json['protocols'])) { + $data['network'] = Protocol::ZOT; + } elseif (!isset($json['protocols'])) { + $data['network'] = Protocol::ZOT; + } } - if (!empty($json['guid'])) { + if (!empty($json['guid']) && empty($data['guid'])) { $data['guid'] = $json['guid']; } - if (!empty($json['key'])) { + if (!empty($json['key']) && empty($data['pubkey'])) { $data['pubkey'] = $json['key']; } if (!empty($json['name'])) { $data['name'] = $json['name']; } - if (!empty($json['photo']) && empty($data['photo'])) { + if (!empty($json['photo'])) { $data['photo'] = $json['photo']; + if (!empty($json['photo_updated'])) { + $data['photo'] .= '?rev=' . urlencode($json['photo_updated']); + } } if (!empty($json['address'])) { $data['addr'] = $json['address']; @@ -771,9 +888,6 @@ class Probe if (!empty($profile['description'])) { $data['about'] = $profile['description']; } - if (!empty($profile['gender'])) { - $data['gender'] = $profile['gender']; - } if (!empty($profile['keywords'])) { $keywords = implode(', ', $profile['keywords']); if (!empty($keywords)) { @@ -788,9 +902,6 @@ class Probe if (!empty($profile['country'])) { $loc['country-name'] = $profile['country']; } - if (!empty($profile['hometown'])) { - $loc['locality'] = $profile['hometown']; - } $location = Profile::formatLocation($loc); if (!empty($location)) { $data['location'] = $location; @@ -801,7 +912,7 @@ class Probe } /** - * @brief Perform a webfinger request. + * Perform a webfinger request. * * For details see RFC 7033: * @@ -813,7 +924,7 @@ class Probe */ private static function webfinger($url, $type) { - $xrd_timeout = Config::get('system', 'xrd_timeout', 20); + $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20); $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => $type]); if ($curlResult->isTimeout()) { @@ -832,7 +943,7 @@ class Probe } // If it is not JSON, maybe it is XML - $xrd = XML::parseString($data, false); + $xrd = XML::parseString($data, true); if (!is_object($xrd)) { Logger::log("No webfinger data retrievable for ".$url, Logger::DEBUG); return false; @@ -871,7 +982,7 @@ class Probe } /** - * @brief Poll the Friendica specific noscrape page. + * Poll the Friendica specific noscrape page. * * "noscrape" is a faster alternative to fetch the data from the hcard. * This functionality was originally created for the directory. @@ -937,10 +1048,6 @@ class Probe $data["about"] = $json["about"]; } - if (!empty($json["gender"])) { - $data["gender"] = $json["gender"]; - } - if (!empty($json["key"])) { $data["pubkey"] = $json["key"]; } @@ -975,7 +1082,7 @@ class Probe } /** - * @brief Check for valid DFRN data + * Check for valid DFRN data * * @param array $data DFRN data * @@ -1003,7 +1110,7 @@ class Probe } /** - * @brief Fetch data from a DFRN profile page and via "noscrape" + * Fetch data from a DFRN profile page and via "noscrape" * * @param string $profile_link Link to the profile page * @@ -1055,7 +1162,7 @@ class Probe } /** - * @brief Check for DFRN contact + * Check for DFRN contact * * @param array $webfinger Webfinger data * @@ -1112,7 +1219,7 @@ class Probe } if (!isset($data["network"]) || ($hcard_url == "")) { - return $data; + return false; } // Fetch data via noscrape - this is faster @@ -1135,7 +1242,7 @@ class Probe } /** - * @brief Poll the hcard page (Diaspora and Friendica specific) + * Poll the hcard page (Diaspora and Friendica specific) * * @param string $hcard_url Link to the hcard page * @param array $data The already fetched data @@ -1263,20 +1370,17 @@ class Probe } /** - * @brief Check for Diaspora contact + * Check for Diaspora contact * * @param array $webfinger Webfinger data - * @param array $data previously probed data * * @return array Diaspora data * @throws HTTPException\InternalServerErrorException */ - private static function diaspora($webfinger, $data) + private static function diaspora($webfinger) { $hcard_url = ""; - - unset($data["guid"]); - unset($data["pubkey"]); + $data = []; // The array is reversed to take into account the order of preference for same-rel links // See: https://tools.ietf.org/html/rfc7033#section-4.4.4 @@ -1306,7 +1410,7 @@ class Probe } if (empty($data["url"]) || empty($hcard_url)) { - return $data; + return false; } if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { @@ -1326,6 +1430,10 @@ class Probe // Fetch further information from the hcard $data = self::pollHcard($hcard_url, $data); + if (!$data) { + return false; + } + if (!empty($data["url"]) && !empty($data["guid"]) && !empty($data["baseurl"]) @@ -1342,23 +1450,26 @@ class Probe // We have to overwrite the detected value for "notify" since Hubzilla doesn't send it $data["notify"] = $data["baseurl"] . "/receive/users/" . $data["guid"]; $data["batch"] = $data["baseurl"] . "/receive/public"; + } else { + return false; } return $data; } /** - * @brief Check for OStatus contact + * Check for OStatus contact * * @param array $webfinger Webfinger data * @param bool $short Short detection mode - * @param array $data previously probed data * * @return array|bool OStatus data or "false" on error or "true" on short mode * @throws HTTPException\InternalServerErrorException */ - private static function ostatus($webfinger, $short = false, $data = []) + private static function ostatus($webfinger, $short = false) { + $data = []; + if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { foreach ($webfinger["aliases"] as $alias) { if (strstr($alias, "@") && !strstr(Strings::normaliseLink($alias), "http://")) { @@ -1399,11 +1510,7 @@ class Probe $curlResult = Network::curl($pubkey); if ($curlResult->isTimeout()) { self::$istimeout = true; - if ($short) { - return false; - } else { - return $data; - } + return false; } $pubkey = $curlResult->getBody(); } @@ -1424,10 +1531,8 @@ class Probe && isset($data["url"]) ) { $data["network"] = Protocol::OSTATUS; - } elseif ($short) { - return false; } else { - return $data; + return false; } if ($short) { @@ -1438,15 +1543,12 @@ class Probe $curlResult = Network::curl($data["poll"]); if ($curlResult->isTimeout()) { self::$istimeout = true; - return $data; + return false; } $feed = $curlResult->getBody(); - $dummy1 = null; - $dummy2 = null; - $dummy2 = null; - $feed_data = Feed::import($feed, $dummy1, $dummy2, $dummy3, true); + $feed_data = Feed::import($feed); if (!$feed_data) { - return $data; + return false; } if (!empty($feed_data["header"]["author-name"])) { @@ -1483,7 +1585,7 @@ class Probe } /** - * @brief Fetch data from a pump.io profile page + * Fetch data from a pump.io profile page * * @param string $profile_link Link to the profile page * @@ -1491,8 +1593,13 @@ class Probe */ private static function pumpioProfileData($profile_link) { + $curlResult = Network::curl($profile_link); + if (!$curlResult->isSuccess()) { + return false; + } + $doc = new DOMDocument(); - if (!@$doc->loadHTMLFile($profile_link)) { + if (!@$doc->loadHTML($curlResult->getBody())) { return false; } @@ -1539,15 +1646,15 @@ class Probe } /** - * @brief Check for pump.io contact + * Check for pump.io contact * * @param array $webfinger Webfinger data * @param string $addr - * @param array $data previously probed data * @return array pump.io data */ - private static function pumpio($webfinger, $addr, $data) + private static function pumpio($webfinger, $addr) { + $data = []; // The array is reversed to take into account the order of preference for same-rel links // See: https://tools.ietf.org/html/rfc7033#section-4.4.4 foreach (array_reverse($webfinger["links"]) as $link) { @@ -1574,13 +1681,13 @@ class Probe $data["network"] = Protocol::PUMPIO; } else { - return $data; + return false; } $profile_data = self::pumpioProfileData($data["url"]); if (!$profile_data) { - return $data; + return false; } $data = array_merge($data, $profile_data); @@ -1596,7 +1703,7 @@ class Probe } /** - * @brief Check for twitter contact + * Check for twitter contact * * @param string $uri * @@ -1656,7 +1763,7 @@ class Probe } /** - * @brief Check page for feed link + * Check page for feed link * * @param string $url Page link * @@ -1664,9 +1771,13 @@ class Probe */ private static function getFeedLink($url) { - $doc = new DOMDocument(); + $curlResult = Network::curl($url); + if (!$curlResult->isSuccess()) { + return false; + } - if (!@$doc->loadHTMLFile($url)) { + $doc = new DOMDocument(); + if (!@$doc->loadHTML($curlResult->getBody())) { return false; } @@ -1699,35 +1810,33 @@ class Probe } /** - * @brief Check for feed contact + * Check for feed contact * * @param string $url Profile link * @param boolean $probe Do a probe if the page contains a feed link - * @param array $data previously probed data * * @return array feed data * @throws HTTPException\InternalServerErrorException */ - private static function feed($url, $probe = true, $data = []) + private static function feed($url, $probe = true) { $curlResult = Network::curl($url); if ($curlResult->isTimeout()) { self::$istimeout = true; - return $data; + return false; } $feed = $curlResult->getBody(); - $dummy1 = $dummy2 = $dummy3 = null; - $feed_data = Feed::import($feed, $dummy1, $dummy2, $dummy3, true); + $feed_data = Feed::import($feed); if (!$feed_data) { if (!$probe) { - return $data; + return false; } $feed_url = self::getFeedLink($url); if (!$feed_url) { - return $data; + return false; } return self::feed($feed_url, false); @@ -1764,7 +1873,7 @@ class Probe } /** - * @brief Check for mail contact + * Check for mail contact * * @param string $uri Profile link * @param integer $uid User ID @@ -1852,7 +1961,7 @@ class Probe } /** - * @brief Mix two paths together to possibly fix missing parts + * Mix two paths together to possibly fix missing parts * * @param string $avatar Path to the avatar * @param string $base Another path that is hopefully complete