X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FNetwork%2FProbe.php;h=3f10895c3c5c8874054066735ef4b259061da1ab;hb=6a376c29d85cb3e3b5ccf99bc604da472a7191c7;hp=f61eed4192231b29cdd3e2c153bfc8308fbc0a46;hpb=7efde8e334d6ca52fd1608fb9a78babcea4bdc9f;p=friendica.git diff --git a/src/Network/Probe.php b/src/Network/Probe.php index f61eed4192..3f10895c3c 100644 --- a/src/Network/Probe.php +++ b/src/Network/Probe.php @@ -10,23 +10,22 @@ namespace Friendica\Network; */ use DOMDocument; +use DomXPath; use Friendica\Core\Cache; use Friendica\Core\Config; use Friendica\Core\Logger; use Friendica\Core\Protocol; use Friendica\Core\System; use Friendica\Database\DBA; -use Friendica\Model\Contact; use Friendica\Model\Profile; +use Friendica\Protocol\ActivityPub; use Friendica\Protocol\Email; use Friendica\Protocol\Feed; -use Friendica\Protocol\ActivityPub; use Friendica\Util\Crypto; use Friendica\Util\DateTimeFormat; use Friendica\Util\Network; use Friendica\Util\Strings; use Friendica\Util\XML; -use DomXPath; /** * @brief This class contain functions for probing URL @@ -35,6 +34,7 @@ use DomXPath; class Probe { private static $baseurl; + private static $istimeout; /** * @brief Rearrange the array so that it always has the same order @@ -45,9 +45,10 @@ class Probe */ private static function rearrangeData($data) { - $fields = ["name", "nick", "guid", "url", "addr", "alias", - "photo", "community", "keywords", "location", "about", + $fields = ["name", "nick", "guid", "url", "addr", "alias", "photo", "account-type", + "community", "keywords", "location", "about", "gender", "hide", "batch", "notify", "poll", "request", "confirm", "poco", + "following", "followers", "inbox", "outbox", "sharedinbox", "priority", "network", "pubkey", "baseurl"]; $newdata = []; @@ -74,7 +75,7 @@ class Probe */ private static function ownHost($host) { - $own_host = get_app()->getHostName(); + $own_host = \get_app()->getHostName(); $parts = parse_url($host); @@ -97,6 +98,7 @@ class Probe * @param string $host The host part of an url * * @return array with template and type of the webfinger template for JSON or XML + * @throws HTTPException\InternalServerErrorException */ private static function hostMeta($host) { @@ -107,12 +109,11 @@ class Probe $url = "http://".$host."/.well-known/host-meta"; $xrd_timeout = Config::get('system', 'xrd_timeout', 20); - $redirects = 0; Logger::log("Probing for ".$host, Logger::DEBUG); $xrd = null; - $curlResult = Network::curl($ssl_url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); + $curlResult = Network::curl($ssl_url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); if ($curlResult->isSuccess()) { $xml = $curlResult->getBody(); $xrd = XML::parseString($xml, false); @@ -120,9 +121,10 @@ class Probe } if (!is_object($xrd)) { - $curlResult = Network::curl($url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); + $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']); if ($curlResult->isTimeout()) { Logger::log("Probing timeout for " . $url, Logger::DEBUG); + self::$istimeout = true; return false; } $xml = $curlResult->getBody(); @@ -155,7 +157,7 @@ class Probe continue; } - if (($attributes["rel"] == "lrdd") && !empty($attributes["template"])) { + if (!empty($attributes["rel"]) && $attributes["rel"] == "lrdd" && !empty($attributes["template"])) { $type = (empty($attributes["type"]) ? '' : $attributes["type"]); $lrdd[$type] = $attributes["template"]; @@ -188,6 +190,7 @@ class Probe * @param string $hcard_url Link to the hcard - is returned by reference * * @return string profile link + * @throws HTTPException\InternalServerErrorException */ public static function webfingerDfrn($webbie, &$hcard_url) { @@ -195,7 +198,7 @@ class Probe $links = self::lrdd($webbie); Logger::log('webfingerDfrn: '.$webbie.':'.print_r($links, true), Logger::DATA); - if (count($links)) { + if (!empty($links) && is_array($links)) { foreach ($links as $link) { if ($link['@attributes']['rel'] === NAMESPACE_DFRN) { $profile_link = $link['@attributes']['href']; @@ -221,6 +224,7 @@ class Probe * @param string $uri Address that should be probed * * @return array uri data + * @throws HTTPException\InternalServerErrorException */ public static function lrdd($uri) { @@ -315,6 +319,8 @@ class Probe * @param boolean $cache Use cached values? * * @return array uri data + * @throws HTTPException\InternalServerErrorException + * @throws \ImagickException */ public static function uri($uri, $network = '', $uid = -1, $cache = true) { @@ -329,16 +335,27 @@ class Probe $uid = local_user(); } + self::$istimeout = false; + if ($network != Protocol::ACTIVITYPUB) { $data = self::detect($uri, $network, $uid); } else { $data = null; } - $ap_profile = ActivityPub::probeProfile($uri); + // When the previous detection process had got a time out + // we could falsely detect a Friendica profile as AP profile. + if (!self::$istimeout) { + $ap_profile = ActivityPub::probeProfile($uri); - if (!empty($ap_profile) && (defaults($data, 'network', '') != Protocol::DFRN)) { - $data = $ap_profile; + if (empty($data) || (!empty($ap_profile) && empty($network) && (defaults($data, 'network', '') != Protocol::DFRN))) { + $data = $ap_profile; + } elseif (!empty($ap_profile)) { + $ap_profile['batch'] = ''; + $data = array_merge($ap_profile, $data); + } + } else { + Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]); } if (!isset($data['url'])) { @@ -377,146 +394,82 @@ class Probe $data['network'] = Protocol::PHANTOM; } + if (!isset($data['hide']) && in_array($data['network'], Protocol::FEDERATED)) { + $data['hide'] = self::getHideStatus($data['url']); + } + $data = self::rearrangeData($data); // Only store into the cache if the value seems to be valid if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) { Cache::set('Probe::uri:' . $network . ':' . $uri, $data, Cache::DAY); + } - /// @todo temporary fix - we need a real contact update function that updates only changing fields - /// The biggest problem is the avatar picture that could have a reduced image size. - /// It should only be updated if the existing picture isn't existing anymore. - /// We only update the contact when it is no probing for a specific network. - if (($data['network'] != Protocol::FEED) - && ($network == '') - && $data['name'] - && $data['nick'] - && $data['url'] - && $data['addr'] - && $data['poll'] - ) { - $fields = [ - 'name' => $data['name'], - 'nick' => $data['nick'], - 'url' => $data['url'], - 'addr' => $data['addr'], - 'photo' => $data['photo'], - 'keywords' => $data['keywords'], - 'location' => $data['location'], - 'about' => $data['about'], - 'notify' => $data['notify'], - 'network' => $data['network'], - 'server_url' => $data['baseurl'] - ]; - - // This doesn't cover the case when a community isn't a community anymore - if (!empty($data['community']) && $data['community']) { - $fields['community'] = $data['community']; - $fields['contact-type'] = Contact::ACCOUNT_TYPE_COMMUNITY; - } + return $data; + } - $fieldnames = []; - foreach ($fields as $key => $val) { - if (empty($val)) { - unset($fields[$key]); - } else { - $fieldnames[] = $key; - } - } + /** + * Fetches the "hide" status from the profile + * + * @param string $url URL of the profile + * + * @return boolean "hide" status + */ + private static function getHideStatus($url) + { + $curlResult = Network::curl($url); + if (!$curlResult->isSuccess()) { + return false; + } - $fields['updated'] = DateTimeFormat::utcNow(); + // If the file is too large then exit + if (defaults($curlResult->getInfo(), 'download_content_length', 0) > 1000000) { + return false; + } - $condition = ['nurl' => Strings::normaliseLink($data['url'])]; + // If it isn't a HTML file then exit + if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) { + return false; + } - $old_fields = DBA::selectFirst('gcontact', $fieldnames, $condition); + $body = $curlResult->getBody(); - // When the gcontact doesn't exist, the value "true" will trigger an insert. - // In difference to the public contacts we want to have every contact - // in the world in our global contacts. - if (!$old_fields) { - $old_fields = true; + $doc = new DOMDocument(); + @$doc->loadHTML($body); - // These values have to be set only on insert - $fields['photo'] = $data['photo']; - $fields['created'] = DateTimeFormat::utcNow(); - } + $xpath = new DOMXPath($doc); - DBA::update('gcontact', $fields, $condition, $old_fields); - - $fields = [ - 'name' => $data['name'], - 'nick' => $data['nick'], - 'url' => $data['url'], - 'addr' => $data['addr'], - 'alias' => $data['alias'], - 'keywords' => $data['keywords'], - 'location' => $data['location'], - 'about' => $data['about'], - 'batch' => $data['batch'], - 'notify' => $data['notify'], - 'poll' => $data['poll'], - 'request' => $data['request'], - 'confirm' => $data['confirm'], - 'poco' => $data['poco'], - 'network' => $data['network'], - 'pubkey' => $data['pubkey'], - 'priority' => $data['priority'], - 'writable' => true, - 'rel' => Contact::SHARING - ]; - - $fieldnames = []; - - foreach ($fields as $key => $val) { - if (empty($val)) { - unset($fields[$key]); - } else { - $fieldnames[] = $key; - } + $list = $xpath->query('//meta[@name]'); + foreach ($list as $node) { + $meta_tag = []; + if ($node->attributes->length) { + foreach ($node->attributes as $attribute) { + $meta_tag[$attribute->name] = $attribute->value; } - - $condition = ['nurl' => Strings::normaliseLink($data['url']), 'self' => false, 'uid' => 0]; - - // "$old_fields" will return a "false" when the contact doesn't exist. - // This won't trigger an insert. This is intended, since we only need - // public contacts for everyone we store items from. - // We don't need to store every contact on the planet. - $old_fields = DBA::selectFirst('contact', $fieldnames, $condition); - - $fields['name-date'] = DateTimeFormat::utcNow(); - $fields['uri-date'] = DateTimeFormat::utcNow(); - $fields['success_update'] = DateTimeFormat::utcNow(); - - DBA::update('contact', $fields, $condition, $old_fields); } - } - - return $data; - } - /** - * @brief Switch the scheme of an url between http and https - * - * @param string $url URL - * - * @return string switched URL - */ - private static function switchScheme($url) - { - $parts = parse_url($url); + if (empty($meta_tag['content'])) { + continue; + } - if (!isset($parts['scheme'])) { - return $url; - } + $content = strtolower(trim($meta_tag['content'])); - if ($parts['scheme'] == 'http') { - $url = str_replace('http://', 'https://', $url); - } elseif ($parts['scheme'] == 'https') { - $url = str_replace('https://', 'http://', $url); + switch (strtolower(trim($meta_tag['name']))) { + case 'dfrn-global-visibility': + if ($content == 'false') { + return true; + } + break; + case 'robots': + if (strpos($content, 'noindex') !== false) { + return true; + } + break; + } } - return $url; + return false; } /** @@ -527,6 +480,7 @@ class Probe * @param string $type type * * @return array fixed webfinger data + * @throws HTTPException\InternalServerErrorException */ private static function fixOStatus($webfinger, $lrdd, $type) { @@ -550,7 +504,7 @@ class Probe return $webfinger; } - $url = self::switchScheme($webfinger['subject']); + $url = Network::switchScheme($webfinger['subject']); $path = str_replace('{uri}', urlencode($url), $lrdd); $webfinger2 = self::webfinger($path, $type); @@ -572,6 +526,7 @@ class Probe * @param integer $uid User ID for the probe (only used for mails) * * @return array uri data + * @throws HTTPException\InternalServerErrorException */ private static function detect($uri, $network, $uid) { @@ -584,7 +539,7 @@ class Probe } if ($host == 'twitter.com') { - return ["network" => Protocol::TWITTER]; + return self::twitter($uri); } $lrdd = self::hostMeta($host); @@ -625,7 +580,7 @@ class Probe $nick = substr($uri, 0, strpos($uri, '@')); if (strpos($uri, '@twitter.com')) { - return ["network" => Protocol::TWITTER]; + return self::twitter($uri); } $lrdd = self::hostMeta($host); @@ -740,14 +695,15 @@ class Probe * @param string $type type * * @return array webfinger data + * @throws HTTPException\InternalServerErrorException */ private static function webfinger($url, $type) { $xrd_timeout = Config::get('system', 'xrd_timeout', 20); - $redirects = 0; - $curlResult = Network::curl($url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => $type]); + $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => $type]); if ($curlResult->isTimeout()) { + self::$istimeout = true; return false; } $data = $curlResult->getBody(); @@ -810,11 +766,13 @@ class Probe * @param array $data The already fetched data * * @return array noscrape data + * @throws HTTPException\InternalServerErrorException */ private static function pollNoscrape($noscrape_url, $data) { $curlResult = Network::curl($noscrape_url); if ($curlResult->isTimeout()) { + self::$istimeout = true; return false; } $content = $curlResult->getBody(); @@ -850,7 +808,7 @@ class Probe } if (!empty($json["tags"])) { - $keywords = implode(" ", $json["tags"]); + $keywords = implode(", ", $json["tags"]); if ($keywords != "") { $data["keywords"] = $keywords; } @@ -865,6 +823,10 @@ class Probe $data["about"] = $json["about"]; } + if (!empty($json["gender"])) { + $data["gender"] = $json["gender"]; + } + if (!empty($json["key"])) { $data["pubkey"] = $json["key"]; } @@ -889,6 +851,12 @@ class Probe $data["poll"] = $json["dfrn-poll"]; } + if (isset($json["hide"])) { + $data["hide"] = (bool)$json["hide"]; + } else { + $data["hide"] = false; + } + return $data; } @@ -926,6 +894,8 @@ class Probe * @param string $profile_link Link to the profile page * * @return array profile data + * @throws HTTPException\InternalServerErrorException + * @throws \ImagickException */ public static function profile($profile_link) { @@ -976,6 +946,7 @@ class Probe * @param array $webfinger Webfinger data * * @return array DFRN data + * @throws HTTPException\InternalServerErrorException */ private static function dfrn($webfinger) { @@ -1057,11 +1028,13 @@ class Probe * @param boolean $dfrn Poll DFRN specific data * * @return array hcard data + * @throws HTTPException\InternalServerErrorException */ private static function pollHcard($hcard_url, $data, $dfrn = false) { $curlResult = Network::curl($hcard_url); if ($curlResult->isTimeout()) { + self::$istimeout = true; return false; } $content = $curlResult->getBody(); @@ -1181,6 +1154,7 @@ class Probe * @param array $webfinger Webfinger data * * @return array Diaspora data + * @throws HTTPException\InternalServerErrorException */ private static function diaspora($webfinger) { @@ -1268,6 +1242,7 @@ class Probe * @param bool $short Short detection mode * * @return array|bool OStatus data or "false" on error or "true" on short mode + * @throws HTTPException\InternalServerErrorException */ private static function ostatus($webfinger, $short = false) { @@ -1287,7 +1262,6 @@ class Probe $data["addr"] = str_replace('acct:', '', $webfinger["subject"]); } - $pubkey = ""; if (is_array($webfinger["links"])) { // The array is reversed to take into account the order of preference for same-rel links // See: https://tools.ietf.org/html/rfc7033#section-4.4.4 @@ -1313,6 +1287,7 @@ class Probe } elseif (Strings::normaliseLink($pubkey) == 'http://') { $curlResult = Network::curl($pubkey); if ($curlResult->isTimeout()) { + self::$istimeout = true; return false; } $pubkey = $curlResult->getBody(); @@ -1345,6 +1320,7 @@ class Probe // Fetch all additional data from the feed $curlResult = Network::curl($data["poll"]); if ($curlResult->isTimeout()) { + self::$istimeout = true; return false; } $feed = $curlResult->getBody(); @@ -1450,6 +1426,7 @@ class Probe * * @param array $webfinger Webfinger data * + * @param $addr * @return array pump.io data */ private static function pumpio($webfinger, $addr) @@ -1502,6 +1479,66 @@ class Probe return $data; } + /** + * @brief Check for twitter contact + * + * @param string $uri + * + * @return array twitter data + */ + private static function twitter($uri) + { + if (preg_match('=(.*)@twitter.com=i', $uri, $matches)) { + $nick = $matches[1]; + } elseif (preg_match('=https?://twitter.com/(.*)=i', $uri, $matches)) { + $nick = $matches[1]; + } else { + return []; + } + + $data = []; + $data['url'] = 'https://twitter.com/' . $nick; + $data['addr'] = $nick . '@twitter.com'; + $data['nick'] = $data['name'] = $nick; + $data['network'] = Protocol::TWITTER; + $data['baseurl'] = 'https://twitter.com'; + + $curlResult = Network::curl($data['url'], false); + if (!$curlResult->isSuccess()) { + return []; + } + + $body = $curlResult->getBody(); + $doc = new DOMDocument(); + @$doc->loadHTML($body); + $xpath = new DOMXPath($doc); + + $list = $xpath->query('//img[@class]'); + foreach ($list as $node) { + $img_attr = []; + if ($node->attributes->length) { + foreach ($node->attributes as $attribute) { + $img_attr[$attribute->name] = $attribute->value; + } + } + + if (empty($img_attr['class'])) { + continue; + } + + if (strpos($img_attr['class'], 'ProfileAvatar-image') !== false) { + if (!empty($img_attr['src'])) { + $data['photo'] = $img_attr['src']; + } + if (!empty($img_attr['alt'])) { + $data['name'] = $img_attr['alt']; + } + } + } + + return $data; + } + /** * @brief Check page for feed link * @@ -1552,11 +1589,13 @@ class Probe * @param boolean $probe Do a probe if the page contains a feed link * * @return array feed data + * @throws HTTPException\InternalServerErrorException */ private static function feed($url, $probe = true) { $curlResult = Network::curl($url); if ($curlResult->isTimeout()) { + self::$istimeout = true; return false; } $feed = $curlResult->getBody(); @@ -1614,6 +1653,7 @@ class Probe * @param integer $uid User ID * * @return array mail data + * @throws \Exception */ private static function mail($uri, $uid) { @@ -1701,6 +1741,7 @@ class Probe * @param string $base Another path that is hopefully complete * * @return string fixed avatar path + * @throws \Exception */ public static function fixAvatar($avatar, $base) {