]> git.mxchange.org Git - friendica.git/blobdiff - src/Network/Probe.php
Fetch more comtact data from probing, remove duplicated contacts
[friendica.git] / src / Network / Probe.php
index b4e297afa2096b9aac07dd5ddcf51bfbcede087c..c38c43c0902261bfaff771787765454e954a5621 100644 (file)
@@ -10,25 +10,22 @@ namespace Friendica\Network;
  */
 
 use DOMDocument;
+use DomXPath;
 use Friendica\Core\Cache;
 use Friendica\Core\Config;
 use Friendica\Core\Logger;
 use Friendica\Core\Protocol;
 use Friendica\Core\System;
 use Friendica\Database\DBA;
-use Friendica\Model\Contact;
 use Friendica\Model\Profile;
+use Friendica\Protocol\ActivityPub;
 use Friendica\Protocol\Email;
 use Friendica\Protocol\Feed;
-use Friendica\Protocol\ActivityPub;
 use Friendica\Util\Crypto;
 use Friendica\Util\DateTimeFormat;
 use Friendica\Util\Network;
 use Friendica\Util\Strings;
 use Friendica\Util\XML;
-use DomXPath;
-
-require_once 'include/dba.php';
 
 /**
  * @brief This class contain functions for probing URL
@@ -37,6 +34,7 @@ require_once 'include/dba.php';
 class Probe
 {
        private static $baseurl;
+       private static $istimeout;
 
        /**
         * @brief Rearrange the array so that it always has the same order
@@ -47,9 +45,10 @@ class Probe
         */
        private static function rearrangeData($data)
        {
-               $fields = ["name", "nick", "guid", "url", "addr", "alias",
-                               "photo", "community", "keywords", "location", "about",
+               $fields = ["name", "nick", "guid", "url", "addr", "alias", "photo", "account-type",
+                               "community", "keywords", "location", "about", "gender", "hide",
                                "batch", "notify", "poll", "request", "confirm", "poco",
+                               "following", "followers", "inbox", "outbox", "sharedinbox",
                                "priority", "network", "pubkey", "baseurl"];
 
                $newdata = [];
@@ -76,7 +75,7 @@ class Probe
         */
        private static function ownHost($host)
        {
-               $own_host = get_app()->getHostName();
+               $own_host = \get_app()->getHostName();
 
                $parts = parse_url($host);
 
@@ -99,6 +98,7 @@ class Probe
         * @param string $host The host part of an url
         *
         * @return array with template and type of the webfinger template for JSON or XML
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function hostMeta($host)
        {
@@ -109,12 +109,11 @@ class Probe
                $url = "http://".$host."/.well-known/host-meta";
 
                $xrd_timeout = Config::get('system', 'xrd_timeout', 20);
-               $redirects = 0;
 
                Logger::log("Probing for ".$host, Logger::DEBUG);
                $xrd = null;
 
-               $curlResult = Network::curl($ssl_url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
+               $curlResult = Network::curl($ssl_url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
                if ($curlResult->isSuccess()) {
                        $xml = $curlResult->getBody();
                        $xrd = XML::parseString($xml, false);
@@ -122,9 +121,10 @@ class Probe
                }
 
                if (!is_object($xrd)) {
-                       $curlResult = Network::curl($url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
+                       $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
                        if ($curlResult->isTimeout()) {
                                Logger::log("Probing timeout for " . $url, Logger::DEBUG);
+                               self::$istimeout = true;
                                return false;
                        }
                        $xml = $curlResult->getBody();
@@ -157,7 +157,7 @@ class Probe
                                continue;
                        }
 
-                       if (($attributes["rel"] == "lrdd") && !empty($attributes["template"])) {
+                       if (!empty($attributes["rel"]) && $attributes["rel"] == "lrdd" && !empty($attributes["template"])) {
                                $type = (empty($attributes["type"]) ? '' : $attributes["type"]);
 
                                $lrdd[$type] = $attributes["template"];
@@ -190,6 +190,7 @@ class Probe
         * @param string $hcard_url Link to the hcard - is returned by reference
         *
         * @return string profile link
+        * @throws HTTPException\InternalServerErrorException
         */
        public static function webfingerDfrn($webbie, &$hcard_url)
        {
@@ -197,7 +198,7 @@ class Probe
 
                $links = self::lrdd($webbie);
                Logger::log('webfingerDfrn: '.$webbie.':'.print_r($links, true), Logger::DATA);
-               if (count($links)) {
+               if (!empty($links) && is_array($links)) {
                        foreach ($links as $link) {
                                if ($link['@attributes']['rel'] === NAMESPACE_DFRN) {
                                        $profile_link = $link['@attributes']['href'];
@@ -223,6 +224,7 @@ class Probe
         * @param string $uri Address that should be probed
         *
         * @return array uri data
+        * @throws HTTPException\InternalServerErrorException
         */
        public static function lrdd($uri)
        {
@@ -317,6 +319,8 @@ class Probe
         * @param boolean $cache   Use cached values?
         *
         * @return array uri data
+        * @throws HTTPException\InternalServerErrorException
+        * @throws \ImagickException
         */
        public static function uri($uri, $network = '', $uid = -1, $cache = true)
        {
@@ -331,16 +335,27 @@ class Probe
                        $uid = local_user();
                }
 
+               self::$istimeout = false;
+
                if ($network != Protocol::ACTIVITYPUB) {
                        $data = self::detect($uri, $network, $uid);
                } else {
                        $data = null;
                }
 
-               $ap_profile = ActivityPub::probeProfile($uri);
+               // When the previous detection process had got a time out
+               // we could falsely detect a Friendica profile as AP profile.
+               if (!self::$istimeout) {
+                       $ap_profile = ActivityPub::probeProfile($uri);
 
-               if (!empty($ap_profile) && (defaults($data, 'network', '') != Protocol::DFRN)) {
-                       $data = $ap_profile;
+                       if (!empty($ap_profile) && empty($network) && (defaults($data, 'network', '') != Protocol::DFRN)) {
+                               $data = $ap_profile;
+                       } elseif (!empty($ap_profile)) {
+                               $ap_profile['batch'] = '';
+                               $data = array_merge($ap_profile, $data);
+                       }
+               } else {
+                       Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]);
                }
 
                if (!isset($data['url'])) {
@@ -384,143 +399,11 @@ class Probe
                // Only store into the cache if the value seems to be valid
                if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) {
                        Cache::set('Probe::uri:' . $network . ':' . $uri, $data, Cache::DAY);
-
-                       /// @todo temporary fix - we need a real contact update function that updates only changing fields
-                       /// The biggest problem is the avatar picture that could have a reduced image size.
-                       /// It should only be updated if the existing picture isn't existing anymore.
-                       /// We only update the contact when it is no probing for a specific network.
-                       if (($data['network'] != Protocol::FEED)
-                               && ($network == '')
-                               && $data['name']
-                               && $data['nick']
-                               && $data['url']
-                               && $data['addr']
-                               && $data['poll']
-                       ) {
-                               $fields = [
-                                       'name' => $data['name'],
-                                       'nick' => $data['nick'],
-                                       'url' => $data['url'],
-                                       'addr' => $data['addr'],
-                                       'photo' => $data['photo'],
-                                       'keywords' => $data['keywords'],
-                                       'location' => $data['location'],
-                                       'about' => $data['about'],
-                                       'notify' => $data['notify'],
-                                       'network' => $data['network'],
-                                       'server_url' => $data['baseurl']
-                               ];
-
-                               // This doesn't cover the case when a community isn't a community anymore
-                               if (!empty($data['community']) && $data['community']) {
-                                       $fields['community'] = $data['community'];
-                                       $fields['contact-type'] = Contact::ACCOUNT_TYPE_COMMUNITY;
-                               }
-
-                               $fieldnames = [];
-
-                               foreach ($fields as $key => $val) {
-                                       if (empty($val)) {
-                                               unset($fields[$key]);
-                                       } else {
-                                               $fieldnames[] = $key;
-                                       }
-                               }
-
-                               $fields['updated'] = DateTimeFormat::utcNow();
-
-                               $condition = ['nurl' => Strings::normaliseLink($data['url'])];
-
-                               $old_fields = DBA::selectFirst('gcontact', $fieldnames, $condition);
-
-                               // When the gcontact doesn't exist, the value "true" will trigger an insert.
-                               // In difference to the public contacts we want to have every contact
-                               // in the world in our global contacts.
-                               if (!$old_fields) {
-                                       $old_fields = true;
-
-                                       // These values have to be set only on insert
-                                       $fields['photo'] = $data['photo'];
-                                       $fields['created'] = DateTimeFormat::utcNow();
-                               }
-
-                               DBA::update('gcontact', $fields, $condition, $old_fields);
-
-                               $fields = [
-                                       'name' => $data['name'],
-                                       'nick' => $data['nick'],
-                                       'url' => $data['url'],
-                                       'addr' => $data['addr'],
-                                       'alias' => $data['alias'],
-                                       'keywords' => $data['keywords'],
-                                       'location' => $data['location'],
-                                       'about' => $data['about'],
-                                       'batch' => $data['batch'],
-                                       'notify' => $data['notify'],
-                                       'poll' => $data['poll'],
-                                       'request' => $data['request'],
-                                       'confirm' => $data['confirm'],
-                                       'poco' => $data['poco'],
-                                       'network' => $data['network'],
-                                       'pubkey' => $data['pubkey'],
-                                       'priority' => $data['priority'],
-                                       'writable' => true,
-                                       'rel' => Contact::SHARING
-                               ];
-
-                               $fieldnames = [];
-
-                               foreach ($fields as $key => $val) {
-                                       if (empty($val)) {
-                                               unset($fields[$key]);
-                                       } else {
-                                               $fieldnames[] = $key;
-                                       }
-                               }
-
-                               $condition = ['nurl' => Strings::normaliseLink($data['url']), 'self' => false, 'uid' => 0];
-
-                               // "$old_fields" will return a "false" when the contact doesn't exist.
-                               // This won't trigger an insert. This is intended, since we only need
-                               // public contacts for everyone we store items from.
-                               // We don't need to store every contact on the planet.
-                               $old_fields = DBA::selectFirst('contact', $fieldnames, $condition);
-
-                               $fields['name-date'] = DateTimeFormat::utcNow();
-                               $fields['uri-date'] = DateTimeFormat::utcNow();
-                               $fields['success_update'] = DateTimeFormat::utcNow();
-
-                               DBA::update('contact', $fields, $condition, $old_fields);
-                       }
                }
 
                return $data;
        }
 
-       /**
-        * @brief Switch the scheme of an url between http and https
-        *
-        * @param string $url URL
-        *
-        * @return string switched URL
-        */
-       private static function switchScheme($url)
-       {
-               $parts = parse_url($url);
-
-               if (!isset($parts['scheme'])) {
-                       return $url;
-               }
-
-               if ($parts['scheme'] == 'http') {
-                       $url = str_replace('http://', 'https://', $url);
-               } elseif ($parts['scheme'] == 'https') {
-                       $url = str_replace('https://', 'http://', $url);
-               }
-
-               return $url;
-       }
-
        /**
         * @brief Checks if a profile url should be OStatus but only provides partial information
         *
@@ -529,6 +412,7 @@ class Probe
         * @param string $type      type
         *
         * @return array fixed webfinger data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function fixOStatus($webfinger, $lrdd, $type)
        {
@@ -552,7 +436,7 @@ class Probe
                        return $webfinger;
                }
 
-               $url = self::switchScheme($webfinger['subject']);
+               $url = Network::switchScheme($webfinger['subject']);
                $path = str_replace('{uri}', urlencode($url), $lrdd);
                $webfinger2 = self::webfinger($path, $type);
 
@@ -574,6 +458,7 @@ class Probe
         * @param integer $uid     User ID for the probe (only used for mails)
         *
         * @return array uri data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function detect($uri, $network, $uid)
        {
@@ -742,14 +627,15 @@ class Probe
         * @param string $type type
         *
         * @return array webfinger data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function webfinger($url, $type)
        {
                $xrd_timeout = Config::get('system', 'xrd_timeout', 20);
-               $redirects = 0;
 
-               $curlResult = Network::curl($url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => $type]);
+               $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => $type]);
                if ($curlResult->isTimeout()) {
+                       self::$istimeout = true;
                        return false;
                }
                $data = $curlResult->getBody();
@@ -812,11 +698,13 @@ class Probe
         * @param array  $data         The already fetched data
         *
         * @return array noscrape data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function pollNoscrape($noscrape_url, $data)
        {
                $curlResult = Network::curl($noscrape_url);
                if ($curlResult->isTimeout()) {
+                       self::$istimeout = true;
                        return false;
                }
                $content = $curlResult->getBody();
@@ -852,7 +740,7 @@ class Probe
                }
 
                if (!empty($json["tags"])) {
-                       $keywords = implode(" ", $json["tags"]);
+                       $keywords = implode(", ", $json["tags"]);
                        if ($keywords != "") {
                                $data["keywords"] = $keywords;
                        }
@@ -867,6 +755,10 @@ class Probe
                        $data["about"] = $json["about"];
                }
 
+               if (!empty($json["gender"])) {
+                       $data["gender"] = $json["gender"];
+               }
+
                if (!empty($json["key"])) {
                        $data["pubkey"] = $json["key"];
                }
@@ -891,6 +783,12 @@ class Probe
                        $data["poll"] = $json["dfrn-poll"];
                }
 
+               if (isset($json["hide"])) {
+                       $data["hide"] = (bool)$json["hide"];
+               } else {
+                       $data["hide"] = false;
+               }
+
                return $data;
        }
 
@@ -928,6 +826,8 @@ class Probe
         * @param string $profile_link Link to the profile page
         *
         * @return array profile data
+        * @throws HTTPException\InternalServerErrorException
+        * @throws \ImagickException
         */
        public static function profile($profile_link)
        {
@@ -978,6 +878,7 @@ class Probe
         * @param array $webfinger Webfinger data
         *
         * @return array DFRN data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function dfrn($webfinger)
        {
@@ -1059,11 +960,13 @@ class Probe
         * @param boolean $dfrn      Poll DFRN specific data
         *
         * @return array hcard data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function pollHcard($hcard_url, $data, $dfrn = false)
        {
                $curlResult = Network::curl($hcard_url);
                if ($curlResult->isTimeout()) {
+                       self::$istimeout = true;
                        return false;
                }
                $content = $curlResult->getBody();
@@ -1183,6 +1086,7 @@ class Probe
         * @param array $webfinger Webfinger data
         *
         * @return array Diaspora data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function diaspora($webfinger)
        {
@@ -1270,6 +1174,7 @@ class Probe
         * @param bool  $short     Short detection mode
         *
         * @return array|bool OStatus data or "false" on error or "true" on short mode
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function ostatus($webfinger, $short = false)
        {
@@ -1289,7 +1194,6 @@ class Probe
                        $data["addr"] = str_replace('acct:', '', $webfinger["subject"]);
                }
 
-               $pubkey = "";
                if (is_array($webfinger["links"])) {
                        // The array is reversed to take into account the order of preference for same-rel links
                        // See: https://tools.ietf.org/html/rfc7033#section-4.4.4
@@ -1315,6 +1219,7 @@ class Probe
                                        } elseif (Strings::normaliseLink($pubkey) == 'http://') {
                                                $curlResult = Network::curl($pubkey);
                                                if ($curlResult->isTimeout()) {
+                                                       self::$istimeout = true;
                                                        return false;
                                                }
                                                $pubkey = $curlResult->getBody();
@@ -1347,6 +1252,7 @@ class Probe
                // Fetch all additional data from the feed
                $curlResult = Network::curl($data["poll"]);
                if ($curlResult->isTimeout()) {
+                       self::$istimeout = true;
                        return false;
                }
                $feed = $curlResult->getBody();
@@ -1452,6 +1358,7 @@ class Probe
         *
         * @param array $webfinger Webfinger data
         *
+        * @param       $addr
         * @return array pump.io data
         */
        private static function pumpio($webfinger, $addr)
@@ -1554,11 +1461,13 @@ class Probe
         * @param boolean $probe Do a probe if the page contains a feed link
         *
         * @return array feed data
+        * @throws HTTPException\InternalServerErrorException
         */
        private static function feed($url, $probe = true)
        {
                $curlResult = Network::curl($url);
                if ($curlResult->isTimeout()) {
+                       self::$istimeout = true;
                        return false;
                }
                $feed = $curlResult->getBody();
@@ -1616,6 +1525,7 @@ class Probe
         * @param integer $uid User ID
         *
         * @return array mail data
+        * @throws \Exception
         */
        private static function mail($uri, $uid)
        {
@@ -1703,6 +1613,7 @@ class Probe
         * @param string $base   Another path that is hopefully complete
         *
         * @return string fixed avatar path
+        * @throws \Exception
         */
        public static function fixAvatar($avatar, $base)
        {