*/
use DOMDocument;
+use DomXPath;
use Friendica\Core\Cache;
use Friendica\Core\Config;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\System;
use Friendica\Database\DBA;
-use Friendica\Model\Contact;
use Friendica\Model\Profile;
+use Friendica\Protocol\Activity\ANamespace;
+use Friendica\Protocol\ActivityPub;
use Friendica\Protocol\Email;
use Friendica\Protocol\Feed;
-use Friendica\Protocol\ActivityPub;
use Friendica\Util\Crypto;
-use Friendica\Util\DateTimeFormat;
use Friendica\Util\Network;
use Friendica\Util\Strings;
use Friendica\Util\XML;
-use DomXPath;
/**
* @brief This class contain functions for probing URL
class Probe
{
private static $baseurl;
+ private static $istimeout;
/**
* @brief Rearrange the array so that it always has the same order
*/
private static function rearrangeData($data)
{
- $fields = ["name", "nick", "guid", "url", "addr", "alias",
- "photo", "community", "keywords", "location", "about",
+ $fields = ["name", "nick", "guid", "url", "addr", "alias", "photo", "account-type",
+ "community", "keywords", "location", "about", "gender", "hide",
"batch", "notify", "poll", "request", "confirm", "poco",
+ "following", "followers", "inbox", "outbox", "sharedinbox",
"priority", "network", "pubkey", "baseurl"];
$newdata = [];
$url = "http://".$host."/.well-known/host-meta";
$xrd_timeout = Config::get('system', 'xrd_timeout', 20);
- $redirects = 0;
Logger::log("Probing for ".$host, Logger::DEBUG);
$xrd = null;
- $curlResult = Network::curl($ssl_url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
+ $curlResult = Network::curl($ssl_url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
if ($curlResult->isSuccess()) {
$xml = $curlResult->getBody();
$xrd = XML::parseString($xml, false);
}
if (!is_object($xrd)) {
- $curlResult = Network::curl($url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
+ $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
if ($curlResult->isTimeout()) {
Logger::log("Probing timeout for " . $url, Logger::DEBUG);
+ self::$istimeout = true;
return false;
}
$xml = $curlResult->getBody();
continue;
}
- if (($attributes["rel"] == "lrdd") && !empty($attributes["template"])) {
+ if (!empty($attributes["rel"]) && $attributes["rel"] == "lrdd" && !empty($attributes["template"])) {
$type = (empty($attributes["type"]) ? '' : $attributes["type"]);
$lrdd[$type] = $attributes["template"];
$links = self::lrdd($webbie);
Logger::log('webfingerDfrn: '.$webbie.':'.print_r($links, true), Logger::DATA);
- if (count($links)) {
+ if (!empty($links) && is_array($links)) {
foreach ($links as $link) {
- if ($link['@attributes']['rel'] === NAMESPACE_DFRN) {
+ if ($link['@attributes']['rel'] === ANamespace::DFRN) {
$profile_link = $link['@attributes']['href'];
}
- if (($link['@attributes']['rel'] === NAMESPACE_OSTATUSSUB) && ($profile_link == "")) {
+ if (($link['@attributes']['rel'] === ANamespace::OSTATUSSUB) && ($profile_link == "")) {
$profile_link = 'stat:'.$link['@attributes']['template'];
}
if ($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') {
$uid = local_user();
}
+ self::$istimeout = false;
+
if ($network != Protocol::ACTIVITYPUB) {
$data = self::detect($uri, $network, $uid);
} else {
$data = null;
}
- $ap_profile = ActivityPub::probeProfile($uri);
+ // When the previous detection process had got a time out
+ // we could falsely detect a Friendica profile as AP profile.
+ if (!self::$istimeout) {
+ $ap_profile = ActivityPub::probeProfile($uri);
- if (!empty($ap_profile) && empty($network) && (defaults($data, 'network', '') != Protocol::DFRN)) {
- $data = $ap_profile;
+ if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) {
+ $data = $ap_profile;
+ } elseif (!empty($ap_profile)) {
+ $ap_profile['batch'] = '';
+ $data = array_merge($ap_profile, $data);
+ }
+ } else {
+ Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]);
}
if (!isset($data['url'])) {
}
if (!empty($data['photo'])) {
- $data['baseurl'] = Network::getUrlMatch(Strings::normaliseLink(defaults($data, 'baseurl', '')), Strings::normaliseLink($data['photo']));
+ $data['baseurl'] = Network::getUrlMatch(Strings::normaliseLink($data['baseurl'] ?? ''), Strings::normaliseLink($data['photo']));
} else {
$data['photo'] = System::baseUrl() . '/images/person-300.jpg';
}
$data['network'] = Protocol::PHANTOM;
}
+ if (!isset($data['hide']) && in_array($data['network'], Protocol::FEDERATED)) {
+ $data['hide'] = self::getHideStatus($data['url']);
+ }
+
$data = self::rearrangeData($data);
// Only store into the cache if the value seems to be valid
if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) {
Cache::set('Probe::uri:' . $network . ':' . $uri, $data, Cache::DAY);
+ }
- /// @todo temporary fix - we need a real contact update function that updates only changing fields
- /// The biggest problem is the avatar picture that could have a reduced image size.
- /// It should only be updated if the existing picture isn't existing anymore.
- /// We only update the contact when it is no probing for a specific network.
- if (($data['network'] != Protocol::FEED)
- && ($network == '')
- && $data['name']
- && $data['nick']
- && $data['url']
- && $data['addr']
- && $data['poll']
- ) {
- $fields = [
- 'name' => $data['name'],
- 'nick' => $data['nick'],
- 'url' => $data['url'],
- 'addr' => $data['addr'],
- 'photo' => $data['photo'],
- 'keywords' => $data['keywords'],
- 'location' => $data['location'],
- 'about' => $data['about'],
- 'notify' => $data['notify'],
- 'network' => $data['network'],
- 'server_url' => $data['baseurl']
- ];
-
- // This doesn't cover the case when a community isn't a community anymore
- if (!empty($data['community']) && $data['community']) {
- $fields['community'] = $data['community'];
- $fields['contact-type'] = Contact::TYPE_COMMUNITY;
- }
+ return $data;
+ }
- $fieldnames = [];
- foreach ($fields as $key => $val) {
- if (empty($val)) {
- unset($fields[$key]);
- } else {
- $fieldnames[] = $key;
- }
- }
+ /**
+ * Fetches the "hide" status from the profile
+ *
+ * @param string $url URL of the profile
+ *
+ * @return boolean "hide" status
+ */
+ private static function getHideStatus($url)
+ {
+ $curlResult = Network::curl($url);
+ if (!$curlResult->isSuccess()) {
+ return false;
+ }
- $fields['updated'] = DateTimeFormat::utcNow();
+ // If the file is too large then exit
+ if (($curlResult->getInfo()['download_content_length'] ?? 0) > 1000000) {
+ return false;
+ }
- $condition = ['nurl' => Strings::normaliseLink($data['url'])];
+ // If it isn't a HTML file then exit
+ if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
+ return false;
+ }
- $old_fields = DBA::selectFirst('gcontact', $fieldnames, $condition);
+ $body = $curlResult->getBody();
- // When the gcontact doesn't exist, the value "true" will trigger an insert.
- // In difference to the public contacts we want to have every contact
- // in the world in our global contacts.
- if (!$old_fields) {
- $old_fields = true;
+ $doc = new DOMDocument();
+ @$doc->loadHTML($body);
- // These values have to be set only on insert
- $fields['photo'] = $data['photo'];
- $fields['created'] = DateTimeFormat::utcNow();
- }
+ $xpath = new DOMXPath($doc);
- DBA::update('gcontact', $fields, $condition, $old_fields);
-
- $fields = [
- 'name' => $data['name'],
- 'nick' => $data['nick'],
- 'url' => $data['url'],
- 'addr' => $data['addr'],
- 'alias' => $data['alias'],
- 'keywords' => $data['keywords'],
- 'location' => $data['location'],
- 'about' => $data['about'],
- 'batch' => $data['batch'],
- 'notify' => $data['notify'],
- 'poll' => $data['poll'],
- 'request' => $data['request'],
- 'confirm' => $data['confirm'],
- 'poco' => $data['poco'],
- 'network' => $data['network'],
- 'pubkey' => $data['pubkey'],
- 'priority' => $data['priority'],
- 'writable' => true,
- 'rel' => Contact::SHARING
- ];
-
- $fieldnames = [];
-
- foreach ($fields as $key => $val) {
- if (empty($val)) {
- unset($fields[$key]);
- } else {
- $fieldnames[] = $key;
- }
+ $list = $xpath->query('//meta[@name]');
+ foreach ($list as $node) {
+ $meta_tag = [];
+ if ($node->attributes->length) {
+ foreach ($node->attributes as $attribute) {
+ $meta_tag[$attribute->name] = $attribute->value;
}
-
- $condition = ['nurl' => Strings::normaliseLink($data['url']), 'self' => false, 'uid' => 0];
-
- // "$old_fields" will return a "false" when the contact doesn't exist.
- // This won't trigger an insert. This is intended, since we only need
- // public contacts for everyone we store items from.
- // We don't need to store every contact on the planet.
- $old_fields = DBA::selectFirst('contact', $fieldnames, $condition);
-
- $fields['name-date'] = DateTimeFormat::utcNow();
- $fields['uri-date'] = DateTimeFormat::utcNow();
- $fields['success_update'] = DateTimeFormat::utcNow();
-
- DBA::update('contact', $fields, $condition, $old_fields);
}
- }
-
- return $data;
- }
- /**
- * @brief Switch the scheme of an url between http and https
- *
- * @param string $url URL
- *
- * @return string switched URL
- */
- private static function switchScheme($url)
- {
- $parts = parse_url($url);
+ if (empty($meta_tag['content'])) {
+ continue;
+ }
- if (!isset($parts['scheme'])) {
- return $url;
- }
+ $content = strtolower(trim($meta_tag['content']));
- if ($parts['scheme'] == 'http') {
- $url = str_replace('http://', 'https://', $url);
- } elseif ($parts['scheme'] == 'https') {
- $url = str_replace('https://', 'http://', $url);
+ switch (strtolower(trim($meta_tag['name']))) {
+ case 'dfrn-global-visibility':
+ if ($content == 'false') {
+ return true;
+ }
+ break;
+ case 'robots':
+ if (strpos($content, 'noindex') !== false) {
+ return true;
+ }
+ break;
+ }
}
- return $url;
+ return false;
}
/**
$has_key = false;
foreach ($webfinger['links'] as $link) {
- if ($link['rel'] == NAMESPACE_OSTATUSSUB) {
+ if ($link['rel'] == ANamespace::OSTATUSSUB) {
$is_ostatus = true;
}
if ($link['rel'] == 'magic-public-key') {
return $webfinger;
}
- $url = self::switchScheme($webfinger['subject']);
+ $url = Network::switchScheme($webfinger['subject']);
$path = str_replace('{uri}', urlencode($url), $lrdd);
$webfinger2 = self::webfinger($path, $type);
}
if ($host == 'twitter.com') {
- return ["network" => Protocol::TWITTER];
+ return self::twitter($uri);
}
$lrdd = self::hostMeta($host);
return [];
}
- $path_parts = explode("/", trim(defaults($parts, 'path', ''), "/"));
+ $path_parts = explode("/", trim($parts['path'] ?? '', "/"));
while (!$lrdd && (sizeof($path_parts) > 1)) {
$host .= "/".array_shift($path_parts);
$nick = substr($uri, 0, strpos($uri, '@'));
if (strpos($uri, '@twitter.com')) {
- return ["network" => Protocol::TWITTER];
+ return self::twitter($uri);
}
$lrdd = self::hostMeta($host);
private static function webfinger($url, $type)
{
$xrd_timeout = Config::get('system', 'xrd_timeout', 20);
- $redirects = 0;
- $curlResult = Network::curl($url, false, $redirects, ['timeout' => $xrd_timeout, 'accept_content' => $type]);
+ $curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => $type]);
if ($curlResult->isTimeout()) {
+ self::$istimeout = true;
return false;
}
$data = $curlResult->getBody();
{
$curlResult = Network::curl($noscrape_url);
if ($curlResult->isTimeout()) {
+ self::$istimeout = true;
return false;
}
$content = $curlResult->getBody();
}
if (!empty($json["tags"])) {
- $keywords = implode(" ", $json["tags"]);
+ $keywords = implode(", ", $json["tags"]);
if ($keywords != "") {
$data["keywords"] = $keywords;
}
$data["about"] = $json["about"];
}
+ if (!empty($json["gender"])) {
+ $data["gender"] = $json["gender"];
+ }
+
if (!empty($json["key"])) {
$data["pubkey"] = $json["key"];
}
$data["poll"] = $json["dfrn-poll"];
}
+ if (isset($json["hide"])) {
+ $data["hide"] = (bool)$json["hide"];
+ } else {
+ $data["hide"] = false;
+ }
+
return $data;
}
if (empty($data["addr"]) || empty($data["nick"])) {
$probe_data = self::uri($profile_link);
- $data["addr"] = defaults($data, "addr", $probe_data["addr"]);
- $data["nick"] = defaults($data, "nick", $probe_data["nick"]);
+ $data["addr"] = ($data["addr"] ?? '') ?: $probe_data["addr"];
+ $data["nick"] = ($data["nick"] ?? '') ?: $probe_data["nick"];
}
$prof_data["addr"] = $data["addr"];
$prof_data["nick"] = $data["nick"];
- $prof_data["dfrn-request"] = defaults($data, 'request', null);
- $prof_data["dfrn-confirm"] = defaults($data, 'confirm', null);
- $prof_data["dfrn-notify"] = defaults($data, 'notify' , null);
- $prof_data["dfrn-poll"] = defaults($data, 'poll' , null);
- $prof_data["photo"] = defaults($data, 'photo' , null);
- $prof_data["fn"] = defaults($data, 'name' , null);
- $prof_data["key"] = defaults($data, 'pubkey' , null);
+ $prof_data["dfrn-request"] = $data['request'] ?? null;
+ $prof_data["dfrn-confirm"] = $data['confirm'] ?? null;
+ $prof_data["dfrn-notify"] = $data['notify'] ?? null;
+ $prof_data["dfrn-poll"] = $data['poll'] ?? null;
+ $prof_data["photo"] = $data['photo'] ?? null;
+ $prof_data["fn"] = $data['name'] ?? null;
+ $prof_data["key"] = $data['pubkey'] ?? null;
Logger::log("Result for profile ".$profile_link.": ".print_r($prof_data, true), Logger::DEBUG);
// The array is reversed to take into account the order of preference for same-rel links
// See: https://tools.ietf.org/html/rfc7033#section-4.4.4
foreach (array_reverse($webfinger["links"]) as $link) {
- if (($link["rel"] == NAMESPACE_DFRN) && !empty($link["href"])) {
+ if (($link["rel"] == ANamespace::DFRN) && !empty($link["href"])) {
$data["network"] = Protocol::DFRN;
- } elseif (($link["rel"] == NAMESPACE_FEED) && !empty($link["href"])) {
+ } elseif (($link["rel"] == ANamespace::FEED) && !empty($link["href"])) {
$data["poll"] = $link["href"];
- } elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") && (defaults($link, "type", "") == "text/html") && !empty($link["href"])) {
+ } elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") && (($link["type"] ?? "") == "text/html") && !empty($link["href"])) {
$data["url"] = $link["href"];
} elseif (($link["rel"] == "http://microformats.org/profile/hcard") && !empty($link["href"])) {
$hcard_url = $link["href"];
- } elseif (($link["rel"] == NAMESPACE_POCO) && !empty($link["href"])) {
+ } elseif (($link["rel"] == ANamespace::POCO) && !empty($link["href"])) {
$data["poco"] = $link["href"];
} elseif (($link["rel"] == "http://webfinger.net/rel/avatar") && !empty($link["href"])) {
$data["photo"] = $link["href"];
{
$curlResult = Network::curl($hcard_url);
if ($curlResult->isTimeout()) {
+ self::$istimeout = true;
return false;
}
$content = $curlResult->getBody();
$data["baseurl"] = trim($link["href"], '/');
} elseif (($link["rel"] == "http://joindiaspora.com/guid") && !empty($link["href"])) {
$data["guid"] = $link["href"];
- } elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") && (defaults($link, "type", "") == "text/html") && !empty($link["href"])) {
+ } elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") && (($link["type"] ?? "") == "text/html") && !empty($link["href"])) {
$data["url"] = $link["href"];
- } elseif (($link["rel"] == NAMESPACE_FEED) && !empty($link["href"])) {
+ } elseif (($link["rel"] == ANamespace::FEED) && !empty($link["href"])) {
$data["poll"] = $link["href"];
- } elseif (($link["rel"] == NAMESPACE_POCO) && !empty($link["href"])) {
+ } elseif (($link["rel"] == ANamespace::POCO) && !empty($link["href"])) {
$data["poco"] = $link["href"];
} elseif (($link["rel"] == "salmon") && !empty($link["href"])) {
$data["notify"] = $link["href"];
// See: https://tools.ietf.org/html/rfc7033#section-4.4.4
foreach (array_reverse($webfinger["links"]) as $link) {
if (($link["rel"] == "http://webfinger.net/rel/profile-page")
- && (defaults($link, "type", "") == "text/html")
+ && (($link["type"] ?? "") == "text/html")
&& ($link["href"] != "")
) {
$data["url"] = $link["href"];
} elseif (($link["rel"] == "salmon") && !empty($link["href"])) {
$data["notify"] = $link["href"];
- } elseif (($link["rel"] == NAMESPACE_FEED) && !empty($link["href"])) {
+ } elseif (($link["rel"] == ANamespace::FEED) && !empty($link["href"])) {
$data["poll"] = $link["href"];
} elseif (($link["rel"] == "magic-public-key") && !empty($link["href"])) {
$pubkey = $link["href"];
} elseif (Strings::normaliseLink($pubkey) == 'http://') {
$curlResult = Network::curl($pubkey);
if ($curlResult->isTimeout()) {
+ self::$istimeout = true;
return false;
}
$pubkey = $curlResult->getBody();
// Fetch all additional data from the feed
$curlResult = Network::curl($data["poll"]);
if ($curlResult->isTimeout()) {
+ self::$istimeout = true;
return false;
}
$feed = $curlResult->getBody();
// See: https://tools.ietf.org/html/rfc7033#section-4.4.4
foreach (array_reverse($webfinger["links"]) as $link) {
if (($link["rel"] == "http://webfinger.net/rel/profile-page")
- && (defaults($link, "type", "") == "text/html")
+ && (($link["type"] ?? "") == "text/html")
&& ($link["href"] != "")
) {
$data["url"] = $link["href"];
return $data;
}
+ /**
+ * @brief Check for twitter contact
+ *
+ * @param string $uri
+ *
+ * @return array twitter data
+ */
+ private static function twitter($uri)
+ {
+ if (preg_match('=(.*)@twitter.com=i', $uri, $matches)) {
+ $nick = $matches[1];
+ } elseif (preg_match('=https?://twitter.com/(.*)=i', $uri, $matches)) {
+ $nick = $matches[1];
+ } else {
+ return [];
+ }
+
+ $data = [];
+ $data['url'] = 'https://twitter.com/' . $nick;
+ $data['addr'] = $nick . '@twitter.com';
+ $data['nick'] = $data['name'] = $nick;
+ $data['network'] = Protocol::TWITTER;
+ $data['baseurl'] = 'https://twitter.com';
+
+ $curlResult = Network::curl($data['url'], false);
+ if (!$curlResult->isSuccess()) {
+ return [];
+ }
+
+ $body = $curlResult->getBody();
+ $doc = new DOMDocument();
+ @$doc->loadHTML($body);
+ $xpath = new DOMXPath($doc);
+
+ $list = $xpath->query('//img[@class]');
+ foreach ($list as $node) {
+ $img_attr = [];
+ if ($node->attributes->length) {
+ foreach ($node->attributes as $attribute) {
+ $img_attr[$attribute->name] = $attribute->value;
+ }
+ }
+
+ if (empty($img_attr['class'])) {
+ continue;
+ }
+
+ if (strpos($img_attr['class'], 'ProfileAvatar-image') !== false) {
+ if (!empty($img_attr['src'])) {
+ $data['photo'] = $img_attr['src'];
+ }
+ if (!empty($img_attr['alt'])) {
+ $data['name'] = $img_attr['alt'];
+ }
+ }
+ }
+
+ return $data;
+ }
+
/**
* @brief Check page for feed link
*
{
$curlResult = Network::curl($url);
if ($curlResult->isTimeout()) {
+ self::$istimeout = true;
return false;
}
$feed = $curlResult->getBody();