<?php
/**
- * @file src/Network/Probe.php
+ * @copyright Copyright (C) 2020, Friendica
+ *
+ * @license GNU AGPL version 3 or any later version
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
*/
-namespace Friendica\Network;
-/**
- * @file src/Network/Probe.php
- * @brief Functions for probing URL
- */
+namespace Friendica\Network;
use DOMDocument;
use DomXPath;
-use Friendica\Core\Cache\Cache;
-use Friendica\Core\Config;
+use Friendica\Core\Cache\Duration;
+use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
+use Friendica\Core\System;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Contact;
+use Friendica\Model\GServer;
use Friendica\Model\Profile;
use Friendica\Protocol\ActivityNamespace;
use Friendica\Protocol\ActivityPub;
use Friendica\Util\XML;
/**
- * @brief This class contain functions for probing URL
- *
+ * This class contain functions for probing URL
*/
class Probe
{
+ const WEBFINGER = '/.well-known/webfinger?resource={uri}';
+
private static $baseurl;
private static $istimeout;
/**
- * @brief Rearrange the array so that it always has the same order
+ * Remove stuff from an URI that doesn't belong there
+ *
+ * @param string $URI
+ * @return string Cleaned URI
+ */
+ public static function cleanURI(string $URI)
+ {
+ // At first remove leading and trailing junk
+ $URI = trim($URI, "@#?:/ \t\n\r\0\x0B");
+
+ $parts = parse_url($URI);
+
+ if (empty($parts['scheme'])) {
+ return $URI;
+ }
+
+ // Remove the URL fragment, since these shouldn't be part of any profile URL
+ unset($parts['fragment']);
+
+ $URI = Network::unparseURL($parts);
+
+ return $URI;
+ }
+
+ /**
+ * Rearrange the array so that it always has the same order
*
* @param array $data Unordered data
*
private static function rearrangeData($data)
{
$fields = ["name", "nick", "guid", "url", "addr", "alias", "photo", "account-type",
- "community", "keywords", "location", "about", "gender", "hide",
- "batch", "notify", "poll", "request", "confirm", "poco",
+ "community", "keywords", "location", "about", "hide",
+ "batch", "notify", "poll", "request", "confirm", "subscribe", "poco",
"following", "followers", "inbox", "outbox", "sharedinbox",
- "priority", "network", "pubkey", "baseurl"];
+ "priority", "network", "pubkey", "baseurl", "gsid"];
$newdata = [];
foreach ($fields as $field) {
if (isset($data[$field])) {
- $newdata[$field] = $data[$field];
- } else {
+ if (in_array($field, ["gsid", "hide", "account-type"])) {
+ $newdata[$field] = (int)$data[$field];
+ } else {
+ $newdata[$field] = $data[$field];
+ }
+ } elseif ($field != "gsid") {
$newdata[$field] = "";
+ } else {
+ $newdata[$field] = null;
}
}
}
/**
- * @brief Check if the hostname belongs to the own server
+ * Check if the hostname belongs to the own server
*
* @param string $host The hostname that is to be checked
*
}
/**
- * @brief Probes for webfinger path via "host-meta"
+ * Probes for webfinger path via "host-meta"
*
* We have to check if the servers in the future still will offer this.
* It seems as if it was dropped from the standard.
// Reset the static variable
self::$baseurl = '';
- $ssl_url = "https://".$host."/.well-known/host-meta";
- $url = "http://".$host."/.well-known/host-meta";
+ // Handles the case when the hostname contains the scheme
+ if (!parse_url($host, PHP_URL_SCHEME)) {
+ $ssl_url = "https://" . $host . "/.well-known/host-meta";
+ $url = "http://" . $host . "/.well-known/host-meta";
+ } else {
+ $ssl_url = $host . "/.well-known/host-meta";
+ $url = '';
+ }
- $xrd_timeout = Config::get('system', 'xrd_timeout', 20);
+ $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20);
- Logger::log("Probing for ".$host, Logger::DEBUG);
+ Logger::info('Probing', ['host' => $host, 'ssl_url' => $ssl_url, 'url' => $url, 'callstack' => System::callstack(20)]);
$xrd = null;
$curlResult = Network::curl($ssl_url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
$ssl_connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
if ($curlResult->isSuccess()) {
$xml = $curlResult->getBody();
- $xrd = XML::parseString($xml, false);
- $host_url = 'https://'.$host;
+ $xrd = XML::parseString($xml, true);
+ if (!empty($url)) {
+ $host_url = 'https://' . $host;
+ } else {
+ $host_url = $host;
+ }
} elseif ($curlResult->isTimeout()) {
Logger::info('Probing timeout', ['url' => $ssl_url], Logger::DEBUG);
self::$istimeout = true;
- return false;
+ return [];
}
- if (!is_object($xrd)) {
+ if (!is_object($xrd) && !empty($url)) {
$curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => 'application/xrd+xml']);
$connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
if ($curlResult->isTimeout()) {
Logger::info('Probing timeout', ['url' => $url], Logger::DEBUG);
self::$istimeout = true;
- return false;
+ return [];
} elseif ($connection_error && $ssl_connection_error) {
self::$istimeout = true;
- return false;
+ return [];
}
$xml = $curlResult->getBody();
- $xrd = XML::parseString($xml, false);
+ $xrd = XML::parseString($xml, true);
$host_url = 'http://'.$host;
}
if (!is_object($xrd)) {
return [];
}
- $lrdd = ['application/jrd+json' => $host_url . '/.well-known/webfinger?resource={uri}'];
+ $lrdd = [];
foreach ($links["xrd"]["link"] as $value => $link) {
if (!empty($link["@attributes"])) {
}
/**
- * @brief Perform Webfinger lookup and return DFRN data
+ * Perform Webfinger lookup and return DFRN data
*
* Given an email style address, perform webfinger lookup and
* return the resulting DFRN profile URL, or if no DFRN profile URL
}
/**
- * @brief Check an URI for LRDD data
+ * Check an URI for LRDD data
*
- * this is a replacement for the "lrdd" function.
- * It isn't used in this class and has some redundancies in the code.
- * When time comes we can check the existing calls for "lrdd" if we can rework them.
- *
- * @param string $uri Address that should be probed
+ * @param string $uri Address that should be probed
*
* @return array uri data
* @throws HTTPException\InternalServerErrorException
*/
- public static function lrdd($uri)
+ public static function lrdd(string $uri)
{
- $lrdd = self::hostMeta($uri);
- $webfinger = null;
-
- if (is_bool($lrdd)) {
- return [];
- }
-
- if (!$lrdd) {
- $parts = @parse_url($uri);
- if (!$parts || empty($parts["host"]) || empty($parts["path"])) {
- return [];
- }
-
- $host = $parts["host"];
- if (!empty($parts["port"])) {
- $host .= ':'.$parts["port"];
- }
-
- $path_parts = explode("/", trim($parts["path"], "/"));
-
- $nick = array_pop($path_parts);
-
- do {
- $lrdd = self::hostMeta($host);
- $host .= "/".array_shift($path_parts);
- } while (!$lrdd && (sizeof($path_parts) > 0));
- }
-
- if (!$lrdd) {
- Logger::log("No lrdd data found for ".$uri, Logger::DEBUG);
+ $data = self::getWebfingerArray($uri);
+ if (empty($data)) {
return [];
}
+ $webfinger = $data['webfinger'];
- foreach ($lrdd as $type => $template) {
- if ($webfinger) {
- continue;
- }
-
- $path = str_replace('{uri}', urlencode($uri), $template);
- $webfinger = self::webfinger($path, $type);
-
- if (!$webfinger && (strstr($uri, "@"))) {
- $path = str_replace('{uri}', urlencode("acct:".$uri), $template);
- $webfinger = self::webfinger($path, $type);
- }
-
- // Special treatment for Mastodon
- // Problem is that Mastodon uses an URL format like http://domain.tld/@nick
- // But the webfinger for this format fails.
- if (!$webfinger && !empty($nick)) {
- // Mastodon uses a "@" as prefix for usernames in their url format
- $nick = ltrim($nick, '@');
-
- $addr = $nick."@".$host;
-
- $path = str_replace('{uri}', urlencode("acct:".$addr), $template);
- $webfinger = self::webfinger($path, $type);
- }
- }
-
- if (!is_array($webfinger["links"])) {
+ if (empty($webfinger["links"])) {
Logger::log("No webfinger links found for ".$uri, Logger::DEBUG);
- return false;
+ return [];
}
$data = [];
$data[] = ["@attributes" => $link];
}
- if (is_array($webfinger["aliases"])) {
+ if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) {
foreach ($webfinger["aliases"] as $alias) {
$data[] = ["@attributes" =>
["rel" => "alias",
}
/**
- * @brief Fetch information (protocol endpoints and user information) about a given uri
+ * Fetch information (protocol endpoints and user information) about a given uri
*
* @param string $uri Address that should be probed
* @param string $network Test for this specific network
*/
public static function uri($uri, $network = '', $uid = -1, $cache = true)
{
+ $cachekey = 'Probe::uri:' . $network . ':' . $uri;
if ($cache) {
- $result = DI::cache()->get('Probe::uri:' . $network . ':' . $uri);
+ $result = DI::cache()->get($cachekey);
if (!is_null($result)) {
return $result;
}
if ($network != Protocol::ACTIVITYPUB) {
$data = self::detect($uri, $network, $uid);
+ if (!is_array($data)) {
+ $data = [];
+ }
} else {
- $data = null;
+ $data = [];
}
// When the previous detection process had got a time out
// we could falsely detect a Friendica profile as AP profile.
- if (!self::$istimeout) {
- $ap_profile = ActivityPub::probeProfile($uri);
+ if (!self::$istimeout && (empty($network) || $network == Protocol::ACTIVITYPUB)) {
+ $ap_profile = ActivityPub::probeProfile($uri, !$cache);
if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) {
$data = $ap_profile;
$ap_profile['batch'] = '';
$data = array_merge($ap_profile, $data);
}
- } else {
- Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]);
}
if (!isset($data['url'])) {
$data['url'] = $uri;
}
- if (!empty($data['photo']) && !empty($data['baseurl'])) {
- $data['baseurl'] = Network::getUrlMatch(Strings::normaliseLink($data['baseurl']), Strings::normaliseLink($data['photo']));
- } elseif (empty($data['photo'])) {
+ if (empty($data['photo'])) {
$data['photo'] = DI::baseUrl() . '/images/person-300.jpg';
}
}
}
- if (!empty(self::$baseurl)) {
- $data['baseurl'] = self::$baseurl;
+ if (!empty($data['baseurl']) && empty($data['gsid'])) {
+ $data['gsid'] = GServer::getID($data['baseurl']);
}
if (empty($data['network'])) {
$data['network'] = Protocol::PHANTOM;
}
+ // Ensure that local connections always are DFRN
+ if (($network == '') && ($data['network'] != Protocol::PHANTOM) && (self::ownHost($data['baseurl'] ?? '') || self::ownHost($data['url']))) {
+ $data['network'] = Protocol::DFRN;
+ }
+
if (!isset($data['hide']) && in_array($data['network'], Protocol::FEDERATED)) {
$data['hide'] = self::getHideStatus($data['url']);
}
// Only store into the cache if the value seems to be valid
if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) {
- DI::cache()->set('Probe::uri:' . $network . ':' . $uri, $data, Cache::DAY);
+ DI::cache()->set($cachekey, $data, Duration::DAY);
}
return $data;
}
/**
- * @brief Checks if a profile url should be OStatus but only provides partial information
- *
- * @param array $webfinger Webfinger data
- * @param string $lrdd Path template for webfinger request
- * @param string $type type
+ * Fetch the "subscribe" and add it to the result
*
- * @return array fixed webfinger data
- * @throws HTTPException\InternalServerErrorException
+ * @param array $result
+ * @param array $webfinger
+ * @return array result
*/
- private static function fixOStatus($webfinger, $lrdd, $type)
+ private static function getSubscribeLink(array $result, array $webfinger)
{
- if (empty($webfinger['links']) || empty($webfinger['subject'])) {
- return $webfinger;
+ if (empty($webfinger['links'])) {
+ return $result;
}
- $is_ostatus = false;
- $has_key = false;
-
foreach ($webfinger['links'] as $link) {
- if ($link['rel'] == ActivityNamespace::OSTATUSSUB) {
- $is_ostatus = true;
- }
- if ($link['rel'] == 'magic-public-key') {
- $has_key = true;
+ if (!empty($link['template']) && ($link['rel'] === ActivityNamespace::OSTATUSSUB)) {
+ $result['subscribe'] = $link['template'];
}
}
- if (!$is_ostatus || $has_key) {
- return $webfinger;
- }
-
- $url = Network::switchScheme($webfinger['subject']);
- $path = str_replace('{uri}', urlencode($url), $lrdd);
- $webfinger2 = self::webfinger($path, $type);
-
- // Is the new webfinger detectable as OStatus?
- if (self::ostatus($webfinger2, true)) {
- $webfinger = $webfinger2;
- }
-
- return $webfinger;
+ return $result;
}
/**
- * @brief Fetch information (protocol endpoints and user information) about a given uri
- *
- * This function is only called by the "uri" function that adds caching and rearranging of data.
- *
- * @param string $uri Address that should be probed
- * @param string $network Test for this specific network
- * @param integer $uid User ID for the probe (only used for mails)
+ * Get webfinger data from a given URI
*
- * @return array uri data
- * @throws HTTPException\InternalServerErrorException
+ * @param string $uri
+ * @return array Webfinger array
*/
- private static function detect($uri, $network, $uid)
+ private static function getWebfingerArray(string $uri)
{
$parts = parse_url($uri);
- if (!empty($parts["scheme"]) && !empty($parts["host"])) {
- $host = $parts["host"];
- if (!empty($parts["port"])) {
- $host .= ':'.$parts["port"];
+ if (!empty($parts['scheme']) && !empty($parts['host'])) {
+ $host = $parts['host'];
+ if (!empty($parts['port'])) {
+ $host .= ':'.$parts['port'];
}
- if ($host == 'twitter.com') {
- return self::twitter($uri);
- }
- $lrdd = self::hostMeta($host);
+ $baseurl = $parts['scheme'] . '://' . $host;
- if (is_bool($lrdd)) {
- return [];
- }
+ $nick = '';
+ $addr = '';
$path_parts = explode("/", trim($parts['path'] ?? '', "/"));
+ if (!empty($path_parts)) {
+ $nick = ltrim(end($path_parts), '@');
+ // When the last part of the URI is numeric then it is most likely an ID and not a nick name
+ if (!is_numeric($nick)) {
+ $addr = $nick."@".$host;
+ } else {
+ $nick = '';
+ }
+ }
- while (!$lrdd && (sizeof($path_parts) > 1)) {
- $host .= "/".array_shift($path_parts);
+ $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+ if (empty($webfinger)) {
$lrdd = self::hostMeta($host);
}
- if (!$lrdd) {
- Logger::log('No XRD data was found for '.$uri, Logger::DEBUG);
- return self::feed($uri);
- }
- $nick = array_pop($path_parts);
- // Mastodon uses a "@" as prefix for usernames in their url format
- $nick = ltrim($nick, '@');
+ if (empty($webfinger) && empty($lrdd)) {
+ while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) {
+ $host .= "/".array_shift($path_parts);
+ $baseurl = $parts['scheme'] . '://' . $host;
- $addr = $nick."@".$host;
- } elseif (strstr($uri, '@')) {
- // If the URI starts with "mailto:" then jump directly to the mail detection
- if (strpos($uri, 'mailto:') !== false) {
- $uri = str_replace('mailto:', '', $uri);
- return self::mail($uri, $uid);
- }
+ if (!empty($nick)) {
+ $addr = $nick."@".$host;
+ }
- if ($network == Protocol::MAIL) {
- return self::mail($uri, $uid);
+ $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+ if (empty($webfinger)) {
+ $lrdd = self::hostMeta($host);
+ }
+ }
+
+ if (empty($lrdd) && empty($webfinger)) {
+ return [];
+ }
}
+ } elseif (strstr($uri, '@')) {
// Remove "acct:" from the URI
$uri = str_replace('acct:', '', $uri);
$host = substr($uri, strpos($uri, '@') + 1);
$nick = substr($uri, 0, strpos($uri, '@'));
+ $addr = $uri;
- if (strpos($uri, '@twitter.com')) {
- return self::twitter($uri);
+ $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+ if (self::$istimeout) {
+ return [];
}
- $lrdd = self::hostMeta($host);
- if (is_bool($lrdd)) {
- return [];
+ if (empty($webfinger)) {
+ $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
+ if (self::$istimeout) {
+ return [];
+ }
+ } else {
+ $baseurl = 'https://' . $host;
}
- if (!$lrdd) {
- Logger::log('No XRD data was found for '.$uri, Logger::DEBUG);
- return self::mail($uri, $uid);
+ if (empty($webfinger)) {
+ $lrdd = self::hostMeta($host);
+ if (self::$istimeout) {
+ return [];
+ }
+ $baseurl = self::$baseurl;
+ } else {
+ $baseurl = 'http://' . $host;
}
- $addr = $uri;
} else {
- Logger::log("Uri ".$uri." was not detectable", Logger::DEBUG);
- return false;
+ Logger::info('URI was not detectable', ['uri' => $uri]);
+ return [];
}
- $webfinger = false;
+ if (empty($webfinger)) {
+ foreach ($lrdd as $type => $template) {
+ if ($webfinger) {
+ continue;
+ }
- /// @todo Do we need the prefix "acct:" or "acct://"?
+ $webfinger = self::getWebfinger($template, $type, $uri, $addr);
+ }
+ }
- foreach ($lrdd as $type => $template) {
- if ($webfinger) {
- continue;
+ if (empty($webfinger)) {
+ return [];
+ }
+
+ if ($webfinger['detected'] == $addr) {
+ $webfinger['nick'] = $nick;
+ $webfinger['addr'] = $addr;
+ }
+
+ $webfinger['baseurl'] = $baseurl;
+
+ return $webfinger;
+ }
+
+ /**
+ * Perform network request for webfinger data
+ *
+ * @param string $template
+ * @param string $type
+ * @param string $uri
+ * @param string $addr
+ * @return array webfinger results
+ */
+ private static function getWebfinger(string $template, string $type, string $uri, string $addr)
+ {
+ // First try the address because this is the primary purpose of webfinger
+ if (!empty($addr)) {
+ $detected = $addr;
+ $path = str_replace('{uri}', urlencode("acct:" . $addr), $template);
+ $webfinger = self::webfinger($path, $type);
+ if (self::$istimeout) {
+ return [];
}
+ }
- // At first try it with the given uri
+ // Then try the URI
+ if (empty($webfinger) && $uri != $addr) {
+ $detected = $uri;
$path = str_replace('{uri}', urlencode($uri), $template);
$webfinger = self::webfinger($path, $type);
+ if (self::$istimeout) {
+ return [];
+ }
+ }
- // Fix possible problems with GNU Social probing to wrong scheme
- $webfinger = self::fixOStatus($webfinger, $template, $type);
+ if (empty($webfinger)) {
+ return [];
+ }
+
+ return ['webfinger' => $webfinger, 'detected' => $detected];
+ }
+
+ /**
+ * Fetch information (protocol endpoints and user information) about a given uri
+ *
+ * This function is only called by the "uri" function that adds caching and rearranging of data.
+ *
+ * @param string $uri Address that should be probed
+ * @param string $network Test for this specific network
+ * @param integer $uid User ID for the probe (only used for mails)
+ *
+ * @return array uri data
+ * @throws HTTPException\InternalServerErrorException
+ */
+ private static function detect($uri, $network, $uid)
+ {
+ $hookData = [
+ 'uri' => $uri,
+ 'network' => $network,
+ 'uid' => $uid,
+ 'result' => [],
+ ];
- // We cannot be sure that the detected address was correct, so we don't use the values
- if ($webfinger && ($uri != $addr)) {
- $nick = "";
- $addr = "";
+ Hook::callAll('probe_detect', $hookData);
+
+ if ($hookData['result']) {
+ if (!is_array($hookData['result'])) {
+ return [];
+ } else {
+ return $hookData['result'];
}
+ }
- // Try webfinger with the address (user@domain.tld)
- if (!$webfinger) {
- $path = str_replace('{uri}', urlencode($addr), $template);
- $webfinger = self::webfinger($path, $type);
+ $parts = parse_url($uri);
+
+ if (!empty($parts['scheme']) && !empty($parts['host'])) {
+ if ($parts['host'] == 'twitter.com') {
+ return self::twitter($uri);
+ }
+ } elseif (strstr($uri, '@')) {
+ // If the URI starts with "mailto:" then jump directly to the mail detection
+ if (strpos($uri, 'mailto:') !== false) {
+ $uri = str_replace('mailto:', '', $uri);
+ return self::mail($uri, $uid);
}
- // Mastodon needs to have it with "acct:"
- if (!$webfinger) {
- $path = str_replace('{uri}', urlencode("acct:".$addr), $template);
- $webfinger = self::webfinger($path, $type);
+ if ($network == Protocol::MAIL) {
+ return self::mail($uri, $uid);
}
+
+ if (strpos($uri, '@twitter.com')) {
+ return self::twitter($uri);
+ }
+ } else {
+ Logger::info('URI was not detectable', ['uri' => $uri]);
+ return [];
}
- if (!$webfinger) {
- return self::feed($uri);
+ Logger::info('Probing start', ['uri' => $uri]);
+
+ $data = self::getWebfingerArray($uri);
+ if (empty($data)) {
+ if (!empty($parts['scheme'])) {
+ return self::feed($uri);
+ } elseif (!empty($uid)) {
+ return self::mail($uri, $uid);
+ } else {
+ return [];
+ }
}
- $result = false;
+ $webfinger = $data['webfinger'];
+ $nick = $data['nick'] ?? '';
+ $addr = $data['addr'] ?? '';
+ $baseurl = $data['baseurl'] ?? '';
- Logger::log("Probing ".$uri, Logger::DEBUG);
+ $result = [];
if (in_array($network, ["", Protocol::DFRN])) {
$result = self::dfrn($webfinger);
$result = self::ostatus($webfinger);
}
if (in_array($network, ['', Protocol::ZOT])) {
- $result = self::zot($webfinger, $result);
+ $result = self::zot($webfinger, $result, $baseurl);
}
if ((!$result && ($network == "")) || ($network == Protocol::PUMPIO)) {
$result = self::pumpio($webfinger, $addr);
}
}
+ $result = self::getSubscribeLink($result, $webfinger);
+
if (empty($result["network"])) {
$result["network"] = Protocol::PHANTOM;
}
+ if (empty($result['baseurl']) && !empty($baseurl)) {
+ $result['baseurl'] = $baseurl;
+ }
+
if (empty($result["url"])) {
$result["url"] = $uri;
}
- Logger::log($uri." is ".$result["network"], Logger::DEBUG);
+ Logger::info('Probing done', ['uri' => $uri, 'network' => $result["network"]]);
- if (empty($result["baseurl"]) && ($result["network"] != Protocol::PHANTOM)) {
- $pos = strpos($result["url"], $host);
- if ($pos) {
- $result["baseurl"] = substr($result["url"], 0, $pos).$host;
- }
- }
return $result;
}
* @return array Zot data
* @throws HTTPException\InternalServerErrorException
*/
- private static function zot($webfinger, $data)
+ private static function zot($webfinger, $data, $baseurl)
{
if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) {
foreach ($webfinger["aliases"] as $alias) {
}
}
- if (empty($zot_url) && !empty($data['addr']) && !empty(self::$baseurl)) {
- $condition = ['nurl' => Strings::normaliseLink(self::$baseurl), 'platform' => ['hubzilla']];
+ if (empty($zot_url) && !empty($data['addr']) && !empty($baseurl)) {
+ $condition = ['nurl' => Strings::normaliseLink($baseurl), 'platform' => ['hubzilla']];
if (!DBA::exists('gserver', $condition)) {
return $data;
}
- $zot_url = self::$baseurl . '/.well-known/zot-info?address=' . $data['addr'];
+ $zot_url = $baseurl . '/.well-known/zot-info?address=' . $data['addr'];
}
if (empty($zot_url)) {
if (!empty($profile['description'])) {
$data['about'] = $profile['description'];
}
- if (!empty($profile['gender'])) {
- $data['gender'] = $profile['gender'];
- }
if (!empty($profile['keywords'])) {
$keywords = implode(', ', $profile['keywords']);
if (!empty($keywords)) {
if (!empty($profile['country'])) {
$loc['country-name'] = $profile['country'];
}
- if (!empty($profile['hometown'])) {
- $loc['locality'] = $profile['hometown'];
- }
$location = Profile::formatLocation($loc);
if (!empty($location)) {
$data['location'] = $location;
}
/**
- * @brief Perform a webfinger request.
+ * Perform a webfinger request.
*
* For details see RFC 7033: <https://tools.ietf.org/html/rfc7033>
*
* @return array webfinger data
* @throws HTTPException\InternalServerErrorException
*/
- private static function webfinger($url, $type)
+ public static function webfinger($url, $type)
{
- $xrd_timeout = Config::get('system', 'xrd_timeout', 20);
+ $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20);
$curlResult = Network::curl($url, false, ['timeout' => $xrd_timeout, 'accept_content' => $type]);
if ($curlResult->isTimeout()) {
self::$istimeout = true;
- return false;
+ return [];
}
$data = $curlResult->getBody();
$webfinger = json_decode($data, true);
- if (is_array($webfinger)) {
+ if (!empty($webfinger)) {
if (!isset($webfinger["links"])) {
Logger::log("No json webfinger links for ".$url, Logger::DEBUG);
- return false;
+ return [];
}
return $webfinger;
}
// If it is not JSON, maybe it is XML
- $xrd = XML::parseString($data, false);
+ $xrd = XML::parseString($data, true);
if (!is_object($xrd)) {
Logger::log("No webfinger data retrievable for ".$url, Logger::DEBUG);
- return false;
+ return [];
}
$xrd_arr = XML::elementToArray($xrd);
if (!isset($xrd_arr["xrd"]["link"])) {
Logger::log("No XML webfinger links for ".$url, Logger::DEBUG);
- return false;
+ return [];
}
$webfinger = [];
}
/**
- * @brief Poll the Friendica specific noscrape page.
+ * Poll the Friendica specific noscrape page.
*
* "noscrape" is a faster alternative to fetch the data from the hcard.
* This functionality was originally created for the directory.
$curlResult = Network::curl($noscrape_url);
if ($curlResult->isTimeout()) {
self::$istimeout = true;
- return false;
+ return [];
}
$content = $curlResult->getBody();
if (!$content) {
Logger::log("Empty body for ".$noscrape_url, Logger::DEBUG);
- return false;
+ return [];
}
$json = json_decode($content, true);
if (!is_array($json)) {
Logger::log("No json data for ".$noscrape_url, Logger::DEBUG);
- return false;
+ return [];
}
if (!empty($json["fn"])) {
$data["about"] = $json["about"];
}
- if (!empty($json["gender"])) {
- $data["gender"] = $json["gender"];
- }
-
if (!empty($json["key"])) {
$data["pubkey"] = $json["key"];
}
}
/**
- * @brief Check for valid DFRN data
+ * Check for valid DFRN data
*
* @param array $data DFRN data
*
}
/**
- * @brief Fetch data from a DFRN profile page and via "noscrape"
+ * Fetch data from a DFRN profile page and via "noscrape"
*
* @param string $profile_link Link to the profile page
*
}
/**
- * @brief Check for DFRN contact
+ * Check for DFRN contact
*
* @param array $webfinger Webfinger data
*
}
if (!isset($data["network"]) || ($hcard_url == "")) {
- return false;
+ return [];
}
// Fetch data via noscrape - this is faster
}
/**
- * @brief Poll the hcard page (Diaspora and Friendica specific)
+ * Poll the hcard page (Diaspora and Friendica specific)
*
* @param string $hcard_url Link to the hcard page
* @param array $data The already fetched data
$curlResult = Network::curl($hcard_url);
if ($curlResult->isTimeout()) {
self::$istimeout = true;
- return false;
+ return [];
}
$content = $curlResult->getBody();
if (!$content) {
- return false;
+ return [];
}
$doc = new DOMDocument();
if (!@$doc->loadHTML($content)) {
- return false;
+ return [];
}
$xpath = new DomXPath($doc);
$vcards = $xpath->query("//div[contains(concat(' ', @class, ' '), ' vcard ')]");
if (!is_object($vcards)) {
- return false;
+ return [];
}
if (!isset($data["baseurl"])) {
}
/**
- * @brief Check for Diaspora contact
+ * Check for Diaspora contact
*
* @param array $webfinger Webfinger data
*
}
if (empty($data["url"]) || empty($hcard_url)) {
- return false;
+ return [];
}
if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) {
$data = self::pollHcard($hcard_url, $data);
if (!$data) {
- return false;
+ return [];
}
if (!empty($data["url"])
$data["notify"] = $data["baseurl"] . "/receive/users/" . $data["guid"];
$data["batch"] = $data["baseurl"] . "/receive/public";
} else {
- return false;
+ return [];
}
return $data;
}
/**
- * @brief Check for OStatus contact
+ * Check for OStatus contact
*
* @param array $webfinger Webfinger data
* @param bool $short Short detection mode
$data["addr"] = str_replace('acct:', '', $webfinger["subject"]);
}
- if (is_array($webfinger["links"])) {
+ if (!empty($webfinger["links"])) {
// The array is reversed to take into account the order of preference for same-rel links
// See: https://tools.ietf.org/html/rfc7033#section-4.4.4
foreach (array_reverse($webfinger["links"]) as $link) {
&& (($link["type"] ?? "") == "text/html")
&& ($link["href"] != "")
) {
- $data["url"] = $link["href"];
+ $data["url"] = $data["alias"] = $link["href"];
} elseif (($link["rel"] == "salmon") && !empty($link["href"])) {
$data["notify"] = $link["href"];
} elseif (($link["rel"] == ActivityNamespace::FEED) && !empty($link["href"])) {
$curlResult = Network::curl($pubkey);
if ($curlResult->isTimeout()) {
self::$istimeout = true;
- return false;
+ return $short ? false : [];
}
$pubkey = $curlResult->getBody();
}
) {
$data["network"] = Protocol::OSTATUS;
} else {
- return false;
+ return $short ? false : [];
}
if ($short) {
$curlResult = Network::curl($data["poll"]);
if ($curlResult->isTimeout()) {
self::$istimeout = true;
- return false;
+ return [];
}
$feed = $curlResult->getBody();
$feed_data = Feed::import($feed);
if (!$feed_data) {
- return false;
+ return [];
}
if (!empty($feed_data["header"]["author-name"])) {
$data["url"] = $feed_data["header"]["author-link"];
}
- if (($data['poll'] == $data['url']) && ($data["alias"] != '')) {
- $data['url'] = $data["alias"];
+ if ($data["url"] == $data["alias"]) {
$data["alias"] = '';
}
}
/**
- * @brief Fetch data from a pump.io profile page
+ * Fetch data from a pump.io profile page
*
* @param string $profile_link Link to the profile page
*
{
$curlResult = Network::curl($profile_link);
if (!$curlResult->isSuccess()) {
- return false;
+ return [];
}
$doc = new DOMDocument();
if (!@$doc->loadHTML($curlResult->getBody())) {
- return false;
+ return [];
}
$xpath = new DomXPath($doc);
}
/**
- * @brief Check for pump.io contact
+ * Check for pump.io contact
*
* @param array $webfinger Webfinger data
* @param string $addr
$data["network"] = Protocol::PUMPIO;
} else {
- return false;
+ return [];
}
$profile_data = self::pumpioProfileData($data["url"]);
if (!$profile_data) {
- return false;
+ return [];
}
$data = array_merge($data, $profile_data);
}
/**
- * @brief Check for twitter contact
+ * Check for twitter contact
*
* @param string $uri
*
$data['network'] = Protocol::TWITTER;
$data['baseurl'] = 'https://twitter.com';
- $curlResult = Network::curl($data['url'], false);
- if (!$curlResult->isSuccess()) {
- return [];
- }
+ return $data;
+ }
- $body = $curlResult->getBody();
+ /**
+ * Checks HTML page for RSS feed link
+ *
+ * @param string $url Page link
+ * @param string $body Page body string
+ * @return string|false Feed link or false if body was invalid HTML document
+ */
+ public static function getFeedLink(string $url, string $body)
+ {
$doc = new DOMDocument();
- @$doc->loadHTML($body);
+ if (!@$doc->loadHTML($body)) {
+ return false;
+ }
+
$xpath = new DOMXPath($doc);
- $list = $xpath->query('//img[@class]');
- foreach ($list as $node) {
- $img_attr = [];
- if ($node->attributes->length) {
- foreach ($node->attributes as $attribute) {
- $img_attr[$attribute->name] = $attribute->value;
- }
- }
+ $feedUrl = $xpath->evaluate('string(/html/head/link[@type="application/rss+xml" and @rel="alternate"]/@href)');
- if (empty($img_attr['class'])) {
- continue;
- }
-
- if (strpos($img_attr['class'], 'ProfileAvatar-image') !== false) {
- if (!empty($img_attr['src'])) {
- $data['photo'] = $img_attr['src'];
- }
- if (!empty($img_attr['alt'])) {
- $data['name'] = $img_attr['alt'];
- }
- }
- }
+ $feedUrl = $feedUrl ? self::ensureAbsoluteLinkFromHTMLDoc($feedUrl, $url, $xpath) : '';
- return $data;
+ return $feedUrl;
}
/**
- * @brief Check page for feed link
+ * Return an absolute URL in the context of a HTML document retrieved from the provided URL.
+ *
+ * Loosely based on RFC 1808
*
- * @param string $url Page link
+ * @see https://tools.ietf.org/html/rfc1808
*
- * @return string feed link
+ * @param string $href The potential relative href found in the HTML document
+ * @param string $base The HTML document URL
+ * @param DOMXPath $xpath The HTML document XPath
+ * @return string
*/
- private static function getFeedLink($url)
+ private static function ensureAbsoluteLinkFromHTMLDoc(string $href, string $base, DOMXPath $xpath)
{
- $curlResult = Network::curl($url);
- if (!$curlResult->isSuccess()) {
- return false;
+ if (filter_var($href, FILTER_VALIDATE_URL)) {
+ return $href;
}
- $doc = new DOMDocument();
- if (!@$doc->loadHTML($curlResult->getBody())) {
- return false;
- }
+ $base = $xpath->evaluate('string(/html/head/base/@href)') ?: $base;
- $xpath = new DomXPath($doc);
+ $baseParts = parse_url($base);
- //$feeds = $xpath->query("/html/head/link[@type='application/rss+xml']");
- $feeds = $xpath->query("/html/head/link[@type='application/rss+xml' and @rel='alternate']");
- if (!is_object($feeds)) {
- return false;
- }
+ // Naked domain case (scheme://basehost)
+ $path = $baseParts['path'] ?? '/';
- if ($feeds->length == 0) {
- return false;
- }
+ // Remove the filename part of the path if it exists (/base/path/file)
+ $path = implode('/', array_slice(explode('/', $path), 0, -1));
- $feed_url = "";
+ $hrefParts = parse_url($href);
- foreach ($feeds as $feed) {
- $attr = [];
- foreach ($feed->attributes as $attribute) {
- $attr[$attribute->name] = trim($attribute->value);
+ // Root path case (/path) including relative scheme case (//host/path)
+ if ($hrefParts['path'] && $hrefParts['path'][0] == '/') {
+ $path = $hrefParts['path'];
+ } else {
+ $path = $path . '/' . $hrefParts['path'];
+
+ // Resolve arbitrary relative path
+ // Lifted from https://www.php.net/manual/en/function.realpath.php#84012
+ $parts = array_filter(explode('/', $path), 'strlen');
+ $absolutes = array();
+ foreach ($parts as $part) {
+ if ('.' == $part) continue;
+ if ('..' == $part) {
+ array_pop($absolutes);
+ } else {
+ $absolutes[] = $part;
+ }
}
- if (empty($feed_url) && !empty($attr['href'])) {
- $feed_url = $attr["href"];
- }
+ $path = '/' . implode('/', $absolutes);
}
- return $feed_url;
+ // Relative scheme case (//host/path)
+ $baseParts['host'] = $hrefParts['host'] ?? $baseParts['host'];
+ $baseParts['path'] = $path;
+ unset($baseParts['query']);
+ unset($baseParts['fragment']);
+
+ return Network::unparseURL($baseParts);
}
/**
- * @brief Check for feed contact
+ * Check for feed contact
*
* @param string $url Profile link
* @param boolean $probe Do a probe if the page contains a feed link
$curlResult = Network::curl($url);
if ($curlResult->isTimeout()) {
self::$istimeout = true;
- return false;
+ return [];
}
$feed = $curlResult->getBody();
$feed_data = Feed::import($feed);
if (!$feed_data) {
if (!$probe) {
- return false;
+ return [];
}
- $feed_url = self::getFeedLink($url);
+ $feed_url = self::getFeedLink($url, $feed);
if (!$feed_url) {
- return false;
+ return [];
}
return self::feed($feed_url, false);
}
/**
- * @brief Check for mail contact
+ * Check for mail contact
*
* @param string $uri Profile link
* @param integer $uid User ID
private static function mail($uri, $uid)
{
if (!Network::isEmailDomainValid($uri)) {
- return false;
+ return [];
}
if ($uid == 0) {
- return false;
+ return [];
}
$user = DBA::selectFirst('user', ['prvkey'], ['uid' => $uid]);
$mailacct = DBA::selectFirst('mailacct', $fields, $condition);
if (!DBA::isResult($user) || !DBA::isResult($mailacct)) {
- return false;
+ return [];
}
$mailbox = Email::constructMailboxName($mailacct);
openssl_private_decrypt(hex2bin($mailacct['pass']), $password, $user['prvkey']);
$mbox = Email::connect($mailbox, $mailacct['user'], $password);
if (!$mbox) {
- return false;
+ return [];
}
$msgs = Email::poll($mbox, $uri);
Logger::log('searching '.$uri.', '.count($msgs).' messages found.', Logger::DEBUG);
if (!count($msgs)) {
- return false;
+ return [];
}
$phost = substr($uri, strpos($uri, '@') + 1);
}
/**
- * @brief Mix two paths together to possibly fix missing parts
+ * Mix two paths together to possibly fix missing parts
*
* @param string $avatar Path to the avatar
* @param string $base Another path that is hopefully complete