<?php
/**
- * @copyright Copyright (C) 2010-2021, the Friendica project
+ * @copyright Copyright (C) 2010-2022, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Module\Register;
-use Friendica\Network\HTTPClientOptions;
-use Friendica\Network\IHTTPResult;
+use Friendica\Network\HTTPClient\Client\HttpClientOptions;
+use Friendica\Network\HTTPClient\Capability\ICanHandleHttpResponses;
use Friendica\Protocol\Relay;
use Friendica\Util\DateTimeFormat;
use Friendica\Util\Network;
const DETECT_MASTODON_API = 16;
const DETECT_STATUS_PHP = 17; // Nextcloud
const DETECT_V1_CONFIG = 18;
+ const DETECT_PUMPIO = 19;
// Standardized endpoints
const DETECT_STATISTICS_JSON = 100;
return self::getID($url, true);
}
+ /**
+ * Retrieves all the servers which base domain are matching the provided domain pattern
+ *
+ * The pattern is a simple fnmatch() pattern with ? for single wildcard and * for multiple wildcard
+ *
+ * @param string $pattern
+ * @return array
+ * @throws Exception
+ */
+ public static function listByDomainPattern(string $pattern): array
+ {
+ $likePattern = 'http://' . strtr($pattern, ['_' => '\_', '%' => '\%', '?' => '_', '*' => '%']);
+
+ // The SUBSTRING_INDEX returns everything before the eventual third /, which effectively trims an
+ // eventual server path and keep only the server domain which we're matching against the pattern.
+ $sql = "SELECT `gserver`.*, COUNT(*) AS `contacts`
+ FROM `gserver`
+ LEFT JOIN `contact` ON `gserver`.`id` = `contact`.`gsid`
+ WHERE SUBSTRING_INDEX(`gserver`.`nurl`, '/', 3) LIKE ?
+ AND NOT `gserver`.`failed`
+ GROUP BY `gserver`.`id`";
+
+ $stmt = DI::dba()->p($sql, $likePattern);
+
+ return DI::dba()->toArray($stmt);
+ }
+
/**
* Checks if the given server is reachable
*
// When a nodeinfo is present, we don't need to dig further
$xrd_timeout = DI::config()->get('system', 'xrd_timeout');
- $curlResult = DI::httpClient()->get($url . '/.well-known/nodeinfo', [HTTPClientOptions::TIMEOUT => $xrd_timeout]);
+ $curlResult = DI::httpClient()->get($url . '/.well-known/nodeinfo', [HttpClientOptions::TIMEOUT => $xrd_timeout]);
if ($curlResult->isTimeout()) {
self::setFailure($url);
return false;
}
// On a redirect follow the new host but mark the old one as failure
- if ($curlResult->isSuccess() && (parse_url($url, PHP_URL_HOST) != parse_url($curlResult->getRedirectUrl(), PHP_URL_HOST))) {
- $curlResult = DI::httpClient()->get($url, [HTTPClientOptions::TIMEOUT => $xrd_timeout]);
- if (parse_url($url, PHP_URL_HOST) != parse_url($curlResult->getRedirectUrl(), PHP_URL_HOST)) {
+ if ($curlResult->isSuccess() && !empty($curlResult->getRedirectUrl()) && (parse_url($url, PHP_URL_HOST) != parse_url($curlResult->getRedirectUrl(), PHP_URL_HOST))) {
+ $curlResult = DI::httpClient()->get($url, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
+ if (!empty($curlResult->getRedirectUrl()) && parse_url($url, PHP_URL_HOST) != parse_url($curlResult->getRedirectUrl(), PHP_URL_HOST)) {
Logger::info('Found redirect. Mark old entry as failure', ['old' => $url, 'new' => $curlResult->getRedirectUrl()]);
self::setFailure($url);
self::detect($curlResult->getRedirectUrl(), $network, $only_nodeinfo);
if (empty($nodeinfo['network']) || in_array($nodeinfo['network'], [Protocol::DFRN, Protocol::ZOT])) {
if (!empty($nodeinfo['detection-method'])) {
$serverdata['detection-method'] = $nodeinfo['detection-method'];
+
+ foreach (['registered-users', 'active_users_monthly', 'active-halfyear-users', 'local-posts'] as $field) {
+ if (!empty($nodeinfo[$field])) {
+ $serverdata[$field] = $nodeinfo[$field];
+ }
+ }
}
// Fetch the landing page, possibly it reveals some data
$basedata = ['detection-method' => self::DETECT_MANUAL];
}
- $curlResult = DI::httpClient()->get($baseurl, [HTTPClientOptions::TIMEOUT => $xrd_timeout]);
+ $curlResult = DI::httpClient()->get($baseurl, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
if ($curlResult->isSuccess()) {
- if ((parse_url($baseurl, PHP_URL_HOST) != parse_url($curlResult->getRedirectUrl(), PHP_URL_HOST))) {
+ if (!empty($curlResult->getRedirectUrl()) && (parse_url($baseurl, PHP_URL_HOST) != parse_url($curlResult->getRedirectUrl(), PHP_URL_HOST))) {
Logger::info('Found redirect. Mark old entry as failure', ['old' => $url, 'new' => $curlResult->getRedirectUrl()]);
self::setFailure($url);
self::detect($curlResult->getRedirectUrl(), $network, $only_nodeinfo);
// When the base path doesn't seem to contain a social network we try the complete path.
// Most detectable system have to be installed in the root directory.
// We checked the base to avoid false positives.
- $curlResult = DI::httpClient()->get($url, [HTTPClientOptions::TIMEOUT => $xrd_timeout]);
+ $curlResult = DI::httpClient()->get($url, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
if ($curlResult->isSuccess()) {
$urldata = self::analyseRootHeader($curlResult, $serverdata);
$urldata = self::analyseRootBody($curlResult, $urldata, $url);
$serverdata = self::detectGNUSocial($url, $serverdata);
}
+ if (empty($serverdata['network'])) {
+ $serverdata = self::detectPumpIO($url, $serverdata);
+ }
+
$serverdata = array_merge($nodeinfo, $serverdata);
} else {
$serverdata = $nodeinfo;
$serverdata['url'] = $url;
$serverdata['nurl'] = Strings::normaliseLink($url);
- // We take the highest number that we do find
- $registeredUsers = $serverdata['registered-users'] ?? 0;
+ if (in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED])) {
+ $serverdata = self::detectNetworkViaContacts($url, $serverdata);
+ }
- // On an active server there has to be at least a single user
- if (($serverdata['network'] != Protocol::PHANTOM) && ($registeredUsers == 0)) {
- $registeredUsers = 1;
+ if ($serverdata['network'] == Protocol::ACTIVITYPUB) {
+ $serverdata = self::fetchWeeklyUsage($url, $serverdata);
}
- if ($serverdata['network'] == Protocol::PHANTOM) {
- $serverdata['registered-users'] = max($registeredUsers, 1);
- $serverdata = self::detectNetworkViaContacts($url, $serverdata);
+ $serverdata['registered-users'] = $serverdata['registered-users'] ?? 0;
+
+ // On an active server there has to be at least a single user
+ if (!in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED]) && ($serverdata['registered-users'] == 0)) {
+ $serverdata['registered-users'] = 1;
+ } elseif (in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED])) {
+ $serverdata['registered-users'] = 0;
}
$serverdata['next_contact'] = self::getNextUpdateDate(true);
$ret = DBA::insert('gserver', $serverdata);
$id = DBA::lastInsertId();
} else {
- // Don't override the network with 'unknown' when there had been a valid entry before
- if (($serverdata['network'] == Protocol::PHANTOM) && !empty($gserver['network'])) {
- unset($serverdata['network']);
- }
-
$ret = DBA::update('gserver', $serverdata, ['nurl' => $serverdata['nurl']]);
$gserver = DBA::selectFirst('gserver', ['id'], ['nurl' => $serverdata['nurl']]);
if (DBA::isResult($gserver)) {
}
}
- if (!empty($serverdata['network']) && !empty($id) && ($serverdata['network'] != Protocol::PHANTOM)) {
+ // Count the number of known contacts from this server
+ if (!empty($id) && !in_array($serverdata['network'], [Protocol::PHANTOM, Protocol::FEED])) {
$apcontacts = DBA::count('apcontact', ['gsid' => $id]);
- $contacts = DBA::count('contact', ['uid' => 0, 'gsid' => $id]);
- $max_users = max($apcontacts, $contacts, $registeredUsers, 1);
- if ($max_users > $registeredUsers) {
+ $contacts = DBA::count('contact', ['uid' => 0, 'gsid' => $id, 'failed' => false]);
+ $max_users = max($apcontacts, $contacts);
+ if ($max_users > $serverdata['registered-users']) {
Logger::info('Update registered users', ['id' => $id, 'url' => $serverdata['nurl'], 'registered-users' => $max_users]);
DBA::update('gserver', ['registered-users' => $max_users], ['id' => $id]);
}
}
}
+ if (!empty($data['total_users'])) {
+ $serverdata['registered-users'] = max($data['total_users'], 1);
+ }
+
+ if (!empty($data['active_users_monthly'])) {
+ $serverdata['active-month-users'] = max($data['active_users_monthly'], 0);
+ }
+
+ if (!empty($data['active_users_halfyear'])) {
+ $serverdata['active-halfyear-users'] = max($data['active_users_halfyear'], 0);
+ }
+
+ if (!empty($data['local_posts'])) {
+ $serverdata['local-posts'] = max($data['local_posts'], 0);
+ }
if (!empty($data['registrations_open'])) {
$serverdata['register_policy'] = Register::OPEN;
/**
* Detect server type by using the nodeinfo data
*
- * @param string $url address of the server
- * @param IHTTPResult $httpResult
+ * @param string $url address of the server
+ * @param ICanHandleHttpResponses $httpResult
*
* @return array Server data
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
- private static function fetchNodeinfo(string $url, IHTTPResult $httpResult)
+ private static function fetchNodeinfo(string $url, ICanHandleHttpResponses $httpResult)
{
if (!$httpResult->isSuccess()) {
return [];
$server['registered-users'] = max($nodeinfo['usage']['users']['total'], 1);
}
+ if (!empty($nodeinfo['usage']['users']['activeMonth'])) {
+ $server['active-month-users'] = max($nodeinfo['usage']['users']['activeMonth'], 0);
+ }
+
+ if (!empty($nodeinfo['usage']['users']['activeHalfyear'])) {
+ $server['active-halfyear-users'] = max($nodeinfo['usage']['users']['activeHalfyear'], 0);
+ }
+
+ if (!empty($nodeinfo['usage']['localPosts'])) {
+ $server['local-posts'] = max($nodeinfo['usage']['localPosts'], 0);
+ }
+
+ if (!empty($nodeinfo['usage']['localComments'])) {
+ $server['local-comments'] = max($nodeinfo['usage']['localComments'], 0);
+ }
+
if (!empty($nodeinfo['protocols']['inbound']) && is_array($nodeinfo['protocols']['inbound'])) {
$protocols = [];
foreach ($nodeinfo['protocols']['inbound'] as $protocol) {
$server['registered-users'] = max($nodeinfo['usage']['users']['total'], 1);
}
+ if (!empty($nodeinfo['usage']['users']['activeMonth'])) {
+ $server['active-month-users'] = max($nodeinfo['usage']['users']['activeMonth'], 0);
+ }
+
+ if (!empty($nodeinfo['usage']['users']['activeHalfyear'])) {
+ $server['active-halfyear-users'] = max($nodeinfo['usage']['users']['activeHalfyear'], 0);
+ }
+
+ if (!empty($nodeinfo['usage']['localPosts'])) {
+ $server['local-posts'] = max($nodeinfo['usage']['localPosts'], 0);
+ }
+
+ if (!empty($nodeinfo['usage']['localComments'])) {
+ $server['local-comments'] = max($nodeinfo['usage']['localComments'], 0);
+ }
+
if (!empty($nodeinfo['protocols'])) {
$protocols = [];
foreach ($nodeinfo['protocols'] as $protocol) {
$serverdata['registered-users'] = max($data['channels_total'], 1);
}
+ if (!empty($data['channels_active_monthly'])) {
+ $serverdata['active-month-users'] = max($data['channels_active_monthly'], 0);
+ }
+
+ if (!empty($data['channels_active_halfyear'])) {
+ $serverdata['active-halfyear-users'] = max($data['channels_active_halfyear'], 0);
+ }
+
+ if (!empty($data['local_posts'])) {
+ $serverdata['local-posts'] = max($data['local_posts'], 0);
+ }
+
+ if (!empty($data['local_comments'])) {
+ $serverdata['local-comments'] = max($data['local_comments'], 0);
+ }
+
if (!empty($data['register_policy'])) {
switch ($data['register_policy']) {
case 'REGISTER_OPEN':
private static function validHostMeta(string $url)
{
$xrd_timeout = DI::config()->get('system', 'xrd_timeout');
- $curlResult = DI::httpClient()->get($url . '/.well-known/host-meta', [HTTPClientOptions::TIMEOUT => $xrd_timeout]);
+ $curlResult = DI::httpClient()->get($url . '/.well-known/host-meta', [HttpClientOptions::TIMEOUT => $xrd_timeout]);
if (!$curlResult->isSuccess()) {
return false;
}
return $serverdata;
}
+ $time = time();
foreach ($contacts as $contact) {
- $probed = Contact::getByURL($contact);
- if (!empty($probed) && in_array($probed['network'], Protocol::FEDERATED)) {
+ $probed = Contact::getByURL($contact, true);
+ if (!empty($probed) && !$probed['failed'] && in_array($probed['network'], Protocol::FEDERATED)) {
$serverdata['network'] = $probed['network'];
break;
+ } elseif ((time() - $time) > 10) {
+ // To reduce the stress on remote systems we probe a maximum of 10 seconds
+ break;
}
}
- $serverdata['registered-users'] = max($serverdata['registered-users'], count($contacts), 1);
-
return $serverdata;
}
return $serverdata;
}
+ private static function fetchWeeklyUsage(string $url, array $serverdata) {
+ $curlResult = DI::httpClient()->get($url . '/api/v1/instance/activity');
+
+ if (!$curlResult->isSuccess() || ($curlResult->getBody() == '')) {
+ return $serverdata;
+ }
+
+ $data = json_decode($curlResult->getBody(), true);
+ if (empty($data)) {
+ return $serverdata;
+ }
+
+ $current_week = [];
+ foreach ($data as $week) {
+ // Use only data from a full week
+ if (empty($week['week']) || (time() - $week['week']) < 7 * 24 * 60 * 60) {
+ continue;
+ }
+
+ // Most likely the data is sorted correctly. But we better are safe than sorry
+ if (empty($current_week['week']) || ($current_week['week'] < $week['week'])) {
+ $current_week = $week;
+ }
+ }
+
+ if (!empty($current_week['logins'])) {
+ $serverdata['active-week-users'] = max($current_week['logins'], 0);
+ }
+
+ return $serverdata;
+ }
+
/**
* Detects data from a given server url if it was a mastodon alike system
*
}
return $val;
- }
+ }
+
+ /**
+ * Detect if the URL belongs to a pump.io server
+ *
+ * @param string $url URL of the given server
+ * @param array $serverdata array with server data
+ *
+ * @return array server data
+ */
+ private static function detectPumpIO(string $url, array $serverdata)
+ {
+ $curlResult = DI::httpClient()->get($url . '/.well-known/host-meta.json');
+ if (!$curlResult->isSuccess()) {
+ return $serverdata;
+ }
+
+ $data = json_decode($curlResult->getBody(), true);
+ if (empty($data['links'])) {
+ return $serverdata;
+
+ }
+
+ // We are looking for some endpoints that are typical for pump.io
+ $trust = 0;
+ foreach ($data['links'] as $link) {
+ if (empty($link['rel'])) {
+ continue;
+ }
+ if (in_array($link['rel'], ['registration_endpoint', 'dialback', 'http://apinamespace.org/activitypub/whoami'])) {
+ ++$trust;
+ }
+ }
+
+ if ($trust == 3) {
+ $serverdata['detection-method'] = self::DETECT_PUMPIO;
+
+ $serverdata['platform'] = 'pumpio';
+ $serverdata['version'] = '';
+ $serverdata['network'] = Protocol::PUMPIO;
+
+ $servers = $curlResult->getHeader('Server');
+ foreach ($servers as $server) {
+ if (preg_match("#pump.io/(.*)\s#U", $server, $matches)) {
+ $serverdata['version'] = $matches[1];
+ }
+ }
+ }
+
+ return $serverdata;
+ }
/**
* Detect if the URL belongs to a GNU Social server
return $serverdata;
}
+ // Using only body information we cannot safely detect a lot of systems.
+ // So we define a list of platforms that we can detect safely.
+ $valid_platforms = ['friendica', 'friendika', 'diaspora', 'mastodon', 'hubzilla', 'misskey', 'peertube', 'wordpress', 'write.as'];
+
$doc = new DOMDocument();
@$doc->loadHTML($curlResult->getBody());
$xpath = new DOMXPath($doc);
$serverdata['version'] = $version_part[1];
// We still do need a reliable test if some AP plugin is activated
- if (DBA::exists('apcontact', ['baseurl' => $url])) {
- $serverdata['network'] = Protocol::ACTIVITYPUB;
- } else {
- $serverdata['network'] = Protocol::FEED;
- }
+ // By now we just check in a later process for some known contacts
+ $serverdata['network'] = Protocol::FEED;
if ($serverdata['detection-method'] == self::DETECT_MANUAL) {
$serverdata['detection-method'] = self::DETECT_BODY;
}
}
- if (!empty($serverdata['network']) && ($serverdata['detection-method'] == self::DETECT_MANUAL)) {
+ if (!empty($serverdata['platform']) && in_array($serverdata['detection-method'], [self::DETECT_MANUAL, self::DETECT_BODY]) && !in_array($serverdata['platform'], $valid_platforms)) {
+ $serverdata['network'] = Protocol::PHANTOM;
+ $serverdata['version'] = '';
+ $serverdata['detection-method'] = self::DETECT_MANUAL;
+ } elseif (!empty($serverdata['network']) && ($serverdata['detection-method'] == self::DETECT_MANUAL)) {
$serverdata['detection-method'] = self::DETECT_BODY;
}
if (!empty($accesstoken)) {
$api = 'https://instances.social/api/1.0/instances/list?count=0';
- $curlResult = DI::httpClient()->get($api, [HTTPClientOptions::HEADERS => ['Authorization' => ['Bearer ' . $accesstoken]]]);
+ $curlResult = DI::httpClient()->get($api, [HttpClientOptions::HEADERS => ['Authorization' => ['Bearer ' . $accesstoken]]]);
if ($curlResult->isSuccess()) {
$servers = json_decode($curlResult->getBody(), true);