]> git.mxchange.org Git - friendica.git/blob - src/Network/Probe.php
Merge pull request #12736 from MrPetovan/bug/12733-webfinger-apcontact
[friendica.git] / src / Network / Probe.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Network;
23
24 use DOMDocument;
25 use DomXPath;
26 use Exception;
27 use Friendica\Core\Hook;
28 use Friendica\Core\Logger;
29 use Friendica\Core\Protocol;
30 use Friendica\Core\System;
31 use Friendica\Database\DBA;
32 use Friendica\DI;
33 use Friendica\Model\Contact;
34 use Friendica\Model\GServer;
35 use Friendica\Model\Profile;
36 use Friendica\Model\User;
37 use Friendica\Network\HTTPClient\Client\HttpClientAccept;
38 use Friendica\Network\HTTPClient\Client\HttpClientOptions;
39 use Friendica\Protocol\ActivityNamespace;
40 use Friendica\Protocol\ActivityPub;
41 use Friendica\Protocol\Diaspora;
42 use Friendica\Protocol\Email;
43 use Friendica\Protocol\Feed;
44 use Friendica\Protocol\Salmon;
45 use Friendica\Util\Crypto;
46 use Friendica\Util\DateTimeFormat;
47 use Friendica\Util\Network;
48 use Friendica\Util\Strings;
49 use Friendica\Util\XML;
50 use GuzzleHttp\Psr7\Uri;
51
52 /**
53  * This class contain functions for probing URL
54  */
55 class Probe
56 {
57         const HOST_META = '/.well-known/host-meta';
58         const WEBFINGER = '/.well-known/webfinger?resource={uri}';
59
60         /**
61          * @var string Base URL
62          */
63         private static $baseurl;
64
65         /**
66          * @var boolean Whether a timeout has occured
67          */
68         private static $isTimeout;
69
70         /**
71          * Checks if the provided network can be probed
72          *
73          * @param string $network
74          *
75          * @return boolean
76          */
77         public static function isProbable(string $network): bool
78         {
79                 return (in_array($network, array_merge(Protocol::FEDERATED, [Protocol::ZOT, Protocol::PHANTOM])));
80         }
81
82         /**
83          * Remove stuff from an URI that doesn't belong there
84          *
85          * @param string $rawUri
86          * @return string Cleaned URI
87          */
88         public static function cleanURI(string $rawUri): string
89         {
90                 // At first remove leading and trailing junk
91                 $rawUri = trim($rawUri, "@#?: \t\n\r\0\x0B");
92
93                 $rawUri = Network::convertToIdn($rawUri);
94
95                 $uri = new Uri($rawUri);
96                 if (!$uri->getScheme()) {
97                         return $uri->__toString();
98                 }
99
100                 // Remove the URL fragment, since these shouldn't be part of any profile URL
101                 $uri = $uri->withFragment('');
102
103                 return $uri->__toString();
104         }
105
106         /**
107          * Rearrange the array so that it always has the same order
108          *
109          * @param array $data Unordered data
110          * @return array Ordered data
111          */
112         private static function rearrangeData(array $data): array
113         {
114                 $fields = ['name', 'given_name', 'family_name', 'nick', 'guid', 'url', 'addr', 'alias',
115                         'photo', 'photo_medium', 'photo_small', 'header',
116                                 'account-type', 'community', 'keywords', 'location', 'about', 'xmpp', 'matrix',
117                                 'hide', 'batch', 'notify', 'poll', 'request', 'confirm', 'subscribe', 'poco',
118                                 'following', 'followers', 'inbox', 'outbox', 'sharedinbox',
119                                 'priority', 'network', 'pubkey', 'manually-approve', 'baseurl', 'gsid'];
120
121                 $numeric_fields = ['gsid', 'hide', 'account-type', 'manually-approve'];
122
123                 if (!empty($data['photo']) && !Network::isValidHttpUrl($data['photo'])) {
124                         Logger::info('Invalid URL for photo', ['url' => $data['url'], 'photo' => $data['photo']]);
125                         unset($data['photo']);
126                 }
127
128                 $newdata = [];
129                 foreach ($fields as $field) {
130                         if (isset($data[$field])) {
131                                 if (in_array($field, $numeric_fields)) {
132                                         $newdata[$field] = (int)$data[$field];
133                                 } else {
134                                         $newdata[$field] = trim($data[$field]);
135                                 }
136                         } elseif (!in_array($field, $numeric_fields)) {
137                                 $newdata[$field] = '';
138                         } else {
139                                 $newdata[$field] = null;
140                         }
141                 }
142
143                 $newdata['networks'] = [];
144                 foreach ([Protocol::DIASPORA, Protocol::OSTATUS] as $network) {
145                         if (!empty($data['networks'][$network])) {
146                                 $data['networks'][$network]['subscribe'] = $newdata['subscribe'] ?? '';
147                                 if (empty($data['networks'][$network]['baseurl'])) {
148                                         $data['networks'][$network]['baseurl'] = $newdata['baseurl'] ?? '';
149                                 } else {
150                                         $newdata['baseurl'] = $data['networks'][$network]['baseurl'];
151                                 }
152                                 if (!empty($newdata['baseurl'])) {
153                                         $newdata['gsid'] = $data['networks'][$network]['gsid'] = GServer::getID($newdata['baseurl']);
154                                 } else {
155                                         $newdata['gsid'] = $data['networks'][$network]['gsid'] = null;
156                                 }
157
158                                 $newdata['networks'][$network] = self::rearrangeData($data['networks'][$network]);
159                                 unset($newdata['networks'][$network]['networks']);
160                         }
161                 }
162
163                 // We don't use the "priority" field anymore and replace it with a dummy.
164                 $newdata['priority'] = 0;
165
166                 return $newdata;
167         }
168
169         /**
170          * Check if the hostname belongs to the own server
171          *
172          * @param string $host The hostname that is to be checked
173          * @return bool Does the testes hostname belongs to the own server?
174          */
175         private static function ownHost(string $host): bool
176         {
177                 $own_host = DI::baseUrl()->getHostname();
178
179                 $parts = parse_url($host);
180
181                 if (!isset($parts['scheme'])) {
182                         $parts = parse_url('http://' . $host);
183                 }
184
185                 if (!isset($parts['host'])) {
186                         return false;
187                 }
188                 return $parts['host'] == $own_host;
189         }
190
191         /**
192          * Probes for webfinger path via "host-meta"
193          *
194          * We have to check if the servers in the future still will offer this.
195          * It seems as if it was dropped from the standard.
196          *
197          * @param string $host The host part of an url
198          *
199          * @return array with template and type of the webfinger template for JSON or XML
200          * @throws HTTPException\InternalServerErrorException
201          */
202         private static function hostMeta(string $host): array
203         {
204                 // Reset the static variable
205                 self::$baseurl = '';
206
207                 // Handles the case when the hostname contains the scheme
208                 if (!parse_url($host, PHP_URL_SCHEME)) {
209                         $ssl_url = 'https://' . $host . self::HOST_META;
210                         $url = 'http://' . $host . self::HOST_META;
211                 } else {
212                         $ssl_url = $host . self::HOST_META;
213                         $url = '';
214                 }
215
216                 $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20);
217
218                 Logger::info('Probing', ['host' => $host, 'ssl_url' => $ssl_url, 'url' => $url, 'callstack' => System::callstack(20)]);
219                 $xrd = null;
220
221                 $curlResult = DI::httpClient()->get($ssl_url, HttpClientAccept::XRD_XML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
222                 $ssl_connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
223                 if ($curlResult->isSuccess()) {
224                         $xml = $curlResult->getBody();
225                         $xrd = XML::parseString($xml, true);
226                         if (!empty($url)) {
227                                 $host_url = 'https://' . $host;
228                         } else {
229                                 $host_url = $host;
230                         }
231                 } elseif ($curlResult->isTimeout()) {
232                         Logger::info('Probing timeout', ['url' => $ssl_url]);
233                         self::$isTimeout = true;
234                         return [];
235                 }
236
237                 if (!is_object($xrd) && !empty($url)) {
238                         $curlResult = DI::httpClient()->get($url, HttpClientAccept::XRD_XML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
239                         $connection_error = ($curlResult->getErrorNumber() == CURLE_COULDNT_CONNECT) || ($curlResult->getReturnCode() == 0);
240                         if ($curlResult->isTimeout()) {
241                                 Logger::info('Probing timeout', ['url' => $url]);
242                                 self::$isTimeout = true;
243                                 return [];
244                         } elseif ($connection_error && $ssl_connection_error) {
245                                 self::$isTimeout = true;
246                                 return [];
247                         }
248
249                         $xml = $curlResult->getBody();
250                         $xrd = XML::parseString($xml, true);
251                         $host_url = 'http://'.$host;
252                 }
253                 if (!is_object($xrd)) {
254                         Logger::info('No xrd object found', ['host' => $host]);
255                         return [];
256                 }
257
258                 $links = XML::elementToArray($xrd);
259                 if (!isset($links['xrd']['link'])) {
260                         Logger::info('No xrd data found', ['host' => $host]);
261                         return [];
262                 }
263
264                 $lrdd = [];
265
266                 foreach ($links['xrd']['link'] as $value => $link) {
267                         if (!empty($link['@attributes'])) {
268                                 $attributes = $link['@attributes'];
269                         } elseif ($value == '@attributes') {
270                                 $attributes = $link;
271                         } else {
272                                 continue;
273                         }
274
275                         if (!empty($attributes['rel']) && $attributes['rel'] == 'lrdd' && !empty($attributes['template'])) {
276                                 $type = (empty($attributes['type']) ? '' : $attributes['type']);
277
278                                 $lrdd[$type] = $attributes['template'];
279                         }
280                 }
281
282                 if (Network::isUrlBlocked($host_url)) {
283                         Logger::info('Domain is blocked', ['url' => $host]);
284                         return [];
285                 }
286
287                 self::$baseurl = $host_url;
288
289                 Logger::info('Probing successful', ['host' => $host]);
290
291                 return $lrdd;
292         }
293
294         /**
295          * Check an URI for LRDD data
296          *
297          * @param string $uri     Address that should be probed
298          * @return array uri data
299          * @throws HTTPException\InternalServerErrorException
300          */
301         public static function lrdd(string $uri): array
302         {
303                 $data = self::getWebfingerArray($uri);
304                 if (empty($data)) {
305                         return [];
306                 }
307                 $webfinger = $data['webfinger'];
308
309                 if (empty($webfinger['links'])) {
310                         Logger::info('No webfinger links found', ['uri' => $uri]);
311                         return [];
312                 }
313
314                 $data = [];
315
316                 foreach ($webfinger['links'] as $link) {
317                         $data[] = ['@attributes' => $link];
318                 }
319
320                 if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) {
321                         foreach ($webfinger['aliases'] as $alias) {
322                                 $data[] = [
323                                         '@attributes' => [
324                                                 'rel' => 'alias',
325                                                 'href' => $alias,
326                                         ]
327                                 ];
328                         }
329                 }
330
331                 return $data;
332         }
333
334         /**
335          * Fetch information (protocol endpoints and user information) about a given uri
336          *
337          * @param string  $uri     Address that should be probed
338          * @param string  $network Test for this specific network
339          * @param integer $uid     User ID for the probe (only used for mails)
340          * @param boolean $cache   Use cached values?
341          *
342          * @return array uri data
343          * @throws HTTPException\InternalServerErrorException
344          * @throws \ImagickException
345          */
346         public static function uri(string $uri, string $network = '', int $uid = -1): array
347         {
348                 // Local profiles aren't probed via network
349                 if (empty($network) && Contact::isLocal($uri)) {
350                         $data = self::localProbe($uri);
351                         if (!empty($data)) {
352                                 return $data;
353                         }
354                 }
355
356                 if ($uid == -1) {
357                         $uid = DI::userSession()->getLocalUserId();
358                 }
359
360                 if (empty($network) || ($network == Protocol::ACTIVITYPUB)) {
361                         $ap_profile = ActivityPub::probeProfile($uri);
362                 } else {
363                         $ap_profile = [];
364                 }
365
366                 self::$isTimeout = false;
367
368                 if ($network != Protocol::ACTIVITYPUB) {
369                         $data = self::detect($uri, $network, $uid, $ap_profile);
370                         if (!is_array($data)) {
371                                 $data = [];
372                         }
373                         if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) {
374                                 $networks = $data['networks'] ?? [];
375                                 unset($data['networks']);
376                                 if (!empty($data['network'])) {
377                                         $networks[$data['network']] = $data;
378                                 }
379                                 $data = $ap_profile;
380                                 $data['networks'] = $networks;
381                         } elseif (!empty($ap_profile)) {
382                                 $ap_profile['batch'] = '';
383                                 $data = array_merge($ap_profile, $data);
384                         }
385                 } else {
386                         $data = $ap_profile;
387                 }
388
389                 if (!isset($data['url'])) {
390                         $data['url'] = $uri;
391                 }
392
393                 if (empty($data['photo'])) {
394                         $data['photo'] = DI::baseUrl() . Contact::DEFAULT_AVATAR_PHOTO;
395                 }
396
397                 if (empty($data['name'])) {
398                         if (!empty($data['nick'])) {
399                                 $data['name'] = $data['nick'];
400                         }
401
402                         if (empty($data['name'])) {
403                                 $data['name'] = $data['url'];
404                         }
405                 }
406
407                 if (empty($data['nick'])) {
408                         $data['nick'] = strtolower($data['name']);
409
410                         if (strpos($data['nick'], ' ')) {
411                                 $data['nick'] = trim(substr($data['nick'], 0, strpos($data['nick'], ' ')));
412                         }
413                 }
414
415                 if (!empty($data['baseurl']) && empty($data['gsid'])) {
416                         $data['gsid'] = GServer::getID($data['baseurl']);
417                 }
418
419                 if (empty($data['network'])) {
420                         $data['network'] = Protocol::PHANTOM;
421                 }
422
423                 // Ensure that local connections always are DFRN
424                 if (($network == '') && ($data['network'] != Protocol::PHANTOM) && (self::ownHost($data['baseurl'] ?? '') || self::ownHost($data['url']))) {
425                         $data['network'] = Protocol::DFRN;
426                 }
427
428                 if (!isset($data['hide']) && in_array($data['network'], Protocol::FEDERATED)) {
429                         $data['hide'] = self::getHideStatus($data['url']);
430                 }
431
432                 return self::rearrangeData($data);
433         }
434
435
436         /**
437          * Fetches the "hide" status from the profile
438          *
439          * @param string $url URL of the profile
440          * @return boolean "hide" status
441          */
442         private static function getHideStatus(string $url): bool
443         {
444                 $curlResult = DI::httpClient()->get($url, HttpClientAccept::HTML, [HttpClientOptions::CONTENT_LENGTH => 1000000]);
445                 if (!$curlResult->isSuccess()) {
446                         return false;
447                 }
448
449                 // If it isn't a HTML file then exit
450                 if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
451                         return false;
452                 }
453
454                 $body = $curlResult->getBody();
455                 if (empty($body)) {
456                         return false;
457                 }
458
459                 $doc = new DOMDocument();
460                 @$doc->loadHTML($body);
461
462                 $xpath = new DOMXPath($doc);
463
464                 $list = $xpath->query('//meta[@name]');
465                 foreach ($list as $node) {
466                         $meta_tag = [];
467                         if ($node->attributes->length) {
468                                 foreach ($node->attributes as $attribute) {
469                                         $meta_tag[$attribute->name] = $attribute->value;
470                                 }
471                         }
472
473                         if (empty($meta_tag['content'])) {
474                                 continue;
475                         }
476
477                         $content = strtolower(trim($meta_tag['content']));
478
479                         switch (strtolower(trim($meta_tag['name']))) {
480                                 case 'dfrn-global-visibility':
481                                         if ($content == 'false') {
482                                                 return true;
483                                         }
484                                         break;
485                                 case 'robots':
486                                         if (strpos($content, 'noindex') !== false) {
487                                                 return true;
488                                         }
489                                         break;
490                         }
491                 }
492
493                 return false;
494         }
495
496         /**
497          * Fetch the "subscribe" and add it to the result
498          *
499          * @param array $result Result array
500          * @param array $webfinger Webfinger data
501          *
502          * @return array result Altered/unaltered result array
503          */
504         private static function getSubscribeLink(array $result, array $webfinger): array
505         {
506                 if (empty($webfinger['links'])) {
507                         return $result;
508                 }
509
510                 foreach ($webfinger['links'] as $link) {
511                         if (!empty($link['template']) && ($link['rel'] === ActivityNamespace::OSTATUSSUB)) {
512                                 $result['subscribe'] = $link['template'];
513                         }
514                 }
515
516                 return $result;
517         }
518
519         /**
520          * Get webfinger data from a given URI
521          *
522          * @param string $uri URI
523          *
524          * @return array Webfinger data
525          * @throws HTTPException\InternalServerErrorException
526          */
527         public static function getWebfingerArray(string $uri): array
528         {
529                 $parts = parse_url($uri);
530
531                 if (!empty($parts['scheme']) && !empty($parts['host'])) {
532                         $host = $parts['host'];
533                         if (!empty($parts['port'])) {
534                                 $host .= ':' . $parts['port'];
535                         }
536
537                         $baseurl = $parts['scheme'] . '://' . $host;
538
539                         $nick = '';
540                         $addr = '';
541
542                         $path_parts = explode('/', trim($parts['path'] ?? '', '/'));
543                         if (!empty($path_parts)) {
544                                 $nick = ltrim(end($path_parts), '@');
545                                 $addr = $nick . '@' . $host;
546                         }
547
548                         $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
549                         if (empty($webfinger)) {
550                                 $lrdd = self::hostMeta($host);
551                         }
552
553                         if (empty($webfinger) && empty($lrdd)) {
554                                 while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) {
555                                         $host    .= '/' . array_shift($path_parts);
556                                         $baseurl = $parts['scheme'] . '://' . $host;
557
558                                         if (!empty($nick)) {
559                                                 $addr = $nick . '@' . $host;
560                                         }
561
562                                         $webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
563                                         if (empty($webfinger)) {
564                                                 $lrdd = self::hostMeta($host);
565                                         }
566                                 }
567
568                                 if (empty($lrdd) && empty($webfinger)) {
569                                         return [];
570                                 }
571                         }
572                 } elseif (strstr($uri, '@')) {
573                         // Remove "acct:" from the URI
574                         $uri = str_replace('acct:', '', $uri);
575
576                         $host = substr($uri, strpos($uri, '@') + 1);
577                         $nick = substr($uri, 0, strpos($uri, '@'));
578                         $addr = $uri;
579
580                         $webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
581                         if (self::$isTimeout) {
582                                 return [];
583                         }
584
585                         if (empty($webfinger)) {
586                                 $webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, HttpClientAccept::JRD_JSON, $uri, $addr);
587                                 if (self::$isTimeout) {
588                                         return [];
589                                 }
590                         } else {
591                                 $baseurl = 'https://' . $host;
592                         }
593
594                         if (empty($webfinger)) {
595                                 $lrdd = self::hostMeta($host);
596                                 if (self::$isTimeout) {
597                                         return [];
598                                 }
599                                 $baseurl = self::$baseurl;
600                         } else {
601                                 $baseurl = 'http://' . $host;
602                         }
603                 } else {
604                         Logger::info('URI was not detectable', ['uri' => $uri]);
605                         return [];
606                 }
607
608                 if (empty($webfinger)) {
609                         foreach ($lrdd as $type => $template) {
610                                 if ($webfinger) {
611                                         continue;
612                                 }
613
614                                 $webfinger = self::getWebfinger($template, $type, $uri, $addr);
615                         }
616                 }
617
618                 if (empty($webfinger)) {
619                         return [];
620                 }
621
622                 if ($webfinger['detected'] == $addr) {
623                         $webfinger['nick'] = $nick;
624                         $webfinger['addr'] = $addr;
625                 }
626
627                 $webfinger['baseurl'] = $baseurl;
628
629                 return $webfinger;
630         }
631
632         /**
633          * Perform network request for webfinger data
634          *
635          * @param string $template
636          * @param string $type
637          * @param string $uri
638          * @param string $addr
639          *
640          * @return array webfinger results
641          */
642         private static function getWebfinger(string $template, string $type, string $uri, string $addr): array
643         {
644                 if (Network::isUrlBlocked($template)) {
645                         Logger::info('Domain is blocked', ['url' => $template]);
646                         return [];
647                 }
648
649                 // First try the address because this is the primary purpose of webfinger
650                 if (!empty($addr)) {
651                         $detected = $addr;
652                         $path = str_replace('{uri}', urlencode('acct:' . $addr), $template);
653                         $webfinger = self::webfinger($path, $type);
654                         if (self::$isTimeout) {
655                                 return [];
656                         }
657                 }
658
659                 // Then try the URI
660                 if (empty($webfinger) && $uri != $addr) {
661                         $detected = $uri;
662                         $path = str_replace('{uri}', urlencode($uri), $template);
663                         $webfinger = self::webfinger($path, $type);
664                         if (self::$isTimeout) {
665                                 return [];
666                         }
667                 }
668
669                 if (empty($webfinger)) {
670                         return [];
671                 }
672
673                 return ['webfinger' => $webfinger, 'detected' => $detected];
674         }
675
676         /**
677          * Fetch information (protocol endpoints and user information) about a given uri
678          *
679          * This function is only called by the "uri" function that adds caching and rearranging of data.
680          *
681          * @param string  $uri        Address that should be probed
682          * @param string  $network    Test for this specific network
683          * @param integer $uid        User ID for the probe (only used for mails)
684          * @param array   $ap_profile Previously probed AP profile
685          * @return array URI data
686          * @throws HTTPException\InternalServerErrorException
687          */
688         private static function detect(string $uri, string $network, int $uid, array $ap_profile): array
689         {
690                 $hookData = [
691                         'uri'     => $uri,
692                         'network' => $network,
693                         'uid'     => $uid,
694                         'result'  => null,
695                 ];
696
697                 Hook::callAll('probe_detect', $hookData);
698
699                 if (isset($hookData['result'])) {
700                         return is_array($hookData['result']) ? $hookData['result'] : [];
701                 }
702
703                 $parts = parse_url($uri);
704                 if (empty($parts['scheme']) && empty($parts['host']) && (empty($parts['path']) || strpos($parts['path'], '@') === false)) {
705                         Logger::info('URI was not detectable', ['uri' => $uri]);
706                         return [];
707                 }
708
709                 // If the URI starts with "mailto:" then jump directly to the mail detection
710                 if (strpos($uri, 'mailto:') !== false) {
711                         $uri = str_replace('mailto:', '', $uri);
712                         return self::mail($uri, $uid);
713                 }
714
715                 if ($network == Protocol::MAIL) {
716                         return self::mail($uri, $uid);
717                 }
718
719                 Logger::info('Probing start', ['uri' => $uri]);
720
721                 if (!empty($ap_profile['addr']) && ($ap_profile['addr'] != $uri)) {
722                         $data = self::getWebfingerArray($ap_profile['addr']);
723                 }
724
725                 if (empty($data)) {
726                         $data = self::getWebfingerArray($uri);
727                 }
728
729                 if (empty($data)) {
730                         if (!empty($parts['scheme'])) {
731                                 return self::feed($uri);
732                         } elseif (!empty($uid)) {
733                                 return self::mail($uri, $uid);
734                         } else {
735                                 return [];
736                         }
737                 }
738
739                 $webfinger = $data['webfinger'];
740                 $nick = $data['nick'] ?? '';
741                 $addr = $data['addr'] ?? '';
742                 $baseurl = $data['baseurl'] ?? '';
743
744                 $result = [];
745
746                 if (in_array($network, ['', Protocol::DFRN])) {
747                         $result = self::dfrn($webfinger);
748                 }
749                 if ((!$result && ($network == '')) || ($network == Protocol::DIASPORA)) {
750                         $result = self::diaspora($webfinger);
751                 } else {
752                         $result['networks'][Protocol::DIASPORA] = self::diaspora($webfinger);
753                 }
754                 if ((!$result && ($network == '')) || ($network == Protocol::OSTATUS)) {
755                         $result = self::ostatus($webfinger);
756                 } else {
757                         $result['networks'][Protocol::OSTATUS] = self::ostatus($webfinger);
758                 }
759                 if (in_array($network, ['', Protocol::ZOT])) {
760                         $result = self::zot($webfinger, $result, $baseurl);
761                 }
762                 if ((!$result && ($network == '')) || ($network == Protocol::PUMPIO)) {
763                         $result = self::pumpio($webfinger, $addr, $baseurl);
764                 }
765                 if (empty($result['network']) && empty($ap_profile['network']) || ($network == Protocol::FEED)) {
766                         $result = self::feed($uri);
767                 } else {
768                         // We overwrite the detected nick with our try if the previois routines hadn't detected it.
769                         // Additionally, it is overwritten when the nickname doesn't make sense (contains spaces).
770                         if ((empty($result['nick']) || (strstr($result['nick'], ' '))) && ($nick != '')) {
771                                 $result['nick'] = $nick;
772                         }
773
774                         if (empty($result['addr']) && ($addr != '')) {
775                                 $result['addr'] = $addr;
776                         }
777                 }
778
779                 $result = self::getSubscribeLink($result, $webfinger);
780
781                 if (empty($result['network'])) {
782                         $result['network'] = Protocol::PHANTOM;
783                 }
784
785                 if (empty($result['baseurl']) && !empty($baseurl)) {
786                         $result['baseurl'] = $baseurl;
787                 }
788
789                 if (empty($result['url'])) {
790                         $result['url'] = $uri;
791                 }
792
793                 Logger::info('Probing done', ['uri' => $uri, 'network' => $result['network']]);
794
795                 return $result;
796         }
797
798         /**
799          * Check for Zot contact
800          *
801          * @param array  $webfinger Webfinger data
802          * @param array  $data      previously probed data
803          * @param string $baseUrl   Base URL
804          *
805          * @return array Zot data
806          * @throws HTTPException\InternalServerErrorException
807          */
808         private static function zot(array $webfinger, array $data, string $baseurl): array
809         {
810                 if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) {
811                         foreach ($webfinger['aliases'] as $alias) {
812                                 if (substr($alias, 0, 5) == 'acct:') {
813                                         $data['addr'] = substr($alias, 5);
814                                 }
815                         }
816                 }
817
818                 if (!empty($webfinger['subject']) && (substr($webfinger['subject'], 0, 5) == 'acct:')) {
819                         $data['addr'] = substr($webfinger['subject'], 5);
820                 }
821
822                 $zot_url = '';
823                 foreach ($webfinger['links'] as $link) {
824                         if (($link['rel'] == 'http://purl.org/zot/protocol') && !empty($link['href'])) {
825                                 $zot_url = $link['href'];
826                         }
827                 }
828
829                 if (empty($zot_url) && !empty($data['addr']) && !empty($baseurl)) {
830                         $condition = ['nurl' => Strings::normaliseLink($baseurl), 'platform' => ['hubzilla']];
831                         if (!DBA::exists('gserver', $condition)) {
832                                 return $data;
833                         }
834                         $zot_url = $baseurl . '/.well-known/zot-info?address=' . $data['addr'];
835                 }
836
837                 if (empty($zot_url)) {
838                         return $data;
839                 }
840
841                 $data = self::pollZot($zot_url, $data);
842
843                 if (!empty($data['url']) && !empty($webfinger['aliases']) && is_array($webfinger['aliases'])) {
844                         foreach ($webfinger['aliases'] as $alias) {
845                                 if (!strstr($alias, '@') && Strings::normaliseLink($alias) != Strings::normaliseLink($data['url'])) {
846                                         $data['alias'] = $alias;
847                                 }
848                         }
849                 }
850
851                 return $data;
852         }
853
854         public static function pollZot(string $url, array $data): array
855         {
856                 $curlResult = DI::httpClient()->get($url, HttpClientAccept::JSON);
857                 if ($curlResult->isTimeout()) {
858                         return $data;
859                 }
860                 $content = $curlResult->getBody();
861                 if (!$content) {
862                         return $data;
863                 }
864
865                 $json = json_decode($content, true);
866                 if (!is_array($json)) {
867                         return $data;
868                 }
869
870                 if (empty($data['network'])) {
871                         if (!empty($json['protocols']) && in_array('zot', $json['protocols'])) {
872                                 $data['network'] = Protocol::ZOT;
873                         } elseif (!isset($json['protocols'])) {
874                                 $data['network'] = Protocol::ZOT;
875                         }
876                 }
877
878                 if (!empty($json['guid']) && empty($data['guid'])) {
879                         $data['guid'] = $json['guid'];
880                 }
881                 if (!empty($json['key']) && empty($data['pubkey'])) {
882                         $data['pubkey'] = $json['key'];
883                 }
884                 if (!empty($json['name'])) {
885                         $data['name'] = $json['name'];
886                 }
887                 if (!empty($json['photo'])) {
888                         $data['photo'] = $json['photo'];
889                         if (!empty($json['photo_updated'])) {
890                                 $data['photo'] .= '?rev=' . urlencode($json['photo_updated']);
891                         }
892                 }
893                 if (!empty($json['address'])) {
894                         $data['addr'] = $json['address'];
895                 }
896                 if (!empty($json['url'])) {
897                         $data['url'] = $json['url'];
898                 }
899                 if (!empty($json['connections_url'])) {
900                         $data['poco'] = $json['connections_url'];
901                 }
902                 if (isset($json['searchable'])) {
903                         $data['hide'] = !$json['searchable'];
904                 }
905                 if (!empty($json['public_forum'])) {
906                         $data['community'] = $json['public_forum'];
907                         $data['account-type'] = User::PAGE_FLAGS_COMMUNITY;
908                 }
909
910                 if (!empty($json['profile'])) {
911                         $profile = $json['profile'];
912                         if (!empty($profile['description'])) {
913                                 $data['about'] = $profile['description'];
914                         }
915                         if (!empty($profile['keywords'])) {
916                                 $keywords = implode(', ', $profile['keywords']);
917                                 if (!empty($keywords)) {
918                                         $data['keywords'] = $keywords;
919                                 }
920                         }
921
922                         $loc = [];
923                         if (!empty($profile['region'])) {
924                                 $loc['region'] = $profile['region'];
925                         }
926                         if (!empty($profile['country'])) {
927                                 $loc['country-name'] = $profile['country'];
928                         }
929                         $location = Profile::formatLocation($loc);
930                         if (!empty($location)) {
931                                 $data['location'] = $location;
932                         }
933                 }
934
935                 return $data;
936         }
937
938         /**
939          * Perform a webfinger request.
940          *
941          * For details see RFC 7033: <https://tools.ietf.org/html/rfc7033>
942          *
943          * @param string $url  Address that should be probed
944          * @param string $type type
945          *
946          * @return array webfinger data
947          * @throws HTTPException\InternalServerErrorException
948          */
949         public static function webfinger(string $url, string $type): array
950         {
951                 $xrd_timeout = DI::config()->get('system', 'xrd_timeout', 20);
952
953                 $curlResult = DI::httpClient()->get($url, $type, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
954                 if ($curlResult->isTimeout()) {
955                         self::$isTimeout = true;
956                         return [];
957                 }
958                 $data = $curlResult->getBody();
959
960                 $webfinger = json_decode($data, true);
961                 if (!empty($webfinger)) {
962                         if (!isset($webfinger['links'])) {
963                                 Logger::info('No json webfinger links', ['url' => $url]);
964                                 return [];
965                         }
966                         return $webfinger;
967                 }
968
969                 // If it is not JSON, maybe it is XML
970                 $xrd = XML::parseString($data, true);
971                 if (!is_object($xrd)) {
972                         Logger::info('No webfinger data retrievable', ['url' => $url]);
973                         return [];
974                 }
975
976                 $xrd_arr = XML::elementToArray($xrd);
977                 if (!isset($xrd_arr['xrd']['link'])) {
978                         Logger::info('No XML webfinger links', ['url' => $url]);
979                         return [];
980                 }
981
982                 $webfinger = [];
983
984                 if (!empty($xrd_arr['xrd']['subject'])) {
985                         $webfinger['subject'] = $xrd_arr['xrd']['subject'];
986                 }
987
988                 if (!empty($xrd_arr['xrd']['alias'])) {
989                         $webfinger['aliases'] = $xrd_arr['xrd']['alias'];
990                 }
991
992                 $webfinger['links'] = [];
993
994                 foreach ($xrd_arr['xrd']['link'] as $value => $data) {
995                         if (!empty($data['@attributes'])) {
996                                 $attributes = $data['@attributes'];
997                         } elseif ($value == '@attributes') {
998                                 $attributes = $data;
999                         } else {
1000                                 continue;
1001                         }
1002
1003                         $webfinger['links'][] = $attributes;
1004                 }
1005                 return $webfinger;
1006         }
1007
1008         /**
1009          * Poll the Friendica specific noscrape page.
1010          *
1011          * "noscrape" is a faster alternative to fetch the data from the hcard.
1012          * This functionality was originally created for the directory.
1013          *
1014          * @param string $noscrape_url Link to the noscrape page
1015          * @param array  $data         The already fetched data
1016          *
1017          * @return array noscrape data
1018          * @throws HTTPException\InternalServerErrorException
1019          */
1020         private static function pollNoscrape(string $noscrape_url, array $data): array
1021         {
1022                 $curlResult = DI::httpClient()->get($noscrape_url, HttpClientAccept::JSON);
1023                 if ($curlResult->isTimeout()) {
1024                         self::$isTimeout = true;
1025                         return $data;
1026                 }
1027                 $content = $curlResult->getBody();
1028                 if (!$content) {
1029                         Logger::info('Empty body', ['url' => $noscrape_url]);
1030                         return $data;
1031                 }
1032
1033                 $json = json_decode($content, true);
1034                 if (!is_array($json)) {
1035                         Logger::info('No json data', ['url' => $noscrape_url]);
1036                         return $data;
1037                 }
1038
1039                 if (!empty($json['fn'])) {
1040                         $data['name'] = $json['fn'];
1041                 }
1042
1043                 if (!empty($json['addr'])) {
1044                         $data['addr'] = $json['addr'];
1045                 }
1046
1047                 if (!empty($json['nick'])) {
1048                         $data['nick'] = $json['nick'];
1049                 }
1050
1051                 if (!empty($json['guid'])) {
1052                         $data['guid'] = $json['guid'];
1053                 }
1054
1055                 if (!empty($json['comm'])) {
1056                         $data['community'] = $json['comm'];
1057                 }
1058
1059                 if (!empty($json['tags'])) {
1060                         $keywords = implode(', ', $json['tags']);
1061                         if ($keywords != '') {
1062                                 $data['keywords'] = $keywords;
1063                         }
1064                 }
1065
1066                 $location = Profile::formatLocation($json);
1067                 if ($location) {
1068                         $data['location'] = $location;
1069                 }
1070
1071                 if (!empty($json['about'])) {
1072                         $data['about'] = $json['about'];
1073                 }
1074
1075                 if (!empty($json['xmpp'])) {
1076                         $data['xmpp'] = $json['xmpp'];
1077                 }
1078
1079                 if (!empty($json['matrix'])) {
1080                         $data['matrix'] = $json['matrix'];
1081                 }
1082
1083                 if (!empty($json['key'])) {
1084                         $data['pubkey'] = $json['key'];
1085                 }
1086
1087                 if (!empty($json['photo'])) {
1088                         $data['photo'] = $json['photo'];
1089                 }
1090
1091                 if (!empty($json['dfrn-request'])) {
1092                         $data['request'] = $json['dfrn-request'];
1093                 }
1094
1095                 if (!empty($json['dfrn-confirm'])) {
1096                         $data['confirm'] = $json['dfrn-confirm'];
1097                 }
1098
1099                 if (!empty($json['dfrn-notify'])) {
1100                         $data['notify'] = $json['dfrn-notify'];
1101                 }
1102
1103                 if (!empty($json['dfrn-poll'])) {
1104                         $data['poll'] = $json['dfrn-poll'];
1105                 }
1106
1107                 if (isset($json['hide'])) {
1108                         $data['hide'] = (bool)$json['hide'];
1109                 } else {
1110                         $data['hide'] = false;
1111                 }
1112
1113                 return $data;
1114         }
1115
1116         /**
1117          * Check for valid DFRN data
1118          *
1119          * @param array $data DFRN data
1120          *
1121          * @return int Number of errors
1122          */
1123         public static function validDfrn(array $data): int
1124         {
1125                 $errors = 0;
1126                 if (!isset($data['key'])) {
1127                         $errors ++;
1128                 }
1129                 if (!isset($data['dfrn-request'])) {
1130                         $errors ++;
1131                 }
1132                 if (!isset($data['dfrn-confirm'])) {
1133                         $errors ++;
1134                 }
1135                 if (!isset($data['dfrn-notify'])) {
1136                         $errors ++;
1137                 }
1138                 if (!isset($data['dfrn-poll'])) {
1139                         $errors ++;
1140                 }
1141                 return $errors;
1142         }
1143
1144         /**
1145          * Fetch data from a DFRN profile page and via "noscrape"
1146          *
1147          * @param string $profile_link Link to the profile page
1148          * @return array profile data
1149          * @throws HTTPException\InternalServerErrorException
1150          * @throws \ImagickException
1151          */
1152         public static function profile(string $profile_link): array
1153         {
1154                 $data = [];
1155
1156                 Logger::info('Check profile', ['link' => $profile_link]);
1157
1158                 // Fetch data via noscrape - this is faster
1159                 $noscrape_url = str_replace(['/hcard/', '/profile/'], '/noscrape/', $profile_link);
1160                 $data = self::pollNoscrape($noscrape_url, $data);
1161
1162                 if (!isset($data['notify'])
1163                         || !isset($data['confirm'])
1164                         || !isset($data['request'])
1165                         || !isset($data['poll'])
1166                         || !isset($data['name'])
1167                         || !isset($data['photo'])
1168                 ) {
1169                         $data = self::pollHcard($profile_link, $data, true);
1170                 }
1171
1172                 $prof_data = [];
1173
1174                 if (empty($data['addr']) || empty($data['nick'])) {
1175                         $probe_data = self::uri($profile_link);
1176                         $data['addr'] = ($data['addr'] ?? '') ?: $probe_data['addr'];
1177                         $data['nick'] = ($data['nick'] ?? '') ?: $probe_data['nick'];
1178                 }
1179
1180                 $prof_data['addr']         = $data['addr'];
1181                 $prof_data['nick']         = $data['nick'];
1182                 $prof_data['dfrn-request'] = $data['request'] ?? null;
1183                 $prof_data['dfrn-confirm'] = $data['confirm'] ?? null;
1184                 $prof_data['dfrn-notify']  = $data['notify']  ?? null;
1185                 $prof_data['dfrn-poll']    = $data['poll']    ?? null;
1186                 $prof_data['photo']        = $data['photo']   ?? null;
1187                 $prof_data['fn']           = $data['name']    ?? null;
1188                 $prof_data['key']          = $data['pubkey']  ?? null;
1189
1190                 Logger::debug('Result', ['link' => $profile_link, 'data' => $prof_data]);
1191
1192                 return $prof_data;
1193         }
1194
1195         /**
1196          * Check for DFRN contact
1197          *
1198          * @param array $webfinger Webfinger data
1199          * @return array DFRN data
1200          * @throws HTTPException\InternalServerErrorException
1201          */
1202         private static function dfrn(array $webfinger): array
1203         {
1204                 $hcard_url = '';
1205                 $data = [];
1206                 // The array is reversed to take into account the order of preference for same-rel links
1207                 // See: https://tools.ietf.org/html/rfc7033#section-4.4.4
1208                 foreach (array_reverse($webfinger['links']) as $link) {
1209                         if (($link['rel'] == ActivityNamespace::DFRN) && !empty($link['href'])) {
1210                                 $data['network'] = Protocol::DFRN;
1211                         } elseif (($link['rel'] == ActivityNamespace::FEED) && !empty($link['href'])) {
1212                                 $data['poll'] = $link['href'];
1213                         } elseif (($link['rel'] == 'http://webfinger.net/rel/profile-page') && (($link['type'] ?? '') == 'text/html') && !empty($link['href'])) {
1214                                 $data['url'] = $link['href'];
1215                         } elseif (($link['rel'] == 'http://microformats.org/profile/hcard') && !empty($link['href'])) {
1216                                 $hcard_url = $link['href'];
1217                         } elseif (($link['rel'] == ActivityNamespace::POCO) && !empty($link['href'])) {
1218                                 $data['poco'] = $link['href'];
1219                         } elseif (($link['rel'] == 'http://webfinger.net/rel/avatar') && !empty($link['href'])) {
1220                                 $data['photo'] = $link['href'];
1221                         } elseif (($link['rel'] == 'http://joindiaspora.com/seed_location') && !empty($link['href'])) {
1222                                 $data['baseurl'] = trim($link['href'], '/');
1223                         } elseif (($link['rel'] == 'http://joindiaspora.com/guid') && !empty($link['href'])) {
1224                                 $data['guid'] = $link['href'];
1225                         } elseif (($link['rel'] == 'diaspora-public-key') && !empty($link['href'])) {
1226                                 $data['pubkey'] = base64_decode($link['href']);
1227
1228                                 if (strstr($data['pubkey'], 'RSA ')) {
1229                                         $data['pubkey'] = Crypto::rsaToPem($data['pubkey']);
1230                                 }
1231                         }
1232                 }
1233
1234                 if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) {
1235                         foreach ($webfinger['aliases'] as $alias) {
1236                                 if (empty($data['url']) && !strstr($alias, '@')) {
1237                                         $data['url'] = $alias;
1238                                 } elseif (!strstr($alias, '@') && Strings::normaliseLink($alias) != Strings::normaliseLink($data['url'])) {
1239                                         $data['alias'] = $alias;
1240                                 } elseif (substr($alias, 0, 5) == 'acct:') {
1241                                         $data['addr'] = substr($alias, 5);
1242                                 }
1243                         }
1244                 }
1245
1246                 if (!empty($webfinger['subject']) && (substr($webfinger['subject'], 0, 5) == 'acct:')) {
1247                         $data['addr'] = substr($webfinger['subject'], 5);
1248                 }
1249
1250                 if (!isset($data['network']) || ($hcard_url == '')) {
1251                         return [];
1252                 }
1253
1254                 // Fetch data via noscrape - this is faster
1255                 $noscrape_url = str_replace('/hcard/', '/noscrape/', $hcard_url);
1256                 $data = self::pollNoscrape($noscrape_url, $data);
1257
1258                 if (isset($data['notify'])
1259                         && isset($data['confirm'])
1260                         && isset($data['request'])
1261                         && isset($data['poll'])
1262                         && isset($data['name'])
1263                         && isset($data['photo'])
1264                 ) {
1265                         return $data;
1266                 }
1267
1268                 $data = self::pollHcard($hcard_url, $data, true);
1269
1270                 return $data;
1271         }
1272
1273         /**
1274          * Poll the hcard page (Diaspora and Friendica specific)
1275          *
1276          * @param string  $hcard_url Link to the hcard page
1277          * @param array   $data      The already fetched data
1278          * @param boolean $dfrn      Poll DFRN specific data
1279          * @return array hcard data
1280          * @throws HTTPException\InternalServerErrorException
1281          */
1282         private static function pollHcard(string $hcard_url, array $data, bool $dfrn = false): array
1283         {
1284                 $curlResult = DI::httpClient()->get($hcard_url, HttpClientAccept::HTML);
1285                 if ($curlResult->isTimeout()) {
1286                         self::$isTimeout = true;
1287                         return [];
1288                 }
1289                 $content = $curlResult->getBody();
1290                 if (empty($content)) {
1291                         return [];
1292                 }
1293
1294                 $doc = new DOMDocument();
1295                 if (!@$doc->loadHTML($content)) {
1296                         return [];
1297                 }
1298
1299                 $xpath = new DomXPath($doc);
1300
1301                 $vcards = $xpath->query("//div[contains(concat(' ', @class, ' '), ' vcard ')]");
1302                 if (!is_object($vcards)) {
1303                         return [];
1304                 }
1305
1306                 if (!isset($data['baseurl'])) {
1307                         $data['baseurl'] = '';
1308                 }
1309
1310                 if ($vcards->length > 0) {
1311                         $vcard = $vcards->item(0);
1312
1313                         // We have to discard the guid from the hcard in favour of the guid from lrdd
1314                         // Reason: Hubzilla doesn't use the value "uid" in the hcard like Diaspora does.
1315                         $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' uid ')]", $vcard); // */
1316                         if (($search->length > 0) && empty($data['guid'])) {
1317                                 $data['guid'] = $search->item(0)->nodeValue;
1318                         }
1319
1320                         $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' nickname ')]", $vcard); // */
1321                         if ($search->length > 0) {
1322                                 $data['nick'] = $search->item(0)->nodeValue;
1323                         }
1324
1325                         $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' fn ')]", $vcard); // */
1326                         if ($search->length > 0) {
1327                                 $data['name'] = $search->item(0)->nodeValue;
1328                         }
1329
1330                         $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' given_name ')]", $vcard); // */
1331                         if ($search->length > 0) {
1332                                 $data["given_name"] = $search->item(0)->nodeValue;
1333                         }
1334
1335                         $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' family_name ')]", $vcard); // */
1336                         if ($search->length > 0) {
1337                                 $data["family_name"] = $search->item(0)->nodeValue;
1338                         }
1339
1340                         $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' searchable ')]", $vcard); // */
1341                         if ($search->length > 0) {
1342                                 $data['hide'] = (strtolower($search->item(0)->nodeValue) != 'true');
1343                         }
1344
1345                         $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' key ')]", $vcard); // */
1346                         if ($search->length > 0) {
1347                                 $data['pubkey'] = $search->item(0)->nodeValue;
1348                                 if (strstr($data['pubkey'], 'RSA ')) {
1349                                         $data['pubkey'] = Crypto::rsaToPem($data['pubkey']);
1350                                 }
1351                         }
1352
1353                         $search = $xpath->query("//*[@id='pod_location']", $vcard); // */
1354                         if ($search->length > 0) {
1355                                 $data['baseurl'] = trim($search->item(0)->nodeValue, '/');
1356                         }
1357                 }
1358
1359                 $avatars = [];
1360                 if (!empty($vcard)) {
1361                         $photos = $xpath->query("//*[contains(concat(' ', @class, ' '), ' photo ') or contains(concat(' ', @class, ' '), ' avatar ')]", $vcard); // */
1362                         foreach ($photos as $photo) {
1363                                 $attr = [];
1364                                 foreach ($photo->attributes as $attribute) {
1365                                         $attr[$attribute->name] = trim($attribute->value);
1366                                 }
1367
1368                                 if (isset($attr['src']) && isset($attr['width'])) {
1369                                         $avatars[$attr['width']] = self::fixAvatar($attr['src'], $data['baseurl']);
1370                                 }
1371
1372                                 // We don't have a width. So we just take everything that we got.
1373                                 // This is a Hubzilla workaround which doesn't send a width.
1374                                 if (!$avatars && !empty($attr['src'])) {
1375                                         $avatars[] = self::fixAvatar($attr['src'], $data['baseurl']);
1376                                 }
1377                         }
1378                 }
1379
1380                 if ($avatars) {
1381                         ksort($avatars);
1382                         $data['photo'] = array_pop($avatars);
1383                         if ($avatars) {
1384                                 $data['photo_medium'] = array_pop($avatars);
1385                         }
1386
1387                         if ($avatars) {
1388                                 $data['photo_small'] = array_pop($avatars);
1389                         }
1390                 }
1391
1392                 if ($dfrn) {
1393                         // Poll DFRN specific data
1394                         $search = $xpath->query("//link[contains(concat(' ', @rel), ' dfrn-')]");
1395                         if ($search->length > 0) {
1396                                 foreach ($search as $link) {
1397                                         //$data['request'] = $search->item(0)->nodeValue;
1398                                         $attr = [];
1399                                         foreach ($link->attributes as $attribute) {
1400                                                 $attr[$attribute->name] = trim($attribute->value);
1401                                         }
1402
1403                                         $data[substr($attr['rel'], 5)] = $attr['href'];
1404                                 }
1405                         }
1406
1407                         // Older Friendica versions had used the "uid" field differently than newer versions
1408                         if (!empty($data['nick']) && !empty($data['guid']) && ($data['nick'] == $data['guid'])) {
1409                                 unset($data['guid']);
1410                         }
1411                 }
1412
1413                 return $data;
1414         }
1415
1416         /**
1417          * Check for Diaspora contact
1418          *
1419          * @param array $webfinger Webfinger data
1420          *
1421          * @return array Diaspora data
1422          * @throws HTTPException\InternalServerErrorException
1423          */
1424         private static function diaspora(array $webfinger): array
1425         {
1426                 $hcard_url = '';
1427                 $data = [];
1428
1429                 // The array is reversed to take into account the order of preference for same-rel links
1430                 // See: https://tools.ietf.org/html/rfc7033#section-4.4.4
1431                 foreach (array_reverse($webfinger['links']) as $link) {
1432                         if (($link['rel'] == 'http://microformats.org/profile/hcard') && !empty($link['href'])) {
1433                                 $hcard_url = $link['href'];
1434                         } elseif (($link['rel'] == 'http://joindiaspora.com/seed_location') && !empty($link['href'])) {
1435                                 $data['baseurl'] = trim($link['href'], '/');
1436                         } elseif (($link['rel'] == 'http://joindiaspora.com/guid') && !empty($link['href'])) {
1437                                 $data['guid'] = $link['href'];
1438                         } elseif (($link['rel'] == 'http://webfinger.net/rel/profile-page') && (($link['type'] ?? '') == 'text/html') && !empty($link['href'])) {
1439                                 $data['url'] = $link['href'];
1440                         } elseif (($link['rel'] == 'http://webfinger.net/rel/profile-page') && empty($link['type']) && !empty($link['href'])) {
1441                                 $profile_url = $link['href'];
1442                         } elseif (($link['rel'] == ActivityNamespace::FEED) && !empty($link['href'])) {
1443                                 $data['poll'] = $link['href'];
1444                         } elseif (($link['rel'] == ActivityNamespace::POCO) && !empty($link['href'])) {
1445                                 $data['poco'] = $link['href'];
1446                         } elseif (($link['rel'] == 'salmon') && !empty($link['href'])) {
1447                                 $data['notify'] = $link['href'];
1448                         } elseif (($link['rel'] == 'diaspora-public-key') && !empty($link['href'])) {
1449                                 $data['pubkey'] = base64_decode($link['href']);
1450
1451                                 if (strstr($data['pubkey'], 'RSA ')) {
1452                                         $data['pubkey'] = Crypto::rsaToPem($data['pubkey']);
1453                                 }
1454                         }
1455                 }
1456
1457                 if (empty($data['url']) && !empty($profile_url)) {
1458                         $data['url'] = $profile_url;
1459                 }
1460
1461                 if (empty($data['url']) || empty($hcard_url)) {
1462                         return [];
1463                 }
1464
1465                 if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) {
1466                         foreach ($webfinger['aliases'] as $alias) {
1467                                 if (Strings::normaliseLink($alias) != Strings::normaliseLink($data['url']) && ! strstr($alias, '@')) {
1468                                         $data['alias'] = $alias;
1469                                 } elseif (substr($alias, 0, 5) == 'acct:') {
1470                                         $data['addr'] = substr($alias, 5);
1471                                 }
1472                         }
1473                 }
1474
1475                 if (!empty($webfinger['subject']) && (substr($webfinger['subject'], 0, 5) == 'acct:')) {
1476                         $data['addr'] = substr($webfinger['subject'], 5);
1477                 }
1478
1479                 // Fetch further information from the hcard
1480                 $data = self::pollHcard($hcard_url, $data);
1481
1482                 if (!$data) {
1483                         return [];
1484                 }
1485
1486                 if (!empty($data['url'])
1487                         && !empty($data['guid'])
1488                         && !empty($data['baseurl'])
1489                         && !empty($data['pubkey'])
1490                         && !empty($hcard_url)
1491                 ) {
1492                         $data['network'] = Protocol::DIASPORA;
1493                         $data['manually-approve'] = false;
1494
1495                         // The Diaspora handle must always be lowercase
1496                         if (!empty($data['addr'])) {
1497                                 $data['addr'] = strtolower($data['addr']);
1498                         }
1499
1500                         // We have to overwrite the detected value for "notify" since Hubzilla doesn't send it
1501                         $data['notify'] = $data['baseurl'] . '/receive/users/' . $data['guid'];
1502                         $data['batch']  = $data['baseurl'] . '/receive/public';
1503                 } else {
1504                         return [];
1505                 }
1506
1507                 return $data;
1508         }
1509
1510         /**
1511          * Check for OStatus contact
1512          *
1513          * @param array $webfinger Webfinger data
1514          * @param bool  $short     Short detection mode
1515          *
1516          * @return array|bool OStatus data or "false" on error or "true" on short mode
1517          * @throws HTTPException\InternalServerErrorException
1518          */
1519         private static function ostatus(array $webfinger, bool $short = false)
1520         {
1521                 $data = [];
1522
1523                 if (!empty($webfinger['aliases']) && is_array($webfinger['aliases'])) {
1524                         foreach ($webfinger['aliases'] as $alias) {
1525                                 if (strstr($alias, '@') && !strstr(Strings::normaliseLink($alias), 'http://')) {
1526                                         $data['addr'] = str_replace('acct:', '', $alias);
1527                                 }
1528                         }
1529                 }
1530
1531                 if (!empty($webfinger['subject']) && strstr($webfinger['subject'], '@')
1532                         && !strstr(Strings::normaliseLink($webfinger['subject']), 'http://')
1533                 ) {
1534                         $data['addr'] = str_replace('acct:', '', $webfinger['subject']);
1535                 }
1536
1537                 if (!empty($webfinger['links'])) {
1538                         // The array is reversed to take into account the order of preference for same-rel links
1539                         // See: https://tools.ietf.org/html/rfc7033#section-4.4.4
1540                         foreach (array_reverse($webfinger['links']) as $link) {
1541                                 if (($link['rel'] == 'http://webfinger.net/rel/profile-page')
1542                                         && (($link['type'] ?? '') == 'text/html')
1543                                         && ($link['href'] != '')
1544                                 ) {
1545                                         $data['url'] = $data['alias'] = $link['href'];
1546                                 } elseif (($link['rel'] == 'salmon') && !empty($link['href'])) {
1547                                         $data['notify'] = $link['href'];
1548                                 } elseif (($link['rel'] == ActivityNamespace::FEED) && !empty($link['href'])) {
1549                                         $data['poll'] = $link['href'];
1550                                 } elseif (($link['rel'] == 'magic-public-key') && !empty($link['href'])) {
1551                                         $pubkey = $link['href'];
1552
1553                                         if (substr($pubkey, 0, 5) === 'data:') {
1554                                                 if (strstr($pubkey, ',')) {
1555                                                         $pubkey = substr($pubkey, strpos($pubkey, ',') + 1);
1556                                                 } else {
1557                                                         $pubkey = substr($pubkey, 5);
1558                                                 }
1559                                         } elseif (Strings::normaliseLink($pubkey) == 'http://') {
1560                                                 $curlResult = DI::httpClient()->get($pubkey, HttpClientAccept::MAGIC_KEY);
1561                                                 if ($curlResult->isTimeout()) {
1562                                                         self::$isTimeout = true;
1563                                                         return $short ? false : [];
1564                                                 }
1565                                                 Logger::debug('Fetched public key', ['Content-Type' => $curlResult->getHeader('Content-Type'), 'url' => $pubkey]);
1566                                                 $pubkey = $curlResult->getBody();
1567                                         }
1568
1569                                         try {
1570                                                 $data['pubkey'] = Salmon::magicKeyToPem($pubkey);
1571                                         } catch (\Throwable $e) {
1572
1573                                         }
1574                                 }
1575                         }
1576                 }
1577
1578                 if (isset($data['notify']) && isset($data['pubkey'])
1579                         && isset($data['poll'])
1580                         && isset($data['url'])
1581                 ) {
1582                         $data['network'] = Protocol::OSTATUS;
1583                         $data['manually-approve'] = false;
1584                 } else {
1585                         return $short ? false : [];
1586                 }
1587
1588                 if ($short) {
1589                         return true;
1590                 }
1591
1592                 // Fetch all additional data from the feed
1593                 $curlResult = DI::httpClient()->get($data['poll'], HttpClientAccept::FEED_XML);
1594                 if ($curlResult->isTimeout()) {
1595                         self::$isTimeout = true;
1596                         return [];
1597                 }
1598                 $feed = $curlResult->getBody();
1599                 $feed_data = Feed::import($feed);
1600                 if (!$feed_data) {
1601                         return [];
1602                 }
1603
1604                 if (!empty($feed_data['header']['author-name'])) {
1605                         $data['name'] = $feed_data['header']['author-name'];
1606                 }
1607                 if (!empty($feed_data['header']['author-nick'])) {
1608                         $data['nick'] = $feed_data['header']['author-nick'];
1609                 }
1610                 if (!empty($feed_data['header']['author-avatar'])) {
1611                         $data['photo'] = self::fixAvatar($feed_data['header']['author-avatar'], $data['url']);
1612                 }
1613                 if (!empty($feed_data['header']['author-id'])) {
1614                         $data['alias'] = $feed_data['header']['author-id'];
1615                 }
1616                 if (!empty($feed_data['header']['author-location'])) {
1617                         $data['location'] = $feed_data['header']['author-location'];
1618                 }
1619                 if (!empty($feed_data['header']['author-about'])) {
1620                         $data['about'] = $feed_data['header']['author-about'];
1621                 }
1622                 // OStatus has serious issues when the the url doesn't fit (ssl vs. non ssl)
1623                 // So we take the value that we just fetched, although the other one worked as well
1624                 if (!empty($feed_data['header']['author-link'])) {
1625                         $data['url'] = $feed_data['header']['author-link'];
1626                 }
1627
1628                 if ($data['url'] == $data['alias']) {
1629                         $data['alias'] = '';
1630                 }
1631
1632                 /// @todo Fetch location and "about" from the feed as well
1633                 return $data;
1634         }
1635
1636         /**
1637          * Fetch data from a pump.io profile page
1638          *
1639          * @param string $profile_link Link to the profile page
1640          *
1641          * @return array Profile data
1642          */
1643         private static function pumpioProfileData(string $profile_link, string $baseurl): array
1644         {
1645                 $curlResult = DI::httpClient()->get($profile_link, HttpClientAccept::HTML);
1646                 if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
1647                         return [];
1648                 }
1649
1650                 $doc = new DOMDocument();
1651                 if (!@$doc->loadHTML($curlResult->getBody())) {
1652                         return [];
1653                 }
1654
1655                 $xpath = new DomXPath($doc);
1656
1657                 $data = [];
1658                 $data['name'] = $xpath->query("//span[contains(@class, 'p-name')]")->item(0)->nodeValue;
1659
1660                 if ($data['name'] == '') {
1661                         // This is ugly - but pump.io doesn't seem to know a better way for it
1662                         $data['name'] = trim($xpath->query("//h1[@class='media-header']")->item(0)->nodeValue);
1663                         $pos = strpos($data['name'], chr(10));
1664                         if ($pos) {
1665                                 $data['name'] = trim(substr($data['name'], 0, $pos));
1666                         }
1667                 }
1668
1669                 $data['location'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'p-locality')]");
1670
1671                 if ($data['location'] == '') {
1672                         $data['location'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'location')]");
1673                 }
1674
1675                 $data['about'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'p-note')]");
1676
1677                 if ($data['about'] == '') {
1678                         $data['about'] = XML::getFirstNodeValue($xpath, "//p[contains(@class, 'summary')]");
1679                 }
1680
1681                 $avatar = $xpath->query("//img[contains(@class, 'u-photo')]")->item(0);
1682                 if (!$avatar) {
1683                         $avatar = $xpath->query("//img[@class='img-rounded media-object']")->item(0);
1684                 }
1685                 if ($avatar) {
1686                         foreach ($avatar->attributes as $attribute) {
1687                                 if ($attribute->name == 'src') {
1688                                         $data['photo'] = trim($attribute->value);
1689                                         if (!empty($data['photo']) && !parse_url($data['photo'], PHP_URL_SCHEME) && !parse_url($data['photo'], PHP_URL_HOST)) {
1690                                                 $data['photo'] = $baseurl . $data['photo'];
1691                                         }
1692                                 }
1693                         }
1694                 }
1695
1696                 return $data;
1697         }
1698
1699         /**
1700          * Check for pump.io contact
1701          *
1702          * @param array  $webfinger Webfinger data
1703          * @param string $addr
1704          *
1705          * @return array pump.io data
1706          */
1707         private static function pumpio(array $webfinger, string $addr, string $baseurl): array
1708         {
1709                 $data = [];
1710                 // The array is reversed to take into account the order of preference for same-rel links
1711                 // See: https://tools.ietf.org/html/rfc7033#section-4.4.4
1712                 foreach (array_reverse($webfinger['links']) as $link) {
1713                         if (($link['rel'] == 'http://webfinger.net/rel/profile-page')
1714                                 && (($link['type'] ?? '') == 'text/html')
1715                                 && ($link['href'] != '')
1716                         ) {
1717                                 $data['url'] = $link['href'];
1718                         } elseif (($link['rel'] == 'activity-inbox') && ($link['href'] != '')) {
1719                                 $data['notify'] = $link['href'];
1720                         } elseif (($link['rel'] == 'activity-outbox') && ($link['href'] != '')) {
1721                                 $data['poll'] = $link['href'];
1722                         } elseif (($link['rel'] == 'dialback') && ($link['href'] != '')) {
1723                                 $data['dialback'] = $link['href'];
1724                         }
1725                 }
1726                 if (isset($data['poll']) && isset($data['notify'])
1727                         && isset($data['dialback'])
1728                         && isset($data['url'])
1729                 ) {
1730                         // by now we use these fields only for the network type detection
1731                         // So we unset all data that isn't used at the moment
1732                         unset($data['dialback']);
1733
1734                         $data['network'] = Protocol::PUMPIO;
1735                 } else {
1736                         return [];
1737                 }
1738
1739                 $profile_data = self::pumpioProfileData($data['url'], $baseurl);
1740
1741                 if (!$profile_data) {
1742                         return [];
1743                 }
1744
1745                 $data = array_merge($data, $profile_data);
1746
1747                 if (($addr != '') && ($data['name'] != '')) {
1748                         $name = trim(str_replace($addr, '', $data['name']));
1749                         if ($name != '') {
1750                                 $data['name'] = $name;
1751                         }
1752                 }
1753
1754                 return $data;
1755         }
1756
1757         /**
1758          * Checks HTML page for RSS feed link
1759          *
1760          * @param string $url  Page link
1761          * @param string $body Page body string
1762          *
1763          * @return string|false Feed link or false if body was invalid HTML document
1764          */
1765         public static function getFeedLink(string $url, string $body)
1766         {
1767                 if (empty($body)) {
1768                         return '';
1769                 }
1770
1771                 $doc = new DOMDocument();
1772                 if (!@$doc->loadHTML($body)) {
1773                         return false;
1774                 }
1775
1776                 $xpath = new DOMXPath($doc);
1777
1778                 $feedUrl = $xpath->evaluate('string(/html/head/link[@type="application/rss+xml" and @rel="alternate"]/@href)');
1779                 $feedUrl = $feedUrl ?: $xpath->evaluate('string(/html/head/link[@type="application/atom+xml" and @rel="alternate"]/@href)');
1780
1781                 $feedUrl = $feedUrl ? self::ensureAbsoluteLinkFromHTMLDoc($feedUrl, $url, $xpath) : '';
1782
1783                 return $feedUrl;
1784         }
1785
1786         /**
1787          * Return an absolute URL in the context of a HTML document retrieved from the provided URL.
1788          *
1789          * Loosely based on RFC 1808
1790          *
1791          * @see https://tools.ietf.org/html/rfc1808
1792          *
1793          * @param string   $href  The potential relative href found in the HTML document
1794          * @param string   $base  The HTML document URL
1795          * @param DOMXPath $xpath The HTML document XPath
1796          *
1797          * @return string Absolute URL
1798          */
1799         private static function ensureAbsoluteLinkFromHTMLDoc(string $href, string $base, DOMXPath $xpath): string
1800         {
1801                 if (filter_var($href, FILTER_VALIDATE_URL)) {
1802                         return $href;
1803                 }
1804
1805                 $base = $xpath->evaluate('string(/html/head/base/@href)') ?: $base;
1806
1807                 $baseParts = parse_url($base);
1808                 if (empty($baseParts['host'])) {
1809                         return $href;
1810                 }
1811
1812                 // Naked domain case (scheme://basehost)
1813                 $path = $baseParts['path'] ?? '/';
1814
1815                 // Remove the filename part of the path if it exists (/base/path/file)
1816                 $path = implode('/', array_slice(explode('/', $path), 0, -1));
1817
1818                 $hrefParts = parse_url($href);
1819
1820                 if (!empty($hrefParts['path'])) {
1821                         // Root path case (/path) including relative scheme case (//host/path)
1822                         if ($hrefParts['path'] && $hrefParts['path'][0] == '/') {
1823                                 $path = $hrefParts['path'];
1824                         } else {
1825                                 $path = $path . '/' . $hrefParts['path'];
1826
1827                                 // Resolve arbitrary relative path
1828                                 // Lifted from https://www.php.net/manual/en/function.realpath.php#84012
1829                                 $parts = array_filter(explode('/', $path), 'strlen');
1830                                 $absolutes = [];
1831                                 foreach ($parts as $part) {
1832                                         if ('.' == $part) continue;
1833                                         if ('..' == $part) {
1834                                                 array_pop($absolutes);
1835                                         } else {
1836                                                 $absolutes[] = $part;
1837                                         }
1838                                 }
1839
1840                                 $path = '/' . implode('/', $absolutes);
1841                         }
1842                 }
1843
1844                 // Relative scheme case (//host/path)
1845                 $baseParts['host'] = $hrefParts['host'] ?? $baseParts['host'];
1846                 $baseParts['path'] = $path;
1847                 unset($baseParts['query']);
1848                 unset($baseParts['fragment']);
1849
1850                 return Network::unparseURL($baseParts);
1851         }
1852
1853         /**
1854          * Check for feed contact
1855          *
1856          * @param string  $url   Profile link
1857          * @param boolean $probe Do a probe if the page contains a feed link
1858          *
1859          * @return array feed data
1860          * @throws HTTPException\InternalServerErrorException
1861          */
1862         private static function feed(string $url, bool $probe = true): array
1863         {
1864                 try {
1865                         $curlResult = DI::httpClient()->get($url, HttpClientAccept::FEED_XML);
1866                 } catch(\Throwable $e) {
1867                         DI::logger()->info('Error requesting feed URL', ['url' => $url, 'exception' => $e]);
1868                         return [];
1869                 }
1870
1871                 if ($curlResult->isTimeout()) {
1872                         self::$isTimeout = true;
1873                         return [];
1874                 }
1875
1876                 $feed = $curlResult->getBody();
1877                 $feed_data = Feed::import($feed);
1878
1879                 if (!$feed_data) {
1880                         if (!$probe) {
1881                                 return [];
1882                         }
1883
1884                         $feed_url = self::getFeedLink($url, $feed);
1885
1886                         if (!$feed_url) {
1887                                 return [];
1888                         }
1889
1890                         return self::feed($feed_url, false);
1891                 }
1892
1893                 if (!empty($feed_data['header']['author-name'])) {
1894                         $data['name'] = $feed_data['header']['author-name'];
1895                 }
1896
1897                 if (!empty($feed_data['header']['author-nick'])) {
1898                         $data['nick'] = $feed_data['header']['author-nick'];
1899                 }
1900
1901                 if (!empty($feed_data['header']['author-avatar'])) {
1902                         $data['photo'] = $feed_data['header']['author-avatar'];
1903                 }
1904
1905                 if (!empty($feed_data['header']['author-id'])) {
1906                         $data['alias'] = $feed_data['header']['author-id'];
1907                 }
1908
1909                 $data['url'] = $url;
1910                 $data['poll'] = $url;
1911
1912                 $data['network'] = Protocol::FEED;
1913
1914                 return $data;
1915         }
1916
1917         /**
1918          * Check for mail contact
1919          *
1920          * @param string  $uri Profile link
1921          * @param integer $uid User ID
1922          *
1923          * @return array mail data
1924          * @throws \Exception
1925          */
1926         private static function mail(string $uri, int $uid): array
1927         {
1928                 if (!Network::isEmailDomainValid($uri)) {
1929                         return [];
1930                 }
1931
1932                 if ($uid == 0) {
1933                         return [];
1934                 }
1935
1936                 $user = DBA::selectFirst('user', ['prvkey'], ['uid' => $uid]);
1937
1938                 $condition = ["`uid` = ? AND `server` != ''", $uid];
1939                 $fields = ['pass', 'user', 'server', 'port', 'ssltype', 'mailbox'];
1940                 $mailacct = DBA::selectFirst('mailacct', $fields, $condition);
1941
1942                 if (!DBA::isResult($user) || !DBA::isResult($mailacct)) {
1943                         return [];
1944                 }
1945
1946                 $mailbox = Email::constructMailboxName($mailacct);
1947                 $password = '';
1948                 openssl_private_decrypt(hex2bin($mailacct['pass']), $password, $user['prvkey']);
1949                 $mbox = Email::connect($mailbox, $mailacct['user'], $password);
1950                 if (!$mbox) {
1951                         return [];
1952                 }
1953
1954                 $msgs = Email::poll($mbox, $uri);
1955                 Logger::info('Messages found', ['uri' => $uri, 'count' => count($msgs)]);
1956
1957                 if (!count($msgs)) {
1958                         return [];
1959                 }
1960
1961                 $phost = substr($uri, strpos($uri, '@') + 1);
1962
1963                 $data = [
1964                         'addr'    => $uri,
1965                         'network' => Protocol::MAIL,
1966                         'name'    => substr($uri, 0, strpos($uri, '@')),
1967                         'photo'   => Network::lookupAvatarByEmail($uri),
1968                         'url'     => 'mailto:' . $uri,
1969                         'notify'  => 'smtp ' . Strings::getRandomHex(),
1970                         'poll'    => 'email ' . Strings::getRandomHex(),
1971                 ];
1972
1973                 $data['nick']    = $data['name'];
1974
1975                 $x = Email::messageMeta($mbox, $msgs[0]);
1976
1977                 if (stristr($x[0]->from, $uri)) {
1978                         $adr = imap_rfc822_parse_adrlist($x[0]->from, '');
1979                 } elseif (stristr($x[0]->to, $uri)) {
1980                         $adr = imap_rfc822_parse_adrlist($x[0]->to, '');
1981                 }
1982
1983                 if (isset($adr)) {
1984                         foreach ($adr as $feadr) {
1985                                 if ((strcasecmp($feadr->mailbox, $data['name']) == 0)
1986                                         &&(strcasecmp($feadr->host, $phost) == 0)
1987                                         && (strlen($feadr->personal))
1988                                 ) {
1989                                         $personal = imap_mime_header_decode($feadr->personal);
1990                                         $data['name'] = '';
1991                                         foreach ($personal as $perspart) {
1992                                                 if ($perspart->charset != 'default') {
1993                                                         $data['name'] .= iconv($perspart->charset, 'UTF-8//IGNORE', $perspart->text);
1994                                                 } else {
1995                                                         $data['name'] .= $perspart->text;
1996                                                 }
1997                                         }
1998                                 }
1999                         }
2000                 }
2001
2002                 if (!empty($mbox)) {
2003                         imap_close($mbox);
2004                 }
2005
2006                 return $data;
2007         }
2008
2009         /**
2010          * Mix two paths together to possibly fix missing parts
2011          *
2012          * @param string $avatar Path to the avatar
2013          * @param string $base   Another path that is hopefully complete
2014          *
2015          * @return string fixed avatar path
2016          * @throws \Exception
2017          */
2018         public static function fixAvatar(string $avatar, string $base): string
2019         {
2020                 $base_parts = parse_url($base);
2021
2022                 // Remove all parts that could create a problem
2023                 unset($base_parts['path']);
2024                 unset($base_parts['query']);
2025                 unset($base_parts['fragment']);
2026
2027                 $avatar_parts = parse_url($avatar);
2028
2029                 // Now we mix them
2030                 $parts = array_merge($base_parts, $avatar_parts);
2031
2032                 // And put them together again
2033                 $scheme   = isset($parts['scheme'])   ? $parts['scheme'] . '://' : '';
2034                 $host     = isset($parts['host'])     ? $parts['host']           : '';
2035                 $port     = isset($parts['port'])     ? ':' . $parts['port']     : '';
2036                 $path     = isset($parts['path'])     ? $parts['path']           : '';
2037                 $query    = isset($parts['query'])    ? '?' . $parts['query']    : '';
2038                 $fragment = isset($parts['fragment']) ? '#' . $parts['fragment'] : '';
2039
2040                 $fixed = $scheme.$host.$port.$path.$query.$fragment;
2041
2042                 Logger::debug('Avatar fixed', ['base' => $base, 'avatar' => $avatar, 'fixed' => $fixed]);
2043
2044                 return $fixed;
2045         }
2046
2047         /**
2048          * Fetch the last date that the contact had posted something (publically)
2049          *
2050          * @param array $data  probing result
2051          *
2052          * @return string last activity
2053          */
2054         public static function getLastUpdate(array $data): string
2055         {
2056                 $uid = User::getIdForURL($data['url']);
2057                 if (!empty($uid)) {
2058                         $contact = Contact::selectFirst(['url', 'last-item'], ['self' => true, 'uid' => $uid]);
2059                         if (!empty($contact['last-item'])) {
2060                                 return $contact['last-item'];
2061                         }
2062                 }
2063
2064                 if ($lastUpdate = self::updateFromNoScrape($data)) {
2065                         return $lastUpdate;
2066                 }
2067
2068                 if (!empty($data['outbox'])) {
2069                         return self::updateFromOutbox($data['outbox'], $data);
2070                 } elseif (!empty($data['poll']) && ($data['network'] == Protocol::ACTIVITYPUB)) {
2071                         return self::updateFromOutbox($data['poll'], $data);
2072                 } elseif (!empty($data['poll'])) {
2073                         return self::updateFromFeed($data);
2074                 }
2075
2076                 return '';
2077         }
2078
2079         /**
2080          * Fetch the last activity date from the "noscrape" endpoint
2081          *
2082          * @param array $data Probing result
2083          *
2084          * @return string last activity or true if update was successful or the server was unreachable
2085          */
2086         private static function updateFromNoScrape(array $data): string
2087         {
2088                 if (empty($data['baseurl'])) {
2089                         return '';
2090                 }
2091
2092                 // Check the 'noscrape' endpoint when it is a Friendica server
2093                 $gserver = DBA::selectFirst('gserver', ['noscrape'], ["`nurl` = ? AND `noscrape` != ''",
2094                         Strings::normaliseLink($data['baseurl'])]);
2095                 if (!DBA::isResult($gserver)) {
2096                         return '';
2097                 }
2098
2099                 $curlResult = DI::httpClient()->get($gserver['noscrape'] . '/' . $data['nick'], HttpClientAccept::JSON);
2100
2101                 if ($curlResult->isSuccess() && !empty($curlResult->getBody())) {
2102                         $noscrape = json_decode($curlResult->getBody(), true);
2103                         if (!empty($noscrape) && !empty($noscrape['updated'])) {
2104                                 return DateTimeFormat::utc($noscrape['updated'], DateTimeFormat::MYSQL);
2105                         }
2106                 }
2107
2108                 return '';
2109         }
2110
2111         /**
2112          * Fetch the last activity date from an ActivityPub Outbox
2113          *
2114          * @param string $feed
2115          * @param array  $data Probing result
2116          *
2117          * @return string last activity
2118          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
2119          */
2120         private static function updateFromOutbox(string $feed, array $data): string
2121         {
2122                 $outbox = ActivityPub::fetchContent($feed);
2123                 if (empty($outbox)) {
2124                         return '';
2125                 }
2126
2127                 if (!empty($outbox['orderedItems'])) {
2128                         $items = $outbox['orderedItems'];
2129                 } elseif (!empty($outbox['first']['orderedItems'])) {
2130                         $items = $outbox['first']['orderedItems'];
2131                 } elseif (!empty($outbox['first']['href']) && ($outbox['first']['href'] != $feed)) {
2132                         return self::updateFromOutbox($outbox['first']['href'], $data);
2133                 } elseif (!empty($outbox['first'])) {
2134                         if (is_string($outbox['first']) && ($outbox['first'] != $feed)) {
2135                                 return self::updateFromOutbox($outbox['first'], $data);
2136                         } else {
2137                                 Logger::warning('Unexpected data', ['outbox' => $outbox]);
2138                         }
2139                         return '';
2140                 } else {
2141                         $items = [];
2142                 }
2143
2144                 $last_updated = '';
2145                 foreach ($items as $activity) {
2146                         if (!empty($activity['published'])) {
2147                                 $published =  DateTimeFormat::utc($activity['published']);
2148                         } elseif (!empty($activity['object']['published'])) {
2149                                 $published =  DateTimeFormat::utc($activity['object']['published']);
2150                         } else {
2151                                 continue;
2152                         }
2153
2154                         if ($last_updated < $published) {
2155                                 $last_updated = $published;
2156                         }
2157                 }
2158
2159                 if (!empty($last_updated)) {
2160                         return $last_updated;
2161                 }
2162
2163                 return '';
2164         }
2165
2166         /**
2167          * Fetch the last activity date from an XML feed
2168          *
2169          * @param array $data Probing result
2170          * @return string last activity
2171          */
2172         private static function updateFromFeed(array $data): string
2173         {
2174                 // Search for the newest entry in the feed
2175                 $curlResult = DI::httpClient()->get($data['poll'], HttpClientAccept::ATOM_XML);
2176                 if (!$curlResult->isSuccess() || !$curlResult->getBody()) {
2177                         return '';
2178                 }
2179
2180                 $doc = new DOMDocument();
2181                 @$doc->loadXML($curlResult->getBody());
2182
2183                 $xpath = new DOMXPath($doc);
2184                 $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom');
2185
2186                 $entries = $xpath->query('/atom:feed/atom:entry');
2187
2188                 $last_updated = '';
2189
2190                 foreach ($entries as $entry) {
2191                         $published_item = $xpath->query('atom:published/text()', $entry)->item(0);
2192                         $updated_item   = $xpath->query('atom:updated/text()'  , $entry)->item(0);
2193                         $published      = !empty($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null;
2194                         $updated        = !empty($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null;
2195
2196                         if (empty($published) || empty($updated)) {
2197                                 Logger::notice('Invalid entry for XPath.', ['entry' => $entry, 'url' => $data['url']]);
2198                                 continue;
2199                         }
2200
2201                         if ($last_updated < $published) {
2202                                 $last_updated = $published;
2203                         }
2204
2205                         if ($last_updated < $updated) {
2206                                 $last_updated = $updated;
2207                         }
2208                 }
2209
2210                 if (!empty($last_updated)) {
2211                         return $last_updated;
2212                 }
2213
2214                 return '';
2215         }
2216
2217         /**
2218          * Probe data from local profiles without network traffic
2219          *
2220          * @param string $url
2221          *
2222          * @return array probed data
2223          * @throws HTTPException\InternalServerErrorException
2224          * @throws HTTPException\NotFoundException
2225          */
2226         private static function localProbe(string $url): array
2227         {
2228                 try {
2229                         $uid = User::getIdForURL($url);
2230                         if (!$uid) {
2231                                 throw new HTTPException\NotFoundException('User not found.');
2232                         }
2233
2234                         $owner     = User::getOwnerDataById($uid);
2235                         $approfile = ActivityPub\Transmitter::getProfile($uid);
2236
2237                         $split_name = Diaspora::splitName($owner['name']);
2238
2239                         if (empty($owner['gsid'])) {
2240                                 $owner['gsid'] = GServer::getID($approfile['generator']['url']);
2241                         }
2242
2243                         $data = [
2244                                 'name'             => $owner['name'], 'nick' => $owner['nick'], 'guid' => $approfile['diaspora:guid'] ?? '',
2245                                 'url'              => $owner['url'], 'addr' => $owner['addr'], 'alias' => $owner['alias'],
2246                                 'photo'            => User::getAvatarUrl($owner),
2247                                 'header'           => $owner['header'] ? Contact::getHeaderUrlForId($owner['id'], $owner['updated']) : '',
2248                                 'account-type'     => $owner['contact-type'], 'community' => ($owner['contact-type'] == User::ACCOUNT_TYPE_COMMUNITY),
2249                                 'keywords'         => $owner['keywords'], 'location' => $owner['location'], 'about' => $owner['about'],
2250                                 'xmpp'             => $owner['xmpp'], 'matrix' => $owner['matrix'],
2251                                 'hide'             => !$owner['net-publish'], 'batch' => '', 'notify' => $owner['notify'],
2252                                 'poll'             => $owner['poll'], 'request' => $owner['request'], 'confirm' => $owner['confirm'],
2253                                 'subscribe'        => $approfile['generator']['url'] . '/contact/follow?url={uri}', 'poco' => $owner['poco'],
2254                                 'following'        => $approfile['following'], 'followers' => $approfile['followers'],
2255                                 'inbox'            => $approfile['inbox'], 'outbox' => $approfile['outbox'],
2256                                 'sharedinbox'      => $approfile['endpoints']['sharedInbox'], 'network' => Protocol::DFRN,
2257                                 'pubkey'           => $owner['upubkey'], 'baseurl' => $approfile['generator']['url'], 'gsid' => $owner['gsid'],
2258                                 'manually-approve' => in_array($owner['page-flags'], [User::PAGE_FLAGS_NORMAL, User::PAGE_FLAGS_PRVGROUP]),
2259                                 'networks' => [
2260                                         Protocol::DIASPORA => [
2261                                                 'name'         => $owner['name'],
2262                                                 'given_name'   => $split_name['first'],
2263                                                 'family_name'  => $split_name['last'],
2264                                                 'nick'         => $owner['nick'],
2265                                                 'guid'         => $approfile['diaspora:guid'],
2266                                                 'url'          => $owner['url'],
2267                                                 'addr'         => $owner['addr'],
2268                                                 'alias'        => $owner['alias'],
2269                                                 'photo'        => $owner['photo'],
2270                                                 'photo_medium' => $owner['thumb'],
2271                                                 'photo_small'  => $owner['micro'],
2272                                                 'batch'        => $approfile['generator']['url'] . '/receive/public',
2273                                                 'notify'       => $owner['notify'],
2274                                                 'poll'         => $owner['poll'],
2275                                                 'poco'         => $owner['poco'],
2276                                                 'network'      => Protocol::DIASPORA,
2277                                                 'pubkey'       => $owner['upubkey'],
2278                                         ]
2279                                 ]
2280                         ];
2281                 } catch (Exception $e) {
2282                         // Default values for non existing targets
2283                         $data = [
2284                                 'name' => $url, 'nick' => $url, 'url' => $url, 'network' => Protocol::PHANTOM,
2285                                 'photo' => DI::baseUrl() . Contact::DEFAULT_AVATAR_PHOTO
2286                         ];
2287                 }
2288
2289                 return self::rearrangeData($data);
2290         }
2291 }