]> git.mxchange.org Git - friendica.git/blobdiff - src/Content/PageInfo.php
Merge pull request #11531 from annando/display-polls
[friendica.git] / src / Content / PageInfo.php
index 73e2f62932148e168530732d410d4480941ceb14..41ecb3d7fa35a0ec207747a7c174fe9707e51b39 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * @copyright Copyright (C) 2020, Friendica
+ * @copyright Copyright (C) 2010-2022, the Friendica project
  *
  * @license GNU AGPL version 3 or any later version
  *
@@ -40,7 +40,7 @@ class PageInfo
         * @return string
         * @throws HTTPException\InternalServerErrorException
         */
-       public static function appendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false)
+       public static function searchAndAppendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false)
        {
                Logger::info('add_page_info_to_body: fetch page info for body', ['body' => $body]);
 
@@ -49,14 +49,34 @@ class PageInfo
                        return $body;
                }
 
-               $footer = self::getFooterFromUrl($url, $no_photos);
-               if (!$footer) {
+               $data = self::queryUrl($url);
+               if (!$data) {
                        return $body;
                }
 
-               $body = self::stripTrailingUrlFromBody($body, $url);
+               return self::appendDataToBody($body, $data, $no_photos);
+       }
 
-               $body .= "\n" . $footer;
+       /**
+        * @param string $body
+        * @param array  $data
+        * @param bool   $no_photos
+        * @return string
+        * @throws HTTPException\InternalServerErrorException
+        */
+       public static function appendDataToBody(string $body, array $data, bool $no_photos = false)
+       {
+               // Only one [attachment] tag per body is allowed
+               $existingAttachmentPos = strpos($body, '[attachment');
+               if ($existingAttachmentPos !== false) {
+                       $linkTitle = $data['title'] ?: $data['url'];
+                       // Additional link attachments are prepended before the existing [attachment] tag
+                       $body = substr_replace($body, "\n[bookmark=" . $data['url'] . ']' . $linkTitle . "[/bookmark]\n", $existingAttachmentPos, 0);
+               } else {
+                       $footer = self::getFooterFromData($data, $no_photos);
+                       $body = self::stripTrailingUrlFromBody($body, $data['url']);
+                       $body .= "\n" . $footer;
+               }
 
                return $body;
        }
@@ -109,25 +129,16 @@ class PageInfo
                }
 
                // Escape some bad characters
-               $data['url'] = str_replace(['[', ']'], ['&#91;', '&#93;'], htmlentities($data['url'], ENT_QUOTES, 'UTF-8', false));
-               $data['title'] = str_replace(['[', ']'], ['&#91;', '&#93;'], htmlentities($data['title'], ENT_QUOTES, 'UTF-8', false));
-
-               $text = "[attachment type='" . $data['type'] . "'";
+               $text = "[attachment";
 
-               if (empty($data['text'])) {
-                       $data['text'] = $data['title'];
+               foreach (['type', 'url', 'title', 'alternative_title', 'publisher_name', 'publisher_url', 'publisher_img', 'author_name', 'author_url', 'author_img'] as $field) {
+                       if (!empty($data[$field])) {
+                               $text .= " " . $field . "='" . str_replace(['[', ']'], ['&#91;', '&#93;'], htmlentities($data[$field], ENT_QUOTES, 'UTF-8', false)) . "'";
+                       }
                }
 
                if (empty($data['text'])) {
-                       $data['text'] = $data['url'];
-               }
-
-               if (!empty($data['url'])) {
-                       $text .= " url='" . $data['url'] . "'";
-               }
-
-               if (!empty($data['title'])) {
-                       $text .= " title='" . $data['title'] . "'";
+                       $data['text'] = '';
                }
 
                // Only embedd a picture link when it seems to be a valid picture ("width" is set)
@@ -140,10 +151,18 @@ class PageInfo
                                $text .= " image='" . $preview . "'";
                        } else {
                                $text .= " preview='" . $preview . "'";
+
+                               if (empty($data['text'])) {
+                                       $data['text'] = $data['title'];
+                               }
+
+                               if (empty($data['text'])) {
+                                       $data['text'] = $data['url'];
+                               }
                        }
                }
 
-               $text .= ']' . $data['text'] . '[/attachment]';
+               $text .= ']' . str_replace(['[', ']'], ['&#91;', '&#93;'], $data['text']) . '[/attachment]';
 
                $hashtags = '';
                if (!empty($data['keywords'])) {
@@ -168,7 +187,7 @@ class PageInfo
         */
        public static function queryUrl(string $url, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
        {
-               $data = ParseUrl::getSiteinfoCached($url, true);
+               $data = ParseUrl::getSiteinfoCached($url);
 
                if ($photo != '') {
                        $data['images'][0]['src'] = $photo;
@@ -210,9 +229,7 @@ class PageInfo
                }
 
                $taglist = [];
-
                foreach ($data['keywords'] as $keyword) {
-
                        $hashtag = str_replace([' ', '+', '/', '.', '#', "'"],
                                ['', '', '', '', '', ''], $keyword);
 
@@ -229,21 +246,26 @@ class PageInfo
         * @param bool   $searchNakedUrls Whether we should pick a naked URL (outside of BBCode tags) as a last resort
         * @return string|null
         */
-       protected static function getRelevantUrlFromBody(string $body, bool $searchNakedUrls = false)
+       public static function getRelevantUrlFromBody(string $body, bool $searchNakedUrls = false)
        {
                $URLSearchString = 'https?://[^\[\]]*';
 
                // Fix for Mastodon where the mentions are in a different format
                $body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
 
-               preg_match("~(?<![!#@])\[url]($URLSearchString)\[/url]$~is", $body, $matches);
+               // Remove all hashtags and mentions
+               $body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
+
+               // Search for pure links
+               preg_match("/\[url\](https?:.*?)\[\/url\]/ism", $body, $matches);
 
                if (!$matches) {
-                       preg_match("~(?<![!#@])\[url=($URLSearchString)].*\[/url]$~is", $body, $matches);
+                       // Search for links with descriptions
+                       preg_match("/\[url\=(https?:.*?)\].*?\[\/url\]/ism", $body, $matches);
                }
 
                if (!$matches && $searchNakedUrls) {
-                       preg_match('~(?<=\W|^)(?<![=\]])(https?://.+)$~is', $body, $matches);
+                       preg_match(Strings::autoLinkRegEx(), $body, $matches);
                        if ($matches && !Strings::endsWith($body, $matches[1])) {
                                unset($matches);
                        }
@@ -254,22 +276,36 @@ class PageInfo
 
        /**
         * Remove the provided URL from the body if it is at the end of it.
-        * Keep the link label if it isn't the full URL.
+        * Keep the link label if it isn't the full URL or a shortened version of it.
         *
         * @param string $body
         * @param string $url
-        * @return string|string[]|null
+        * @return string
         */
        protected static function stripTrailingUrlFromBody(string $body, string $url)
        {
                $quotedUrl = preg_quote($url, '#');
-               $body = preg_replace("#(?:
+               $body = preg_replace_callback("#(?:
                        \[url]$quotedUrl\[/url]|
                        \[url=$quotedUrl]$quotedUrl\[/url]|
                        \[url=$quotedUrl]([^[]*?)\[/url]|
                        $quotedUrl
-               )$#isx", '$1', $body);
+               )$#isx", function ($match) use ($url) {
+                       // Stripping URLs with no label
+                       if (empty($match[1])) {
+                               return '';
+                       }
 
-               return $body;
+                       // Stripping link labels that include a shortened version of the URL
+                       $trimMatch = trim($match[1], '.…');
+                       if (!empty($trimMatch) && strpos($url, $trimMatch) !== false) {
+                               return '';
+                       }
+
+                       // Keep all other labels
+                       return $match[1];
+               }, $body);
+
+               return rtrim($body);
        }
 }