]> git.mxchange.org Git - friendica.git/commitdiff
Improved link detection
authorMichael <heluecht@pirati.ca>
Sun, 2 May 2021 17:33:32 +0000 (17:33 +0000)
committerMichael <heluecht@pirati.ca>
Sun, 2 May 2021 17:33:32 +0000 (17:33 +0000)
src/Content/PageInfo.php
src/Model/Item.php
src/Model/Post/Media.php

index 7226ff2a445b55075989faf8f2d21c147b85c52c..21f32cd3c38dad350c1f6795595333c6195ec5dc 100644 (file)
@@ -253,10 +253,15 @@ class PageInfo
                // Fix for Mastodon where the mentions are in a different format
                $body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
 
-               preg_match("~(?<![!#@])\[url]($URLSearchString)\[/url]$~is", $body, $matches);
+               // Remove all hashtags and mentions
+               $body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
+
+               // Search for pure links
+               preg_match("/\[url\](.*?)\[\/url\]/ism", $body, $matches);
 
                if (!$matches) {
-                       preg_match("~(?<![!#@])\[url=($URLSearchString)].*\[/url]$~is", $body, $matches);
+                       // Search for links with descriptions
+                       preg_match("/\[url\=(.*?)\].*?\[\/url\]/ism", $body, $matches);
                }
 
                if (!$matches && $searchNakedUrls) {
index 13da8ec75b135e84f2d498fe67c00b0a21c550bf..2732330e00a3d67a7e373de7db6eaf53365a2bb9 100644 (file)
@@ -971,6 +971,8 @@ class Item
                $item['raw-body'] = Post\Media::insertFromBody($item['uri-id'], $item['raw-body']);
                $item['raw-body'] = self::setHashtags($item['raw-body']);
 
+               Post\Media::insertFromAttachmentData($item['uri-id'], $item['body']);
+
                // Check for hashtags in the body and repair or add hashtag links
                $item['body'] = self::setHashtags($item['body']);
 
@@ -2646,7 +2648,7 @@ class Item
                }
 
                $body = $item['body'] ?? '';
-               $item['body'] = preg_replace("/\s*\[attachment .*?\].*?\[\/attachment\]\s*/ism", '', $item['body']);
+               $item['body'] = preg_replace("/\s*\[attachment .*?\].*?\[\/attachment\]\s*/ism", "\n", $item['body']);
                self::putInCache($item);
                $item['body'] = $body;
                $s = $item["rendered-html"];
@@ -2722,6 +2724,12 @@ class Item
         */
        public static function containsLink(string $body, string $url)
        {
+               // Make sure that for example site parameters aren't used when testing if the link is contained in the body
+               $urlparts = parse_url($url);
+               unset($urlparts['query']);
+               unset($urlparts['fragment']);
+               $url = Network::unparseURL($urlparts);
+
                if (strpos($body, $url)) {
                        return true;
                }
index fe362f735848f8671f7a57037ef4ef16f816a27e..63e349795ba3a49e6dfd0a5e342d470a2bc2f7e3 100644 (file)
@@ -286,6 +286,8 @@ class Media
                // Simplify image codes
                $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
 
+               $unshared_body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
+
                $attachments = [];
                if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
                        foreach ($pictures as $picture) {
@@ -346,7 +348,10 @@ class Media
                }
 
                foreach ($attachments as $attachment) {
-                       self::insert($attachment);
+                       // Only store attachments that are part of the unshared body
+                       if (strpos($unshared_body, $attachment['url']) !== false) {
+                               self::insert($attachment);
+                       }
                }
 
                return trim($body);
@@ -360,6 +365,9 @@ class Media
         */
        public static function insertFromAttachmentData(int $uriid, string $body)
        {
+               // Don't look at the shared content
+               $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
+
                $data = BBCode::getAttachmentData($body);
                if (empty($data))  {
                        return;
@@ -548,10 +556,18 @@ class Media
                        }
 
                        if ($media['type'] == self::IMAGE) {
-                               if (!empty($media['description'])) {
-                                       $body .= "\n[img=" . $media['url'] . ']' . $media['description'] .'[/img]';
+                               if (!empty($media['preview'])) {
+                                       if (!empty($media['description'])) {
+                                               $body .= "\n[url=" . $media['url'] . "][img=" . $media['preview'] . ']' . $media['description'] .'[/img][/url]';
+                                       } else {
+                                               $body .= "\n[url=" . $media['url'] . "][img]" . $media['preview'] .'[/img][/url]';
+                                       }
                                } else {
-                                       $body .= "\n[img]" . $media['url'] .'[/img]';
+                                       if (!empty($media['description'])) {
+                                               $body .= "\n[img=" . $media['url'] . ']' . $media['description'] .'[/img]';
+                                       } else {
+                                               $body .= "\n[img]" . $media['url'] .'[/img]';
+                                       }
                                }
                        } elseif ($media['type'] == self::AUDIO) {
                                $body .= "\n[audio]" . $media['url'] . "[/audio]\n";