]> git.mxchange.org Git - friendica.git/blobdiff - src/Model/Post/Media.php
Avoid local network communication / invalid url requests
[friendica.git] / src / Model / Post / Media.php
index 04c0db0f80aefea2ed8583a69b6dbe3fe706bbab..8216d7383719e314dd7a87e5c390bbf2182f5f6c 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * @copyright Copyright (C) 2010-2022, the Friendica project
+ * @copyright Copyright (C) 2010-2023, the Friendica project
  *
  * @license GNU AGPL version 3 or any later version
  *
@@ -30,6 +30,7 @@ use Friendica\Database\DBA;
 use Friendica\DI;
 use Friendica\Model\Contact;
 use Friendica\Model\Item;
+use Friendica\Model\ItemURI;
 use Friendica\Model\Photo;
 use Friendica\Model\Post;
 use Friendica\Network\HTTPClient\Client\HttpClientAccept;
@@ -66,6 +67,7 @@ class Media
         * Insert a post-media record
         *
         * @param array $media
+        * @param bool  $force
         * @return void
         */
        public static function insert(array $media, bool $force = false)
@@ -88,6 +90,11 @@ class Media
                        return;
                }
 
+               if (!ItemURI::exists($media['uri-id'])) {
+                       Logger::info('Media referenced URI ID not found', ['uri-id' => $media['uri-id'], 'url' => $media['url'], 'callstack' => System::callstack()]);
+                       return;
+               }
+
                $media = self::unsetEmptyFields($media);
                $media = DI::dbaDefinition()->truncateFieldsForTable('post-media', $media);
 
@@ -117,7 +124,7 @@ class Media
         */
        private static function unsetEmptyFields(array $media): array
        {
-               $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description'];
+               $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'blurhash', 'description'];
                foreach ($fields as $field) {
                        if (empty($media[$field])) {
                                unset($media[$field]);
@@ -173,7 +180,7 @@ class Media
                }
 
                // Fetch the mimetype or size if missing.
-               if (empty($media['mimetype']) || empty($media['size'])) {
+               if (Network::isValidHttpUrl($media['url']) && (empty($media['mimetype']) || empty($media['size']))) {
                        $timeout = DI::config()->get('system', 'xrd_timeout');
                        $curlResult = DI::httpClient()->head($media['url'], [HttpClientOptions::TIMEOUT => $timeout]);
 
@@ -203,6 +210,7 @@ class Media
                                $media['size'] = $imagedata['size'];
                                $media['width'] = $imagedata[0];
                                $media['height'] = $imagedata[1];
+                               $media['blurhash'] = $imagedata['blurhash'] ?? null;
                        } else {
                                Logger::notice('No image data', ['media' => $media]);
                        }
@@ -228,19 +236,9 @@ class Media
                }
 
                if ($media['type'] == self::HTML) {
-                       $data = ParseUrl::getSiteinfoCached($media['url'], false);
-                       $media['preview'] = $data['images'][0]['src'] ?? null;
-                       $media['preview-height'] = $data['images'][0]['height'] ?? null;
-                       $media['preview-width'] = $data['images'][0]['width'] ?? null;
-                       $media['description'] = $data['text'] ?? null;
-                       $media['name'] = $data['title'] ?? null;
-                       $media['author-url'] = $data['author_url'] ?? null;
-                       $media['author-name'] = $data['author_name'] ?? null;
-                       $media['author-image'] = $data['author_img'] ?? null;
-                       $media['publisher-url'] = $data['publisher_url'] ?? null;
-                       $media['publisher-name'] = $data['publisher_name'] ?? null;
-                       $media['publisher-image'] = $data['publisher_img'] ?? null;
+                       $media = self::addPage($media);
                }
+
                return $media;
        }
 
@@ -287,6 +285,7 @@ class Media
                $media['preview'] = null;
                $media['preview-height'] = null;
                $media['preview-width'] = null;
+               $media['blurhash'] = null;
                $media['description'] = $item['body'];
                $media['name'] = $item['title'];
                $media['author-url'] = $item['author-link'];
@@ -328,6 +327,7 @@ class Media
                $media['preview'] = null;
                $media['preview-height'] = null;
                $media['preview-width'] = null;
+               $media['blurhash'] = null;
                $media['description'] = $contact['about'];
                $media['name'] = $contact['name'];
                $media['author-url'] = $contact['url'];
@@ -341,6 +341,31 @@ class Media
                return $media;
        }
 
+       /**
+        * Add page infos for HTML entries
+        *
+        * @param array $media
+        * @return array
+        */
+       private static function addPage(array $media): array
+       {
+               $data = ParseUrl::getSiteinfoCached($media['url'], false);
+               $media['preview'] = $data['images'][0]['src'] ?? null;
+               $media['preview-height'] = $data['images'][0]['height'] ?? null;
+               $media['preview-width'] = $data['images'][0]['width'] ?? null;
+               $media['blurhash'] = $data['images'][0]['blurhash'] ?? null;
+               $media['description'] = $data['text'] ?? null;
+               $media['name'] = $data['title'] ?? null;
+               $media['author-url'] = $data['author_url'] ?? null;
+               $media['author-name'] = $data['author_name'] ?? null;
+               $media['author-image'] = $data['author_img'] ?? null;
+               $media['publisher-url'] = $data['publisher_url'] ?? null;
+               $media['publisher-name'] = $data['publisher_name'] ?? null;
+               $media['publisher-image'] = $data['publisher_img'] ?? null;
+
+               return $media;
+       }
+
        /**
         * Fetch media data from local resources
         * @param array $media
@@ -357,6 +382,7 @@ class Media
                        $media['size'] = $photo['datasize'];
                        $media['width'] = $photo['width'];
                        $media['height'] = $photo['height'];
+                       $media['blurhash'] = $photo['blurhash'];
                }
 
                if (!preg_match('|.*?/photo/(.*[a-fA-F0-9])\-(.*[0-9])\..*[\w]|', $media['preview'] ?? '', $matches)) {
@@ -441,13 +467,14 @@ class Media
         * @param string $body
         * @return string Body without media links
         */
-       public static function insertFromBody(int $uriid, string $body): string
+       public static function insertFromBody(int $uriid, string $body, bool $endmatch = false): string
        {
+               $endmatchpattern = $endmatch ? '\z' : '';
                // Simplify image codes
-               $unshared_body = $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
+               $unshared_body = $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]$endmatchpattern/ism", '[img]$3[/img]', $body);
 
                $attachments = [];
-               if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
+               if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]$endmatchpattern#ism", $body, $pictures, PREG_SET_ORDER)) {
                        foreach ($pictures as $picture) {
                                if (!self::isPictureLink($picture[1], $picture[2])) {
                                        continue;
@@ -459,14 +486,14 @@ class Media
                        }
                }
 
-               if (preg_match_all("/\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) {
+               if (preg_match_all("/\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]$endmatchpattern/Usi", $body, $pictures, PREG_SET_ORDER)) {
                        foreach ($pictures as $picture) {
                                $body = str_replace($picture[0], '', $body);
                                $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1], 'description' => $picture[2]];
                        }
                }
 
-               if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
+               if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]$endmatchpattern#ism", $body, $pictures, PREG_SET_ORDER)) {
                        foreach ($pictures as $picture) {
                                if (!self::isPictureLink($picture[1], $picture[2])) {
                                        continue;
@@ -478,41 +505,58 @@ class Media
                        }
                }
 
-               if (preg_match_all("/\[img\]([^\[\]]*)\[\/img\]/ism", $body, $pictures, PREG_SET_ORDER)) {
+               if (preg_match_all("/\[img\]([^\[\]]*)\[\/img\]$endmatchpattern/ism", $body, $pictures, PREG_SET_ORDER)) {
                        foreach ($pictures as $picture) {
                                $body = str_replace($picture[0], '', $body);
                                $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1]];
                        }
                }
 
-               if (preg_match_all("/\[audio\]([^\[\]]*)\[\/audio\]/ism", $body, $audios, PREG_SET_ORDER)) {
+               if (preg_match_all("/\[audio\]([^\[\]]*)\[\/audio\]$endmatchpattern/ism", $body, $audios, PREG_SET_ORDER)) {
                        foreach ($audios as $audio) {
                                $body = str_replace($audio[0], '', $body);
                                $attachments[$audio[1]] = ['uri-id' => $uriid, 'type' => self::AUDIO, 'url' => $audio[1]];
                        }
                }
 
-               if (preg_match_all("/\[video\]([^\[\]]*)\[\/video\]/ism", $body, $videos, PREG_SET_ORDER)) {
+               if (preg_match_all("/\[video\]([^\[\]]*)\[\/video\]$endmatchpattern/ism", $body, $videos, PREG_SET_ORDER)) {
                        foreach ($videos as $video) {
                                $body = str_replace($video[0], '', $body);
                                $attachments[$video[1]] = ['uri-id' => $uriid, 'type' => self::VIDEO, 'url' => $video[1]];
                        }
                }
 
-               foreach ($attachments as $attachment) {
-                       if (Post\Link::exists($uriid, $attachment['preview'] ?? $attachment['url'])) {
-                               continue;
-                       }
+               if ($uriid != 0) {
+                       foreach ($attachments as $attachment) {
+                               if (Post\Link::exists($uriid, $attachment['preview'] ?? $attachment['url'])) {
+                                       continue;
+                               }
 
-                       // Only store attachments that are part of the unshared body
-                       if (Item::containsLink($unshared_body, $attachment['preview'] ?? $attachment['url'], $attachment['type'])) {
-                               self::insert($attachment);
+                               // Only store attachments that are part of the unshared body
+                               if (Item::containsLink($unshared_body, $attachment['preview'] ?? $attachment['url'], $attachment['type'])) {
+                                       self::insert($attachment);
+                               }
                        }
                }
 
                return trim($body);
        }
 
+       /**
+        * Remove media that is at the end of the body
+        *
+        * @param string $body
+        * @return string
+        */
+       public static function removeFromEndOfBody(string $body): string
+       {
+               do {
+                       $prebody = $body;
+                       $body = self::insertFromBody(0, $body, true);
+               } while ($prebody != $body);
+               return $body;
+       }
+
        /**
         * Add media links from a relevant url in the body
         *
@@ -520,7 +564,7 @@ class Media
         * @param string $body
         * @return void
         */
-       public static function insertFromRelevantUrl(int $uriid, string $body)
+       public static function insertFromRelevantUrl(int $uriid, string $body, string $fullbody, string $network)
        {
                // Remove all hashtags and mentions
                $body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
@@ -529,7 +573,10 @@ class Media
                if (preg_match_all("/\[url\](https?:.*?)\[\/url\]/ism", $body, $matches)) {
                        foreach ($matches[1] as $url) {
                                Logger::info('Got page url (link without description)', ['uri-id' => $uriid, 'url' => $url]);
-                               self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
+                               self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url], false, $network);
+                               if ($network == Protocol::DFRN) {
+                                       self::revertHTMLType($uriid, $url, $fullbody);
+                               }
                        }
                }
 
@@ -537,11 +584,31 @@ class Media
                if (preg_match_all("/\[url\=(https?:.*?)\].*?\[\/url\]/ism", $body, $matches)) {
                        foreach ($matches[1] as $url) {
                                Logger::info('Got page url (link with description)', ['uri-id' => $uriid, 'url' => $url]);
-                               self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
+                               self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url], false, $network);
+                               if ($network == Protocol::DFRN) {
+                                       self::revertHTMLType($uriid, $url, $fullbody);
+                               }
                        }
                }
        }
 
+       /**
+        * Revert the media type of links to UNKNOWN for DFRN posts when they aren't attached
+        *
+        * @param integer $uriid
+        * @param string $url
+        * @param string $body
+        * @return void
+        */
+       private static function revertHTMLType(int $uriid, string $url, string $body)
+       {
+               $attachment = BBCode::getAttachmentData($body);
+               if (!empty($attachment['url']) && Network::getUrlMatch($attachment['url'], $url)) {
+                       return;
+               }
+               DBA::update('post-media', ['type' => self::UNKNOWN], ['uri-id' => $uriid, 'type' => self::HTML, 'url' => $url]);
+       }
+
        /**
         * Add media links from the attachment field
         *
@@ -610,7 +677,7 @@ class Media
         */
        public static function getByURIId(int $uri_id, array $types = [])
        {
-               $condition = ['uri-id' => $uri_id];
+               $condition = ["`uri-id` = ? AND `type` != ?", $uri_id, self::UNKNOWN];
 
                if (!empty($types)) {
                        $condition = DBA::mergeConditions($condition, ['type' => $types]);
@@ -629,7 +696,7 @@ class Media
         */
        public static function existsByURIId(int $uri_id, array $types = []): bool
        {
-               $condition = ['uri-id' => $uri_id];
+               $condition = ["`uri-id` = ? AND `type` != ?", $uri_id, self::UNKNOWN];
 
                if (!empty($types)) {
                        $condition = DBA::mergeConditions($condition, ['type' => $types]);
@@ -687,7 +754,7 @@ class Media
                                $previews[] = $medium['preview'];
                        }
 
-                       $type = explode('/', explode(';', $medium['mimetype'])[0]);
+                       $type = explode('/', explode(';', $medium['mimetype'] ?? '')[0]);
                        if (count($type) < 2) {
                                Logger::info('Unknown MimeType', ['type' => $type, 'media' => $medium]);
                                $filetype = 'unkn';