]> git.mxchange.org Git - friendica.git/blobdiff - src/Model/Post/Media.php
Add atachments to body for Diaspora and DFRN
[friendica.git] / src / Model / Post / Media.php
index 57668fa99e37413eb0b5200027e04619a24d31c5..06935f8067d9c3ec87ed00075ae7a3b247a625a2 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * @copyright Copyright (C) 2020, Friendica
+ * @copyright Copyright (C) 2010-2021, the Friendica project
  *
  * @license GNU AGPL version 3 or any later version
  *
 
 namespace Friendica\Model\Post;
 
+use Friendica\Content\PageInfo;
+use Friendica\Content\Text\BBCode;
 use Friendica\Core\Logger;
 use Friendica\Core\System;
+use Friendica\Database\Database;
 use Friendica\Database\DBA;
 use Friendica\DI;
+use Friendica\Model\Item;
+use Friendica\Model\Post;
 use Friendica\Util\Images;
+use Friendica\Util\ParseUrl;
+use Friendica\Util\Strings;
 
 /**
  * Class Media
@@ -35,12 +42,17 @@ use Friendica\Util\Images;
  */
 class Media
 {
-       const UNKNOWN  = 0;
-       const IMAGE    = 1;
-       const VIDEO    = 2;
-       const AUDIO    = 3;
-       const TORRENT  = 16;
-       const DOCUMENT = 128;
+       const UNKNOWN     = 0;
+       const IMAGE       = 1;
+       const VIDEO       = 2;
+       const AUDIO       = 3;
+       const TEXT        = 4;
+       const APPLICATION = 5;
+       const TORRENT     = 16;
+       const HTML        = 17;
+       const XML         = 18;
+       const PLAIN       = 19;
+       const DOCUMENT    = 128;
 
        /**
         * Insert a post-media record
@@ -50,42 +62,71 @@ class Media
         */
        public static function insert(array $media, bool $force = false)
        {
-               if (empty($media['url']) || empty($media['uri-id']) || empty($media['type'])) {
+               if (empty($media['url']) || empty($media['uri-id']) || !isset($media['type'])) {
                        Logger::warning('Incomplete media data', ['media' => $media]);
                        return;
                }
 
                // "document" has got the lowest priority. So when the same file is both attached as document
-               // and embedded as picture then we only store the picture or replace the document 
+               // and embedded as picture then we only store the picture or replace the document
                $found = DBA::selectFirst('post-media', ['type'], ['uri-id' => $media['uri-id'], 'url' => $media['url']]);
                if (!$force && !empty($found) && (($found['type'] != self::DOCUMENT) || ($media['type'] == self::DOCUMENT))) {
                        Logger::info('Media already exists', ['uri-id' => $media['uri-id'], 'url' => $media['url'], 'callstack' => System::callstack()]);
                        return;
                }
 
-               $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description'];
-               foreach ($fields as $field) {
-                       if (empty($media[$field])) {
-                               unset($media[$field]);
-                       }
-               }
+               $media = self::unsetEmptyFields($media);
 
                // We are storing as fast as possible to avoid duplicated network requests
                // when fetching additional information for pictures and other content.
-               $result = DBA::insert('post-media', $media, true);
+               $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE);
                Logger::info('Stored media', ['result' => $result, 'media' => $media, 'callstack' => System::callstack()]);
                $stored = $media;
 
                $media = self::fetchAdditionalData($media);
+               $media = self::unsetEmptyFields($media);
 
                if (array_diff_assoc($media, $stored)) {
-                       $result = DBA::insert('post-media', $media, true);
+                       $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE);
                        Logger::info('Updated media', ['result' => $result, 'media' => $media]);
                } else {
                        Logger::info('Nothing to update', ['media' => $media]);
                }
        }
 
+       /**
+        * Remove empty media fields
+        *
+        * @param array $media
+        * @return array cleaned media array
+        */
+       private static function unsetEmptyFields(array $media)
+       {
+               $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description'];
+               foreach ($fields as $field) {
+                       if (empty($media[$field])) {
+                               unset($media[$field]);
+                       }
+               }
+               return $media;
+       }
+
+       /**
+        * Copy attachments from one uri-id to another
+        *
+        * @param integer $from_uri_id
+        * @param integer $to_uri_id
+        * @return void
+        */
+       public static function copy(int $from_uri_id, int $to_uri_id)
+       {
+               $attachments = self::getByURIId($from_uri_id);
+               foreach ($attachments as $attachment) {
+                       $attachment['uri-id'] = $to_uri_id;
+                       self::insert($attachment);
+               }
+       }
+
        /**
         * Creates the "[attach]" element from the given attributes
         *
@@ -113,23 +154,22 @@ class Media
        public static function fetchAdditionalData(array $media)
        {
                // Fetch the mimetype or size if missing.
-               // We don't do it for torrent links since they need special treatment.
-               // We don't do this for images, since we are fetching their details some lines later anyway.
-               if (!in_array($media['type'], [self::TORRENT, self::IMAGE]) && (empty($media['mimetype']) || empty($media['size']))) {
+               if (empty($media['mimetype']) || empty($media['size'])) {
                        $timeout = DI::config()->get('system', 'xrd_timeout');
                        $curlResult = DI::httpRequest()->head($media['url'], ['timeout' => $timeout]);
                        if ($curlResult->isSuccess()) {
-                               $header = $curlResult->getHeaderArray();
-                               if (empty($media['mimetype']) && !empty($header['content-type'])) {
-                                       $media['mimetype'] = $header['content-type'];
+                               if (empty($media['mimetype'])) {
+                                       $media['mimetype'] = $curlResult->getHeader('Content-Type');
                                }
-                               if (empty($media['size']) && !empty($header['content-length'])) {
-                                       $media['size'] = $header['content-length'];
+                               if (empty($media['size'])) {
+                                       $media['size'] = (int)$curlResult->getHeader('Content-Length');
                                }
+                       } else {
+                               Logger::notice('Could not fetch head', ['media' => $media]);
                        }
                }
 
-               $filetype = !empty($media['mimetype']) ? strtolower(substr($media['mimetype'], 0, strpos($media['mimetype'], '/'))) : '';
+               $filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : '';
 
                if (($media['type'] == self::IMAGE) || ($filetype == 'image')) {
                        $imagedata = Images::getInfoFromURLCached($media['url']);
@@ -138,6 +178,8 @@ class Media
                                $media['size'] = $imagedata['size'];
                                $media['width'] = $imagedata[0];
                                $media['height'] = $imagedata[1];
+                       } else {
+                               Logger::notice('No image data', ['media' => $media]);
                        }
                        if (!empty($media['preview'])) {
                                $imagedata = Images::getInfoFromURLCached($media['preview']);
@@ -147,9 +189,79 @@ class Media
                                }
                        }
                }
+
+               if ($media['type'] != self::DOCUMENT) {
+                       $media = self::addType($media);
+               }
+
+               if ($media['type'] == self::HTML) {
+                       $data = ParseUrl::getSiteinfoCached($media['url'], false);
+                       $media['preview'] = $data['images'][0]['src'] ?? null;
+                       $media['preview-height'] = $data['images'][0]['height'] ?? null;
+                       $media['preview-width'] = $data['images'][0]['width'] ?? null;
+                       $media['description'] = $data['text'] ?? null;
+                       $media['name'] = $data['title'] ?? null;
+                       $media['author-url'] = $data['author_url'] ?? null;
+                       $media['author-name'] = $data['author_name'] ?? null;
+                       $media['author-image'] = $data['author_img'] ?? null;
+                       $media['publisher-url'] = $data['publisher_url'] ?? null;
+                       $media['publisher-name'] = $data['publisher_name'] ?? null;
+                       $media['publisher-image'] = $data['publisher_img'] ?? null;
+               }
                return $media;
        }
 
+       /**
+        * Add the detected type to the media array
+        *
+        * @param array $data
+        * @return array data array with the detected type
+        */
+       public static function addType(array $data)
+       {
+               if (empty($data['mimetype'])) {
+                       Logger::info('No MimeType provided', ['media' => $data]);
+                       return $data;
+               }
+
+               $type = explode('/', current(explode(';', $data['mimetype'])));
+               if (count($type) < 2) {
+                       Logger::info('Unknown MimeType', ['type' => $type, 'media' => $data]);
+                       $data['type'] = self::UNKNOWN;
+                       return $data;
+               }
+
+               $filetype = strtolower($type[0]);
+               $subtype = strtolower($type[1]);
+
+               if ($filetype == 'image') {
+                       $data['type'] = self::IMAGE;
+               } elseif ($filetype == 'video') {
+                       $data['type'] = self::VIDEO;
+               } elseif ($filetype == 'audio') {
+                       $data['type'] = self::AUDIO;
+               } elseif (($filetype == 'text') && ($subtype == 'html')) {
+                       $data['type'] = self::HTML;
+               } elseif (($filetype == 'text') && ($subtype == 'xml')) {
+                       $data['type'] = self::XML;
+               } elseif (($filetype == 'text') && ($subtype == 'plain')) {
+                       $data['type'] = self::PLAIN;
+               } elseif ($filetype == 'text') {
+                       $data['type'] = self::TEXT;
+               } elseif (($filetype == 'application') && ($subtype == 'x-bittorrent')) {
+                       $data['type'] = self::TORRENT;
+               } elseif ($filetype == 'application') {
+                       $data['type'] = self::APPLICATION;
+               } else {
+                       $data['type'] = self::UNKNOWN;
+                       Logger::info('Unknown type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
+                       return $data;
+               }
+
+               Logger::debug('Detected type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
+               return $data;
+       }
+
        /**
         * Tests for path patterns that are usef for picture links in Friendica
         *
@@ -182,7 +294,7 @@ class Media
                                }
                                $body = str_replace($picture[0], '', $body);
                                $image = str_replace('-1.', '-0.', $picture[2]);
-                               $attachments[] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
+                               $attachments[$image] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
                                        'preview' => $picture[2], 'description' => $picture[3]];
                        }
                }
@@ -190,7 +302,7 @@ class Media
                if (preg_match_all("/\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) {
                        foreach ($pictures as $picture) {
                                $body = str_replace($picture[0], '', $body);
-                               $attachments[] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1], 'description' => $picture[2]];
+                               $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1], 'description' => $picture[2]];
                        }
                }
 
@@ -201,7 +313,7 @@ class Media
                                }
                                $body = str_replace($picture[0], '', $body);
                                $image = str_replace('-1.', '-0.', $picture[2]);
-                               $attachments[] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
+                               $attachments[$image] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
                                        'preview' => $picture[2], 'description' => null];
                        }
                }
@@ -209,24 +321,30 @@ class Media
                if (preg_match_all("/\[img\]([^\[\]]*)\[\/img\]/ism", $body, $pictures, PREG_SET_ORDER)) {
                        foreach ($pictures as $picture) {
                                $body = str_replace($picture[0], '', $body);
-                               $attachments[] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1]];
+                               $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1]];
                        }
                }
 
                if (preg_match_all("/\[audio\]([^\[\]]*)\[\/audio\]/ism", $body, $audios, PREG_SET_ORDER)) {
                        foreach ($audios as $audio) {
                                $body = str_replace($audio[0], '', $body);
-                               $attachments[] = ['uri-id' => $uriid, 'type' => self::AUDIO, 'url' => $audio[1]];
+                               $attachments[$audio[1]] = ['uri-id' => $uriid, 'type' => self::AUDIO, 'url' => $audio[1]];
                        }
                }
 
                if (preg_match_all("/\[video\]([^\[\]]*)\[\/video\]/ism", $body, $videos, PREG_SET_ORDER)) {
                        foreach ($videos as $video) {
                                $body = str_replace($video[0], '', $body);
-                               $attachments[] = ['uri-id' => $uriid, 'type' => self::VIDEO, 'url' => $video[1]];
+                               $attachments[$video[1]] = ['uri-id' => $uriid, 'type' => self::VIDEO, 'url' => $video[1]];
                        }
                }
 
+               $url = PageInfo::getRelevantUrlFromBody($body);
+               if (!empty($url)) {
+                       Logger::debug('Got page url', ['url' => $url]);
+                       $attachments[$url] = ['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url];
+               }
+
                foreach ($attachments as $attachment) {
                        self::insert($attachment);
                }
@@ -234,6 +352,38 @@ class Media
                return trim($body);
        }
 
+       /**
+        * Add media links from the attachment field
+        *
+        * @param integer $uriid
+        * @param string $body
+        */
+       public static function insertFromAttachmentData(int $uriid, string $body)
+       {
+               $data = BBCode::getAttachmentData($body);
+               if (empty($data))  {
+                       return;
+               }
+
+               Logger::info('Adding attachment data', ['data' => $data]);
+               $attachment = [
+                       'uri-id' => $uriid,
+                       'type' => self::HTML,
+                       'url' => $data['url'],
+                       'preview' => $data['preview'] ?? null,
+                       'description' => $data['description'] ?? null,
+                       'name' => $data['title'] ?? null,
+                       'author-url' => $data['author_url'] ?? null,
+                       'author-name' => $data['author_name'] ?? null,
+                       'publisher-url' => $data['provider_url'] ?? null,
+                       'publisher-name' => $data['provider_name'] ?? null,
+               ];
+               if (!empty($data['image'])) {
+                       $attachment['preview'] = $data['image'];
+               }
+               self::insert($attachment);
+       }
+
        /**
         * Add media links from the attach field
         *
@@ -263,11 +413,151 @@ class Media
         * Retrieves the media attachments associated with the provided item ID.
         *
         * @param int $uri_id
+        * @param array $types
         * @return array
         * @throws \Exception
         */
-       public static function getByURIId(int $uri_id)
+       public static function getByURIId(int $uri_id, array $types = [])
        {
-               return DBA::selectToArray('post-media', [], ['uri-id' => $uri_id]);
+               $condition = ['uri-id' => $uri_id];
+
+               if (!empty($types)) {
+                       $condition = DBA::mergeConditions($condition, ['type' => $types]);
+               }
+
+               return DBA::selectToArray('post-media', [], $condition);
+       }
+
+       /**
+        * Checks if media attachments are associated with the provided item ID.
+        *
+        * @param int $uri_id
+        * @param array $types
+        * @return array
+        * @throws \Exception
+        */
+       public static function existsByURIId(int $uri_id, array $types = [])
+       {
+               $condition = ['uri-id' => $uri_id];
+
+               if (!empty($types)) {
+                       $condition = DBA::mergeConditions($condition, ['type' => $types]);
+               }
+
+               return DBA::exists('post-media', $condition);
+       }
+
+       /**
+        * Split the attachment media in the three segments "visual", "link" and "additional"
+        * 
+        * @param int    $uri_id 
+        * @param string $guid
+        * @param array  $links ist of links that shouldn't be added 
+        * @return array attachments
+        */
+       public static function splitAttachments(int $uri_id, string $guid = '', array $links = [])
+       {
+               $attachments = ['visual' => [], 'link' => [], 'additional' => []];
+
+               $media = self::getByURIId($uri_id);
+               if (empty($media)) {
+                       return $attachments;
+               }
+
+               $height = 0;
+               $selected = '';
+
+               foreach ($media as $medium) {
+                       foreach ($links as $link) {
+                               if (Strings::compareLink($link, $medium['url'])) {
+                                       continue 2;
+                               }
+                       }
+
+                       $type = explode('/', current(explode(';', $medium['mimetype'])));
+                       if (count($type) < 2) {
+                               Logger::info('Unknown MimeType', ['type' => $type, 'media' => $medium]);
+                               $filetype = 'unkn';
+                               $subtype = 'unkn';
+                       } else {
+                               $filetype = strtolower($type[0]);
+                               $subtype = strtolower($type[1]);
+                       }
+
+                       $medium['filetype'] = $filetype;
+                       $medium['subtype'] = $subtype;
+
+                       if ($medium['type'] == self::HTML || (($filetype == 'text') && ($subtype == 'html'))) {
+                               $attachments['link'][] = $medium;
+                               continue;
+                       }
+
+                       if (in_array($medium['type'], [self::AUDIO, self::IMAGE]) ||
+                               in_array($filetype, ['audio', 'image'])) {
+                               $attachments['visual'][] = $medium;
+                       } elseif (($medium['type'] == self::VIDEO) || ($filetype == 'video')) {
+                               if (strpos($medium['url'], $guid) !== false) {
+                                       // Peertube videos are delivered in many different resolutions. We pick a moderate one.
+                                       // By checking against the GUID we also ensure to only work this way on Peertube posts.
+                                       // This wouldn't be executed when someone for example on Mastodon was sharing multiple videos in a single post.
+                                       if (empty($height) || ($height > $medium['height']) && ($medium['height'] >= 480)) {
+                                               $height = $medium['height'];
+                                               $selected = $medium['url'];
+                                       }
+                                       $video[$medium['url']] = $medium;
+                               } else {
+                                       $attachments['visual'][] = $medium;
+                               }
+                       } else {
+                               $attachments['additional'][] = $medium;
+                       }
+               }
+               if (!empty($selected)) {
+                       $attachments['visual'][] = $video[$selected];
+                       unset($video[$selected]);
+                       foreach ($video as $element) {
+                               $attachments['additional'][] = $element;
+                       }
+               }
+               return $attachments;
+       }
+
+       /**
+        * Add media attachments to the body
+        *
+        * @param int $uriid
+        * @return string body
+        */
+       public static function addAttachmentsToBody(int $uriid)
+       {
+               $item = Post::selectFirst(['body'], ['uri-id' => $uriid]);
+               if (!DBA::isResult($item)) {
+                       return '';
+               }
+               $body = preg_replace("/\s*\[attachment .*?\].*?\[\/attachment\]\s*/ism", '', $item['body']);
+
+               foreach (self::getByURIId($uriid, [self::IMAGE, self::AUDIO, self::VIDEO]) as $media) {
+                       if (Item::containsLink($body, $media['url'])) {
+                               continue;
+                       }
+
+                       if ($media['type'] == self::IMAGE) {
+                               if (!empty($media['description'])) {
+                                       $body .= "\n[img=" . $media['url'] . ']' . $media['description'] .'[/img]';
+                               } else {
+                                       $body .= "\n[img]" . $media['url'] .'[/img]';
+                               }
+                       } elseif ($media['type'] == self::AUDIO) {
+                               $body .= "\n[audio]" . $media['url'] . "[/audio]\n";
+                       } elseif ($media['type'] == self::VIDEO) {
+                               $body .= "\n[video]" . $media['url'] . "[/video]\n";
+                       }
+               }
+
+               if (preg_match("/.*(\[attachment.*?\].*?\[\/attachment\]).*/ism", $item['body'], $match)) {
+                       $body .= "\n" . $match[1];
+               }
+
+               return $body;
        }
 }