]> git.mxchange.org Git - friendica.git/blob - src/Model/Post/Media.php
Merge pull request #10206 from annando/attachments
[friendica.git] / src / Model / Post / Media.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2021, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Model\Post;
23
24 use Friendica\Content\PageInfo;
25 use Friendica\Content\Text\BBCode;
26 use Friendica\Core\Logger;
27 use Friendica\Core\System;
28 use Friendica\Database\Database;
29 use Friendica\Database\DBA;
30 use Friendica\DI;
31 use Friendica\Model\Item;
32 use Friendica\Model\Post;
33 use Friendica\Util\Images;
34 use Friendica\Util\ParseUrl;
35 use Friendica\Util\Strings;
36
37 /**
38  * Class Media
39  *
40  * This Model class handles media interactions.
41  * This tables stores medias (images, videos, audio files) related to posts.
42  */
43 class Media
44 {
45         const UNKNOWN     = 0;
46         const IMAGE       = 1;
47         const VIDEO       = 2;
48         const AUDIO       = 3;
49         const TEXT        = 4;
50         const APPLICATION = 5;
51         const TORRENT     = 16;
52         const HTML        = 17;
53         const XML         = 18;
54         const PLAIN       = 19;
55         const DOCUMENT    = 128;
56
57         /**
58          * Insert a post-media record
59          *
60          * @param array $media
61          * @return void
62          */
63         public static function insert(array $media, bool $force = false)
64         {
65                 if (empty($media['url']) || empty($media['uri-id']) || !isset($media['type'])) {
66                         Logger::warning('Incomplete media data', ['media' => $media]);
67                         return;
68                 }
69
70                 // "document" has got the lowest priority. So when the same file is both attached as document
71                 // and embedded as picture then we only store the picture or replace the document
72                 $found = DBA::selectFirst('post-media', ['type'], ['uri-id' => $media['uri-id'], 'url' => $media['url']]);
73                 if (!$force && !empty($found) && (($found['type'] != self::DOCUMENT) || ($media['type'] == self::DOCUMENT))) {
74                         Logger::info('Media already exists', ['uri-id' => $media['uri-id'], 'url' => $media['url'], 'callstack' => System::callstack()]);
75                         return;
76                 }
77
78                 $media = self::unsetEmptyFields($media);
79
80                 // We are storing as fast as possible to avoid duplicated network requests
81                 // when fetching additional information for pictures and other content.
82                 $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE);
83                 Logger::info('Stored media', ['result' => $result, 'media' => $media, 'callstack' => System::callstack()]);
84                 $stored = $media;
85
86                 $media = self::fetchAdditionalData($media);
87                 $media = self::unsetEmptyFields($media);
88
89                 if (array_diff_assoc($media, $stored)) {
90                         $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE);
91                         Logger::info('Updated media', ['result' => $result, 'media' => $media]);
92                 } else {
93                         Logger::info('Nothing to update', ['media' => $media]);
94                 }
95         }
96
97         /**
98          * Remove empty media fields
99          *
100          * @param array $media
101          * @return array cleaned media array
102          */
103         private static function unsetEmptyFields(array $media)
104         {
105                 $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description'];
106                 foreach ($fields as $field) {
107                         if (empty($media[$field])) {
108                                 unset($media[$field]);
109                         }
110                 }
111                 return $media;
112         }
113
114         /**
115          * Copy attachments from one uri-id to another
116          *
117          * @param integer $from_uri_id
118          * @param integer $to_uri_id
119          * @return void
120          */
121         public static function copy(int $from_uri_id, int $to_uri_id)
122         {
123                 $attachments = self::getByURIId($from_uri_id);
124                 foreach ($attachments as $attachment) {
125                         $attachment['uri-id'] = $to_uri_id;
126                         self::insert($attachment);
127                 }
128         }
129
130         /**
131          * Creates the "[attach]" element from the given attributes
132          *
133          * @param string $href
134          * @param integer $length
135          * @param string $type
136          * @param string $title
137          * @return string "[attach]" element
138          */
139         public static function getAttachElement(string $href, int $length, string $type, string $title = '')
140         {
141                 $media = self::fetchAdditionalData(['type' => self::DOCUMENT, 'url' => $href,
142                         'size' => $length, 'mimetype' => $type, 'description' => $title]);
143
144                 return '[attach]href="' . $media['url'] . '" length="' . $media['size'] .
145                         '" type="' . $media['mimetype'] . '" title="' . $media['description'] . '"[/attach]';
146         }
147
148         /**
149          * Fetch additional data for the provided media array
150          *
151          * @param array $media
152          * @return array media array with additional data
153          */
154         public static function fetchAdditionalData(array $media)
155         {
156                 // Fetch the mimetype or size if missing.
157                 if (empty($media['mimetype']) || empty($media['size'])) {
158                         $timeout = DI::config()->get('system', 'xrd_timeout');
159                         $curlResult = DI::httpRequest()->head($media['url'], ['timeout' => $timeout]);
160                         if ($curlResult->isSuccess()) {
161                                 if (empty($media['mimetype'])) {
162                                         $media['mimetype'] = $curlResult->getHeader('Content-Type');
163                                 }
164                                 if (empty($media['size'])) {
165                                         $media['size'] = (int)$curlResult->getHeader('Content-Length');
166                                 }
167                         } else {
168                                 Logger::notice('Could not fetch head', ['media' => $media]);
169                         }
170                 }
171
172                 $filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : '';
173
174                 if (($media['type'] == self::IMAGE) || ($filetype == 'image')) {
175                         $imagedata = Images::getInfoFromURLCached($media['url']);
176                         if (!empty($imagedata)) {
177                                 $media['mimetype'] = $imagedata['mime'];
178                                 $media['size'] = $imagedata['size'];
179                                 $media['width'] = $imagedata[0];
180                                 $media['height'] = $imagedata[1];
181                         } else {
182                                 Logger::notice('No image data', ['media' => $media]);
183                         }
184                         if (!empty($media['preview'])) {
185                                 $imagedata = Images::getInfoFromURLCached($media['preview']);
186                                 if (!empty($imagedata)) {
187                                         $media['preview-width'] = $imagedata[0];
188                                         $media['preview-height'] = $imagedata[1];
189                                 }
190                         }
191                 }
192
193                 if ($media['type'] != self::DOCUMENT) {
194                         $media = self::addType($media);
195                 }
196
197                 if ($media['type'] == self::HTML) {
198                         $data = ParseUrl::getSiteinfoCached($media['url'], false);
199                         $media['preview'] = $data['images'][0]['src'] ?? null;
200                         $media['preview-height'] = $data['images'][0]['height'] ?? null;
201                         $media['preview-width'] = $data['images'][0]['width'] ?? null;
202                         $media['description'] = $data['text'] ?? null;
203                         $media['name'] = $data['title'] ?? null;
204                         $media['author-url'] = $data['author_url'] ?? null;
205                         $media['author-name'] = $data['author_name'] ?? null;
206                         $media['author-image'] = $data['author_img'] ?? null;
207                         $media['publisher-url'] = $data['publisher_url'] ?? null;
208                         $media['publisher-name'] = $data['publisher_name'] ?? null;
209                         $media['publisher-image'] = $data['publisher_img'] ?? null;
210                 }
211                 return $media;
212         }
213
214         /**
215          * Add the detected type to the media array
216          *
217          * @param array $data
218          * @return array data array with the detected type
219          */
220         public static function addType(array $data)
221         {
222                 if (empty($data['mimetype'])) {
223                         Logger::info('No MimeType provided', ['media' => $data]);
224                         return $data;
225                 }
226
227                 $type = explode('/', current(explode(';', $data['mimetype'])));
228                 if (count($type) < 2) {
229                         Logger::info('Unknown MimeType', ['type' => $type, 'media' => $data]);
230                         $data['type'] = self::UNKNOWN;
231                         return $data;
232                 }
233
234                 $filetype = strtolower($type[0]);
235                 $subtype = strtolower($type[1]);
236
237                 if ($filetype == 'image') {
238                         $data['type'] = self::IMAGE;
239                 } elseif ($filetype == 'video') {
240                         $data['type'] = self::VIDEO;
241                 } elseif ($filetype == 'audio') {
242                         $data['type'] = self::AUDIO;
243                 } elseif (($filetype == 'text') && ($subtype == 'html')) {
244                         $data['type'] = self::HTML;
245                 } elseif (($filetype == 'text') && ($subtype == 'xml')) {
246                         $data['type'] = self::XML;
247                 } elseif (($filetype == 'text') && ($subtype == 'plain')) {
248                         $data['type'] = self::PLAIN;
249                 } elseif ($filetype == 'text') {
250                         $data['type'] = self::TEXT;
251                 } elseif (($filetype == 'application') && ($subtype == 'x-bittorrent')) {
252                         $data['type'] = self::TORRENT;
253                 } elseif ($filetype == 'application') {
254                         $data['type'] = self::APPLICATION;
255                 } else {
256                         $data['type'] = self::UNKNOWN;
257                         Logger::info('Unknown type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
258                         return $data;
259                 }
260
261                 Logger::debug('Detected type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
262                 return $data;
263         }
264
265         /**
266          * Tests for path patterns that are usef for picture links in Friendica
267          *
268          * @param string $page    Link to the image page
269          * @param string $preview Preview picture
270          * @return boolean
271          */
272         private static function isPictureLink(string $page, string $preview)
273         {
274                 return preg_match('#/photos/.*/image/#ism', $page) && preg_match('#/photo/.*-1\.#ism', $preview);
275         }
276
277         /**
278          * Add media links and remove them from the body
279          *
280          * @param integer $uriid
281          * @param string $body
282          * @return string Body without media links
283          */
284         public static function insertFromBody(int $uriid, string $body)
285         {
286                 // Simplify image codes
287                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
288
289                 $unshared_body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
290
291                 $attachments = [];
292                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
293                         foreach ($pictures as $picture) {
294                                 if (!self::isPictureLink($picture[1], $picture[2])) {
295                                         continue;
296                                 }
297                                 $body = str_replace($picture[0], '', $body);
298                                 $image = str_replace('-1.', '-0.', $picture[2]);
299                                 $attachments[$image] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
300                                         'preview' => $picture[2], 'description' => $picture[3]];
301                         }
302                 }
303
304                 if (preg_match_all("/\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) {
305                         foreach ($pictures as $picture) {
306                                 $body = str_replace($picture[0], '', $body);
307                                 $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1], 'description' => $picture[2]];
308                         }
309                 }
310
311                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
312                         foreach ($pictures as $picture) {
313                                 if (!self::isPictureLink($picture[1], $picture[2])) {
314                                         continue;
315                                 }
316                                 $body = str_replace($picture[0], '', $body);
317                                 $image = str_replace('-1.', '-0.', $picture[2]);
318                                 $attachments[$image] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
319                                         'preview' => $picture[2], 'description' => null];
320                         }
321                 }
322
323                 if (preg_match_all("/\[img\]([^\[\]]*)\[\/img\]/ism", $body, $pictures, PREG_SET_ORDER)) {
324                         foreach ($pictures as $picture) {
325                                 $body = str_replace($picture[0], '', $body);
326                                 $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1]];
327                         }
328                 }
329
330                 if (preg_match_all("/\[audio\]([^\[\]]*)\[\/audio\]/ism", $body, $audios, PREG_SET_ORDER)) {
331                         foreach ($audios as $audio) {
332                                 $body = str_replace($audio[0], '', $body);
333                                 $attachments[$audio[1]] = ['uri-id' => $uriid, 'type' => self::AUDIO, 'url' => $audio[1]];
334                         }
335                 }
336
337                 if (preg_match_all("/\[video\]([^\[\]]*)\[\/video\]/ism", $body, $videos, PREG_SET_ORDER)) {
338                         foreach ($videos as $video) {
339                                 $body = str_replace($video[0], '', $body);
340                                 $attachments[$video[1]] = ['uri-id' => $uriid, 'type' => self::VIDEO, 'url' => $video[1]];
341                         }
342                 }
343
344                 foreach ($attachments as $attachment) {
345                         // Only store attachments that are part of the unshared body
346                         if (strpos($unshared_body, $attachment['url']) !== false) {
347                                 self::insert($attachment);
348                         }
349                 }
350
351                 return trim($body);
352         }
353
354         /**
355          * Add media links from a relevant url in the body
356          *
357          * @param integer $uriid
358          * @param string $body
359          */
360         public static function insertFromRelevantUrl(int $uriid, string $body)
361         {
362                 // Don't look at the shared content
363                 $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
364
365                 // Remove all hashtags and mentions
366                 $body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
367
368                 // Search for pure links
369                 if (preg_match_all("/\[url\](https?:.*?)\[\/url\]/ism", $body, $matches)) {
370                         foreach ($matches[1] as $url) {
371                                 Logger::info('Got page url (link without description)', ['uri-id' => $uriid, 'url' => $url]);
372                                 self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
373                         }
374                 }
375
376                 // Search for links with descriptions
377                 if (preg_match_all("/\[url\=(https?:.*?)\].*?\[\/url\]/ism", $body, $matches)) {
378                         foreach ($matches[1] as $url) {
379                                 Logger::info('Got page url (link with description)', ['uri-id' => $uriid, 'url' => $url]);
380                                 self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
381                         }
382                 }
383         }
384
385         /**
386          * Add media links from the attachment field
387          *
388          * @param integer $uriid
389          * @param string $body
390          */
391         public static function insertFromAttachmentData(int $uriid, string $body)
392         {
393                 // Don't look at the shared content
394                 $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
395
396                 $data = BBCode::getAttachmentData($body);
397                 if (empty($data))  {
398                         return;
399                 }
400
401                 Logger::info('Adding attachment data', ['data' => $data]);
402                 $attachment = [
403                         'uri-id' => $uriid,
404                         'type' => self::HTML,
405                         'url' => $data['url'],
406                         'preview' => $data['preview'] ?? null,
407                         'description' => $data['description'] ?? null,
408                         'name' => $data['title'] ?? null,
409                         'author-url' => $data['author_url'] ?? null,
410                         'author-name' => $data['author_name'] ?? null,
411                         'publisher-url' => $data['provider_url'] ?? null,
412                         'publisher-name' => $data['provider_name'] ?? null,
413                 ];
414                 if (!empty($data['image'])) {
415                         $attachment['preview'] = $data['image'];
416                 }
417                 self::insert($attachment);
418         }
419
420         /**
421          * Add media links from the attach field
422          *
423          * @param integer $uriid
424          * @param string $attach
425          * @return void
426          */
427         public static function insertFromAttachment(int $uriid, string $attach)
428         {
429                 if (!preg_match_all('|\[attach\]href=\"(.*?)\" length=\"(.*?)\" type=\"(.*?)\"(?: title=\"(.*?)\")?|', $attach, $matches, PREG_SET_ORDER)) {
430                         return;
431                 }
432
433                 foreach ($matches as $attachment) {
434                         $media['type'] = self::DOCUMENT;
435                         $media['uri-id'] = $uriid;
436                         $media['url'] = $attachment[1];
437                         $media['size'] = $attachment[2];
438                         $media['mimetype'] = $attachment[3];
439                         $media['description'] = $attachment[4] ?? '';
440
441                         self::insert($media);
442                 }
443         }
444
445         /**
446          * Retrieves the media attachments associated with the provided item ID.
447          *
448          * @param int $uri_id
449          * @param array $types
450          * @return array
451          * @throws \Exception
452          */
453         public static function getByURIId(int $uri_id, array $types = [])
454         {
455                 $condition = ['uri-id' => $uri_id];
456
457                 if (!empty($types)) {
458                         $condition = DBA::mergeConditions($condition, ['type' => $types]);
459                 }
460
461                 return DBA::selectToArray('post-media', [], $condition);
462         }
463
464         /**
465          * Checks if media attachments are associated with the provided item ID.
466          *
467          * @param int $uri_id
468          * @param array $types
469          * @return array
470          * @throws \Exception
471          */
472         public static function existsByURIId(int $uri_id, array $types = [])
473         {
474                 $condition = ['uri-id' => $uri_id];
475
476                 if (!empty($types)) {
477                         $condition = DBA::mergeConditions($condition, ['type' => $types]);
478                 }
479
480                 return DBA::exists('post-media', $condition);
481         }
482
483         /**
484          * Split the attachment media in the three segments "visual", "link" and "additional"
485          * 
486          * @param int    $uri_id 
487          * @param string $guid
488          * @param array  $links ist of links that shouldn't be added 
489          * @return array attachments
490          */
491         public static function splitAttachments(int $uri_id, string $guid = '', array $links = [])
492         {
493                 $attachments = ['visual' => [], 'link' => [], 'additional' => []];
494
495                 $media = self::getByURIId($uri_id);
496                 if (empty($media)) {
497                         return $attachments;
498                 }
499
500                 $height = 0;
501                 $selected = '';
502
503                 foreach ($media as $medium) {
504                         foreach ($links as $link) {
505                                 if (Strings::compareLink($link, $medium['url'])) {
506                                         continue 2;
507                                 }
508                         }
509
510                         $type = explode('/', current(explode(';', $medium['mimetype'])));
511                         if (count($type) < 2) {
512                                 Logger::info('Unknown MimeType', ['type' => $type, 'media' => $medium]);
513                                 $filetype = 'unkn';
514                                 $subtype = 'unkn';
515                         } else {
516                                 $filetype = strtolower($type[0]);
517                                 $subtype = strtolower($type[1]);
518                         }
519
520                         $medium['filetype'] = $filetype;
521                         $medium['subtype'] = $subtype;
522
523                         if ($medium['type'] == self::HTML || (($filetype == 'text') && ($subtype == 'html'))) {
524                                 $attachments['link'][] = $medium;
525                                 continue;
526                         }
527
528                         if (in_array($medium['type'], [self::AUDIO, self::IMAGE]) ||
529                                 in_array($filetype, ['audio', 'image'])) {
530                                 $attachments['visual'][] = $medium;
531                         } elseif (($medium['type'] == self::VIDEO) || ($filetype == 'video')) {
532                                 if (strpos($medium['url'], $guid) !== false) {
533                                         // Peertube videos are delivered in many different resolutions. We pick a moderate one.
534                                         // By checking against the GUID we also ensure to only work this way on Peertube posts.
535                                         // This wouldn't be executed when someone for example on Mastodon was sharing multiple videos in a single post.
536                                         if (empty($height) || ($height > $medium['height']) && ($medium['height'] >= 480)) {
537                                                 $height = $medium['height'];
538                                                 $selected = $medium['url'];
539                                         }
540                                         $video[$medium['url']] = $medium;
541                                 } else {
542                                         $attachments['visual'][] = $medium;
543                                 }
544                         } else {
545                                 $attachments['additional'][] = $medium;
546                         }
547                 }
548                 if (!empty($selected)) {
549                         $attachments['visual'][] = $video[$selected];
550                         unset($video[$selected]);
551                         foreach ($video as $element) {
552                                 $attachments['additional'][] = $element;
553                         }
554                 }
555                 return $attachments;
556         }
557
558         /**
559          * Add media attachments to the body
560          *
561          * @param int $uriid
562          * @param string $body
563          * @return string body
564          */
565         public static function addAttachmentsToBody(int $uriid, string $body = '')
566         {
567                 if (empty($body)) {
568                         $item = Post::selectFirst(['body'], ['uri-id' => $uriid]);
569                         if (!DBA::isResult($item)) {
570                                 return '';
571                         }
572                         $body = $item['body'];
573                 }
574                 $original_body = $body;
575
576                 $body = preg_replace("/\s*\[attachment .*?\].*?\[\/attachment\]\s*/ism", '', $body);
577
578                 foreach (self::getByURIId($uriid, [self::IMAGE, self::AUDIO, self::VIDEO]) as $media) {
579                         if (Item::containsLink($body, $media['url'])) {
580                                 continue;
581                         }
582
583                         if ($media['type'] == self::IMAGE) {
584                                 if (!empty($media['preview'])) {
585                                         if (!empty($media['description'])) {
586                                                 $body .= "\n[url=" . $media['url'] . "][img=" . $media['preview'] . ']' . $media['description'] .'[/img][/url]';
587                                         } else {
588                                                 $body .= "\n[url=" . $media['url'] . "][img]" . $media['preview'] .'[/img][/url]';
589                                         }
590                                 } else {
591                                         if (!empty($media['description'])) {
592                                                 $body .= "\n[img=" . $media['url'] . ']' . $media['description'] .'[/img]';
593                                         } else {
594                                                 $body .= "\n[img]" . $media['url'] .'[/img]';
595                                         }
596                                 }
597                         } elseif ($media['type'] == self::AUDIO) {
598                                 $body .= "\n[audio]" . $media['url'] . "[/audio]\n";
599                         } elseif ($media['type'] == self::VIDEO) {
600                                 $body .= "\n[video]" . $media['url'] . "[/video]\n";
601                         }
602                 }
603
604                 if (preg_match("/.*(\[attachment.*?\].*?\[\/attachment\]).*/ism", $original_body, $match)) {
605                         $body .= "\n" . $match[1];
606                 }
607
608                 return $body;
609         }
610 }