]> git.mxchange.org Git - friendica.git/blob - src/Model/Post/Media.php
6f78b09051a7de8f838db6e6ef4d14868f0778ad
[friendica.git] / src / Model / Post / Media.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2021, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Model\Post;
23
24 use Friendica\Content\Text\BBCode;
25 use Friendica\Core\Logger;
26 use Friendica\Core\System;
27 use Friendica\Database\Database;
28 use Friendica\Database\DBA;
29 use Friendica\DI;
30 use Friendica\Model\Item;
31 use Friendica\Model\Post;
32 use Friendica\Util\Images;
33 use Friendica\Util\ParseUrl;
34 use Friendica\Util\Strings;
35
36 /**
37  * Class Media
38  *
39  * This Model class handles media interactions.
40  * This tables stores medias (images, videos, audio files) related to posts.
41  */
42 class Media
43 {
44         const UNKNOWN     = 0;
45         const IMAGE       = 1;
46         const VIDEO       = 2;
47         const AUDIO       = 3;
48         const TEXT        = 4;
49         const APPLICATION = 5;
50         const TORRENT     = 16;
51         const HTML        = 17;
52         const XML         = 18;
53         const PLAIN       = 19;
54         const DOCUMENT    = 128;
55
56         /**
57          * Insert a post-media record
58          *
59          * @param array $media
60          * @return void
61          */
62         public static function insert(array $media, bool $force = false)
63         {
64                 if (empty($media['url']) || empty($media['uri-id']) || !isset($media['type'])) {
65                         Logger::warning('Incomplete media data', ['media' => $media]);
66                         return;
67                 }
68
69                 if (DBA::exists('post-media', ['uri-id' => $media['uri-id'], 'preview' => $media['url']])) {
70                         Logger::info('Media already exists as preview', ['uri-id' => $media['uri-id'], 'url' => $media['url'], 'callstack' => System::callstack()]);
71                         return;
72                 }
73
74                 // "document" has got the lowest priority. So when the same file is both attached as document
75                 // and embedded as picture then we only store the picture or replace the document
76                 $found = DBA::selectFirst('post-media', ['type'], ['uri-id' => $media['uri-id'], 'url' => $media['url']]);
77                 if (!$force && !empty($found) && (($found['type'] != self::DOCUMENT) || ($media['type'] == self::DOCUMENT))) {
78                         Logger::info('Media already exists', ['uri-id' => $media['uri-id'], 'url' => $media['url'], 'callstack' => System::callstack()]);
79                         return;
80                 }
81
82                 $media = self::unsetEmptyFields($media);
83
84                 // We are storing as fast as possible to avoid duplicated network requests
85                 // when fetching additional information for pictures and other content.
86                 $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE);
87                 Logger::info('Stored media', ['result' => $result, 'media' => $media, 'callstack' => System::callstack()]);
88                 $stored = $media;
89
90                 $media = self::fetchAdditionalData($media);
91                 $media = self::unsetEmptyFields($media);
92
93                 if (array_diff_assoc($media, $stored)) {
94                         $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE);
95                         Logger::info('Updated media', ['result' => $result, 'media' => $media]);
96                 } else {
97                         Logger::info('Nothing to update', ['media' => $media]);
98                 }
99         }
100
101         /**
102          * Remove empty media fields
103          *
104          * @param array $media
105          * @return array cleaned media array
106          */
107         private static function unsetEmptyFields(array $media)
108         {
109                 $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description'];
110                 foreach ($fields as $field) {
111                         if (empty($media[$field])) {
112                                 unset($media[$field]);
113                         }
114                 }
115                 return $media;
116         }
117
118         /**
119          * Copy attachments from one uri-id to another
120          *
121          * @param integer $from_uri_id
122          * @param integer $to_uri_id
123          * @return void
124          */
125         public static function copy(int $from_uri_id, int $to_uri_id)
126         {
127                 $attachments = self::getByURIId($from_uri_id);
128                 foreach ($attachments as $attachment) {
129                         $attachment['uri-id'] = $to_uri_id;
130                         self::insert($attachment);
131                 }
132         }
133
134         /**
135          * Creates the "[attach]" element from the given attributes
136          *
137          * @param string $href
138          * @param integer $length
139          * @param string $type
140          * @param string $title
141          * @return string "[attach]" element
142          */
143         public static function getAttachElement(string $href, int $length, string $type, string $title = '')
144         {
145                 $media = self::fetchAdditionalData(['type' => self::DOCUMENT, 'url' => $href,
146                         'size' => $length, 'mimetype' => $type, 'description' => $title]);
147
148                 return '[attach]href="' . $media['url'] . '" length="' . $media['size'] .
149                         '" type="' . $media['mimetype'] . '" title="' . $media['description'] . '"[/attach]';
150         }
151
152         /**
153          * Fetch additional data for the provided media array
154          *
155          * @param array $media
156          * @return array media array with additional data
157          */
158         public static function fetchAdditionalData(array $media)
159         {
160                 // Fetch the mimetype or size if missing.
161                 if (empty($media['mimetype']) || empty($media['size'])) {
162                         $timeout = DI::config()->get('system', 'xrd_timeout');
163                         $curlResult = DI::httpRequest()->head($media['url'], ['timeout' => $timeout]);
164                         if ($curlResult->isSuccess()) {
165                                 if (empty($media['mimetype'])) {
166                                         $media['mimetype'] = $curlResult->getHeader('Content-Type');
167                                 }
168                                 if (empty($media['size'])) {
169                                         $media['size'] = (int)$curlResult->getHeader('Content-Length');
170                                 }
171                         } else {
172                                 Logger::notice('Could not fetch head', ['media' => $media]);
173                         }
174                 }
175
176                 $filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : '';
177
178                 if (($media['type'] == self::IMAGE) || ($filetype == 'image')) {
179                         $imagedata = Images::getInfoFromURLCached($media['url']);
180                         if (!empty($imagedata)) {
181                                 $media['mimetype'] = $imagedata['mime'];
182                                 $media['size'] = $imagedata['size'];
183                                 $media['width'] = $imagedata[0];
184                                 $media['height'] = $imagedata[1];
185                         } else {
186                                 Logger::notice('No image data', ['media' => $media]);
187                         }
188                         if (!empty($media['preview'])) {
189                                 $imagedata = Images::getInfoFromURLCached($media['preview']);
190                                 if (!empty($imagedata)) {
191                                         $media['preview-width'] = $imagedata[0];
192                                         $media['preview-height'] = $imagedata[1];
193                                 }
194                         }
195                 }
196
197                 if ($media['type'] != self::DOCUMENT) {
198                         $media = self::addType($media);
199                 }
200
201                 if ($media['type'] == self::HTML) {
202                         $data = ParseUrl::getSiteinfoCached($media['url'], false);
203                         $media['preview'] = $data['images'][0]['src'] ?? null;
204                         $media['preview-height'] = $data['images'][0]['height'] ?? null;
205                         $media['preview-width'] = $data['images'][0]['width'] ?? null;
206                         $media['description'] = $data['text'] ?? null;
207                         $media['name'] = $data['title'] ?? null;
208                         $media['author-url'] = $data['author_url'] ?? null;
209                         $media['author-name'] = $data['author_name'] ?? null;
210                         $media['author-image'] = $data['author_img'] ?? null;
211                         $media['publisher-url'] = $data['publisher_url'] ?? null;
212                         $media['publisher-name'] = $data['publisher_name'] ?? null;
213                         $media['publisher-image'] = $data['publisher_img'] ?? null;
214                 }
215                 return $media;
216         }
217
218         /**
219          * Add the detected type to the media array
220          *
221          * @param array $data
222          * @return array data array with the detected type
223          */
224         public static function addType(array $data)
225         {
226                 if (empty($data['mimetype'])) {
227                         Logger::info('No MimeType provided', ['media' => $data]);
228                         return $data;
229                 }
230
231                 $type = explode('/', current(explode(';', $data['mimetype'])));
232                 if (count($type) < 2) {
233                         Logger::info('Unknown MimeType', ['type' => $type, 'media' => $data]);
234                         $data['type'] = self::UNKNOWN;
235                         return $data;
236                 }
237
238                 $filetype = strtolower($type[0]);
239                 $subtype = strtolower($type[1]);
240
241                 if ($filetype == 'image') {
242                         $data['type'] = self::IMAGE;
243                 } elseif ($filetype == 'video') {
244                         $data['type'] = self::VIDEO;
245                 } elseif ($filetype == 'audio') {
246                         $data['type'] = self::AUDIO;
247                 } elseif (($filetype == 'text') && ($subtype == 'html')) {
248                         $data['type'] = self::HTML;
249                 } elseif (($filetype == 'text') && ($subtype == 'xml')) {
250                         $data['type'] = self::XML;
251                 } elseif (($filetype == 'text') && ($subtype == 'plain')) {
252                         $data['type'] = self::PLAIN;
253                 } elseif ($filetype == 'text') {
254                         $data['type'] = self::TEXT;
255                 } elseif (($filetype == 'application') && ($subtype == 'x-bittorrent')) {
256                         $data['type'] = self::TORRENT;
257                 } elseif ($filetype == 'application') {
258                         $data['type'] = self::APPLICATION;
259                 } else {
260                         $data['type'] = self::UNKNOWN;
261                         Logger::info('Unknown type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
262                         return $data;
263                 }
264
265                 Logger::debug('Detected type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
266                 return $data;
267         }
268
269         /**
270          * Tests for path patterns that are usef for picture links in Friendica
271          *
272          * @param string $page    Link to the image page
273          * @param string $preview Preview picture
274          * @return boolean
275          */
276         private static function isPictureLink(string $page, string $preview)
277         {
278                 return preg_match('#/photos/.*/image/#ism', $page) && preg_match('#/photo/.*-1\.#ism', $preview);
279         }
280
281         /**
282          * Add media links and remove them from the body
283          *
284          * @param integer $uriid
285          * @param string $body
286          * @return string Body without media links
287          */
288         public static function insertFromBody(int $uriid, string $body)
289         {
290                 // Simplify image codes
291                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
292
293                 $unshared_body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
294
295                 $attachments = [];
296                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
297                         foreach ($pictures as $picture) {
298                                 if (!self::isPictureLink($picture[1], $picture[2])) {
299                                         continue;
300                                 }
301                                 $body = str_replace($picture[0], '', $body);
302                                 $image = str_replace('-1.', '-0.', $picture[2]);
303                                 $attachments[$image] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
304                                         'preview' => $picture[2], 'description' => $picture[3]];
305                         }
306                 }
307
308                 if (preg_match_all("/\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) {
309                         foreach ($pictures as $picture) {
310                                 $body = str_replace($picture[0], '', $body);
311                                 $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1], 'description' => $picture[2]];
312                         }
313                 }
314
315                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
316                         foreach ($pictures as $picture) {
317                                 if (!self::isPictureLink($picture[1], $picture[2])) {
318                                         continue;
319                                 }
320                                 $body = str_replace($picture[0], '', $body);
321                                 $image = str_replace('-1.', '-0.', $picture[2]);
322                                 $attachments[$image] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
323                                         'preview' => $picture[2], 'description' => null];
324                         }
325                 }
326
327                 if (preg_match_all("/\[img\]([^\[\]]*)\[\/img\]/ism", $body, $pictures, PREG_SET_ORDER)) {
328                         foreach ($pictures as $picture) {
329                                 $body = str_replace($picture[0], '', $body);
330                                 $attachments[$picture[1]] = ['uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $picture[1]];
331                         }
332                 }
333
334                 if (preg_match_all("/\[audio\]([^\[\]]*)\[\/audio\]/ism", $body, $audios, PREG_SET_ORDER)) {
335                         foreach ($audios as $audio) {
336                                 $body = str_replace($audio[0], '', $body);
337                                 $attachments[$audio[1]] = ['uri-id' => $uriid, 'type' => self::AUDIO, 'url' => $audio[1]];
338                         }
339                 }
340
341                 if (preg_match_all("/\[video\]([^\[\]]*)\[\/video\]/ism", $body, $videos, PREG_SET_ORDER)) {
342                         foreach ($videos as $video) {
343                                 $body = str_replace($video[0], '', $body);
344                                 $attachments[$video[1]] = ['uri-id' => $uriid, 'type' => self::VIDEO, 'url' => $video[1]];
345                         }
346                 }
347
348                 foreach ($attachments as $attachment) {
349                         // Only store attachments that are part of the unshared body
350                         if (strpos($unshared_body, $attachment['url']) !== false) {
351                                 self::insert($attachment);
352                         }
353                 }
354
355                 return trim($body);
356         }
357
358         /**
359          * Add media links from a relevant url in the body
360          *
361          * @param integer $uriid
362          * @param string $body
363          */
364         public static function insertFromRelevantUrl(int $uriid, string $body)
365         {
366                 // Don't look at the shared content
367                 $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
368
369                 // Remove all hashtags and mentions
370                 $body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
371
372                 // Search for pure links
373                 if (preg_match_all("/\[url\](https?:.*?)\[\/url\]/ism", $body, $matches)) {
374                         foreach ($matches[1] as $url) {
375                                 Logger::info('Got page url (link without description)', ['uri-id' => $uriid, 'url' => $url]);
376                                 self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
377                         }
378                 }
379
380                 // Search for links with descriptions
381                 if (preg_match_all("/\[url\=(https?:.*?)\].*?\[\/url\]/ism", $body, $matches)) {
382                         foreach ($matches[1] as $url) {
383                                 Logger::info('Got page url (link with description)', ['uri-id' => $uriid, 'url' => $url]);
384                                 self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
385                         }
386                 }
387         }
388
389         /**
390          * Add media links from the attachment field
391          *
392          * @param integer $uriid
393          * @param string $body
394          */
395         public static function insertFromAttachmentData(int $uriid, string $body)
396         {
397                 // Don't look at the shared content
398                 $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
399
400                 $data = BBCode::getAttachmentData($body);
401                 if (empty($data))  {
402                         return;
403                 }
404
405                 Logger::info('Adding attachment data', ['data' => $data]);
406                 $attachment = [
407                         'uri-id' => $uriid,
408                         'type' => self::HTML,
409                         'url' => $data['url'],
410                         'preview' => $data['preview'] ?? null,
411                         'description' => $data['description'] ?? null,
412                         'name' => $data['title'] ?? null,
413                         'author-url' => $data['author_url'] ?? null,
414                         'author-name' => $data['author_name'] ?? null,
415                         'publisher-url' => $data['provider_url'] ?? null,
416                         'publisher-name' => $data['provider_name'] ?? null,
417                 ];
418                 if (!empty($data['image'])) {
419                         $attachment['preview'] = $data['image'];
420                 }
421                 self::insert($attachment);
422         }
423
424         /**
425          * Add media links from the attach field
426          *
427          * @param integer $uriid
428          * @param string $attach
429          * @return void
430          */
431         public static function insertFromAttachment(int $uriid, string $attach)
432         {
433                 if (!preg_match_all('|\[attach\]href=\"(.*?)\" length=\"(.*?)\" type=\"(.*?)\"(?: title=\"(.*?)\")?|', $attach, $matches, PREG_SET_ORDER)) {
434                         return;
435                 }
436
437                 foreach ($matches as $attachment) {
438                         $media['type'] = self::DOCUMENT;
439                         $media['uri-id'] = $uriid;
440                         $media['url'] = $attachment[1];
441                         $media['size'] = $attachment[2];
442                         $media['mimetype'] = $attachment[3];
443                         $media['description'] = $attachment[4] ?? '';
444
445                         self::insert($media);
446                 }
447         }
448
449         /**
450          * Retrieves the media attachments associated with the provided item ID.
451          *
452          * @param int $uri_id
453          * @param array $types
454          * @return array
455          * @throws \Exception
456          */
457         public static function getByURIId(int $uri_id, array $types = [])
458         {
459                 $condition = ['uri-id' => $uri_id];
460
461                 if (!empty($types)) {
462                         $condition = DBA::mergeConditions($condition, ['type' => $types]);
463                 }
464
465                 return DBA::selectToArray('post-media', [], $condition);
466         }
467
468         /**
469          * Checks if media attachments are associated with the provided item ID.
470          *
471          * @param int $uri_id
472          * @param array $types
473          * @return array
474          * @throws \Exception
475          */
476         public static function existsByURIId(int $uri_id, array $types = [])
477         {
478                 $condition = ['uri-id' => $uri_id];
479
480                 if (!empty($types)) {
481                         $condition = DBA::mergeConditions($condition, ['type' => $types]);
482                 }
483
484                 return DBA::exists('post-media', $condition);
485         }
486
487         /**
488          * Split the attachment media in the three segments "visual", "link" and "additional"
489          * 
490          * @param int    $uri_id 
491          * @param string $guid
492          * @param array  $links ist of links that shouldn't be added 
493          * @return array attachments
494          */
495         public static function splitAttachments(int $uri_id, string $guid = '', array $links = [])
496         {
497                 $attachments = ['visual' => [], 'link' => [], 'additional' => []];
498
499                 $media = self::getByURIId($uri_id);
500                 if (empty($media)) {
501                         return $attachments;
502                 }
503
504                 $height = 0;
505                 $selected = '';
506                 $previews = [];
507
508                 foreach ($media as $medium) {
509                         foreach ($links as $link) {
510                                 if (Strings::compareLink($link, $medium['url'])) {
511                                         continue 2;
512                                 }
513                         }
514
515                         // Avoid adding separate media entries for previews
516                         foreach ($previews as $preview) {
517                                 if (Strings::compareLink($preview, $medium['url'])) {
518                                         continue 2;
519                                 }
520                         }
521                         
522                         if (!empty($medium['preview'])) {
523                                 $previews[] = $medium['preview'];
524                         }
525
526                         $type = explode('/', current(explode(';', $medium['mimetype'])));
527                         if (count($type) < 2) {
528                                 Logger::info('Unknown MimeType', ['type' => $type, 'media' => $medium]);
529                                 $filetype = 'unkn';
530                                 $subtype = 'unkn';
531                         } else {
532                                 $filetype = strtolower($type[0]);
533                                 $subtype = strtolower($type[1]);
534                         }
535
536                         $medium['filetype'] = $filetype;
537                         $medium['subtype'] = $subtype;
538
539                         if ($medium['type'] == self::HTML || (($filetype == 'text') && ($subtype == 'html'))) {
540                                 $attachments['link'][] = $medium;
541                                 continue;
542                         }
543
544                         if (in_array($medium['type'], [self::AUDIO, self::IMAGE]) ||
545                                 in_array($filetype, ['audio', 'image'])) {
546                                 $attachments['visual'][] = $medium;
547                         } elseif (($medium['type'] == self::VIDEO) || ($filetype == 'video')) {
548                                 if (strpos($medium['url'], $guid) !== false) {
549                                         // Peertube videos are delivered in many different resolutions. We pick a moderate one.
550                                         // By checking against the GUID we also ensure to only work this way on Peertube posts.
551                                         // This wouldn't be executed when someone for example on Mastodon was sharing multiple videos in a single post.
552                                         if (empty($height) || ($height > $medium['height']) && ($medium['height'] >= 480)) {
553                                                 $height = $medium['height'];
554                                                 $selected = $medium['url'];
555                                         }
556                                         $video[$medium['url']] = $medium;
557                                 } else {
558                                         $attachments['visual'][] = $medium;
559                                 }
560                         } else {
561                                 $attachments['additional'][] = $medium;
562                         }
563                 }
564                 if (!empty($selected)) {
565                         $attachments['visual'][] = $video[$selected];
566                         unset($video[$selected]);
567                         foreach ($video as $element) {
568                                 $attachments['additional'][] = $element;
569                         }
570                 }
571                 return $attachments;
572         }
573
574         /**
575          * Add media attachments to the body
576          *
577          * @param int $uriid
578          * @param string $body
579          * @return string body
580          */
581         public static function addAttachmentsToBody(int $uriid, string $body = '')
582         {
583                 if (empty($body)) {
584                         $item = Post::selectFirst(['body'], ['uri-id' => $uriid]);
585                         if (!DBA::isResult($item)) {
586                                 return '';
587                         }
588                         $body = $item['body'];
589                 }
590                 $original_body = $body;
591
592                 $body = preg_replace("/\s*\[attachment .*?\].*?\[\/attachment\]\s*/ism", '', $body);
593
594                 foreach (self::getByURIId($uriid, [self::IMAGE, self::AUDIO, self::VIDEO]) as $media) {
595                         if (Item::containsLink($body, $media['url'])) {
596                                 continue;
597                         }
598
599                         if ($media['type'] == self::IMAGE) {
600                                 if (!empty($media['preview'])) {
601                                         if (!empty($media['description'])) {
602                                                 $body .= "\n[url=" . $media['url'] . "][img=" . $media['preview'] . ']' . $media['description'] .'[/img][/url]';
603                                         } else {
604                                                 $body .= "\n[url=" . $media['url'] . "][img]" . $media['preview'] .'[/img][/url]';
605                                         }
606                                 } else {
607                                         if (!empty($media['description'])) {
608                                                 $body .= "\n[img=" . $media['url'] . ']' . $media['description'] .'[/img]';
609                                         } else {
610                                                 $body .= "\n[img]" . $media['url'] .'[/img]';
611                                         }
612                                 }
613                         } elseif ($media['type'] == self::AUDIO) {
614                                 $body .= "\n[audio]" . $media['url'] . "[/audio]\n";
615                         } elseif ($media['type'] == self::VIDEO) {
616                                 $body .= "\n[video]" . $media['url'] . "[/video]\n";
617                         }
618                 }
619
620                 if (preg_match("/.*(\[attachment.*?\].*?\[\/attachment\]).*/ism", $original_body, $match)) {
621                         $body .= "\n" . $match[1];
622                 }
623
624                 return $body;
625         }
626 }