]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
Media elements added
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28
29 /**
30  * Tumblr Neue Post Format
31  * @see https://www.tumblr.com/docs/npf
32  */
33 class NPF
34 {
35         static $heading_subtype = [];
36
37         /**
38          * Convert BBCode into NPF (Tumblr Neue Post Format)
39          *
40          * @param string $bbcode
41          * @param integer $uri_id
42          * @return array NPF
43          */
44         static public function fromBBCode(string $bbcode, int $uri_id): array
45         {
46                 $bbcode = self::prepareBody($bbcode);
47
48                 $html = BBCode::convert($bbcode, false, BBCode::NPF);
49                 if (empty($html)) {
50                         return [];
51                 }
52
53                 $doc = new DOMDocument();
54                 $doc->formatOutput = true;
55                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
56                         return [];
57                 }
58
59                 self::setHeadingSubStyles($doc);
60
61                 $element = $doc->getElementsByTagName('body')->item(0);
62
63                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []);
64
65                 return self::addLinkBlockForUriId($uri_id, 0, $npf);
66         }
67
68         /**
69          * Fetch the heading types
70          *
71          * @param DOMDocument $doc
72          * @return void
73          */
74         static function setHeadingSubStyles(DOMDocument $doc)
75         {
76                 self::$heading_subtype = [];
77                 foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) {
78                         if ($doc->getElementsByTagName($element)->count() > 0) {
79                                 if (empty(self::$heading_subtype)) {
80                                         self::$heading_subtype[$element] = 'heading1';
81                                 } else {
82                                         self::$heading_subtype[$element] = 'heading2';
83                                 }
84                         }
85                 }
86         }
87
88         /**
89          * Prepare the BBCode for the NPF conversion
90          *
91          * @param string $bbcode
92          * @return string
93          */
94         static private function prepareBody(string $bbcode): string
95         {
96                 $shared = BBCode::fetchShareAttributes($bbcode);
97                 if (!empty($shared)) {
98                         $bbcode = $shared['shared'];
99                 }
100
101                 $bbcode = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $bbcode);
102
103                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) {
104                         foreach ($pictures as $picture) {
105                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
106                                         $bbcode = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $bbcode);
107                                 }
108                         }
109                 }
110
111                 $bbcode = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $bbcode);
112
113                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) {
114                         foreach ($pictures as $picture) {
115                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
116                                         $bbcode = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $bbcode);
117                                 }
118                         }
119                 }
120
121                 $bbcode = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $bbcode);
122
123                 do {
124                         $oldbbcode = $bbcode;
125                         $bbcode = str_replace(["\n\n\n"], ["\n\n"], $bbcode);
126                 } while ($oldbbcode != $bbcode);
127
128                 return trim($bbcode);
129         }
130
131         /**
132          * Walk recursively through the HTML
133          *
134          * @param DOMElement $element
135          * @param integer $uri_id
136          * @param boolean $parse_structure
137          * @param array $callstack
138          * @param array $npf
139          * @param string $text
140          * @param array $formatting
141          * @return array
142          */
143         static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array
144         {
145                 if ($parse_structure && $text) {
146                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
147                 }
148
149                 $callstack[] = $element->nodeName;
150                 $level = self::getLevelByCallstack($callstack);
151
152                 foreach ($element->childNodes as $child) {
153                         switch ($child->nodeName) {
154                                 case 'b':
155                                 case 'strong':
156                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
157                                         break;
158         
159                                 case 'i':
160                                 case 'em':
161                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
162                                         break;
163         
164                                 case 's':
165                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
166                                         break;
167
168                                 case 'u':
169                                 case 'span':
170                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
171                                         break;
172
173                                 case 'hr':
174                                 case 'br':
175                                         if (!empty($text)) {
176                                                 $text .= "\n";
177                                         }
178                                         break;
179                 
180                                 case '#text':
181                                         $text .= $child->textContent;
182                                         break;
183
184                                 case 'table':
185                                 case 'summary':
186                                         // Ignore tables and spoilers
187                                         break;
188
189                                 case 'a':
190                                         list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
191                                         break;
192
193                                 case 'img':
194                                         $npf = self::addImageBlock($child, $uri_id, $level, $npf);
195                                         break;
196
197                                 case 'audio':
198                                 case 'video':
199                                         $npf = self::addMediaBlock($child, $uri_id, $level, $npf);
200                                         break;
201         
202                                 default:
203                                         list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
204                                         break;
205                         }
206                 }
207
208                 if ($parse_structure && $text) {
209                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
210                 }
211                 return [$npf, $text, $formatting];
212         }
213
214         /**
215          * Return the correct indent level
216          *
217          * @param array $callstack
218          * @return integer
219          */
220         static private function getLevelByCallstack(array $callstack): int
221         {
222                 $level = 0;
223                 foreach ($callstack as $entry) {
224                         if (in_array($entry, ['ol', 'ul', 'blockquote'])) {
225                                 ++$level;
226                         }
227                 }
228                 return max(0, $level - 1);
229         }
230
231         /**
232          * Detect the subtype via the HTML element callstack
233          *
234          * @param array $callstack
235          * @param string $text
236          * @return string
237          */
238         static private function getSubTypeByCallstack(array $callstack, string $text): string
239         {
240                 $subtype = '';
241                 foreach ($callstack as $entry) {
242                         switch ($entry) {
243                                 case 'ol':
244                                         $subtype = 'ordered-list-item';
245                                         break;
246
247                                 case 'ul':
248                                         $subtype = 'unordered-list-item';
249                                         break;
250
251                                 case 'h1':
252                                         $subtype = self::$heading_subtype[$entry];
253                                         break;
254         
255                                 case 'h2':
256                                         $subtype = self::$heading_subtype[$entry];
257                                         break;
258         
259                                 case 'h3':
260                                         $subtype = self::$heading_subtype[$entry];
261                                         break;
262         
263                                 case 'h4':
264                                         $subtype = self::$heading_subtype[$entry];
265                                         break;
266         
267                                 case 'h5':
268                                         $subtype = self::$heading_subtype[$entry];
269                                         break;
270         
271                                 case 'h6':
272                                         $subtype = self::$heading_subtype[$entry];
273                                         break;
274         
275                                 case 'blockquote':                                      
276                                         $subtype = mb_strlen($text) < 100 ? 'quote' : 'indented';
277                                         break;
278
279                                 case 'pre':
280                                         $subtype = 'indented';
281                                         break;
282
283                                 case 'code':
284                                         $subtype = 'chat';
285                                         break;
286                         }
287                 }
288                 return $subtype;
289         }
290
291         /**
292          * Add formatting for a text block
293          *
294          * @param DOMElement $element
295          * @param integer $uri_id
296          * @param string $type
297          * @param array $callstack
298          * @param array $npf
299          * @param string $text
300          * @param array $formatting
301          * @return array
302          */
303         static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array
304         {
305                 $start = mb_strlen($text);
306                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
307
308                 if (!empty($type)) {
309                         $formatting[] = [
310                                 'start' => $start,
311                                 'end'   => mb_strlen($text),
312                                 'type'  => $type
313                         ];
314                 }
315                 return [$npf, $text, $formatting];
316         }
317
318         /**
319          * Add an inline link for a text block
320          *
321          * @param DOMElement $element
322          * @param integer $uri_id
323          * @param array $callstack
324          * @param array $npf
325          * @param string $text
326          * @param array $formatting
327          * @return array
328          */
329         static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array
330         {
331                 $start = mb_strlen($text);
332                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
333
334                 $attributes = [];
335                 foreach ($element->attributes as $key => $attribute) {
336                         $attributes[$key] = trim($attribute->value);
337                 }
338                 if (!empty($attributes['href'])) {
339                         $formatting[] = [
340                                 'start' => $start,
341                                 'end'   => mb_strlen($text),
342                                 'type'  => 'link',
343                                 'url'   => $attributes['href']
344                         ];
345                 }
346                 return [$npf, $text, $formatting];
347         }
348
349         /**
350          * Add a text block
351          *
352          * @param string $text
353          * @param array $formatting
354          * @param array $npf
355          * @param array $callstack
356          * @return array
357          */
358         static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array
359         {
360                 $block = [
361                         'type'      => 'text',
362                         'subtype'   => '',
363                         'text'      => $text,
364                 ];
365
366                 if (!empty($formatting)) {
367                         $block['formatting'] = $formatting;
368                 }
369
370                 $level = self::getLevelByCallstack($callstack);
371                 if ($level > 0) {
372                         $block['indent_level'] = $level;
373                 }
374
375                 $subtype = self::getSubTypeByCallstack($callstack, $text);
376                 if ($subtype) {
377                         $block['subtype'] = $subtype;
378                 } else {
379                         unset($block['subtype']);
380                 }
381
382                 $npf[] = $block;
383                 $text = '';
384                 $formatting = [];
385                 return [$npf, $text, $formatting];
386         }
387
388         /**
389          * Add a block for a preview picture
390          *
391          * @param array $media
392          * @param array $block
393          * @return array
394          */
395         static private function addPoster(array $media, array $block): array
396         {
397                 $poster = [];
398                 if (!empty($media['preview'])) {
399                         $poster['url'] = $media['preview'];
400                 }
401                 if (!empty($media['preview-width'])) {
402                         $poster['width'] = $media['preview-width'];
403                 }
404                 if (!empty($media['preview-height'])) {
405                         $poster['height'] = $media['preview-height'];
406                 }
407                 if (!empty($poster)) {
408                         $block['poster'] = [$poster];
409                 }
410                 return $block;
411         }
412
413         /**
414          * Add a link block from the HTML attachment of a given post uri-id
415          *
416          * @param integer $uri_id
417          * @param integer $level
418          * @param array $npf
419          * @return array
420          */
421         static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
422         {
423                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
424                         $host = parse_url($link['url'], PHP_URL_HOST);
425                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
426                                 $block = [
427                                         'type'     => 'video',
428                                         'provider' => 'youtube',
429                                         'url'      => $link['url'],
430                                 ];
431                         } elseif (in_array($host, ['vimeo.com'])) {
432                                 $block = [
433                                         'type'     => 'video',
434                                         'provider' => 'vimeo',
435                                         'url'      => $link['url'],
436                                 ];
437                         } elseif (in_array($host, ['open.spotify.com'])) {
438                                 $block = [
439                                         'type'     => 'audio',
440                                         'provider' => 'spotify',
441                                         'url'      => $link['url'],
442                                 ];
443                         } else {
444                                 $block = [
445                                         'type' => 'link',
446                                         'url'  => $link['url'],
447                                 ];
448                                 if (!empty($link['name'])) {
449                                         $block['title'] = $link['name'];
450                                 }
451                                 if (!empty($link['description'])) {
452                                         $block['description'] = $link['description'];
453                                 }
454                                 if (!empty($link['author-name'])) {
455                                         $block['author'] = $link['author-name'];
456                                 }
457                                 if (!empty($link['publisher-name'])) {
458                                         $block['site_name'] = $link['publisher-name'];
459                                 }
460                         }
461
462                         if ($level > 0) {
463                                 $block['indent_level'] = $level;
464                         }
465
466                         $npf[] = self::addPoster($link, $block);
467                 }
468                 return $npf;
469         }
470
471         /**
472          * Add an image block
473          *
474          * @param DOMElement $element
475          * @param integer $uri_id
476          * @param integer $level
477          * @param array $npf
478          * @return array
479          */
480         static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
481         {
482                 $attributes = [];
483                 foreach ($element->attributes as $key => $attribute) {
484                         $attributes[$key] = trim($attribute->value);
485                 }
486                 if (empty($attributes['src'])) {
487                         return $npf;
488                 }
489
490                 $block = [
491                         'type'  => 'image',
492                         'media' => [],
493                 ];
494
495                 if (!empty($attributes['alt'])) {
496                         $block['alt_text'] = $attributes['alt'];
497                 }
498
499                 if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) {
500                         $block['caption'] = $attributes['title'];
501                 }
502
503                 $rid = Photo::ridFromURI($attributes['src']);
504                 if (!empty($rid)) {
505                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
506                         foreach ($photos as $photo) {
507                                 $block['media'][] = [
508                                         'type'   => $photo['type'],
509                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
510                                         'width'  => $photo['width'],
511                                         'height' => $photo['height'],
512                                 ];
513                         }
514                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
515                                 $block['alt_text'] = $photos[0]['desc'];
516                         }
517                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
518                         $block['media'][] = [
519                                 'type'   => $media['mimetype'],
520                                 'url'    => $media['url'],
521                                 'width'  => $media['width'],
522                                 'height' => $media['height'],
523                         ];
524                         if (empty($attributes['alt']) && !empty($media['description'])) {
525                                 $block['alt_text'] = $media['description'];
526                         }
527                 } else {
528                         $block['media'][] = ['url' => $attributes['src']];
529                 }
530
531                 if ($level > 0) {
532                         $block['indent_level'] = $level;
533                 }
534
535                 $npf[] = $block;
536
537                 return $npf;
538         }
539
540         /**
541          * Add an audio or video block
542          *
543          * @param DOMElement $element
544          * @param integer $uri_id
545          * @param integer $level
546          * @param array $npf
547          * @return array
548          */
549         static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
550         {
551                 $attributes = [];
552                 foreach ($element->attributes as $key => $attribute) {
553                         $attributes[$key] = trim($attribute->value);
554                 }
555                 if (empty($attributes['src'])) {
556                         return $npf;
557                 }
558
559                 $media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::AUDIO, Post\Media::VIDEO]);
560                 if (!empty($media)) {
561                         switch ($media['type']) {
562                                 case Post\Media::AUDIO:
563                                         $block = [
564                                                 'type' => 'audio',
565                                                 'media' => [
566                                                         'type' => $media['mimetype'],
567                                                         'url'  => $media['url'],
568                                                 ]
569                                         ];
570
571                                         if (!empty($media['name'])) {
572                                                 $block['title'] = $media['name'];
573                                         } elseif (!empty($media['description'])) {
574                                                 $block['title'] = $media['description'];
575                                         }
576
577                                         $block = self::addPoster($media, $block);
578                                         break;
579
580                                 case Post\Media::VIDEO:
581                                         $block = [
582                                                 'type' => 'video',
583                                                 'media' => [
584                                                         'type' => $media['mimetype'],
585                                                         'url'  => $media['url'],
586                                                 ]
587                                         ];
588
589                                         $block = self::addPoster($media, $block);
590                                         break;
591                         }
592                 } else {
593                         $block = [
594                                 'type' => 'text',
595                                 'text' => $element->textContent,
596                                 'formatting' => [
597                                         [
598                                                 'start' => 0,
599                                                 'end'   => mb_strlen($element->textContent),
600                                                 'type'  => 'link',
601                                                 'url'   => $attributes['src']
602                                         ]
603                                 ]
604                         ];
605                 }
606
607                 if ($level > 0) {
608                         $block['indent_level'] = $level;
609                 }
610
611                 $npf[] = $block;
612
613                 return $npf;
614         }
615 }