]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
Merge remote-tracking branch 'upstream/2023.03-rc' into npf2
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28
29 /**
30  * Tumblr Neue Post Format
31  * @see https://www.tumblr.com/docs/npf
32  */
33 class NPF
34 {
35         static public function fromBBCode(string $bbcode, int $uri_id): array
36         {
37                 $npf = [];
38
39                 $bbcode = self::prepareBody($bbcode);
40
41                 $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
42                 if (empty($html)) {
43                         return [];
44                 }
45
46                 $doc = new DOMDocument();
47                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
48                         return [];
49                 }
50
51                 $element = $doc->getElementsByTagName('body')->item(0);
52                 $npf = self::routeChildren($element, $uri_id, 0, $npf);
53
54                 return self::addLinkBlock($uri_id, 0, $npf);
55         }
56
57         static private function prepareBody(string $body): string
58         {
59                 $shared = BBCode::fetchShareAttributes($body);
60                 if (!empty($shared)) {
61                         $body = $shared['shared'];
62                 }
63
64                 $body = BBCode::removeAttachment($body);
65
66                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
67
68                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
69                         foreach ($pictures as $picture) {
70                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
71                                         $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
72                                 }
73                         }
74                 }
75
76                 $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
77
78                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
79                         foreach ($pictures as $picture) {
80                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
81                                         $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
82                                 }
83                         }
84                 }
85
86                 $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
87                 $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
88                 $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
89
90                 do {
91                         $oldbody = $body;
92                         $body = str_replace(["\n\n\n"], ["\n\n"], $body);
93                 } while ($oldbody != $body);
94
95                 return trim($body);
96         }
97
98         static private function routeChildren(DOMElement $element, int $uri_id, int $level, array $npf): array
99         {
100                 $text       = '';
101                 $formatting = [];
102
103                 foreach ($element->childNodes as $child) {
104                         switch ($child->nodeName) {
105                                 case 'blockquote':
106                                         $npf = self::addText($text, $formatting, $npf);
107                                         $npf = self::addQuoteBlock($child, $uri_id, $level, $npf);
108                                         break;
109         
110                                 case 'h1':
111                                         $npf = self::addText($text, $formatting, $npf);
112                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
113                                         break;
114         
115                                 case 'h2':
116                                         $npf = self::addText($text, $formatting, $npf);
117                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
118                                         break;
119         
120                                 case 'h3':
121                                         $npf = self::addText($text, $formatting, $npf);
122                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
123                                         break;
124         
125                                 case 'h4':
126                                         $npf = self::addText($text, $formatting, $npf);
127                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
128                                         break;
129         
130                                 case 'h5':
131                                         $npf = self::addText($text, $formatting, $npf);
132                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
133                                         break;
134         
135                                 case 'h6':
136                                         $npf = self::addText($text, $formatting, $npf);
137                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
138                                         break;
139         
140                                 case 'ul':
141                                         $npf = self::addText($text, $formatting, $npf);
142                                         $npf = self::addListBlock($child, $uri_id, $level, $npf, false, 0);
143                                         break;
144         
145                                 case 'ol':
146                                         $npf = self::addText($text, $formatting, $npf);
147                                         $npf = self::addListBlock($child, $uri_id, $level, $npf, true, 0);
148                                         break;
149         
150                                 case 'hr':
151                                 case 'br':
152                                         $text .= "\n";
153                                         break;
154         
155                                 case 'pre':
156                                 case 'code':
157                                         $npf = self::addText($text, $formatting, $npf);
158                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'indented');
159                                         break;
160         
161                                 case 'a':
162                                         $npf = self::addText($text, $formatting, $npf);
163                                         $npf = self::addMediaBlock($child, $uri_id, $level, $npf);
164                                         break;
165         
166                                 case 'table':
167                                         // Unsupported
168                                         // $child->ownerDocument->saveHTML($child)
169                                         break;
170         
171                                 case 'img':
172                                         $npf = self::addText($text, $formatting, $npf);
173                                         $npf = self::addImageBlock($child, $uri_id, $level, $npf);
174                                         break;
175
176                                 case 'p':
177                                 case 'div':
178                                         $npf = self::addText($text, $formatting, $npf);
179                                         $npf = self::addTextBlock($child, $uri_id, $level, $npf);
180                                         break;
181
182                                 default:
183                                         $text .= $child->textContent;
184                                         break;
185                         }
186                 }
187                 return $npf;
188         }
189
190         static private function addText(string $text, array $formatting, array $npf): array
191         {
192                 if (empty($text)) {
193                         return $npf;
194                 }
195                 $block = [
196                         'type' => 'text',
197                         'text' => $text,
198                 ];
199
200                 if (!empty($formatting)) {
201                         $block['formatting'] = $formatting;
202                 }
203
204                 $npf[] = $block;
205
206                 return $npf;
207         }
208
209         static private function routeElement(DOMElement $element, int $uri_id, int $level, array $npf): array
210         {
211                 switch ($element->nodeName) {
212                         case 'blockquote':
213                                 $npf = self::addQuoteBlock($element, $uri_id, $level, $npf);
214                                 break;
215
216                         case 'h1':
217                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
218                                 break;
219
220                         case 'h2':
221                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
222                                 break;
223
224                         case 'h3':
225                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
226                                 break;
227
228                         case 'h4':
229                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
230                                 break;
231
232                         case 'h5':
233                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
234                                 break;
235
236                         case 'h6':
237                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
238                                 break;
239
240                         case 'ul':
241                                 $npf = self::addListBlock($element, $uri_id, $level, $npf, false, 0);
242                                 break;
243
244                         case 'ol':
245                                 $npf = self::addListBlock($element, $uri_id, $level, $npf, true, 0);
246                                 break;
247
248                         case 'hr':
249                         case 'br':
250                                 break;
251
252                         case 'pre':
253                         case 'code':
254                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'indented');
255                                 break;
256
257                         case 'a':
258                                 $npf = self::addMediaBlock($element, $uri_id, $level, $npf);
259                                 break;
260
261                         case 'table':
262                                 // Unsupported
263                                 // $element->ownerDocument->saveHTML($element)
264                                 break;
265
266                         case 'img':
267                                 $npf = self::addImageBlock($element, $uri_id, $level, $npf);
268                                 break;
269
270                         default:
271                                 $npf = self::addTextBlock($element, $uri_id, $level, $npf);
272                                 break;
273                 }
274                 return $npf;
275         }
276
277         static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
278         {
279                 $attributes = [];
280                 foreach ($element->attributes as $key => $attribute) {
281                         $attributes[$key] = $attribute->value;
282                 }
283                 if (empty($attributes['src'])) {
284                         return $npf;
285                 }
286
287                 $block = [
288                         'type'  => 'image',
289                         'media' => [],
290                 ];
291
292                 if (!empty($attributes['alt'])) {
293                         $block['alt_text'] = $attributes['alt'];
294                 }
295
296                 if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) {
297                         $block['caption'] = $attributes['title'];
298                 }
299
300                 $rid = Photo::ridFromURI($attributes['src']);
301                 if (!empty($rid)) {
302                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
303                         foreach ($photos as $photo) {
304                                 $block['media'][] = [
305                                         'type'   => $photo['type'],
306                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
307                                         'width'  => $photo['width'],
308                                         'height' => $photo['height'],
309                                 ];
310                         }
311                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
312                                 $block['alt_text'] = $photos[0]['desc'];
313                         }
314                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
315                         $block['media'][] = [
316                                 'type'   => $media['mimetype'],
317                                 'url'    => $media['url'],
318                                 'width'  => $media['width'],
319                                 'height' => $media['height'],
320                         ];
321                         if (empty($attributes['alt']) && !empty($media['description'])) {
322                                 $block['alt_text'] = $media['description'];
323                         }
324                 } else {
325                         $block['media'][] = ['url' => $attributes['src']];
326                 }
327
328                 if ($level > 0) {
329                         $block['indent_level'] = $level;
330                 }
331
332                 $npf[] = $block;
333
334                 return $npf;
335         }
336
337         static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
338         {
339                 $attributes = [];
340                 foreach ($element->attributes as $key => $attribute) {
341                         $attributes[$key] = $attribute->value;
342                 }
343                 if (empty($attributes['href'])) {
344                         return $npf;
345                 }
346
347                 $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
348                 if (!empty($media)) {
349                         switch ($media['type']) {
350                                 case Post\Media::AUDIO:
351                                         $block = [
352                                                 'type' => 'audio',
353                                                 'media' => [
354                                                         'type' => $media['mimetype'],
355                                                         'url'  => $media['url'],
356                                                 ]
357                                         ];
358
359                                         if (!empty($media['name'])) {
360                                                 $block['title'] = $media['name'];
361                                         } elseif (!empty($media['description'])) {
362                                                 $block['title'] = $media['description'];
363                                         }
364
365                                         $block = self::addPoster($media, $block);
366                                         break;
367
368                                 case Post\Media::VIDEO:
369                                         $block = [
370                                                 'type' => 'video',
371                                                 'media' => [
372                                                         'type' => $media['mimetype'],
373                                                         'url'  => $media['url'],
374                                                 ]
375                                         ];
376
377                                         $block = self::addPoster($media, $block);
378                                         break;
379                         }
380                 } else {
381                         $block = [
382                                 'type' => 'text',
383                                 'text' => $element->textContent,
384                                 'formatting' => [
385                                         'start' => 0,
386                                         'end'   => strlen($element->textContent),
387                                         'type'  => 'link',
388                                         'url'   => $attributes['href']
389                                 ]
390                         ];
391                 }
392
393                 if ($level > 0) {
394                         $block['indent_level'] = $level;
395                 }
396
397                 $npf[] = $block;
398
399                 return $npf;
400         }
401
402         static private function addPoster(array $media, array $block): array
403         {
404                 $poster = [];
405                 if (!empty($media['preview'])) {
406                         $poster['url'] = $media['preview'];
407                 }
408                 if (!empty($media['preview-width'])) {
409                         $poster['width'] = $media['preview-width'];
410                 }
411                 if (!empty($media['preview-height'])) {
412                         $poster['height'] = $media['preview-height'];
413                 }
414                 if (!empty($poster)) {
415                         $block['poster'] = $poster;
416                 }
417                 return $block;
418         }
419
420         static private function getTypeForNodeName(string $nodename): string
421         {
422                 switch ($nodename) {
423                         case 'b':
424                         case 'strong':
425                                 return 'bold';
426
427                         case 'i':
428                         case 'em':
429                                 return 'italic';
430
431                         case 's':
432                                 return 'strikethrough';
433                 }
434                 return '';
435         }
436
437         static private function fetchText(DOMElement $element, array $text = ['text' => '', 'formatting' => []]): array
438         {
439                 foreach ($element->childNodes as $child) {
440                         $start = strlen($text['text']);
441
442                         $type = self::getTypeForNodeName($child->nodeName);
443
444                         if ($child->nodeName == 'br') {
445                                 $text['text'] .= "\n";
446                         } elseif (($type != '') || in_array($child->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) {
447                                 $text['text'] .= $child->textContent;
448                         } else {
449                                 echo $element->ownerDocument->saveHTML($element) . "\n";
450                                 die($child->nodeName . "\n");
451                         }
452                         if (!empty($type)) {
453                                 $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type];
454                         }
455                 }
456                 return $text;
457         }
458
459         static private function addQuoteBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
460         {
461                 $block = ['type' => 'text', 'subtype' => 'indented'];
462
463                 if ($level > 0) {
464                         $block['indent_level'] = $level;
465                 }
466
467                 $npf[] = $block;
468
469                 $npf = self::routeChildren($element, $uri_id, 0, $npf);
470
471                 return $npf;
472         }
473
474         static private function addTextBlock(DOMElement $element, int $uri_id, int $level, array $npf, string $subtype = ''): array
475         {
476                 if (empty($subtype) && ($element->textContent == $element->firstChild->textContent) && ($element->firstChild->nodeName != '#text')) {
477                         return self::routeElement($element->firstChild, $uri_id, $level, $npf);
478                 }
479
480                 $block = ['type' => 'text'];
481
482                 if (!empty($subtype)) {
483                         $block['subtype'] = $subtype;
484                 }
485
486                 $text = self::fetchText($element);
487
488                 $block['text']       = $text['text'];
489                 $block['formatting'] = $text['formatting'];
490
491                 if (empty($subtype)) {
492                         $type = self::getTypeForNodeName($element->nodeName);
493                         if (!empty($type)) {
494                                 $block['formatting'][] = ['start' => 0, 'end' => strlen($block['text']), 'type' => $type];
495                         }
496                 }
497
498                 if (empty($block['formatting'])) {
499                         unset($block['formatting']);
500                 }
501
502                 if ($level > 0) {
503                         $block['indent_level'] = $level;
504                 }
505
506                 $npf[] = $block;
507
508                 return $npf;
509         }
510
511         static private function addListBlock(DOMElement $element, int $uri_id, int $level, array $npf, bool $ordered): array
512         {
513                 foreach ($element->childNodes as $child) {
514                         switch ($child->nodeName) {
515                                 case 'ul':
516                                         $npf = self::addListBlock($child, $uri_id, $level++, $npf, false);
517                                 case 'ol':
518                                         $npf = self::addListBlock($child, $uri_id, $level++, $npf, true);
519                                 case 'li':
520                                         $text = self::fetchText($child);
521
522                                         $block = [
523                                                 'type'    => 'text',
524                                                 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item',
525                                                 'text'    => $text['text']
526                                         ];
527                                         if ($level > 0) {
528                                                 $block['indent_level'] = $level;
529                                         }
530                                         if (!empty($text['formatting'])) {
531                                                 $block['formatting'] = $text['formatting'];
532                                         }
533                                         $npf[] = $block;
534                         }
535                 }
536
537                 return $npf;
538         }
539
540         static private function addLinkBlock(int $uri_id, int $level, array $npf): array
541         {
542                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
543                         $host = parse_url($link['url'], PHP_URL_HOST);
544                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
545                                 $block = [
546                                         'type'     => 'video',
547                                         'provider' => 'youtube',
548                                         'url'      => $link['url'],
549                                 ];
550                         } elseif (in_array($host, ['vimeo.com'])) {
551                                 $block = [
552                                         'type'     => 'video',
553                                         'provider' => 'vimeo',
554                                         'url'      => $link['url'],
555                                 ];
556                         } elseif (in_array($host, ['open.spotify.com'])) {
557                                 $block = [
558                                         'type'     => 'audio',
559                                         'provider' => 'spotify',
560                                         'url'      => $link['url'],
561                                 ];
562                         } else {
563                                 $block = [
564                                         'type' => 'link',
565                                         'url'  => $link['url'],
566                                 ];
567                                 if (!empty($link['name'])) {
568                                         $block['title'] = $link['name'];
569                                 }
570                                 if (!empty($link['description'])) {
571                                         $block['description'] = $link['description'];
572                                 }
573                                 if (!empty($link['author-name'])) {
574                                         $block['author'] = $link['author-name'];
575                                 }
576                                 if (!empty($link['publisher-name'])) {
577                                         $block['site_name'] = $link['publisher-name'];
578                                 }
579                         }
580
581                         if ($level > 0) {
582                                 $block['indent_level'] = $level;
583                         }
584
585                         $npf[] = self::addPoster($link, $block);
586                 }
587                 return $npf;
588         }
589 }