]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
Improved NPF
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28
29 /**
30  * Tumblr Neue Post Format
31  * @see https://www.tumblr.com/docs/npf
32  */
33 class NPF
34 {
35         static public function fromBBCode(string $bbcode, int $uri_id): array
36         {
37                 $npf = [];
38
39                 $bbcode = self::prepareBody($bbcode);
40
41                 $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
42                 if (empty($html)) {
43                         return [];
44                 }
45
46                 $doc = new DOMDocument();
47                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
48                         return [];
49                 }
50
51                 $node = $doc->getElementsByTagName('body')->item(0);
52                 foreach ($node->childNodes as $child) {
53                         if ($child->nodeName == '#text') {
54                                 $npf[] = [
55                                         'type' => 'text',
56                                         'text' => $child->textContent,
57                                 ];
58                         } else {
59                                 $npf = self::routeElements($child, $uri_id, $npf);
60                         }
61                 }
62
63                 return self::addLinkBlock($uri_id, $npf);
64         }
65
66         public static function prepareBody(string $body): string
67         {
68                 $shared = BBCode::fetchShareAttributes($body);
69                 if (!empty($shared)) {
70                         $body = $shared['shared'];
71                 }
72
73                 $body = BBCode::removeAttachment($body);
74
75                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
76
77                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
78                         foreach ($pictures as $picture) {
79                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
80                                         $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
81                                 }
82                         }
83                 }
84
85                 $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
86
87                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
88                         foreach ($pictures as $picture) {
89                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
90                                         $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
91                                 }
92                         }
93                 }
94
95                 $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
96                 $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
97                 $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
98
99                 do {
100                         $oldbody = $body;
101                         $body = str_replace(["\n\n\n"], ["\n\n"], $body);
102                 } while ($oldbody != $body);
103
104                 return trim($body);
105         }
106
107         static private function routeElements(DOMElement $child, int $uri_id, array $npf): array
108         {
109                 switch ($child->nodeName) {
110                         case 'blockquote':
111                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'indented');
112                                 break;
113
114                         case 'h1':
115                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
116                                 break;
117
118                         case 'h2':
119                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
120                                 break;
121
122                         case 'h3':
123                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
124                                 break;
125
126                         case 'h4':
127                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
128                                 break;
129
130                         case 'h5':
131                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
132                                 break;
133
134                         case 'h6':
135                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
136                                 break;
137
138                         case 'ul':
139                                 $npf = self::addListBlock($child, $uri_id, $npf, false, 0);
140                                 break;
141
142                         case 'ol':
143                                 $npf = self::addListBlock($child, $uri_id, $npf, true, 0);
144                                 break;
145
146                         case 'hr':
147                         case 'br':
148                                 break;
149
150                         case 'pre':
151                         case 'code':
152                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'indented');
153                                 break;
154
155                         case 'a':
156                                 $npf = self::addMediaBlock($child, $uri_id, $npf);
157                                 break;
158
159                         case 'table':
160                                 // Unsupported
161                                 // $child->ownerDocument->saveHTML($child)
162                                 break;
163
164                         case 'img':
165                                 $npf = self::addImageBlock($child, $uri_id, $npf);
166                                 break;
167
168                         default:
169                                 $npf = self::addTextBlock($child, $uri_id, $npf);
170                                 break;
171                 }
172                 return $npf;
173         }
174
175         static private function addImageBlock(DOMElement $child, int $uri_id, array $npf): array
176         {
177                 $attributes = [];
178                 foreach ($child->attributes as $key => $attribute) {
179                         $attributes[$key] = $attribute->value;
180                 }
181                 if (empty($attributes['src'])) {
182                         return $npf;
183                 }
184
185                 $entry = [
186                         'type'  => 'image',
187                         'media' => [],
188                 ];
189
190                 if (!empty($attributes['alt'])) {
191                         $entry['alt_text'] = $attributes['alt'];
192                 }
193
194                 if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) {
195                         $entry['caption'] = $attributes['title'];
196                 }
197
198                 $rid = Photo::ridFromURI($attributes['src']);
199                 if (!empty($rid)) {
200                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
201                         foreach ($photos as $photo) {
202                                 $entry['media'][] = [
203                                         'type'   => $photo['type'],
204                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
205                                         'width'  => $photo['width'],
206                                         'height' => $photo['height'],
207                                 ];
208                         }
209                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
210                                 $entry['alt_text'] = $photos[0]['desc'];
211                         }
212                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
213                         $entry['media'][] = [
214                                 'type'   => $media['mimetype'],
215                                 'url'    => $media['url'],
216                                 'width'  => $media['width'],
217                                 'height' => $media['height'],
218                         ];
219                         if (empty($attributes['alt']) && !empty($media['description'])) {
220                                 $entry['alt_text'] = $media['description'];
221                         }
222                 } else {
223                         $entry['media'][] = ['url' => $attributes['src']];
224                 }
225
226                 $npf[] = $entry;
227
228                 return $npf;
229         }
230
231         static private function addMediaBlock(DOMElement $child, int $uri_id, array $npf): array
232         {
233                 $attributes = [];
234                 foreach ($child->attributes as $key => $attribute) {
235                         $attributes[$key] = $attribute->value;
236                 }
237                 if (empty($attributes['href'])) {
238                         return $npf;
239                 }
240
241                 $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
242                 if (!empty($media)) {
243                         switch ($media['type']) {
244                                 case Post\Media::AUDIO:
245                                         $entry = [
246                                                 'type' => 'audio',
247                                                 'media' => [
248                                                         'type' => $media['mimetype'],
249                                                         'url'  => $media['url'],
250                                                 ]
251                                         ];
252
253                                         if (!empty($media['name'])) {
254                                                 $entry['title'] = $media['name'];
255                                         } elseif (!empty($media['description'])) {
256                                                 $entry['title'] = $media['description'];
257                                         }
258
259                                         $npf[] = self::addPoster($media, $entry);
260                                         break;
261
262                                 case Post\Media::VIDEO:
263                                         $entry = [
264                                                 'type' => 'video',
265                                                 'media' => [
266                                                         'type' => $media['mimetype'],
267                                                         'url'  => $media['url'],
268                                                 ]
269                                         ];
270
271                                         $npf[] = self::addPoster($media, $entry);
272                                         break;
273                         }
274                 } else {
275                         $npf[] = [
276                                 'type' => 'text',
277                                 'text' => $child->textContent,
278                                 'formatting' => [
279                                         'start' => 0,
280                                         'end'   => strlen($child->textContent),
281                                         'type'  => 'link',
282                                         'url'   => $attributes['href']
283                                 ]
284                         ];
285                 }
286                 return $npf;
287         }
288
289         static private function addPoster(array $media, array $entry): array
290         {
291                 $poster = [];
292                 if (!empty($media['preview'])) {
293                         $poster['url'] = $media['preview'];
294                 }
295                 if (!empty($media['preview-width'])) {
296                         $poster['width'] = $media['preview-width'];
297                 }
298                 if (!empty($media['preview-height'])) {
299                         $poster['height'] = $media['preview-height'];
300                 }
301                 if (!empty($poster)) {
302                         $entry['poster'] = $poster;
303                 }
304                 return $entry;
305         }
306
307         static private function getTypeForNodeName(string $nodename): string
308         {
309                 switch ($nodename) {
310                         case 'b':
311                         case 'strong':
312                                 return 'bold';
313
314                         case 'i':
315                         case 'em':
316                                 return 'italic';
317
318                         case 's':
319                                 return 'strikethrough';
320                 }
321                 return '';
322         }
323
324         static private function fetchText(DOMElement $child, array $text = ['text' => '', 'formatting' => []]): array
325         {
326                 foreach ($child->childNodes as $node) {
327                         $start = strlen($text['text']);
328
329                         $type = self::getTypeForNodeName($node->nodeName);
330
331                         if ($node->nodeName == 'br') {
332                                 $text['text'] .= "\n";
333                         } elseif (($type != '') || in_array($node->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) {
334                                 $text['text'] .= $node->textContent;
335                         } else {
336                                 echo $child->ownerDocument->saveHTML($child) . "\n";
337                                 die($node->nodeName . "\n");
338                         }
339                         if (!empty($type)) {
340                                 $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type];
341                         }
342                 }
343                 return $text;
344         }
345
346         static private function addTextBlock(DOMElement $child, int $uri_id, array $npf, string $subtype = ''): array
347         {
348                 if (empty($subtype) && ($child->textContent == $child->firstChild->textContent) && ($child->firstChild->nodeName != '#text')) {
349                         return self::routeElements($child->firstChild, $uri_id, $npf);
350                 }
351
352                 $element = ['type' => 'text'];
353
354                 if (!empty($subtype)) {
355                         $element['subtype'] = $subtype;
356                 }
357
358                 $text = self::fetchText($child);
359
360                 $element['text']       = $text['text'];
361                 $element['formatting'] = $text['formatting'];
362
363                 if (empty($subtype)) {
364                         $type = self::getTypeForNodeName($child->nodeName);
365                         if (!empty($type)) {
366                                 $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => $type];
367                         }
368                 }
369
370                 if (empty($element['formatting'])) {
371                         unset($element['formatting']);
372                 }
373
374                 $npf[] = $element;
375
376                 return $npf;
377         }
378
379         static private function addListBlock(DOMElement $child, int $uri_id, array $npf, bool $ordered, int $level): array
380         {
381                 foreach ($child->childNodes as $node) {
382                         switch ($node->nodeName) {
383                                 case 'ul':
384                                         $npf = self::addListBlock($node, $uri_id, $npf, false, $level++);
385                                 case 'ol':
386                                         $npf = self::addListBlock($node, $uri_id, $npf, true, $level++);
387                                 case 'li':
388                                         $text = self::fetchText($node);
389
390                                         $entry = [
391                                                 'type'    => 'text',
392                                                 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item',
393                                                 'text'    => $text['text']
394                                         ];
395                                         if ($level > 0) {
396                                                 $entry['indent_level'] = $level;
397                                         }
398                                         if (!empty($text['formatting'])) {
399                                                 $entry['formatting'] = $text['formatting'];
400                                         }
401                                         $npf[] = $entry;
402                         }
403                 }
404
405                 return $npf;
406         }
407
408         static private function addLinkBlock(int $uri_id, array $npf): array
409         {
410                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
411                         $host = parse_url($link['url'], PHP_URL_HOST);
412                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
413                                 $entry = [
414                                         'type'     => 'video',
415                                         'provider' => 'youtube',
416                                         'url'      => $link['url'],
417                                 ];
418                         } elseif (in_array($host, ['vimeo.com'])) {
419                                 $entry = [
420                                         'type'     => 'video',
421                                         'provider' => 'vimeo',
422                                         'url'      => $link['url'],
423                                 ];
424                         } elseif (in_array($host, ['open.spotify.com'])) {
425                                 $entry = [
426                                         'type'     => 'audio',
427                                         'provider' => 'spotify',
428                                         'url'      => $link['url'],
429                                 ];
430                         } else {
431                                 $entry = [
432                                         'type' => 'link',
433                                         'url'  => $link['url'],
434                                 ];
435                                 if (!empty($link['name'])) {
436                                         $entry['title'] = $link['name'];
437                                 }
438                                 if (!empty($link['description'])) {
439                                         $entry['description'] = $link['description'];
440                                 }
441                                 if (!empty($link['author-name'])) {
442                                         $entry['author'] = $link['author-name'];
443                                 }
444                                 if (!empty($link['publisher-name'])) {
445                                         $entry['site_name'] = $link['publisher-name'];
446                                 }
447                         }
448
449                         $npf[] = self::addPoster($link, $entry);
450                 }
451                 return $npf;
452         }
453 }