]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
Merge remote-tracking branch 'upstream/2023.03-rc' into npf2
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use DOMNode;
27 use Friendica\Model\Photo;
28 use Friendica\Model\Post;
29
30 /**
31  * Tumblr Neue Post Format
32  * @see https://www.tumblr.com/docs/npf
33  */
34 class NPF
35 {
36         static public function fromBBCode(string $bbcode, int $uri_id): array
37         {
38                 $npf = [];
39
40                 $bbcode = self::prepareBody($bbcode);
41
42                 $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
43                 if (empty($html)) {
44                         return [];
45                 }
46
47                 $doc = new DOMDocument();
48                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
49                         return [];
50                 }
51
52                 $node = $doc->getElementsByTagName('body')->item(0);
53                 foreach ($node->childNodes as $child) {
54                         $npf = self::routeElements($child, $uri_id, $npf);
55                 }
56
57                 return self::addLinkBlock($uri_id, $npf);
58         }
59
60         public static function prepareBody(string $body): string
61         {
62                 $shared = BBCode::fetchShareAttributes($body);
63                 if (!empty($shared)) {
64                         $body = $shared['shared'];
65                 }
66
67                 $body = BBCode::removeAttachment($body);
68
69                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
70
71                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
72                         foreach ($pictures as $picture) {
73                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
74                                         $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
75                                 }
76                         }
77                 }
78
79                 $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
80
81                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
82                         foreach ($pictures as $picture) {
83                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
84                                         $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
85                                 }
86                         }
87                 }
88
89                 $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
90                 $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
91                 $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
92
93                 do {
94                         $oldbody = $body;
95                         $body = str_replace(["\n\n\n"], ["\n\n"], $body);
96                 } while ($oldbody != $body);
97
98                 return trim($body);
99         }
100
101         static private function routeElements(DOMElement|DOMNode $child, int $uri_id, array $npf): array
102         {
103                 switch ($child->tagName ?? '') {
104                         case 'blockquote':
105                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'indented');
106                                 break;
107
108                         case 'h1':
109                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
110                                 break;
111
112                         case 'h2':
113                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
114                                 break;
115
116                         case 'h3':
117                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
118                                 break;
119
120                         case 'h4':
121                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
122                                 break;
123
124                         case 'h5':
125                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
126                                 break;
127
128                         case 'h6':
129                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
130                                 break;
131
132                         case 'ul':
133                                 $npf = self::addListBlock($child, $uri_id, $npf, false, 0);
134                                 break;
135
136                         case 'ol':
137                                 $npf = self::addListBlock($child, $uri_id, $npf, true, 0);
138                                 break;
139
140                         case 'hr':
141                         case 'br':
142                                 break;
143
144                         case 'pre':
145                         case 'code':
146                                 $npf = self::addTextBlock($child, $uri_id, $npf, 'indented');
147                                 break;
148
149                         case 'a':
150                                 $npf = self::addMediaBlock($child, $uri_id, $npf);
151                                 break;
152
153                         case 'table':
154                                 // $child->ownerDocument->saveHTML($child)
155                                 break;
156
157                         case 'img':
158                                 $npf = self::addImageBlock($child, $uri_id, $npf);
159                                 break;
160
161                         default:
162                                 $npf = self::addTextBlock($child, $uri_id, $npf);
163                                 break;
164                 }
165                 return $npf;
166         }
167
168         static private function addImageBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array
169         {
170                 $attributes = [];
171                 foreach ($child->attributes as $key => $attribute) {
172                         $attributes[$key] = $attribute->value;
173                 }
174                 if (empty($attributes['src'])) {
175                         return $npf;
176                 }
177
178                 $entry = [
179                         'type'  => 'image',
180                         'media' => [],
181                 ];
182
183                 if (!empty($attributes['alt'])) {
184                         $entry['alt_text'] = $attributes['alt'];
185                 }
186
187                 if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) {
188                         $entry['caption'] = $attributes['title'];
189                 }
190
191                 $rid = Photo::ridFromURI($attributes['src']);
192                 if (!empty($rid)) {
193                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
194                         foreach ($photos as $photo) {
195                                 $entry['media'][] = [
196                                         'type'   => $photo['type'],
197                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
198                                         'width'  => $photo['width'],
199                                         'height' => $photo['height'],
200                                 ];
201                         }
202                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
203                                 $entry['alt_text'] = $photos[0]['desc'];
204                         }
205                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
206                         $entry['media'][] = [
207                                 'type'   => $media['mimetype'],
208                                 'url'    => $media['url'],
209                                 'width'  => $media['width'],
210                                 'height' => $media['height'],
211                         ];
212                         if (empty($attributes['alt']) && !empty($media['description'])) {
213                                 $entry['alt_text'] = $media['description'];
214                         }
215                 } else {
216                         $entry['media'][] = ['url' => $attributes['src']];
217                 }
218
219                 $npf[] = $entry;
220
221                 return $npf;
222         }
223
224         static private function addMediaBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array
225         {
226                 $attributes = [];
227                 foreach ($child->attributes as $key => $attribute) {
228                         $attributes[$key] = $attribute->value;
229                 }
230                 if (empty($attributes['href'])) {
231                         return $npf;
232                 }
233
234                 $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
235                 if (!empty($media)) {
236                         switch ($media['type']) {
237                                 case Post\Media::AUDIO:
238                                         $entry = [
239                                                 'type' => 'audio',
240                                                 'media' => [
241                                                         'type' => $media['mimetype'],
242                                                         'url'  => $media['url'],
243                                                 ]
244                                         ];
245
246                                         if (!empty($media['name'])) {
247                                                 $entry['title'] = $media['name'];
248                                         } elseif (!empty($media['description'])) {
249                                                 $entry['title'] = $media['description'];
250                                         }
251
252                                         $npf[] = self::addPoster($media, $entry);
253                                         break;
254
255                                 case Post\Media::VIDEO:
256                                         $entry = [
257                                                 'type' => 'video',
258                                                 'media' => [
259                                                         'type' => $media['mimetype'],
260                                                         'url'  => $media['url'],
261                                                 ]
262                                         ];
263
264                                         $npf[] = self::addPoster($media, $entry);
265                                         break;
266                         }
267                 } else {
268                         $npf[] = [
269                                 'type' => 'text',
270                                 'text' => $child->textContent,
271                                 'formatting' => [
272                                         'start' => 0,
273                                         'end'   => strlen($child->textContent),
274                                         'type'  => 'link',
275                                         'url'   => $attributes['href']
276                                 ]
277                         ];
278                 }
279                 return $npf;
280         }
281
282         static private function addPoster(array $media, array $entry): array
283         {
284                 $poster = [];
285                 if (!empty($media['preview'])) {
286                         $poster['url'] = $media['preview'];
287                 }
288                 if (!empty($media['preview-width'])) {
289                         $poster['width'] = $media['preview-width'];
290                 }
291                 if (!empty($media['preview-height'])) {
292                         $poster['height'] = $media['preview-height'];
293                 }
294                 if (!empty($poster)) {
295                         $entry['poster'] = $poster;
296                 }
297                 return $entry;
298         }
299
300         static private function fetchText(DOMElement|DOMNode $child, array $text = ['text' => '', 'formatting' => []]): array
301         {
302                 foreach ($child->childNodes as $node) {
303                         $start = strlen($text['text']);
304
305                         switch ($node->nodeName) {
306                                 case 'b':
307                                 case 'strong':
308                                         $type = 'bold';
309                                         break;
310
311                                 case 'i':
312                                 case 'em':
313                                         $type = 'italic';
314                                         break;
315         
316                                 case 's':
317                                         $type = 'strikethrough';
318                                         break;
319                                                                                                                 
320                                 default:
321                                         $type = '';
322                                         break;
323                         }
324                         if ($node->nodeName == 'br') {
325                                 $text['text'] .= "\n";
326                         } else {
327                                 $text['text'] .= $node->textContent;
328                         }
329                         if (!empty($type)) {
330                                 $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type];
331                         }
332                 }
333                 return $text;
334         }
335
336         static private function addTextBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, string $subtype = ''): array
337         {
338                 if (empty($subtype) && (($child->childElementCount) ?? 0 == 1) && ($child->textContent == $child->firstChild->textContent)) {
339                         return self::routeElements($child->firstChild, $uri_id, $npf);
340                 }
341
342                 $element = ['type' => 'text'];
343
344                 if (!empty($subtype)) {
345                         $element['subtype'] = $subtype;
346                 }
347
348                 $text = self::fetchText($child);
349
350                 $element['text']       = $text['text'];
351                 $element['formatting'] = $text['formatting'];
352
353                 if (empty($subtype)) {
354                         switch ($child->tagName ?? '') {
355                                 case 'b':
356                                 case 'strong':
357                                         $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'bold'];
358                                         break;
359
360                                 case 'i':
361                                 case 'em':
362                                         $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'italic'];
363                                         break;
364
365                                 case 's':
366                                         $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'strikethrough'];
367                                         break;
368
369                                 case 'span':
370                                 case 'p':
371                                 case 'div':
372                                 case 'details';
373                                 case '':
374                                         break;
375                                 default:
376                                         print_r($element);
377                                         die($child->tagName . "\n");
378                                         break;
379                         }
380                 }
381
382                 if (empty($element['formatting'])) {
383                         unset($element['formatting']);
384                 }
385
386                 $npf[] = $element;
387
388                 return $npf;
389         }
390
391         static private function addListBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, bool $ordered, int $level): array
392         {
393                 foreach ($child->childNodes as $node) {
394                         switch ($node->nodeName) {
395                                 case 'ul':
396                                         $npf = self::addListBlock($node, $uri_id, $npf, false, $level++);
397                                 case 'ol':
398                                         $npf = self::addListBlock($node, $uri_id, $npf, true, $level++);
399                                 case 'li':
400                                         $text = self::fetchText($node);
401
402                                         $entry = [
403                                                 'type'    => 'text',
404                                                 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item',
405                                                 'text'    => $text['text']
406                                         ];
407                                         if ($level > 0) {
408                                                 $entry['indent_level'] = $level;
409                                         }
410                                         if (!empty($text['formatting'])) {
411                                                 $entry['formatting'] = $text['formatting'];
412                                         }
413                                         $npf[] = $entry;
414                         }
415                 }
416
417                 return $npf;
418         }
419
420         static private function addLinkBlock(int $uri_id, array $npf): array
421         {
422                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
423                         $host = parse_url($link['url'], PHP_URL_HOST);
424                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
425                                 $entry = [
426                                         'type'     => 'video',
427                                         'provider' => 'youtube',
428                                         'url'      => $link['url'],
429                                 ];
430                         } elseif (in_array($host, ['vimeo.com'])) {
431                                 $entry = [
432                                         'type'     => 'video',
433                                         'provider' => 'vimeo',
434                                         'url'      => $link['url'],
435                                 ];
436                         } elseif (in_array($host, ['open.spotify.com'])) {
437                                 $entry = [
438                                         'type'     => 'audio',
439                                         'provider' => 'spotify',
440                                         'url'      => $link['url'],
441                                 ];
442                         } else {
443                                 $entry = [
444                                         'type' => 'link',
445                                         'url'  => $link['url'],
446                                 ];
447                                 if (!empty($link['name'])) {
448                                         $entry['title'] = $link['name'];
449                                 }
450                                 if (!empty($link['description'])) {
451                                         $entry['description'] = $link['description'];
452                                 }
453                                 if (!empty($link['author-name'])) {
454                                         $entry['author'] = $link['author-name'];
455                                 }
456                                 if (!empty($link['publisher-name'])) {
457                                         $entry['site_name'] = $link['publisher-name'];
458                                 }
459                         }
460
461                         $npf[] = self::addPoster($link, $entry);
462                 }
463                 return $npf;
464         }
465 }