]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
No call by reference anymore
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28
29 /**
30  * Tumblr Neue Post Format
31  * @see https://www.tumblr.com/docs/npf
32  */
33 class NPF
34 {
35         static $heading_subtype = [];
36
37         static public function fromBBCode(string $bbcode, int $uri_id): array
38         {
39                 $bbcode = self::prepareBody($bbcode);
40
41                 $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
42                 if (empty($html)) {
43                         return [];
44                 }
45
46                 $doc = new DOMDocument();
47                 $doc->formatOutput = true;
48                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
49                         return [];
50                 }
51
52                 self::setHeadingSubStyles($doc);
53
54                 $element = $doc->getElementsByTagName('body')->item(0);
55 //              echo $element->ownerDocument->saveHTML($element) . "\n";
56
57                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []);
58
59                 return self::addLinkBlockForUriId($uri_id, 0, $npf);
60         }
61
62         static function setHeadingSubStyles($doc)
63         {
64                 self::$heading_subtype = [];
65                 foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) {
66                         if ($doc->getElementsByTagName($element)->count() > 0) {
67                                 if (empty(self::$heading_subtype)) {
68                                         self::$heading_subtype[$element] = 'heading1';
69                                 } else {
70                                         self::$heading_subtype[$element] = 'heading2';
71                                 }
72                         }
73                 }
74         }
75
76         static private function prepareBody(string $body): string
77         {
78                 $shared = BBCode::fetchShareAttributes($body);
79                 if (!empty($shared)) {
80                         $body = $shared['shared'];
81                 }
82
83                 $body = BBCode::removeAttachment($body);
84
85                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
86
87                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
88                         foreach ($pictures as $picture) {
89                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
90                                         $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
91                                 }
92                         }
93                 }
94
95                 $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
96
97                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
98                         foreach ($pictures as $picture) {
99                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
100                                         $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
101                                 }
102                         }
103                 }
104
105                 $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
106                 $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
107                 $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
108
109                 do {
110                         $oldbody = $body;
111                         $body = str_replace(["\n\n\n"], ["\n\n"], $body);
112                 } while ($oldbody != $body);
113
114                 return trim($body);
115         }
116
117         static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array
118         {
119                 if ($parse_structure && $text) {
120                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
121                 }
122
123                 $callstack[] = $element->nodeName;
124                 $level = self::getLevelByCallstack($callstack);
125
126                 foreach ($element->childNodes as $child) {
127                         switch ($child->nodeName) {
128                                 case 'b':
129                                 case 'strong':
130                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
131                                         break;
132         
133                                 case 'i':
134                                 case 'em':
135                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
136                                         break;
137         
138                                 case 's':
139                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
140                                         break;
141
142                                 case 'u':
143                                 case 'span':
144                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
145                                         break;
146
147                                 case 'hr':
148                                 case 'br':
149                                         if (!empty($text)) {
150                                                 $text .= "\n";
151                                         }
152                                         break;
153                 
154                                 case '#text':
155                                         $text .= $child->textContent;
156                                         break;
157
158                                 case 'table':
159                                 case 'summary':
160                                         // Ignore tables and spoilers
161                                         break;
162
163                                 case 'a':
164                                         if ($text) {
165                                                 list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
166                                         } else {
167                                                 $npf = self::addLinkBlock($child, $uri_id, $level, $npf);
168                                         }
169                                         break;
170
171                                 case 'img':
172                                         $npf = self::addImageBlock($child, $uri_id, $level, $npf);
173                                         break;
174
175                                 case 'ol':
176                                 case 'div':
177                                 case 'h1':
178                                 case 'h2':
179                                 case 'h3':
180                                 case 'h4':
181                                 case 'h5':
182                                 case 'h6':
183                                 case 'blockquote':
184                                 case 'p':
185                                 case 'pre':
186                                 case 'code':
187                                 case 'ul':
188                                 case 'li':
189                                 case 'details':
190                                         list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
191                                         break;
192
193                                 default:
194                                         print_r($npf);
195                                         print_r($callstack);
196                                         die($child . "\n");
197                         }
198                 }
199
200                 if ($parse_structure && $text) {
201                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
202                 }
203                 return [$npf, $text, $formatting];
204         }
205
206         static private function getLevelByCallstack($callstack): int
207         {
208                 $level = 0;
209                 foreach ($callstack as $entry) {
210                         if (in_array($entry, ['ol', 'ul', 'blockquote'])) {
211                                 ++$level;
212                         }
213                 }
214                 return max(0, $level - 1);
215         }
216
217         static private function getSubTypeByCallstack($callstack, string $text): string
218         {
219                 $subtype = '';
220                 foreach ($callstack as $entry) {
221                         switch ($entry) {
222                                 case 'ol':
223                                         $subtype = 'ordered-list-item';
224                                         break;
225
226                                 case 'ul':
227                                         $subtype = 'unordered-list-item';
228                                         break;
229
230                                 case 'h1':
231                                         $subtype = self::$heading_subtype[$entry];
232                                         break;
233         
234                                 case 'h2':
235                                         $subtype = self::$heading_subtype[$entry];
236                                         break;
237         
238                                 case 'h3':
239                                         $subtype = self::$heading_subtype[$entry];
240                                         break;
241         
242                                 case 'h4':
243                                         $subtype = self::$heading_subtype[$entry];
244                                         break;
245         
246                                 case 'h5':
247                                         $subtype = self::$heading_subtype[$entry];
248                                         break;
249         
250                                 case 'h6':
251                                         $subtype = self::$heading_subtype[$entry];
252                                         break;
253         
254                                 case 'blockquote':                                      
255                                         $subtype = strlen($text) < 100 ? 'quote' : 'indented';
256                                         break;
257
258                                 case 'pre':
259                                         $subtype = 'indented';
260                                         break;
261
262                                 case 'code':
263                                         $subtype = 'chat';
264                                         break;
265                         }
266                 }
267                 return $subtype;
268         }
269
270         static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array
271         {
272                 $start = mb_strlen($text);
273                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
274
275                 if (!empty($type)) {
276                         $formatting[] = [
277                                 'start' => $start,
278                                 'end'   => mb_strlen($text),
279                                 'type'  => $type
280                         ];
281                 }
282                 return [$npf, $text, $formatting];
283         }
284
285         static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array
286         {
287                 $start = mb_strlen($text);
288                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
289
290                 $attributes = [];
291                 foreach ($element->attributes as $key => $attribute) {
292                         $attributes[$key] = trim($attribute->value);
293                 }
294                 if (!empty($attributes['href'])) {
295                         $formatting[] = [
296                                 'start' => $start,
297                                 'end'   => mb_strlen($text),
298                                 'type'  => 'link',
299                                 'url'   => $attributes['href']
300                         ];
301                 }
302                 return [$npf, $text, $formatting];
303         }
304
305         static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array
306         {
307                 $block = [
308                         'type'      => 'text',
309                         'subtype'   => '',
310                         'text'      => $text,
311                 ];
312
313                 if (!empty($formatting)) {
314                         $block['formatting'] = $formatting;
315                 }
316
317                 $level = self::getLevelByCallstack($callstack);
318                 if ($level > 0) {
319                         $block['indent_level'] = $level;
320                 }
321
322                 $subtype = self::getSubTypeByCallstack($callstack, $text);
323                 if ($subtype) {
324                         $block['subtype'] = $subtype;
325                 } else {
326                         unset($block['subtype']);
327                 }
328
329                 $npf[] = $block;
330                 $text = '';
331                 $formatting = [];
332                 return [$npf, $text, $formatting];
333         }
334
335         static private function addPoster(array $media, array $block): array
336         {
337                 $poster = [];
338                 if (!empty($media['preview'])) {
339                         $poster['url'] = $media['preview'];
340                 }
341                 if (!empty($media['preview-width'])) {
342                         $poster['width'] = $media['preview-width'];
343                 }
344                 if (!empty($media['preview-height'])) {
345                         $poster['height'] = $media['preview-height'];
346                 }
347                 if (!empty($poster)) {
348                         $block['poster'] = $poster;
349                 }
350                 return $block;
351         }
352
353         static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
354         {
355                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
356                         $host = parse_url($link['url'], PHP_URL_HOST);
357                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
358                                 $block = [
359                                         'type'     => 'video',
360                                         'provider' => 'youtube',
361                                         'url'      => $link['url'],
362                                 ];
363                         } elseif (in_array($host, ['vimeo.com'])) {
364                                 $block = [
365                                         'type'     => 'video',
366                                         'provider' => 'vimeo',
367                                         'url'      => $link['url'],
368                                 ];
369                         } elseif (in_array($host, ['open.spotify.com'])) {
370                                 $block = [
371                                         'type'     => 'audio',
372                                         'provider' => 'spotify',
373                                         'url'      => $link['url'],
374                                 ];
375                         } else {
376                                 $block = [
377                                         'type' => 'link',
378                                         'url'  => $link['url'],
379                                 ];
380                                 if (!empty($link['name'])) {
381                                         $block['title'] = $link['name'];
382                                 }
383                                 if (!empty($link['description'])) {
384                                         $block['description'] = $link['description'];
385                                 }
386                                 if (!empty($link['author-name'])) {
387                                         $block['author'] = $link['author-name'];
388                                 }
389                                 if (!empty($link['publisher-name'])) {
390                                         $block['site_name'] = $link['publisher-name'];
391                                 }
392                         }
393
394                         if ($level > 0) {
395                                 $block['indent_level'] = $level;
396                         }
397
398                         $npf[] = self::addPoster($link, $block);
399                 }
400                 return $npf;
401         }
402
403         static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
404         {
405                 $attributes = [];
406                 foreach ($element->attributes as $key => $attribute) {
407                         $attributes[$key] = trim($attribute->value);
408                 }
409                 if (empty($attributes['src'])) {
410                         return $npf;
411                 }
412
413                 $block = [
414                         'type'  => 'image',
415                         'media' => [],
416                 ];
417
418                 if (!empty($attributes['alt'])) {
419                         $block['alt_text'] = $attributes['alt'];
420                 }
421
422                 if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) {
423                         $block['caption'] = $attributes['title'];
424                 }
425
426                 $rid = Photo::ridFromURI($attributes['src']);
427                 if (!empty($rid)) {
428                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
429                         foreach ($photos as $photo) {
430                                 $block['media'][] = [
431                                         'type'   => $photo['type'],
432                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
433                                         'width'  => $photo['width'],
434                                         'height' => $photo['height'],
435                                 ];
436                         }
437                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
438                                 $block['alt_text'] = $photos[0]['desc'];
439                         }
440                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
441                         $block['media'][] = [
442                                 'type'   => $media['mimetype'],
443                                 'url'    => $media['url'],
444                                 'width'  => $media['width'],
445                                 'height' => $media['height'],
446                         ];
447                         if (empty($attributes['alt']) && !empty($media['description'])) {
448                                 $block['alt_text'] = $media['description'];
449                         }
450                 } else {
451                         $block['media'][] = ['url' => $attributes['src']];
452                 }
453
454                 if ($level > 0) {
455                         $block['indent_level'] = $level;
456                 }
457
458                 $npf[] = $block;
459
460                 return $npf;
461         }
462
463         static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
464         {
465                 $attributes = [];
466                 foreach ($element->attributes as $key => $attribute) {
467                         $attributes[$key] = trim($attribute->value);
468                 }
469                 if (empty($attributes['href'])) {
470                         return $npf;
471                 }
472
473                 $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
474                 if (!empty($media)) {
475                         switch ($media['type']) {
476                                 case Post\Media::AUDIO:
477                                         $block = [
478                                                 'type' => 'audio',
479                                                 'media' => [
480                                                         'type' => $media['mimetype'],
481                                                         'url'  => $media['url'],
482                                                 ]
483                                         ];
484
485                                         if (!empty($media['name'])) {
486                                                 $block['title'] = $media['name'];
487                                         } elseif (!empty($media['description'])) {
488                                                 $block['title'] = $media['description'];
489                                         }
490
491                                         $block = self::addPoster($media, $block);
492                                         break;
493
494                                 case Post\Media::VIDEO:
495                                         $block = [
496                                                 'type' => 'video',
497                                                 'media' => [
498                                                         'type' => $media['mimetype'],
499                                                         'url'  => $media['url'],
500                                                 ]
501                                         ];
502
503                                         $block = self::addPoster($media, $block);
504                                         break;
505                         }
506                 } else {
507                         $block = [
508                                 'type' => 'text',
509                                 'text' => $element->textContent,
510                                 'formatting' => [
511                                         'start' => 0,
512                                         'end'   => strlen($element->textContent),
513                                         'type'  => 'link',
514                                         'url'   => $attributes['href']
515                                 ]
516                         ];
517                 }
518
519                 if ($level > 0) {
520                         $block['indent_level'] = $level;
521                 }
522
523                 $npf[] = $block;
524
525                 return $npf;
526         }
527 }