]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
Deactivate the attributes
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28
29 /**
30  * Tumblr Neue Post Format
31  * @see https://www.tumblr.com/docs/npf
32  */
33 class NPF
34 {
35         static $heading_subtype = [];
36
37         static public function fromBBCode(string $bbcode, int $uri_id): array
38         {
39                 $bbcode = self::prepareBody($bbcode);
40
41                 $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
42                 if (empty($html)) {
43                         return [];
44                 }
45
46                 $doc = new DOMDocument();
47                 $doc->formatOutput = true;
48                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
49                         return [];
50                 }
51
52                 self::setHeadingSubStyles($doc);
53
54                 $element = $doc->getElementsByTagName('body')->item(0);
55 //              echo $element->ownerDocument->saveHTML($element) . "\n";
56
57                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []);
58
59                 return self::addLinkBlockForUriId($uri_id, 0, $npf);
60         }
61
62         static function setHeadingSubStyles($doc)
63         {
64                 self::$heading_subtype = [];
65                 foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) {
66                         if ($doc->getElementsByTagName($element)->count() > 0) {
67                                 if (empty(self::$heading_subtype)) {
68                                         self::$heading_subtype[$element] = 'heading1';
69                                 } else {
70                                         self::$heading_subtype[$element] = 'heading2';
71                                 }
72                         }
73                 }
74         }
75
76         static private function prepareBody(string $body): string
77         {
78                 $shared = BBCode::fetchShareAttributes($body);
79                 if (!empty($shared)) {
80                         $body = $shared['shared'];
81                 }
82
83                 $body = BBCode::removeAttachment($body);
84
85                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
86
87                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
88                         foreach ($pictures as $picture) {
89                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
90                                         $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
91                                 }
92                         }
93                 }
94
95                 $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
96
97                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
98                         foreach ($pictures as $picture) {
99                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
100                                         $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
101                                 }
102                         }
103                 }
104
105                 $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
106                 $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
107                 $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
108
109                 do {
110                         $oldbody = $body;
111                         $body = str_replace(["\n\n\n"], ["\n\n"], $body);
112                 } while ($oldbody != $body);
113
114                 return trim($body);
115         }
116
117         static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array
118         {
119                 if ($parse_structure && $text) {
120                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
121                 }
122
123                 $callstack[] = $element->nodeName;
124                 $level = self::getLevelByCallstack($callstack);
125
126                 foreach ($element->childNodes as $child) {
127                         switch ($child->nodeName) {
128                                 case 'b':
129                                 case 'strong':
130                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
131                                         break;
132         
133                                 case 'i':
134                                 case 'em':
135                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
136                                         break;
137         
138                                 case 's':
139                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
140                                         break;
141
142                                 case 'u':
143                                 case 'span':
144                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
145                                         break;
146
147                                 case 'hr':
148                                 case 'br':
149                                         if (!empty($text)) {
150                                                 $text .= "\n";
151                                         }
152                                         break;
153                 
154                                 case '#text':
155                                         $text .= $child->textContent;
156                                         break;
157
158                                 case 'table':
159                                 case 'summary':
160                                         // Ignore tables and spoilers
161                                         break;
162
163                                 case 'a':
164                                         if ($text) {
165                                                 list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
166                                         } else {
167                                                 $npf = self::addLinkBlock($child, $uri_id, $level, $npf);
168                                         }
169                                         break;
170
171                                 case 'img':
172                                         $npf = self::addImageBlock($child, $uri_id, $level, $npf);
173                                         break;
174
175                                         default:
176                                         list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
177                                         break;
178                         }
179                 }
180
181                 if ($parse_structure && $text) {
182                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
183                 }
184                 return [$npf, $text, $formatting];
185         }
186
187         static private function getLevelByCallstack($callstack): int
188         {
189                 $level = 0;
190                 foreach ($callstack as $entry) {
191                         if (in_array($entry, ['ol', 'ul', 'blockquote'])) {
192                                 ++$level;
193                         }
194                 }
195                 return max(0, $level - 1);
196         }
197
198         static private function getSubTypeByCallstack($callstack, string $text): string
199         {
200                 $subtype = '';
201                 foreach ($callstack as $entry) {
202                         switch ($entry) {
203                                 case 'ol':
204                                         $subtype = 'ordered-list-item';
205                                         break;
206
207                                 case 'ul':
208                                         $subtype = 'unordered-list-item';
209                                         break;
210
211                                 case 'h1':
212                                         $subtype = self::$heading_subtype[$entry];
213                                         break;
214         
215                                 case 'h2':
216                                         $subtype = self::$heading_subtype[$entry];
217                                         break;
218         
219                                 case 'h3':
220                                         $subtype = self::$heading_subtype[$entry];
221                                         break;
222         
223                                 case 'h4':
224                                         $subtype = self::$heading_subtype[$entry];
225                                         break;
226         
227                                 case 'h5':
228                                         $subtype = self::$heading_subtype[$entry];
229                                         break;
230         
231                                 case 'h6':
232                                         $subtype = self::$heading_subtype[$entry];
233                                         break;
234         
235                                 case 'blockquote':                                      
236                                         $subtype = strlen($text) < 100 ? 'quote' : 'indented';
237                                         break;
238
239                                 case 'pre':
240                                         $subtype = 'indented';
241                                         break;
242
243                                 case 'code':
244                                         $subtype = 'chat';
245                                         break;
246                         }
247                 }
248                 return $subtype;
249         }
250
251         static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array
252         {
253                 $start = mb_strlen($text);
254                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
255
256                 if (!empty($type)) {
257                         $formatting[] = [
258                                 'start' => $start,
259                                 'end'   => mb_strlen($text),
260                                 'type'  => $type
261                         ];
262                 }
263                 return [$npf, $text, $formatting];
264         }
265
266         static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array
267         {
268                 $start = mb_strlen($text);
269                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
270
271                 $attributes = [];
272                 foreach ($element->attributes as $key => $attribute) {
273                         $attributes[$key] = trim($attribute->value);
274                 }
275                 if (!empty($attributes['href'])) {
276                         $formatting[] = [
277                                 'start' => $start,
278                                 'end'   => mb_strlen($text),
279                                 'type'  => 'link',
280                                 'url'   => $attributes['href']
281                         ];
282                 }
283                 return [$npf, $text, $formatting];
284         }
285
286         static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array
287         {
288                 $block = [
289                         'type'      => 'text',
290                         'subtype'   => '',
291                         'text'      => $text,
292                 ];
293
294                 // Deactivated since Tumblr has got issues with it
295                 //if (!empty($formatting)) {
296                 //      $block['formatting'] = $formatting;
297                 //}
298
299                 $level = self::getLevelByCallstack($callstack);
300                 if ($level > 0) {
301                         $block['indent_level'] = $level;
302                 }
303
304                 $subtype = self::getSubTypeByCallstack($callstack, $text);
305                 if ($subtype) {
306                         $block['subtype'] = $subtype;
307                 } else {
308                         unset($block['subtype']);
309                 }
310
311                 $npf[] = $block;
312                 $text = '';
313                 $formatting = [];
314                 return [$npf, $text, $formatting];
315         }
316
317         static private function addPoster(array $media, array $block): array
318         {
319                 $poster = [];
320                 if (!empty($media['preview'])) {
321                         $poster['url'] = $media['preview'];
322                 }
323                 if (!empty($media['preview-width'])) {
324                         $poster['width'] = $media['preview-width'];
325                 }
326                 if (!empty($media['preview-height'])) {
327                         $poster['height'] = $media['preview-height'];
328                 }
329                 if (!empty($poster)) {
330                         $block['poster'] = [$poster];
331                 }
332                 return $block;
333         }
334
335         static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
336         {
337                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
338                         $host = parse_url($link['url'], PHP_URL_HOST);
339                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
340                                 $block = [
341                                         'type'     => 'video',
342                                         'provider' => 'youtube',
343                                         'url'      => $link['url'],
344                                 ];
345                         } elseif (in_array($host, ['vimeo.com'])) {
346                                 $block = [
347                                         'type'     => 'video',
348                                         'provider' => 'vimeo',
349                                         'url'      => $link['url'],
350                                 ];
351                         } elseif (in_array($host, ['open.spotify.com'])) {
352                                 $block = [
353                                         'type'     => 'audio',
354                                         'provider' => 'spotify',
355                                         'url'      => $link['url'],
356                                 ];
357                         } else {
358                                 $block = [
359                                         'type' => 'link',
360                                         'url'  => $link['url'],
361                                 ];
362                                 if (!empty($link['name'])) {
363                                         $block['title'] = $link['name'];
364                                 }
365                                 if (!empty($link['description'])) {
366                                         $block['description'] = $link['description'];
367                                 }
368                                 if (!empty($link['author-name'])) {
369                                         $block['author'] = $link['author-name'];
370                                 }
371                                 if (!empty($link['publisher-name'])) {
372                                         $block['site_name'] = $link['publisher-name'];
373                                 }
374                         }
375
376                         if ($level > 0) {
377                                 $block['indent_level'] = $level;
378                         }
379
380                         $npf[] = self::addPoster($link, $block);
381                 }
382                 return $npf;
383         }
384
385         static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
386         {
387                 $attributes = [];
388                 foreach ($element->attributes as $key => $attribute) {
389                         $attributes[$key] = trim($attribute->value);
390                 }
391                 if (empty($attributes['src'])) {
392                         return $npf;
393                 }
394
395                 $block = [
396                         'type'  => 'image',
397                         'media' => [],
398                 ];
399
400                 if (!empty($attributes['alt'])) {
401                         $block['alt_text'] = $attributes['alt'];
402                 }
403
404                 if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) {
405                         $block['caption'] = $attributes['title'];
406                 }
407
408                 $rid = Photo::ridFromURI($attributes['src']);
409                 if (!empty($rid)) {
410                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
411                         foreach ($photos as $photo) {
412                                 $block['media'][] = [
413                                         'type'   => $photo['type'],
414                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
415                                         'width'  => $photo['width'],
416                                         'height' => $photo['height'],
417                                 ];
418                         }
419                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
420                                 $block['alt_text'] = $photos[0]['desc'];
421                         }
422                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
423                         $block['media'][] = [
424                                 'type'   => $media['mimetype'],
425                                 'url'    => $media['url'],
426                                 'width'  => $media['width'],
427                                 'height' => $media['height'],
428                         ];
429                         if (empty($attributes['alt']) && !empty($media['description'])) {
430                                 $block['alt_text'] = $media['description'];
431                         }
432                 } else {
433                         $block['media'][] = ['url' => $attributes['src']];
434                 }
435
436                 if ($level > 0) {
437                         $block['indent_level'] = $level;
438                 }
439
440                 $npf[] = $block;
441
442                 return $npf;
443         }
444
445         static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
446         {
447                 $attributes = [];
448                 foreach ($element->attributes as $key => $attribute) {
449                         $attributes[$key] = trim($attribute->value);
450                 }
451                 if (empty($attributes['href'])) {
452                         return $npf;
453                 }
454
455                 $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
456                 if (!empty($media)) {
457                         switch ($media['type']) {
458                                 case Post\Media::AUDIO:
459                                         $block = [
460                                                 'type' => 'audio',
461                                                 'media' => [
462                                                         'type' => $media['mimetype'],
463                                                         'url'  => $media['url'],
464                                                 ]
465                                         ];
466
467                                         if (!empty($media['name'])) {
468                                                 $block['title'] = $media['name'];
469                                         } elseif (!empty($media['description'])) {
470                                                 $block['title'] = $media['description'];
471                                         }
472
473                                         $block = self::addPoster($media, $block);
474                                         break;
475
476                                 case Post\Media::VIDEO:
477                                         $block = [
478                                                 'type' => 'video',
479                                                 'media' => [
480                                                         'type' => $media['mimetype'],
481                                                         'url'  => $media['url'],
482                                                 ]
483                                         ];
484
485                                         $block = self::addPoster($media, $block);
486                                         break;
487                         }
488                 } else {
489                         $block = [
490                                 'type' => 'text',
491                                 'text' => $element->textContent,
492                                 'formatting' => [
493                                         'start' => 0,
494                                         'end'   => strlen($element->textContent),
495                                         'type'  => 'link',
496                                         'url'   => $attributes['href']
497                                 ]
498                         ];
499                 }
500
501                 if ($level > 0) {
502                         $block['indent_level'] = $level;
503                 }
504
505                 $npf[] = $block;
506
507                 return $npf;
508         }
509 }