]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
Fixed NPF, new OAuth library added
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28
29 /**
30  * Tumblr Neue Post Format
31  * @see https://www.tumblr.com/docs/npf
32  */
33 class NPF
34 {
35         static $heading_subtype = [];
36
37         static public function fromBBCode(string $bbcode, int $uri_id): array
38         {
39                 $bbcode = self::prepareBody($bbcode);
40
41                 $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
42                 if (empty($html)) {
43                         return [];
44                 }
45
46                 $doc = new DOMDocument();
47                 $doc->formatOutput = true;
48                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
49                         return [];
50                 }
51
52                 self::setHeadingSubStyles($doc);
53
54                 $element = $doc->getElementsByTagName('body')->item(0);
55
56                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []);
57
58                 return self::addLinkBlockForUriId($uri_id, 0, $npf);
59         }
60
61         static function setHeadingSubStyles($doc)
62         {
63                 self::$heading_subtype = [];
64                 foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) {
65                         if ($doc->getElementsByTagName($element)->count() > 0) {
66                                 if (empty(self::$heading_subtype)) {
67                                         self::$heading_subtype[$element] = 'heading1';
68                                 } else {
69                                         self::$heading_subtype[$element] = 'heading2';
70                                 }
71                         }
72                 }
73         }
74
75         static private function prepareBody(string $body): string
76         {
77                 $shared = BBCode::fetchShareAttributes($body);
78                 if (!empty($shared)) {
79                         $body = $shared['shared'];
80                 }
81
82                 $body = BBCode::removeAttachment($body);
83
84                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
85
86                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
87                         foreach ($pictures as $picture) {
88                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
89                                         $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
90                                 }
91                         }
92                 }
93
94                 $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
95
96                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
97                         foreach ($pictures as $picture) {
98                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
99                                         $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
100                                 }
101                         }
102                 }
103
104                 $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
105                 $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
106                 $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
107
108                 do {
109                         $oldbody = $body;
110                         $body = str_replace(["\n\n\n"], ["\n\n"], $body);
111                 } while ($oldbody != $body);
112
113                 return trim($body);
114         }
115
116         static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array
117         {
118                 if ($parse_structure && $text) {
119                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
120                 }
121
122                 $callstack[] = $element->nodeName;
123                 $level = self::getLevelByCallstack($callstack);
124
125                 foreach ($element->childNodes as $child) {
126                         switch ($child->nodeName) {
127                                 case 'b':
128                                 case 'strong':
129                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
130                                         break;
131         
132                                 case 'i':
133                                 case 'em':
134                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
135                                         break;
136         
137                                 case 's':
138                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
139                                         break;
140
141                                 case 'u':
142                                 case 'span':
143                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
144                                         break;
145
146                                 case 'hr':
147                                 case 'br':
148                                         if (!empty($text)) {
149                                                 $text .= "\n";
150                                         }
151                                         break;
152                 
153                                 case '#text':
154                                         $text .= $child->textContent;
155                                         break;
156
157                                 case 'table':
158                                 case 'summary':
159                                         // Ignore tables and spoilers
160                                         break;
161
162                                 case 'a':
163                                         if ($text) {
164                                                 list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
165                                         } else {
166                                                 $npf = self::addLinkBlock($child, $uri_id, $level, $npf);
167                                         }
168                                         break;
169
170                                 case 'img':
171                                         $npf = self::addImageBlock($child, $uri_id, $level, $npf);
172                                         break;
173
174                                         default:
175                                         list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
176                                         break;
177                         }
178                 }
179
180                 if ($parse_structure && $text) {
181                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
182                 }
183                 return [$npf, $text, $formatting];
184         }
185
186         static private function getLevelByCallstack($callstack): int
187         {
188                 $level = 0;
189                 foreach ($callstack as $entry) {
190                         if (in_array($entry, ['ol', 'ul', 'blockquote'])) {
191                                 ++$level;
192                         }
193                 }
194                 return max(0, $level - 1);
195         }
196
197         static private function getSubTypeByCallstack($callstack, string $text): string
198         {
199                 $subtype = '';
200                 foreach ($callstack as $entry) {
201                         switch ($entry) {
202                                 case 'ol':
203                                         $subtype = 'ordered-list-item';
204                                         break;
205
206                                 case 'ul':
207                                         $subtype = 'unordered-list-item';
208                                         break;
209
210                                 case 'h1':
211                                         $subtype = self::$heading_subtype[$entry];
212                                         break;
213         
214                                 case 'h2':
215                                         $subtype = self::$heading_subtype[$entry];
216                                         break;
217         
218                                 case 'h3':
219                                         $subtype = self::$heading_subtype[$entry];
220                                         break;
221         
222                                 case 'h4':
223                                         $subtype = self::$heading_subtype[$entry];
224                                         break;
225         
226                                 case 'h5':
227                                         $subtype = self::$heading_subtype[$entry];
228                                         break;
229         
230                                 case 'h6':
231                                         $subtype = self::$heading_subtype[$entry];
232                                         break;
233         
234                                 case 'blockquote':                                      
235                                         $subtype = mb_strlen($text) < 100 ? 'quote' : 'indented';
236                                         break;
237
238                                 case 'pre':
239                                         $subtype = 'indented';
240                                         break;
241
242                                 case 'code':
243                                         $subtype = 'chat';
244                                         break;
245                         }
246                 }
247                 return $subtype;
248         }
249
250         static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array
251         {
252                 $start = mb_strlen($text);
253                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
254
255                 if (!empty($type)) {
256                         $formatting[] = [
257                                 'start' => $start,
258                                 'end'   => mb_strlen($text),
259                                 'type'  => $type
260                         ];
261                 }
262                 return [$npf, $text, $formatting];
263         }
264
265         static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array
266         {
267                 $start = mb_strlen($text);
268                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
269
270                 $attributes = [];
271                 foreach ($element->attributes as $key => $attribute) {
272                         $attributes[$key] = trim($attribute->value);
273                 }
274                 if (!empty($attributes['href'])) {
275                         $formatting[] = [
276                                 'start' => $start,
277                                 'end'   => mb_strlen($text),
278                                 'type'  => 'link',
279                                 'url'   => $attributes['href']
280                         ];
281                 }
282                 return [$npf, $text, $formatting];
283         }
284
285         static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array
286         {
287                 $block = [
288                         'type'      => 'text',
289                         'subtype'   => '',
290                         'text'      => $text,
291                 ];
292
293                 if (!empty($formatting)) {
294                         $block['formatting'] = $formatting;
295                 }
296
297                 $level = self::getLevelByCallstack($callstack);
298                 if ($level > 0) {
299                         $block['indent_level'] = $level;
300                 }
301
302                 $subtype = self::getSubTypeByCallstack($callstack, $text);
303                 if ($subtype) {
304                         $block['subtype'] = $subtype;
305                 } else {
306                         unset($block['subtype']);
307                 }
308
309                 $npf[] = $block;
310                 $text = '';
311                 $formatting = [];
312                 return [$npf, $text, $formatting];
313         }
314
315         static private function addPoster(array $media, array $block): array
316         {
317                 $poster = [];
318                 if (!empty($media['preview'])) {
319                         $poster['url'] = $media['preview'];
320                 }
321                 if (!empty($media['preview-width'])) {
322                         $poster['width'] = $media['preview-width'];
323                 }
324                 if (!empty($media['preview-height'])) {
325                         $poster['height'] = $media['preview-height'];
326                 }
327                 if (!empty($poster)) {
328                         $block['poster'] = [$poster];
329                 }
330                 return $block;
331         }
332
333         static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
334         {
335                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
336                         $host = parse_url($link['url'], PHP_URL_HOST);
337                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
338                                 $block = [
339                                         'type'     => 'video',
340                                         'provider' => 'youtube',
341                                         'url'      => $link['url'],
342                                 ];
343                         } elseif (in_array($host, ['vimeo.com'])) {
344                                 $block = [
345                                         'type'     => 'video',
346                                         'provider' => 'vimeo',
347                                         'url'      => $link['url'],
348                                 ];
349                         } elseif (in_array($host, ['open.spotify.com'])) {
350                                 $block = [
351                                         'type'     => 'audio',
352                                         'provider' => 'spotify',
353                                         'url'      => $link['url'],
354                                 ];
355                         } else {
356                                 $block = [
357                                         'type' => 'link',
358                                         'url'  => $link['url'],
359                                 ];
360                                 if (!empty($link['name'])) {
361                                         $block['title'] = $link['name'];
362                                 }
363                                 if (!empty($link['description'])) {
364                                         $block['description'] = $link['description'];
365                                 }
366                                 if (!empty($link['author-name'])) {
367                                         $block['author'] = $link['author-name'];
368                                 }
369                                 if (!empty($link['publisher-name'])) {
370                                         $block['site_name'] = $link['publisher-name'];
371                                 }
372                         }
373
374                         if ($level > 0) {
375                                 $block['indent_level'] = $level;
376                         }
377
378                         $npf[] = self::addPoster($link, $block);
379                 }
380                 return $npf;
381         }
382
383         static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
384         {
385                 $attributes = [];
386                 foreach ($element->attributes as $key => $attribute) {
387                         $attributes[$key] = trim($attribute->value);
388                 }
389                 if (empty($attributes['src'])) {
390                         return $npf;
391                 }
392
393                 $block = [
394                         'type'  => 'image',
395                         'media' => [],
396                 ];
397
398                 if (!empty($attributes['alt'])) {
399                         $block['alt_text'] = $attributes['alt'];
400                 }
401
402                 if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) {
403                         $block['caption'] = $attributes['title'];
404                 }
405
406                 $rid = Photo::ridFromURI($attributes['src']);
407                 if (!empty($rid)) {
408                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
409                         foreach ($photos as $photo) {
410                                 $block['media'][] = [
411                                         'type'   => $photo['type'],
412                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
413                                         'width'  => $photo['width'],
414                                         'height' => $photo['height'],
415                                 ];
416                         }
417                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
418                                 $block['alt_text'] = $photos[0]['desc'];
419                         }
420                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
421                         $block['media'][] = [
422                                 'type'   => $media['mimetype'],
423                                 'url'    => $media['url'],
424                                 'width'  => $media['width'],
425                                 'height' => $media['height'],
426                         ];
427                         if (empty($attributes['alt']) && !empty($media['description'])) {
428                                 $block['alt_text'] = $media['description'];
429                         }
430                 } else {
431                         $block['media'][] = ['url' => $attributes['src']];
432                 }
433
434                 if ($level > 0) {
435                         $block['indent_level'] = $level;
436                 }
437
438                 $npf[] = $block;
439
440                 return $npf;
441         }
442
443         static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
444         {
445                 $attributes = [];
446                 foreach ($element->attributes as $key => $attribute) {
447                         $attributes[$key] = trim($attribute->value);
448                 }
449                 if (empty($attributes['href'])) {
450                         return $npf;
451                 }
452
453                 $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
454                 if (!empty($media)) {
455                         switch ($media['type']) {
456                                 case Post\Media::AUDIO:
457                                         $block = [
458                                                 'type' => 'audio',
459                                                 'media' => [
460                                                         'type' => $media['mimetype'],
461                                                         'url'  => $media['url'],
462                                                 ]
463                                         ];
464
465                                         if (!empty($media['name'])) {
466                                                 $block['title'] = $media['name'];
467                                         } elseif (!empty($media['description'])) {
468                                                 $block['title'] = $media['description'];
469                                         }
470
471                                         $block = self::addPoster($media, $block);
472                                         break;
473
474                                 case Post\Media::VIDEO:
475                                         $block = [
476                                                 'type' => 'video',
477                                                 'media' => [
478                                                         'type' => $media['mimetype'],
479                                                         'url'  => $media['url'],
480                                                 ]
481                                         ];
482
483                                         $block = self::addPoster($media, $block);
484                                         break;
485                         }
486                 } else {
487                         $block = [
488                                 'type' => 'text',
489                                 'text' => $element->textContent,
490                                 'formatting' => [
491                                         [
492                                                 'start' => 0,
493                                                 'end'   => mb_strlen($element->textContent),
494                                                 'type'  => 'link',
495                                                 'url'   => $attributes['href']
496                                         ]
497                                 ]
498                         ];
499                 }
500
501                 if ($level > 0) {
502                         $block['indent_level'] = $level;
503                 }
504
505                 $npf[] = $block;
506
507                 return $npf;
508         }
509 }