]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/NPF.php
Deactivated not working stuff
[friendica.git] / src / Content / Text / NPF.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use DOMDocument;
25 use DOMElement;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28
29 /**
30  * Tumblr Neue Post Format
31  * @see https://www.tumblr.com/docs/npf
32  */
33 class NPF
34 {
35         static $heading_subtype = [];
36
37         static public function fromBBCode(string $bbcode, int $uri_id): array
38         {
39                 $bbcode = self::prepareBody($bbcode);
40
41                 $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
42                 if (empty($html)) {
43                         return [];
44                 }
45
46                 $doc = new DOMDocument();
47                 $doc->formatOutput = true;
48                 if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
49                         return [];
50                 }
51
52                 self::setHeadingSubStyles($doc);
53
54                 $element = $doc->getElementsByTagName('body')->item(0);
55
56                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []);
57
58                 return self::addLinkBlockForUriId($uri_id, 0, $npf);
59         }
60
61         static function setHeadingSubStyles($doc)
62         {
63                 self::$heading_subtype = [];
64                 foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) {
65                         if ($doc->getElementsByTagName($element)->count() > 0) {
66                                 if (empty(self::$heading_subtype)) {
67                                         self::$heading_subtype[$element] = 'heading1';
68                                 } else {
69                                         self::$heading_subtype[$element] = 'heading2';
70                                 }
71                         }
72                 }
73         }
74
75         static private function prepareBody(string $body): string
76         {
77                 $shared = BBCode::fetchShareAttributes($body);
78                 if (!empty($shared)) {
79                         $body = $shared['shared'];
80                 }
81
82                 $body = BBCode::removeAttachment($body);
83
84                 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
85
86                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
87                         foreach ($pictures as $picture) {
88                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
89                                         $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
90                                 }
91                         }
92                 }
93
94                 $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
95
96                 if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
97                         foreach ($pictures as $picture) {
98                                 if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
99                                         $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
100                                 }
101                         }
102                 }
103
104                 $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
105                 $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
106                 $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
107
108                 do {
109                         $oldbody = $body;
110                         $body = str_replace(["\n\n\n"], ["\n\n"], $body);
111                 } while ($oldbody != $body);
112
113                 return trim($body);
114         }
115
116         static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array
117         {
118                 if ($parse_structure && $text) {
119                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
120                 }
121
122                 $callstack[] = $element->nodeName;
123                 $level = self::getLevelByCallstack($callstack);
124
125                 foreach ($element->childNodes as $child) {
126                         switch ($child->nodeName) {
127                                 case 'b':
128                                 case 'strong':
129                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
130                                         break;
131         
132                                 case 'i':
133                                 case 'em':
134                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
135                                         break;
136         
137                                 case 's':
138                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
139                                         break;
140
141                                 case 'u':
142                                 case 'span':
143                                         list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
144                                         break;
145
146                                 case 'hr':
147                                 case 'br':
148                                         if (!empty($text)) {
149                                                 $text .= "\n";
150                                         }
151                                         break;
152                 
153                                 case '#text':
154                                         $text .= $child->textContent;
155                                         break;
156
157                                 case 'table':
158                                 case 'summary':
159                                         // Ignore tables and spoilers
160                                         break;
161
162                                 case 'a':
163                                         if ($text) {
164                                                 list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
165                                         } else {
166                                                 $npf = self::addLinkBlock($child, $uri_id, $level, $npf);
167                                         }
168                                         break;
169
170                                 case 'img':
171                                         $npf = self::addImageBlock($child, $uri_id, $level, $npf);
172                                         break;
173
174                                         default:
175                                         list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
176                                         break;
177                         }
178                 }
179
180                 if ($parse_structure && $text) {
181                         list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
182                 }
183                 return [$npf, $text, $formatting];
184         }
185
186         static private function getLevelByCallstack($callstack): int
187         {
188                 // Deactivated, since Tumblr seems to have issues with the indent level
189                 return 0;
190
191                 $level = 0;
192                 foreach ($callstack as $entry) {
193                         if (in_array($entry, ['ol', 'ul', 'blockquote'])) {
194                                 ++$level;
195                         }
196                 }
197                 return max(0, $level - 1);
198         }
199
200         static private function getSubTypeByCallstack($callstack, string $text): string
201         {
202                 $subtype = '';
203                 foreach ($callstack as $entry) {
204                         switch ($entry) {
205                                 case 'ol':
206                                         $subtype = 'ordered-list-item';
207                                         break;
208
209                                 case 'ul':
210                                         $subtype = 'unordered-list-item';
211                                         break;
212
213                                 case 'h1':
214                                         $subtype = self::$heading_subtype[$entry];
215                                         break;
216         
217                                 case 'h2':
218                                         $subtype = self::$heading_subtype[$entry];
219                                         break;
220         
221                                 case 'h3':
222                                         $subtype = self::$heading_subtype[$entry];
223                                         break;
224         
225                                 case 'h4':
226                                         $subtype = self::$heading_subtype[$entry];
227                                         break;
228         
229                                 case 'h5':
230                                         $subtype = self::$heading_subtype[$entry];
231                                         break;
232         
233                                 case 'h6':
234                                         $subtype = self::$heading_subtype[$entry];
235                                         break;
236         
237                                 case 'blockquote':                                      
238                                         $subtype = strlen($text) < 100 ? 'quote' : 'indented';
239                                         break;
240
241                                 case 'pre':
242                                         $subtype = 'indented';
243                                         break;
244
245                                 case 'code':
246                                         $subtype = 'chat';
247                                         break;
248                         }
249                 }
250                 return $subtype;
251         }
252
253         static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array
254         {
255                 $start = mb_strlen($text);
256                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
257
258                 if (!empty($type)) {
259                         $formatting[] = [
260                                 'start' => $start,
261                                 'end'   => mb_strlen($text),
262                                 'type'  => $type
263                         ];
264                 }
265                 return [$npf, $text, $formatting];
266         }
267
268         static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array
269         {
270                 $start = mb_strlen($text);
271                 list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
272
273                 $attributes = [];
274                 foreach ($element->attributes as $key => $attribute) {
275                         $attributes[$key] = trim($attribute->value);
276                 }
277                 if (!empty($attributes['href'])) {
278                         $formatting[] = [
279                                 'start' => $start,
280                                 'end'   => mb_strlen($text),
281                                 'type'  => 'link',
282                                 'url'   => $attributes['href']
283                         ];
284                 }
285                 return [$npf, $text, $formatting];
286         }
287
288         static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array
289         {
290                 $block = [
291                         'type'      => 'text',
292                         'subtype'   => '',
293                         'text'      => $text,
294                 ];
295
296                 // Deactivated since Tumblr has got issues with it
297                 //if (!empty($formatting)) {
298                 //      $block['formatting'] = $formatting;
299                 //}
300
301                 $level = self::getLevelByCallstack($callstack);
302                 if ($level > 0) {
303                         $block['indent_level'] = $level;
304                 }
305
306                 $subtype = self::getSubTypeByCallstack($callstack, $text);
307                 if ($subtype) {
308                         $block['subtype'] = $subtype;
309                 } else {
310                         unset($block['subtype']);
311                 }
312
313                 $npf[] = $block;
314                 $text = '';
315                 $formatting = [];
316                 return [$npf, $text, $formatting];
317         }
318
319         static private function addPoster(array $media, array $block): array
320         {
321                 $poster = [];
322                 if (!empty($media['preview'])) {
323                         $poster['url'] = $media['preview'];
324                 }
325                 if (!empty($media['preview-width'])) {
326                         $poster['width'] = $media['preview-width'];
327                 }
328                 if (!empty($media['preview-height'])) {
329                         $poster['height'] = $media['preview-height'];
330                 }
331                 if (!empty($poster)) {
332                         $block['poster'] = [$poster];
333                 }
334                 return $block;
335         }
336
337         static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
338         {
339                 foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
340                         $host = parse_url($link['url'], PHP_URL_HOST);
341                         if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
342                                 $block = [
343                                         'type'     => 'video',
344                                         'provider' => 'youtube',
345                                         'url'      => $link['url'],
346                                 ];
347                         } elseif (in_array($host, ['vimeo.com'])) {
348                                 $block = [
349                                         'type'     => 'video',
350                                         'provider' => 'vimeo',
351                                         'url'      => $link['url'],
352                                 ];
353                         } elseif (in_array($host, ['open.spotify.com'])) {
354                                 $block = [
355                                         'type'     => 'audio',
356                                         'provider' => 'spotify',
357                                         'url'      => $link['url'],
358                                 ];
359                         } else {
360                                 $block = [
361                                         'type' => 'link',
362                                         'url'  => $link['url'],
363                                 ];
364                                 if (!empty($link['name'])) {
365                                         $block['title'] = $link['name'];
366                                 }
367                                 if (!empty($link['description'])) {
368                                         $block['description'] = $link['description'];
369                                 }
370                                 if (!empty($link['author-name'])) {
371                                         $block['author'] = $link['author-name'];
372                                 }
373                                 if (!empty($link['publisher-name'])) {
374                                         $block['site_name'] = $link['publisher-name'];
375                                 }
376                         }
377
378                         if ($level > 0) {
379                                 $block['indent_level'] = $level;
380                         }
381
382                         $npf[] = self::addPoster($link, $block);
383                 }
384                 return $npf;
385         }
386
387         static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
388         {
389                 $attributes = [];
390                 foreach ($element->attributes as $key => $attribute) {
391                         $attributes[$key] = trim($attribute->value);
392                 }
393                 if (empty($attributes['src'])) {
394                         return $npf;
395                 }
396
397                 $block = [
398                         'type'  => 'image',
399                         'media' => [],
400                 ];
401
402                 if (!empty($attributes['alt'])) {
403                         $block['alt_text'] = $attributes['alt'];
404                 }
405
406                 if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) {
407                         $block['caption'] = $attributes['title'];
408                 }
409
410                 $rid = Photo::ridFromURI($attributes['src']);
411                 if (!empty($rid)) {
412                         $photos = Photo::selectToArray([], ['resource-id' => $rid]);
413                         foreach ($photos as $photo) {
414                                 $block['media'][] = [
415                                         'type'   => $photo['type'],
416                                         'url'    => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
417                                         'width'  => $photo['width'],
418                                         'height' => $photo['height'],
419                                 ];
420                         }
421                         if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
422                                 $block['alt_text'] = $photos[0]['desc'];
423                         }
424                 } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
425                         $block['media'][] = [
426                                 'type'   => $media['mimetype'],
427                                 'url'    => $media['url'],
428                                 'width'  => $media['width'],
429                                 'height' => $media['height'],
430                         ];
431                         if (empty($attributes['alt']) && !empty($media['description'])) {
432                                 $block['alt_text'] = $media['description'];
433                         }
434                 } else {
435                         $block['media'][] = ['url' => $attributes['src']];
436                 }
437
438                 if ($level > 0) {
439                         $block['indent_level'] = $level;
440                 }
441
442                 $npf[] = $block;
443
444                 return $npf;
445         }
446
447         static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
448         {
449                 $attributes = [];
450                 foreach ($element->attributes as $key => $attribute) {
451                         $attributes[$key] = trim($attribute->value);
452                 }
453                 if (empty($attributes['href'])) {
454                         return $npf;
455                 }
456
457                 $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
458                 if (!empty($media)) {
459                         switch ($media['type']) {
460                                 case Post\Media::AUDIO:
461                                         $block = [
462                                                 'type' => 'audio',
463                                                 'media' => [
464                                                         'type' => $media['mimetype'],
465                                                         'url'  => $media['url'],
466                                                 ]
467                                         ];
468
469                                         if (!empty($media['name'])) {
470                                                 $block['title'] = $media['name'];
471                                         } elseif (!empty($media['description'])) {
472                                                 $block['title'] = $media['description'];
473                                         }
474
475                                         $block = self::addPoster($media, $block);
476                                         break;
477
478                                 case Post\Media::VIDEO:
479                                         $block = [
480                                                 'type' => 'video',
481                                                 'media' => [
482                                                         'type' => $media['mimetype'],
483                                                         'url'  => $media['url'],
484                                                 ]
485                                         ];
486
487                                         $block = self::addPoster($media, $block);
488                                         break;
489                         }
490                 } else {
491                         $block = [
492                                 'type' => 'text',
493                                 'text' => $element->textContent,
494                                 // Deactivated, since Tumblr has got issues with the formatting
495                                 //'formatting' => [
496                                 //      'start' => 0,
497                                 //      'end'   => strlen($element->textContent),
498                                 //      'type'  => 'link',
499                                 //      'url'   => $attributes['href']
500                                 //]
501                         ];
502                 }
503
504                 if ($level > 0) {
505                         $block['indent_level'] = $level;
506                 }
507
508                 $npf[] = $block;
509
510                 return $npf;
511         }
512 }