]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/HTML.php
893b59c3efb85e407a92f62b09793ec94a6e6fdf
[friendica.git] / src / Content / Text / HTML.php
1 <?php
2 /**
3  * @file src/Content/Text/HTML.php
4  */
5
6 namespace Friendica\Content\Text;
7
8 use DOMDocument;
9 use DOMXPath;
10 use Friendica\Core\Addon;
11 use Friendica\Core\L10n;
12 use Friendica\Core\Config;
13 use Friendica\Core\PConfig;
14 use Friendica\Core\Renderer;
15 use Friendica\Database\DBA;
16 use Friendica\Model\Contact;
17 use Friendica\Util\Network;
18 use Friendica\Util\Proxy as ProxyUtils;
19 use Friendica\Util\XML;
20 use League\HTMLToMarkdown\HtmlConverter;
21
22 class HTML
23 {
24         public static function sanitizeCSS($input)
25         {
26                 $cleaned = "";
27
28                 $input = strtolower($input);
29
30                 for ($i = 0; $i < strlen($input); $i++) {
31                         $char = substr($input, $i, 1);
32
33                         if (($char >= "a") && ($char <= "z")) {
34                                 $cleaned .= $char;
35                         }
36
37                         if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
38                                 $cleaned .= $char;
39                         }
40                 }
41
42                 return $cleaned;
43         }
44
45         private static function tagToBBCode(DOMDocument $doc, $tag, $attributes, $startbb, $endbb)
46         {
47                 do {
48                         $done = self::tagToBBCodeSub($doc, $tag, $attributes, $startbb, $endbb);
49                 } while ($done);
50         }
51
52         private static function tagToBBCodeSub(DOMDocument $doc, $tag, $attributes, $startbb, $endbb)
53         {
54                 $savestart = str_replace('$', '\x01', $startbb);
55                 $replace = false;
56
57                 $xpath = new DOMXPath($doc);
58
59                 $list = $xpath->query("//" . $tag);
60                 foreach ($list as $node) {
61                         $attr = [];
62                         if ($node->attributes->length) {
63                                 foreach ($node->attributes as $attribute) {
64                                         $attr[$attribute->name] = $attribute->value;
65                                 }
66                         }
67
68                         $replace = true;
69
70                         $startbb = $savestart;
71
72                         $i = 0;
73
74                         foreach ($attributes as $attribute => $value) {
75                                 $startbb = str_replace('\x01' . ++$i, '$1', $startbb);
76                                 if (strpos('*' . $startbb, '$1') > 0) {
77                                         if ($replace && (@$attr[$attribute] != '')) {
78                                                 $startbb = preg_replace($value, $startbb, $attr[$attribute], -1, $count);
79
80                                                 // If nothing could be changed
81                                                 if ($count == 0) {
82                                                         $replace = false;
83                                                 }
84                                         } else {
85                                                 $replace = false;
86                                         }
87                                 } else {
88                                         if (@$attr[$attribute] != $value) {
89                                                 $replace = false;
90                                         }
91                                 }
92                         }
93
94                         if ($replace) {
95                                 $StartCode = $doc->createTextNode($startbb);
96                                 $EndCode = $doc->createTextNode($endbb);
97
98                                 $node->parentNode->insertBefore($StartCode, $node);
99
100                                 if ($node->hasChildNodes()) {
101                                         foreach ($node->childNodes as $child) {
102                                                 $newNode = $child->cloneNode(true);
103                                                 $node->parentNode->insertBefore($newNode, $node);
104                                         }
105                                 }
106
107                                 $node->parentNode->insertBefore($EndCode, $node);
108                                 $node->parentNode->removeChild($node);
109                         }
110                 }
111
112                 return $replace;
113         }
114
115         /**
116          * Made by: ike@piratenpartei.de
117          * Originally made for the syncom project: http://wiki.piratenpartei.de/Syncom
118          *                                      https://github.com/annando/Syncom
119          *
120          * @brief Converter for HTML to BBCode
121          * @param string $message
122          * @param string $basepath
123          * @return string
124          */
125         public static function toBBCode($message, $basepath = '')
126         {
127                 $message = str_replace("\r", "", $message);
128
129                 // Removing code blocks before the whitespace removal processing below
130                 $codeblocks = [];
131                 $message = preg_replace_callback(
132                         '#<pre><code(?: class="language-([^"]*)")?>(.*)</code></pre>#iUs',
133                         function ($matches) use (&$codeblocks) {
134                                 $return = '[codeblock-' . count($codeblocks) . ']';
135
136                                 $prefix = '[code]';
137                                 if ($matches[1] != '') {
138                                         $prefix = '[code=' . $matches[1] . ']';
139                                 }
140
141                                 $codeblocks[] = $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]';
142                                 return $return;
143                         },
144                         $message
145                 );
146
147                 $message = str_replace(
148                         [
149                                 "<li><p>",
150                                 "</p></li>",
151                         ],
152                         [
153                                 "<li>",
154                                 "</li>",
155                         ],
156                         message
157                 );
158
159                 // remove namespaces
160                 $message = preg_replace('=<(\w+):(.+?)>=', '<removeme>', $message);
161                 $message = preg_replace('=</(\w+):(.+?)>=', '</removeme>', $message);
162
163                 $doc = new DOMDocument();
164                 $doc->preserveWhiteSpace = false;
165
166                 $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
167
168                 @$doc->loadHTML($message);
169
170                 XML::deleteNode($doc, 'style');
171                 XML::deleteNode($doc, 'head');
172                 XML::deleteNode($doc, 'title');
173                 XML::deleteNode($doc, 'meta');
174                 XML::deleteNode($doc, 'xml');
175                 XML::deleteNode($doc, 'removeme');
176
177                 $xpath = new DomXPath($doc);
178                 $list = $xpath->query("//pre");
179                 foreach ($list as $node) {
180                         $node->nodeValue = str_replace("\n", "\r", $node->nodeValue);
181                 }
182
183                 $message = $doc->saveHTML();
184                 $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "<br />", " ", ""], $message);
185                 $message = preg_replace('= [\s]*=i', " ", $message);
186                 @$doc->loadHTML($message);
187
188                 self::tagToBBCode($doc, 'html', [], "", "");
189                 self::tagToBBCode($doc, 'body', [], "", "");
190
191                 // Outlook-Quote - Variant 1
192                 self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]');
193
194                 // Outlook-Quote - Variant 2
195                 self::tagToBBCode(
196                         $doc,
197                         'div',
198                         ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'],
199                         '[quote]',
200                         '[/quote]'
201                 );
202
203                 // MyBB-Stuff
204                 self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]');
205                 self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]');
206                 self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]');
207
208                 /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]');
209                   self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]');
210                   self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]');
211                   self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]');
212                   self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]');
213                   self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]');
214                   self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]');
215                  */
216                 // Untested
217                 //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]');
218                 //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]');
219                 //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]');
220
221                 self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]');
222
223                 //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]');
224                 //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]');
225                 //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]');
226                 //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]');
227                 // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica
228                 // Test
229                 //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]');
230                 self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]');
231                 self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]');
232
233                 self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]');
234                 self::tagToBBCode($doc, 'em', [], '[i]', '[/i]');
235                 self::tagToBBCode($doc, 'b', [], '[b]', '[/b]');
236                 self::tagToBBCode($doc, 'i', [], '[i]', '[/i]');
237                 self::tagToBBCode($doc, 'u', [], '[u]', '[/u]');
238                 self::tagToBBCode($doc, 's', [], '[s]', '[/s]');
239                 self::tagToBBCode($doc, 'del', [], '[s]', '[/s]');
240                 self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]');
241
242                 self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]");
243                 self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]");
244
245                 self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]');
246
247                 self::tagToBBCode($doc, 'br', [], "\n", '');
248
249                 self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", "");
250                 self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", "");
251
252                 self::tagToBBCode($doc, 'span', [], "", "");
253
254                 self::tagToBBCode($doc, 'span', [], "", "");
255                 self::tagToBBCode($doc, 'pre', [], "", "");
256
257                 self::tagToBBCode($doc, 'div', [], "\r", "\r");
258                 self::tagToBBCode($doc, 'p', [], "\n", "\n");
259
260                 self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]");
261                 self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]");
262                 self::tagToBBCode($doc, 'li', [], "[*]", "");
263
264                 self::tagToBBCode($doc, 'hr', [], "[hr]", "");
265
266                 self::tagToBBCode($doc, 'table', [], "", "");
267                 self::tagToBBCode($doc, 'tr', [], "\n", "");
268                 self::tagToBBCode($doc, 'td', [], "\t", "");
269                 //self::node2BBCode($doc, 'table', array(), "[table]", "[/table]");
270                 //self::node2BBCode($doc, 'th', array(), "[th]", "[/th]");
271                 //self::node2BBCode($doc, 'tr', array(), "[tr]", "[/tr]");
272                 //self::node2BBCode($doc, 'td', array(), "[td]", "[/td]");
273                 //self::node2BBCode($doc, 'h1', array(), "\n\n[size=xx-large][b]", "[/b][/size]\n");
274                 //self::node2BBCode($doc, 'h2', array(), "\n\n[size=x-large][b]", "[/b][/size]\n");
275                 //self::node2BBCode($doc, 'h3', array(), "\n\n[size=large][b]", "[/b][/size]\n");
276                 //self::node2BBCode($doc, 'h4', array(), "\n\n[size=medium][b]", "[/b][/size]\n");
277                 //self::node2BBCode($doc, 'h5', array(), "\n\n[size=small][b]", "[/b][/size]\n");
278                 //self::node2BBCode($doc, 'h6', array(), "\n\n[size=x-small][b]", "[/b][/size]\n");
279
280                 self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]");
281                 self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]");
282                 self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]");
283                 self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]");
284                 self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]");
285                 self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]");
286
287                 self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]');
288                 self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]');
289
290                 self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]');
291                 self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]');
292
293
294                 self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]');
295                 self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]');
296                 self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]');
297
298                 self::tagToBBCode($doc, 'key', [], '[code]', '[/code]');
299                 self::tagToBBCode($doc, 'code', [], '[code]', '[/code]');
300
301                 $message = $doc->saveHTML();
302
303                 // I'm removing something really disturbing
304                 // Don't know exactly what it is
305                 $message = str_replace(chr(194) . chr(160), ' ', $message);
306
307                 $message = str_replace("&nbsp;", " ", $message);
308
309                 // removing multiple DIVs
310                 $message = preg_replace('=\r *\r=i', "\n", $message);
311                 $message = str_replace("\r", "\n", $message);
312
313                 Addon::callHooks('html2bbcode', $message);
314
315                 $message = strip_tags($message);
316
317                 $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8');
318
319                 $message = str_replace(["<"], ["&lt;"], $message);
320
321                 // remove quotes if they don't make sense
322                 $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message);
323
324                 $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message);
325                 $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message);
326
327                 do {
328                         $oldmessage = $message;
329                         $message = str_replace("\n \n", "\n\n", $message);
330                 } while ($oldmessage != $message);
331
332                 do {
333                         $oldmessage = $message;
334                         $message = str_replace("\n\n\n", "\n\n", $message);
335                 } while ($oldmessage != $message);
336
337                 do {
338                         $oldmessage = $message;
339                         $message = str_replace(
340                                 [
341                                 "[/size]\n\n",
342                                 "\n[hr]",
343                                 "[hr]\n",
344                                 "\n[list",
345                                 "[/list]\n",
346                                 "\n[/",
347                                 "[list]\n",
348                                 "[list=1]\n",
349                                 "\n[*]"],
350                                 [
351                                 "[/size]\n",
352                                 "[hr]",
353                                 "[hr]",
354                                 "[list",
355                                 "[/list]",
356                                 "[/",
357                                 "[list]",
358                                 "[list=1]",
359                                 "[*]"],
360                                 $message
361                         );
362                 } while ($message != $oldmessage);
363
364                 $message = str_replace(
365                         ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'],
366                         ['[b]', '[/b]', '[i]', '[/i]'],
367                         $message
368                 );
369
370                 // Handling Yahoo style of mails
371                 $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message);
372
373                 // Restore code blocks
374                 $message = preg_replace_callback(
375                         '#\[codeblock-([0-9]+)\]#iU',
376                         function ($matches) use ($codeblocks) {
377                                 $return = '';
378                                 if (isset($codeblocks[intval($matches[1])])) {
379                                         $return = $codeblocks[$matches[1]];
380                                 }
381                                 return $return;
382                         },
383                         $message
384                 );
385
386                 $message = trim($message);
387
388                 if ($basepath != '') {
389                         $message = self::qualifyURLs($message, $basepath);
390                 }
391
392                 return $message;
393         }
394
395         /**
396          * @brief Sub function to complete incomplete URL
397          *
398          * @param array  $matches  Result of preg_replace_callback
399          * @param string $basepath Basepath that is used to complete the URL
400          *
401          * @return string The expanded URL
402          */
403         private static function qualifyURLsSub($matches, $basepath)
404         {
405                 $base = parse_url($basepath);
406                 unset($base['query']);
407                 unset($base['fragment']);
408
409                 $link = $matches[0];
410                 $url = $matches[1];
411
412                 $parts = array_merge($base, parse_url($url));
413                 $url2 = Network::unparseURL($parts);
414
415                 return str_replace($url, $url2, $link);
416         }
417
418         /**
419          * @brief Complete incomplete URLs in BBCode
420          *
421          * @param string $body     Body with URLs
422          * @param string $basepath Base path that is used to complete the URL
423          *
424          * @return string Body with expanded URLs
425          */
426         private static function qualifyURLs($body, $basepath)
427         {
428                 $URLSearchString = "^\[\]";
429
430                 $matches = ["/\[url\=([$URLSearchString]*)\].*?\[\/url\]/ism",
431                         "/\[url\]([$URLSearchString]*)\[\/url\]/ism",
432                         "/\[img\=[0-9]*x[0-9]*\](.*?)\[\/img\]/ism",
433                         "/\[img\](.*?)\[\/img\]/ism",
434                         "/\[zmg\=[0-9]*x[0-9]*\](.*?)\[\/img\]/ism",
435                         "/\[zmg\](.*?)\[\/zmg\]/ism",
436                         "/\[video\](.*?)\[\/video\]/ism",
437                         "/\[audio\](.*?)\[\/audio\]/ism",
438                 ];
439
440                 foreach ($matches as $match) {
441                         $body = preg_replace_callback(
442                                 $match,
443                                 function ($match) use ($basepath) {
444                                         return self::qualifyURLsSub($match, $basepath);
445                                 },
446                                 $body
447                         );
448                 }
449                 return $body;
450         }
451
452         private static function breakLines($line, $level, $wraplength = 75)
453         {
454                 if ($wraplength == 0) {
455                         $wraplength = 2000000;
456                 }
457
458                 $wraplen = $wraplength - $level;
459
460                 $newlines = [];
461
462                 do {
463                         $oldline = $line;
464
465                         $subline = substr($line, 0, $wraplen);
466
467                         $pos = strrpos($subline, ' ');
468
469                         if ($pos == 0) {
470                                 $pos = strpos($line, ' ');
471                         }
472
473                         if (($pos > 0) && strlen($line) > $wraplen) {
474                                 $newline = trim(substr($line, 0, $pos));
475                                 if ($level > 0) {
476                                         $newline = str_repeat(">", $level) . ' ' . $newline;
477                                 }
478
479                                 $newlines[] = $newline . " ";
480                                 $line = substr($line, $pos + 1);
481                         }
482                 } while ((strlen($line) > $wraplen) && !($oldline == $line));
483
484                 if ($level > 0) {
485                         $line = str_repeat(">", $level) . ' ' . $line;
486                 }
487
488                 $newlines[] = $line;
489
490                 return implode($newlines, "\n");
491         }
492
493         private static function quoteLevel($message, $wraplength = 75)
494         {
495                 $lines = explode("\n", $message);
496
497                 $newlines = [];
498                 $level = 0;
499                 foreach ($lines as $line) {
500                         $line = trim($line);
501                         $startquote = false;
502                         while (strpos("*" . $line, '[quote]') > 0) {
503                                 $level++;
504                                 $pos = strpos($line, '[quote]');
505                                 $line = substr($line, 0, $pos) . substr($line, $pos + 7);
506                                 $startquote = true;
507                         }
508
509                         $currlevel = $level;
510
511                         while (strpos("*" . $line, '[/quote]') > 0) {
512                                 $level--;
513                                 if ($level < 0) {
514                                         $level = 0;
515                                 }
516
517                                 $pos = strpos($line, '[/quote]');
518                                 $line = substr($line, 0, $pos) . substr($line, $pos + 8);
519                         }
520
521                         if (!$startquote || ($line != '')) {
522                                 $newlines[] = self::breakLines($line, $currlevel, $wraplength);
523                         }
524                 }
525
526                 return implode($newlines, "\n");
527         }
528
529         private static function collectURLs($message)
530         {
531                 $pattern = '/<a.*?href="(.*?)".*?>(.*?)<\/a>/is';
532                 preg_match_all($pattern, $message, $result, PREG_SET_ORDER);
533
534                 $urls = [];
535                 foreach ($result as $treffer) {
536                         $ignore = false;
537
538                         // A list of some links that should be ignored
539                         $list = ["/user/", "/tag/", "/group/", "/profile/", "/search?search=", "/search?tag=", "mailto:", "/u/", "/node/",
540                                 "//plus.google.com/", "//twitter.com/"];
541                         foreach ($list as $listitem) {
542                                 if (strpos($treffer[1], $listitem) !== false) {
543                                         $ignore = true;
544                                 }
545                         }
546
547                         if ((strpos($treffer[1], "//twitter.com/") !== false) && (strpos($treffer[1], "/status/") !== false)) {
548                                 $ignore = false;
549                         }
550
551                         if ((strpos($treffer[1], "//plus.google.com/") !== false) && (strpos($treffer[1], "/posts") !== false)) {
552                                 $ignore = false;
553                         }
554
555                         if ((strpos($treffer[1], "//plus.google.com/") !== false) && (strpos($treffer[1], "/photos") !== false)) {
556                                 $ignore = false;
557                         }
558
559                         if (!$ignore) {
560                                 $urls[$treffer[1]] = $treffer[1];
561                         }
562                 }
563
564                 return $urls;
565         }
566
567         public static function toPlaintext($html, $wraplength = 75, $compact = false)
568         {
569                 $message = str_replace("\r", "", $html);
570
571                 $doc = new DOMDocument();
572                 $doc->preserveWhiteSpace = false;
573
574                 $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
575
576                 @$doc->loadHTML($message);
577
578                 $xpath = new DOMXPath($doc);
579                 $list = $xpath->query("//pre");
580                 foreach ($list as $node) {
581                         $node->nodeValue = str_replace("\n", "\r", $node->nodeValue);
582                 }
583
584                 $message = $doc->saveHTML();
585                 $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "<br>", " ", ""], $message);
586                 $message = preg_replace('= [\s]*=i', " ", $message);
587
588                 // Collecting all links
589                 $urls = self::collectURLs($message);
590
591                 @$doc->loadHTML($message);
592
593                 self::tagToBBCode($doc, 'html', [], '', '');
594                 self::tagToBBCode($doc, 'body', [], '', '');
595
596                 // MyBB-Auszeichnungen
597                 /*
598                   self::node2BBCode($doc, 'span', array('style'=>'text-decoration: underline;'), '_', '_');
599                   self::node2BBCode($doc, 'span', array('style'=>'font-style: italic;'), '/', '/');
600                   self::node2BBCode($doc, 'span', array('style'=>'font-weight: bold;'), '*', '*');
601
602                   self::node2BBCode($doc, 'strong', array(), '*', '*');
603                   self::node2BBCode($doc, 'b', array(), '*', '*');
604                   self::node2BBCode($doc, 'i', array(), '/', '/');
605                   self::node2BBCode($doc, 'u', array(), '_', '_');
606                  */
607
608                 if ($compact) {
609                         self::tagToBBCode($doc, 'blockquote', [], "»", "«");
610                 } else {
611                         self::tagToBBCode($doc, 'blockquote', [], '[quote]', "[/quote]\n");
612                 }
613
614                 self::tagToBBCode($doc, 'br', [], "\n", '');
615
616                 self::tagToBBCode($doc, 'span', [], "", "");
617                 self::tagToBBCode($doc, 'pre', [], "", "");
618                 self::tagToBBCode($doc, 'div', [], "\r", "\r");
619                 self::tagToBBCode($doc, 'p', [], "\n", "\n");
620
621                 //self::node2BBCode($doc, 'ul', array(), "\n[list]", "[/list]\n");
622                 //self::node2BBCode($doc, 'ol', array(), "\n[list=1]", "[/list]\n");
623                 self::tagToBBCode($doc, 'li', [], "\n* ", "\n");
624
625                 self::tagToBBCode($doc, 'hr', [], "\n" . str_repeat("-", 70) . "\n", "");
626
627                 self::tagToBBCode($doc, 'tr', [], "\n", "");
628                 self::tagToBBCode($doc, 'td', [], "\t", "");
629
630                 self::tagToBBCode($doc, 'h1', [], "\n\n*", "*\n");
631                 self::tagToBBCode($doc, 'h2', [], "\n\n*", "*\n");
632                 self::tagToBBCode($doc, 'h3', [], "\n\n*", "*\n");
633                 self::tagToBBCode($doc, 'h4', [], "\n\n*", "*\n");
634                 self::tagToBBCode($doc, 'h5', [], "\n\n*", "*\n");
635                 self::tagToBBCode($doc, 'h6', [], "\n\n*", "*\n");
636
637                 // Problem: there is no reliable way to detect if it is a link to a tag or profile
638                 //self::node2BBCode($doc, 'a', array('href'=>'/(.+)/'), ' $1 ', ' ', true);
639                 //self::node2BBCode($doc, 'a', array('href'=>'/(.+)/', 'rel'=>'oembed'), ' $1 ', '', true);
640                 //self::node2BBCode($doc, 'img', array('alt'=>'/(.+)/'), '$1', '');
641                 //self::node2BBCode($doc, 'img', array('title'=>'/(.+)/'), '$1', '');
642                 //self::node2BBCode($doc, 'img', array(), '', '');
643                 if (!$compact) {
644                         self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], ' [img]$1', '[/img] ');
645                 } else {
646                         self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], ' ', ' ');
647                 }
648
649                 self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], ' $1 ', '');
650
651                 $message = $doc->saveHTML();
652
653                 if (!$compact) {
654                         $message = str_replace("[img]", "", $message);
655                         $message = str_replace("[/img]", "", $message);
656                 }
657
658                 // was ersetze ich da?
659                 // Irgendein stoerrisches UTF-Zeug
660                 $message = str_replace(chr(194) . chr(160), ' ', $message);
661
662                 $message = str_replace("&nbsp;", " ", $message);
663
664                 // Aufeinanderfolgende DIVs
665                 $message = preg_replace('=\r *\r=i', "\n", $message);
666                 $message = str_replace("\r", "\n", $message);
667
668                 $message = strip_tags($message);
669
670                 $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8');
671
672                 if (!$compact && ($message != '')) {
673                         foreach ($urls as $id => $url) {
674                                 if ($url != '' && strpos($message, $url) === false) {
675                                         $message .= "\n" . $url . ' ';
676                                 }
677                         }
678                 }
679
680                 $message = str_replace("\n«", "«\n", $message);
681                 $message = str_replace("»\n", "\n»", $message);
682
683                 do {
684                         $oldmessage = $message;
685                         $message = str_replace("\n\n\n", "\n\n", $message);
686                 } while ($oldmessage != $message);
687
688                 $message = self::quoteLevel(trim($message), $wraplength);
689
690                 return trim($message);
691         }
692
693         /**
694          * Converts provided HTML code to Markdown. The hardwrap parameter maximizes
695          * compatibility with Diaspora in spite of the Markdown standards.
696          *
697          * @param string $html
698          * @return string
699          */
700         public static function toMarkdown($html)
701         {
702                 $converter = new HtmlConverter(['hard_break' => true]);
703                 $markdown = $converter->convert($html);
704
705                 return $markdown;
706         }
707
708         /**
709          * @brief Convert video HTML to BBCode tags
710          *
711          * @param string $s
712          */
713         public static function htmlToBBVideo($s)
714         {
715                 $s = preg_replace(
716                         '#<object[^>]+>(.*?)https?://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+)(.*?)</object>#ism',
717                         '[youtube]$2[/youtube]',
718                         $s
719                 );
720         
721                 $s = preg_replace(
722                         '#<iframe[^>](.*?)https?://www.youtube.com/embed/([A-Za-z0-9\-_=]+)(.*?)</iframe>#ism',
723                         '[youtube]$2[/youtube]',
724                         $s
725                 );
726         
727                 $s = preg_replace(
728                         '#<iframe[^>](.*?)https?://player.vimeo.com/video/([0-9]+)(.*?)</iframe>#ism',
729                         '[vimeo]$2[/vimeo]',
730                         $s
731                 );
732         
733                 return $s;
734         }
735         
736         /**
737          * transform link href and img src from relative to absolute
738          *
739          * @param string $text
740          * @param string $base base url
741          * @return string
742          */
743         public static function relToAbs($text, $base)
744         {
745                 if (empty($base)) {
746                         return $text;
747                 }
748         
749                 $base = rtrim($base, '/');
750         
751                 $base2 = $base . "/";
752         
753                 // Replace links
754                 $pattern = "/<a([^>]*) href=\"(?!http|https|\/)([^\"]*)\"/";
755                 $replace = "<a\${1} href=\"" . $base2 . "\${2}\"";
756                 $text = preg_replace($pattern, $replace, $text);
757         
758                 $pattern = "/<a([^>]*) href=\"(?!http|https)([^\"]*)\"/";
759                 $replace = "<a\${1} href=\"" . $base . "\${2}\"";
760                 $text = preg_replace($pattern, $replace, $text);
761         
762                 // Replace images
763                 $pattern = "/<img([^>]*) src=\"(?!http|https|\/)([^\"]*)\"/";
764                 $replace = "<img\${1} src=\"" . $base2 . "\${2}\"";
765                 $text = preg_replace($pattern, $replace, $text);
766         
767                 $pattern = "/<img([^>]*) src=\"(?!http|https)([^\"]*)\"/";
768                 $replace = "<img\${1} src=\"" . $base . "\${2}\"";
769                 $text = preg_replace($pattern, $replace, $text);
770         
771         
772                 // Done
773                 return $text;
774         }
775
776         /**
777          * return div element with class 'clear'
778          * @return string
779          * @deprecated
780          */
781         public static function clearDiv()
782         {
783                 return '<div class="clear"></div>';
784         }
785
786         /**
787          * Loader for infinite scrolling
788          * @return string html for loader
789          */
790         public static function scrollLoader()
791         {
792                 $tpl = Renderer::getMarkupTemplate("scroll_loader.tpl");
793                 return Renderer::replaceMacros($tpl, [
794                         'wait' => L10n::t('Loading more entries...'),
795                         'end' => L10n::t('The end')
796                 ]);
797         }
798
799         /**
800          * Get html for contact block.
801          *
802          * @template contact_block.tpl
803          * @hook contact_block_end (contacts=>array, output=>string)
804          * @return string
805          */
806         public static function contactBlock()
807         {
808                 $o = '';
809                 $a = get_app();
810
811                 $shown = PConfig::get($a->profile['uid'], 'system', 'display_friend_count', 24);
812                 if ($shown == 0) {
813                         return;
814                 }
815
816                 if (!is_array($a->profile) || $a->profile['hide-friends']) {
817                         return $o;
818                 }
819
820                 $r = q("SELECT COUNT(*) AS `total` FROM `contact`
821                                 WHERE `uid` = %d AND NOT `self` AND NOT `blocked`
822                                         AND NOT `pending` AND NOT `hidden` AND NOT `archive`
823                                         AND `network` IN ('%s', '%s', '%s')",
824                         intval($a->profile['uid']),
825                         DBA::escape(Protocol::DFRN),
826                         DBA::escape(Protocol::OSTATUS),
827                         DBA::escape(Protocol::DIASPORA)
828                 );
829
830                 if (DBA::isResult($r)) {
831                         $total = intval($r[0]['total']);
832                 }
833
834                 if (!$total) {
835                         $contacts = L10n::t('No contacts');
836                         $micropro = null;
837                 } else {
838                         // Splitting the query in two parts makes it much faster
839                         $r = q("SELECT `id` FROM `contact`
840                                         WHERE `uid` = %d AND NOT `self` AND NOT `blocked`
841                                                 AND NOT `pending` AND NOT `hidden` AND NOT `archive`
842                                                 AND `network` IN ('%s', '%s', '%s')
843                                         ORDER BY RAND() LIMIT %d",
844                                 intval($a->profile['uid']),
845                                 DBA::escape(Protocol::DFRN),
846                                 DBA::escape(Protocol::OSTATUS),
847                                 DBA::escape(Protocol::DIASPORA),
848                                 intval($shown)
849                         );
850
851                         if (DBA::isResult($r)) {
852                                 $contacts = [];
853                                 foreach ($r as $contact) {
854                                         $contacts[] = $contact["id"];
855                                 }
856
857                                 $r = q("SELECT `id`, `uid`, `addr`, `url`, `name`, `thumb`, `network` FROM `contact` WHERE `id` IN (%s)",
858                                         DBA::escape(implode(",", $contacts))
859                                 );
860
861                                 if (DBA::isResult($r)) {
862                                         $contacts = L10n::tt('%d Contact', '%d Contacts', $total);
863                                         $micropro = [];
864                                         foreach ($r as $rr) {
865                                                 $micropro[] = micropro($rr, true, 'mpfriend');
866                                         }
867                                 }
868                         }
869                 }
870
871                 $tpl = Renderer::getMarkupTemplate('contact_block.tpl');
872                 $o = Renderer::replaceMacros($tpl, [
873                         '$contacts' => $contacts,
874                         '$nickname' => $a->profile['nickname'],
875                         '$viewcontacts' => L10n::t('View Contacts'),
876                         '$micropro' => $micropro,
877                 ]);
878
879                 $arr = ['contacts' => $r, 'output' => $o];
880
881                 Addon::callHooks('contact_block_end', $arr);
882
883                 return $o;
884         }
885
886         /**
887          * @brief Format contacts as picture links or as texxt links
888          *
889          * @param array $contact Array with contacts which contains an array with
890          *      int 'id' => The ID of the contact
891         *       int 'uid' => The user ID of the user who owns this data
892         *       string 'name' => The name of the contact
893         *       string 'url' => The url to the profile page of the contact
894         *       string 'addr' => The webbie of the contact (e.g.) username@friendica.com
895         *       string 'network' => The network to which the contact belongs to
896         *       string 'thumb' => The contact picture
897         *       string 'click' => js code which is performed when clicking on the contact
898         * @param boolean $redirect If true try to use the redir url if it's possible
899         * @param string $class CSS class for the
900         * @param boolean $textmode If true display the contacts as text links
901         *       if false display the contacts as picture links
902
903         * @return string Formatted html
904         */
905         public static function micropro($contact, $redirect = false, $class = '', $textmode = false)
906         {
907                 // Use the contact URL if no address is available
908                 if (!x($contact, "addr")) {
909                         $contact["addr"] = $contact["url"];
910                 }
911
912                 $url = $contact['url'];
913                 $sparkle = '';
914                 $redir = false;
915
916                 if ($redirect) {
917                         $url = Contact::magicLink($contact['url']);
918                         if (strpos($url, 'redir/') === 0) {
919                                 $sparkle = ' sparkle';
920                         }
921                 }
922
923                 // If there is some js available we don't need the url
924                 if (x($contact, 'click')) {
925                         $url = '';
926                 }
927
928                 return Renderer::replaceMacros(Renderer::getMarkupTemplate(($textmode)?'micropro_txt.tpl':'micropro_img.tpl'), [
929                         '$click' => defaults($contact, 'click', ''),
930                         '$class' => $class,
931                         '$url' => $url,
932                         '$photo' => ProxyUtils::proxifyUrl($contact['thumb'], false, ProxyUtils::SIZE_THUMB),
933                         '$name' => $contact['name'],
934                         'title' => $contact['name'] . ' [' . $contact['addr'] . ']',
935                         '$parkle' => $sparkle,
936                         '$redir' => $redir
937                 ]);
938         }
939
940         /**
941          * Search box.
942          *
943          * @param string $s     Search query.
944          * @param string $id    HTML id
945          * @param string $url   Search url.
946          * @param bool   $save  Show save search button.
947          * @param bool   $aside Display the search widgit aside.
948          *
949          * @return string Formatted HTML.
950          */
951         public static function search($s, $id = 'search-box', $url = 'search', $save = false, $aside = true)
952         {
953                 $mode = 'text';
954
955                 if (strpos($s, '#') === 0) {
956                         $mode = 'tag';
957                 }
958                 $save_label = $mode === 'text' ? L10n::t('Save') : L10n::t('Follow');
959
960                 $values = [
961                                 '$s' => htmlspecialchars($s),
962                                 '$id' => $id,
963                                 '$action_url' => $url,
964                                 '$search_label' => L10n::t('Search'),
965                                 '$save_label' => $save_label,
966                                 '$savedsearch' => local_user() && Feature::isEnabled(local_user(), 'savedsearch'),
967                                 '$search_hint' => L10n::t('@name, !forum, #tags, content'),
968                                 '$mode' => $mode
969                         ];
970
971                 if (!$aside) {
972                         $values['$searchoption'] = [
973                                                 L10n::t("Full Text"),
974                                                 L10n::t("Tags"),
975                                                 L10n::t("Contacts")];
976
977                         if (Config::get('system', 'poco_local_search')) {
978                                 $values['$searchoption'][] = L10n::t("Forums");
979                         }
980                 }
981
982                 return Renderer::replaceMacros(Renderer::getMarkupTemplate('searchbox.tpl'), $values);
983         }
984
985         /**
986          * Replace naked text hyperlink with HTML formatted hyperlink
987          *
988          * @param string $s
989          */
990         public static function toLink($s)
991         {
992                 $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\'\%\$\!\+]*)/", ' <a href="$1" target="_blank">$1</a>', $s);
993                 $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism", '<$1$2=$3&$4>', $s);
994                 return $s;
995         }
996
997         /**
998          * Given a HTML text and a set of filtering reasons, adds a content hiding header with the provided reasons
999          *
1000          * Reasons are expected to have been translated already.
1001          *
1002          * @param string $html
1003          * @param array  $reasons
1004          * @return string
1005          */
1006         public static function applyContentFilter($html, array $reasons)
1007         {
1008                 if (count($reasons)) {
1009                         $tpl = Renderer::getMarkupTemplate('wall/content_filter.tpl');
1010                         $html = Renderer::replaceMacros($tpl, [
1011                                 '$reasons'   => $reasons,
1012                                 '$rnd'       => random_string(8),
1013                                 '$openclose' => L10n::t('Click to open/close'),
1014                                 '$html'      => $html
1015                         ]);
1016                 }
1017
1018                 return $html;
1019         }
1020
1021         /**
1022          * replace html amp entity with amp char
1023          * @param string $s
1024          * @return string
1025          */
1026         public static function unamp($s)
1027         {
1028                 return str_replace('&amp;', '&', $s);
1029         }
1030 }