X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FHTML.php;h=0e117a2205b9a7676897dd9f15e2f9a35a61d69a;hb=e8becc153856debb927d7afac17bae5c80a0abb7;hp=8116fa637813ccf9da5c60a61d8fe878a151dd02;hpb=ab8997f9db910ba70c592bf106a7f5fc41a55b2d;p=friendica.git diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 8116fa6378..0e117a2205 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -1,6 +1,22 @@ . + * */ namespace Friendica\Content\Text; @@ -9,39 +25,17 @@ use DOMDocument; use DOMXPath; use Friendica\Content\Widget\ContactBlock; use Friendica\Core\Hook; -use Friendica\Core\Config; use Friendica\Core\Renderer; +use Friendica\Core\Search; use Friendica\DI; use Friendica\Model\Contact; use Friendica\Util\Network; -use Friendica\Util\Proxy as ProxyUtils; use Friendica\Util\Strings; use Friendica\Util\XML; use League\HTMLToMarkdown\HtmlConverter; class HTML { - public static function sanitizeCSS($input) - { - $cleaned = ""; - - $input = strtolower($input); - - for ($i = 0; $i < strlen($input); $i++) { - $char = substr($input, $i, 1); - - if (($char >= "a") && ($char <= "z")) { - $cleaned .= $char; - } - - if (!(strpos(" #;:0123456789-_.%", $char) === false)) { - $cleaned .= $char; - } - } - - return $cleaned; - } - /** * Search all instances of a specific HTML tag node in the provided DOM document and replaces them with BBCode text nodes. * @@ -149,254 +143,250 @@ class HTML */ public static function toBBCode($message, $basepath = '') { + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $message); - // Removing code blocks before the whitespace removal processing below - $codeblocks = []; - $message = preg_replace_callback( - '#
(.*)
#iUs', - function ($matches) use (&$codeblocks) { - $return = '[codeblock-' . count($codeblocks) . ']'; - - $prefix = '[code]'; - if ($matches[1] != '') { - $prefix = '[code=' . $matches[1] . ']'; - } - - $codeblocks[] = $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; - return $return; - }, - $message - ); - - $message = str_replace( - [ - "
  • ", - "

  • ", - ], - [ - "
  • ", - "
  • ", - ], - $message - ); - - // remove namespaces - $message = preg_replace('=<(\w+):(.+?)>=', '', $message); - $message = preg_replace('==', '', $message); - - $doc = new DOMDocument(); - $doc->preserveWhiteSpace = false; - - $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); - - @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); - - XML::deleteNode($doc, 'style'); - XML::deleteNode($doc, 'head'); - XML::deleteNode($doc, 'title'); - XML::deleteNode($doc, 'meta'); - XML::deleteNode($doc, 'xml'); - XML::deleteNode($doc, 'removeme'); - - $xpath = new DomXPath($doc); - $list = $xpath->query("//pre"); - foreach ($list as $node) { - // Ensure to escape unescaped & - they will otherwise raise a warning - $safe_value = preg_replace('/&(?!\w+;)/', '&', $node->nodeValue); - $node->nodeValue = str_replace("\n", "\r", $safe_value); - } - - $message = $doc->saveHTML(); - $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
    ", " ", ""], $message); - $message = preg_replace('= [\s]*=i', " ", $message); - - @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); - - self::tagToBBCode($doc, 'html', [], "", ""); - self::tagToBBCode($doc, 'body', [], "", ""); - - // Outlook-Quote - Variant 1 - self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]'); - - // Outlook-Quote - Variant 2 - self::tagToBBCode( - $doc, - 'div', - ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'], - '[quote]', - '[/quote]' - ); - - // MyBB-Stuff - self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]'); - self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]'); - self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]'); - - /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]'); - self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]'); - self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]'); - self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]'); - */ - // Untested - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]'); - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]'); - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]'); - - self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]'); - - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); - // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica - // Test - //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]'); - self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); - self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); - - self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'em', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'b', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'i', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'u', [], '[u]', '[/u]'); - self::tagToBBCode($doc, 's', [], '[s]', '[/s]'); - self::tagToBBCode($doc, 'del', [], '[s]', '[/s]'); - self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]'); - - self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]"); - self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]"); - - self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]'); - - self::tagToBBCode($doc, 'br', [], "\n", ''); - - self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", ""); - self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", ""); - - self::tagToBBCode($doc, 'span', [], "", ""); - - self::tagToBBCode($doc, 'span', [], "", ""); - self::tagToBBCode($doc, 'pre', [], "", ""); - - self::tagToBBCode($doc, 'div', [], "\r", "\r"); - self::tagToBBCode($doc, 'p', [], "\n", "\n"); - - self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]"); - self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]"); - self::tagToBBCode($doc, 'li', [], "[*]", ""); - - self::tagToBBCode($doc, 'hr', [], "[hr]", ""); - - self::tagToBBCode($doc, 'table', [], "[table]", "[/table]"); - self::tagToBBCode($doc, 'th', [], "[th]", "[/th]"); - self::tagToBBCode($doc, 'tr', [], "[tr]", "[/tr]"); - self::tagToBBCode($doc, 'td', [], "[td]", "[/td]"); - - self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]"); - self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]"); - self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]"); - self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]"); - self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]"); - self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]"); + $message = Strings::performWithEscapedBlocks($message, '#
    #iUs', function ($message) { + $message = str_replace( + [ + "
  • ", + "

  • ", + ], + [ + "
  • ", + "
  • ", + ], + $message + ); - self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]'); - self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]'); + // remove namespaces + $message = preg_replace('=<(\w+):(.+?)>=', '', $message); + $message = preg_replace('==', '', $message); - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true); - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true); - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true); + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = false; + $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); - self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true); - self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true); - self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]', true); + if (empty($message)) { + return ''; + } - self::tagToBBCode($doc, 'key', [], '[code]', '[/code]'); - self::tagToBBCode($doc, 'code', [], '[code]', '[/code]'); + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); + + XML::deleteNode($doc, 'style'); + XML::deleteNode($doc, 'head'); + XML::deleteNode($doc, 'title'); + XML::deleteNode($doc, 'meta'); + XML::deleteNode($doc, 'xml'); + XML::deleteNode($doc, 'removeme'); + + $xpath = new DomXPath($doc); + $list = $xpath->query("//pre"); + foreach ($list as $node) { + // Ensure to escape unescaped & - they will otherwise raise a warning + $safe_value = preg_replace('/&(?!\w+;)/', '&', $node->nodeValue); + $node->nodeValue = str_replace("\n", "\r", $safe_value); + } - $message = $doc->saveHTML(); + $message = $doc->saveHTML(); + $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
    ", " ", ""], $message); + $message = preg_replace('= [\s]*=i', " ", $message); - // I'm removing something really disturbing - // Don't know exactly what it is - $message = str_replace(chr(194) . chr(160), ' ', $message); + if (empty($message)) { + return ''; + } - $message = str_replace(" ", " ", $message); + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); - // removing multiple DIVs - $message = preg_replace('=\r *\r=i', "\n", $message); - $message = str_replace("\r", "\n", $message); + self::tagToBBCode($doc, 'html', [], "", ""); + self::tagToBBCode($doc, 'body', [], "", ""); - Hook::callAll('html2bbcode', $message); + // Outlook-Quote - Variant 1 + self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]'); - $message = strip_tags($message); + // Outlook-Quote - Variant 2 + self::tagToBBCode( + $doc, + 'div', + ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'], + '[quote]', + '[/quote]' + ); - $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8'); + // MyBB-Stuff + self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]'); + self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]'); + self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]'); - // remove quotes if they don't make sense - $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message); + /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]'); + self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]'); + self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]'); + self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]'); + */ + // Untested + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]'); + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]'); + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]'); - $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message); - $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message); + self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]'); - do { - $oldmessage = $message; - $message = str_replace("\n \n", "\n\n", $message); - } while ($oldmessage != $message); + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); + // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica + // Test + //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]'); + self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); + self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); - do { - $oldmessage = $message; - $message = str_replace("\n\n\n", "\n\n", $message); - } while ($oldmessage != $message); + self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]'); + self::tagToBBCode($doc, 'em', [], '[i]', '[/i]'); + self::tagToBBCode($doc, 'b', [], '[b]', '[/b]'); + self::tagToBBCode($doc, 'i', [], '[i]', '[/i]'); + self::tagToBBCode($doc, 'u', [], '[u]', '[/u]'); + self::tagToBBCode($doc, 's', [], '[s]', '[/s]'); + self::tagToBBCode($doc, 'del', [], '[s]', '[/s]'); + self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]'); + + self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]"); + self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]"); + + self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]'); + + self::tagToBBCode($doc, 'br', [], "\n", ''); + + self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", ""); + self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", ""); + + self::tagToBBCode($doc, 'span', [], "", ""); + + self::tagToBBCode($doc, 'span', [], "", ""); + self::tagToBBCode($doc, 'pre', [], "", ""); + + self::tagToBBCode($doc, 'div', [], "\r", "\r"); + self::tagToBBCode($doc, 'p', [], "\n", "\n"); + + self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]"); + self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]"); + self::tagToBBCode($doc, 'li', [], "[*]", ""); + + self::tagToBBCode($doc, 'hr', [], "[hr]", ""); + + self::tagToBBCode($doc, 'table', [], "[table]", "[/table]"); + self::tagToBBCode($doc, 'th', [], "[th]", "[/th]"); + self::tagToBBCode($doc, 'tr', [], "[tr]", "[/tr]"); + self::tagToBBCode($doc, 'td', [], "[td]", "[/td]"); + + self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]"); + self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]"); + self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]"); + self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]"); + self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]"); + self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]"); + + self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]'); + self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]'); + + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true); + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true); + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true); + + + self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true); + self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true); + // Backward compatibility, [iframe] support has been removed in version 2020.12 + self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[url]$1', '[/url]', true); + + self::tagToBBCode($doc, 'key', [], '[code]', '[/code]'); + self::tagToBBCode($doc, 'code', [], '[code]', '[/code]'); + + $message = $doc->saveHTML(); + + // I'm removing something really disturbing + // Don't know exactly what it is + $message = str_replace(chr(194) . chr(160), ' ', $message); + + $message = str_replace(" ", " ", $message); + + // removing multiple DIVs + $message = preg_replace('=\r *\r=i', "\n", $message); + $message = str_replace("\r", "\n", $message); + + Hook::callAll('html2bbcode', $message); + + $message = strip_tags($message); + + $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8'); + + // remove quotes if they don't make sense + $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message); + + $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message); + $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message); + + do { + $oldmessage = $message; + $message = str_replace("\n \n", "\n\n", $message); + } while ($oldmessage != $message); + + do { + $oldmessage = $message; + $message = str_replace("\n\n\n", "\n\n", $message); + } while ($oldmessage != $message); + + do { + $oldmessage = $message; + $message = str_replace( + [ + "[/size]\n\n", + "\n[hr]", + "[hr]\n", + "\n[list", + "[/list]\n", + "\n[/", + "[list]\n", + "[list=1]\n", + "\n[*]"], + [ + "[/size]\n", + "[hr]", + "[hr]", + "[list", + "[/list]", + "[/", + "[list]", + "[list=1]", + "[*]"], + $message + ); + } while ($message != $oldmessage); - do { - $oldmessage = $message; $message = str_replace( - [ - "[/size]\n\n", - "\n[hr]", - "[hr]\n", - "\n[list", - "[/list]\n", - "\n[/", - "[list]\n", - "[list=1]\n", - "\n[*]"], - [ - "[/size]\n", - "[hr]", - "[hr]", - "[list", - "[/list]", - "[/", - "[list]", - "[list=1]", - "[*]"], + ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'], + ['[b]', '[/b]', '[i]', '[/i]'], $message ); - } while ($message != $oldmessage); - $message = str_replace( - ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'], - ['[b]', '[/b]', '[i]', '[/i]'], - $message - ); + // Handling Yahoo style of mails + $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message); - // Handling Yahoo style of mails - $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message); + return $message; + }); - // Restore code blocks $message = preg_replace_callback( - '#\[codeblock-([0-9]+)\]#iU', - function ($matches) use ($codeblocks) { - $return = ''; - if (isset($codeblocks[intval($matches[1])])) { - $return = $codeblocks[$matches[1]]; + '#
    (.*)
    #iUs', + function ($matches) { + $prefix = '[code]'; + if ($matches[1] != '') { + $prefix = '[code=' . $matches[1] . ']'; } - return $return; + + return $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; }, $message ); @@ -407,6 +397,7 @@ class HTML $message = self::qualifyURLs($message, $basepath); } + DI::profiler()->stopRecording(); return $message; } @@ -596,6 +587,7 @@ class HTML */ public static function toPlaintext(string $html, $wraplength = 75, $compact = false) { + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $html); $doc = new DOMDocument(); @@ -603,6 +595,11 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + if (empty($message)) { + DI::profiler()->stopRecording(); + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); $message = $doc->saveHTML(); @@ -612,6 +609,11 @@ class HTML // Collecting all links $urls = self::collectURLs($message); + if (empty($message)) { + DI::profiler()->stopRecording(); + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); self::tagToBBCode($doc, 'html', [], '', ''); @@ -650,6 +652,7 @@ class HTML self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], ' ', ' '); } + // Backward compatibility, [iframe] support has been removed in version 2020.12 self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], ' $1 ', ''); $message = $doc->saveHTML(); @@ -691,6 +694,7 @@ class HTML $message = self::quoteLevel(trim($message), $wraplength); + DI::profiler()->stopRecording(); return trim($message); } @@ -703,9 +707,11 @@ class HTML */ public static function toMarkdown($html) { + DI::profiler()->startRecording('rendering'); $converter = new HtmlConverter(['hard_break' => true]); $markdown = $converter->convert($html); + DI::profiler()->stopRecording(); return $markdown; } @@ -803,22 +809,6 @@ class HTML ]); } - /** - * Get html for contact block. - * - * @deprecated since version 2019.03 - * @see ContactBlock::getHTML() - * @return string - * @throws \Friendica\Network\HTTPException\InternalServerErrorException - * @throws \ImagickException - */ - public static function contactBlock() - { - $a = DI::app(); - - return ContactBlock::getHTML($a->profile); - } - /** * Format contacts as picture links or as text links * @@ -851,7 +841,7 @@ class HTML $redir = false; if ($redirect) { - $url = Contact::magicLink($contact['url']); + $url = Contact::magicLinkByContact($contact); if (strpos($url, 'redir/') === 0) { $sparkle = ' sparkle'; } @@ -866,7 +856,7 @@ class HTML '$click' => $contact['click'] ?? '', '$class' => $class, '$url' => $url, - '$photo' => ProxyUtils::proxifyUrl($contact['thumb'], false, ProxyUtils::SIZE_THUMB), + '$photo' => Contact::getThumb($contact), '$name' => $contact['name'], 'title' => $contact['name'] . ' [' . $contact['addr'] . ']', '$parkle' => $sparkle, @@ -902,7 +892,7 @@ class HTML '$save_label' => $save_label, '$search_hint' => DI::l10n()->t('@name, !forum, #tags, content'), '$mode' => $mode, - '$return_url' => urlencode('search?q=' . urlencode($s)), + '$return_url' => urlencode(Search::getSearchPath($s)), ]; if (!$aside) { @@ -912,7 +902,7 @@ class HTML 'contacts' => DI::l10n()->t('Contacts') ]; - if (Config::get('system', 'poco_local_search')) { + if (DI::config()->get('system', 'poco_local_search')) { $values['$searchoption']['forums'] = DI::l10n()->t('Forums'); } } @@ -928,7 +918,7 @@ class HTML */ public static function toLink($s) { - $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\'\%\$\!\+]*)/", ' $1', $s); + $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\'\%\$\!\+]*)/", ' $1', $s); $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&\;(.*?)\>/ism", '<$1$2=$3&$4>', $s); return $s; } @@ -967,4 +957,69 @@ class HTML { return str_replace('&', '&', $s); } + + /** + * Clean an HTML text for potentially harmful code + * + * @param string $text + * @param array $allowedIframeDomains List of allowed iframe source domains without the scheme + * @return string + */ + public static function purify(string $text, array $allowedIframeDomains = []): string + { + // Allows cid: URL scheme + \HTMLPurifier_URISchemeRegistry::instance()->register('cid', new HTMLPurifier_URIScheme_cid()); + + $config = \HTMLPurifier_HTML5Config::createDefault(); + $config->set('HTML.Doctype', 'HTML5'); + + // Used to remove iframe with src attribute filtered out + $config->set('AutoFormat.RemoveEmpty', true); + + $config->set('HTML.SafeIframe', true); + + array_walk($allowedIframeDomains, function (&$domain) { + // Allow the domain and all its eventual sub-domains + $domain = '(?:(?!-)[A-Za-z0-9-]{1,63}(?set('URI.SafeIframeRegexp', + '%^https://(?: + ' . implode('|', $allowedIframeDomains) . ' + ) + (?:/|$) # Prevents bogus domains like youtube.com.fake.tld + %xi' + ); + + $config->set('Attr.AllowedRel', [ + 'noreferrer' => true, + 'noopener' => true, + ]); + $config->set('Attr.AllowedFrameTargets', [ + '_blank' => true, + ]); + + $config->set('AutoFormat.RemoveEmpty.Predicate', [ + 'colgroup' => [], // | + 'th' => [], // | + 'td' => [], // | + 'iframe' => ['src'], // ↳ Default HTMLPurify values + 'i' => ['class'], // Allows forkawesome icons + ]); + + // Uncomment to debug HTMLPurifier behavior + //$config->set('Core.CollectErrors', true); + //$config->set('Core.MaintainLineNumbers', true); + + $HTMLPurifier = new \HTMLPurifier($config); + + $text = $HTMLPurifier->purify($text); + + /** @var \HTMLPurifier_ErrorCollector $errorCollector */ + // Uncomment to debug HTML Purifier behavior + //$errorCollector = $HTMLPurifier->context->get('ErrorCollector'); + //var_dump($errorCollector->getRaw()); + + return $text; + } }