X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FHTML.php;h=ae9452abb0fd7d273be064048efb87346ad726dd;hb=1b73e4d267761d2b717fb30fb972c6e450ecf85e;hp=7b8153b8a8e735df5b7813834ed83437b8e0fa67;hpb=c6b45a958e3b09bc8f3950a718c181dfc9e0b910;p=friendica.git diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 7b8153b8a8..ae9452abb0 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -1,6 +1,6 @@ = "a") && ($char <= "z")) { - $cleaned .= $char; - } - - if (!(strpos(" #;:0123456789-_.%", $char) === false)) { - $cleaned .= $char; - } - } - - return $cleaned; - } - /** * Search all instances of a specific HTML tag node in the provided DOM document and replaces them with BBCode text nodes. * @@ -164,6 +143,7 @@ class HTML */ public static function toBBCode($message, $basepath = '') { + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $message); $message = Strings::performWithEscapedBlocks($message, '#
#iUs', function ($message) { @@ -188,6 +168,10 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + if (empty($message)) { + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); XML::deleteNode($doc, 'style'); @@ -209,6 +193,10 @@ class HTML $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
", " ", ""], $message); $message = preg_replace('= [\s]*=i', " ", $message); + if (empty($message)) { + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); self::tagToBBCode($doc, 'html', [], "", ""); @@ -311,7 +299,8 @@ class HTML self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true); self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true); - self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]', true); + // Backward compatibility, [iframe] support has been removed in version 2020.12 + self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[url]$1', '[/url]', true); self::tagToBBCode($doc, 'key', [], '[code]', '[/code]'); self::tagToBBCode($doc, 'code', [], '[code]', '[/code]'); @@ -397,7 +386,7 @@ class HTML $prefix = '[code=' . $matches[1] . ']'; } - return $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; + return $prefix . "\n" . html_entity_decode($matches[2]) . "\n" . '[/code]'; }, $message ); @@ -408,6 +397,7 @@ class HTML $message = self::qualifyURLs($message, $basepath); } + DI::profiler()->stopRecording(); return $message; } @@ -597,6 +587,7 @@ class HTML */ public static function toPlaintext(string $html, $wraplength = 75, $compact = false) { + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $html); $doc = new DOMDocument(); @@ -604,6 +595,11 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + if (empty($message)) { + DI::profiler()->stopRecording(); + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); $message = $doc->saveHTML(); @@ -613,6 +609,11 @@ class HTML // Collecting all links $urls = self::collectURLs($message); + if (empty($message)) { + DI::profiler()->stopRecording(); + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); self::tagToBBCode($doc, 'html', [], '', ''); @@ -651,6 +652,7 @@ class HTML self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], ' ', ' '); } + // Backward compatibility, [iframe] support has been removed in version 2020.12 self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], ' $1 ', ''); $message = $doc->saveHTML(); @@ -692,6 +694,7 @@ class HTML $message = self::quoteLevel(trim($message), $wraplength); + DI::profiler()->stopRecording(); return trim($message); } @@ -704,9 +707,11 @@ class HTML */ public static function toMarkdown($html) { + DI::profiler()->startRecording('rendering'); $converter = new HtmlConverter(['hard_break' => true]); $markdown = $converter->convert($html); + DI::profiler()->stopRecording(); return $markdown; } @@ -779,16 +784,6 @@ class HTML return $text; } - /** - * return div element with class 'clear' - * @return string - * @deprecated - */ - public static function clearDiv() - { - return '
'; - } - /** * Loader for infinite scrolling * @@ -804,22 +799,6 @@ class HTML ]); } - /** - * Get html for contact block. - * - * @deprecated since version 2019.03 - * @see ContactBlock::getHTML() - * @return string - * @throws \Friendica\Network\HTTPException\InternalServerErrorException - * @throws \ImagickException - */ - public static function contactBlock() - { - $a = DI::app(); - - return ContactBlock::getHTML($a->profile); - } - /** * Format contacts as picture links or as text links * @@ -852,7 +831,7 @@ class HTML $redir = false; if ($redirect) { - $url = Contact::magicLink($contact['url']); + $url = Contact::magicLinkByContact($contact); if (strpos($url, 'redir/') === 0) { $sparkle = ' sparkle'; } @@ -968,4 +947,70 @@ class HTML { return str_replace('&', '&', $s); } + + /** + * Clean an HTML text for potentially harmful code + * + * @param string $text + * @param array $allowedIframeDomains List of allowed iframe source domains without the scheme + * @return string + */ + public static function purify(string $text, array $allowedIframeDomains = []): string + { + // Allows cid: URL scheme + \HTMLPurifier_URISchemeRegistry::instance()->register('cid', new HTMLPurifier_URIScheme_cid()); + + $config = \HTMLPurifier_HTML5Config::createDefault(); + $config->set('HTML.Doctype', 'HTML5'); + + // Used to remove iframe with src attribute filtered out + $config->set('AutoFormat.RemoveEmpty', true); + + $config->set('HTML.SafeIframe', true); + + array_walk($allowedIframeDomains, function (&$domain) { + // Allow the domain and all its eventual sub-domains + $domain = '(?:(?!-)[A-Za-z0-9-]{1,63}(?set('URI.SafeIframeRegexp', + '%^https://(?: + ' . implode('|', $allowedIframeDomains) . ' + ) + (?:/|$) # Prevents bogus domains like youtube.com.fake.tld + %xi' + ); + + $config->set('Attr.AllowedRel', [ + 'noreferrer' => true, + 'noopener' => true, + 'tag' => true, + ]); + $config->set('Attr.AllowedFrameTargets', [ + '_blank' => true, + ]); + + $config->set('AutoFormat.RemoveEmpty.Predicate', [ + 'colgroup' => [], // | + 'th' => [], // | + 'td' => [], // | + 'iframe' => ['src'], // ↳ Default HTMLPurify values + 'i' => ['class'], // Allows forkawesome icons + ]); + + // Uncomment to debug HTMLPurifier behavior + //$config->set('Core.CollectErrors', true); + //$config->set('Core.MaintainLineNumbers', true); + + $HTMLPurifier = new \HTMLPurifier($config); + + $text = $HTMLPurifier->purify($text); + + /** @var \HTMLPurifier_ErrorCollector $errorCollector */ + // Uncomment to debug HTML Purifier behavior + //$errorCollector = $HTMLPurifier->context->get('ErrorCollector'); + //var_dump($errorCollector->getRaw()); + + return $text; + } }