X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FHTML.php;h=f9f340135cf51cf22cab2bf62c941166305b2f4c;hb=7e618856ab09ac74a3760e238c73ecb9515f6701;hp=6f2d7c79053ad66b52c13e6cf82ec3d51c8bc67d;hpb=ec0c9dcdb160750c4346579a9fab21e323106ede;p=friendica.git diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 6f2d7c7905..f9f340135c 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -1,6 +1,6 @@ childNodes as $key => $child) { /* Remove empty text nodes at the start or at the end of the children list */ - if ($key > 0 && $key < $node->childNodes->length - 1 || $child->nodeName != '#text' || trim($child->nodeValue)) { + if ($key > 0 && $key < $node->childNodes->length - 1 || $child->nodeName != '#text' || trim($child->nodeValue) !== '') { $newNode = $child->cloneNode(true); $node->parentNode->insertBefore($newNode, $node); } @@ -141,8 +143,17 @@ class HTML * @return string * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function toBBCode($message, $basepath = '') + public static function toBBCode(string $message, string $basepath = ''): string { + /* + * Check if message is empty to prevent a lot of code below from being executed + * for just an empty message. + */ + if ($message === '') { + return ''; + } + + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $message); $message = Strings::performWithEscapedBlocks($message, '#
#iUs', function ($message) { @@ -167,6 +178,10 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + if (empty($message)) { + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); XML::deleteNode($doc, 'style'); @@ -188,6 +203,10 @@ class HTML $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
", " ", ""], $message); $message = preg_replace('= [\s]*=i', " ", $message); + if (empty($message)) { + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); self::tagToBBCode($doc, 'html', [], "", ""); @@ -377,7 +396,7 @@ class HTML $prefix = '[code=' . $matches[1] . ']'; } - return $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; + return $prefix . "\n" . html_entity_decode($matches[2]) . "\n" . '[/code]'; }, $message ); @@ -388,6 +407,7 @@ class HTML $message = self::qualifyURLs($message, $basepath); } + DI::profiler()->stopRecording(); return $message; } @@ -399,7 +419,7 @@ class HTML * * @return string The expanded URL */ - private static function qualifyURLsSub($matches, $basepath) + private static function qualifyURLsSub(array $matches, string $basepath): string { $base = parse_url($basepath); unset($base['query']); @@ -426,7 +446,7 @@ class HTML * * @return string Body with expanded URLs */ - private static function qualifyURLs($body, $basepath) + private static function qualifyURLs(string $body, string $basepath): string { $URLSearchString = "^\[\]"; @@ -452,7 +472,7 @@ class HTML return $body; } - private static function breakLines($line, $level, $wraplength = 75) + private static function breakLines(string $line, int $level, int $wraplength = 75): string { if ($wraplength == 0) { $wraplength = 2000000; @@ -493,7 +513,7 @@ class HTML return implode("\n", $newlines); } - private static function quoteLevel($message, $wraplength = 75) + private static function quoteLevel(string $message, int $wraplength = 75): string { $lines = explode("\n", $message); @@ -529,7 +549,7 @@ class HTML return implode("\n", $newlines); } - private static function collectURLs($message) + private static function collectURLs(string $message): array { $pattern = '/(.*?)<\/a>/is'; preg_match_all($pattern, $message, $result, PREG_SET_ORDER); @@ -575,8 +595,9 @@ class HTML * @param bool $compact True: Completely strips image tags; False: Keeps image URLs * @return string */ - public static function toPlaintext(string $html, $wraplength = 75, $compact = false) + public static function toPlaintext(string $html, int $wraplength = 75, bool $compact = false): string { + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $html); $doc = new DOMDocument(); @@ -584,6 +605,11 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + if (empty($message)) { + DI::profiler()->stopRecording(); + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); $message = $doc->saveHTML(); @@ -593,6 +619,11 @@ class HTML // Collecting all links $urls = self::collectURLs($message); + if (empty($message)) { + DI::profiler()->stopRecording(); + return ''; + } + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); self::tagToBBCode($doc, 'html', [], '', ''); @@ -673,6 +704,7 @@ class HTML $message = self::quoteLevel(trim($message), $wraplength); + DI::profiler()->stopRecording(); return trim($message); } @@ -683,11 +715,13 @@ class HTML * @param string $html * @return string */ - public static function toMarkdown($html) + public static function toMarkdown(string $html): string { + DI::profiler()->startRecording('rendering'); $converter = new HtmlConverter(['hard_break' => true]); $markdown = $converter->convert($html); + DI::profiler()->stopRecording(); return $markdown; } @@ -697,29 +731,29 @@ class HTML * @param string $s * @return string */ - public static function toBBCodeVideo($s) + public static function toBBCodeVideo(string $s): string { $s = preg_replace( '#]+>(.*?)https?://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+)(.*?)#ism', '[youtube]$2[/youtube]', $s ); - + $s = preg_replace( '#](.*?)https?://www.youtube.com/embed/([A-Za-z0-9\-_=]+)(.*?)#ism', '[youtube]$2[/youtube]', $s ); - + $s = preg_replace( '#](.*?)https?://player.vimeo.com/video/([0-9]+)(.*?)#ism', '[vimeo]$2[/vimeo]', $s ); - + return $s; } - + /** * transform link href and img src from relative to absolute * @@ -727,56 +761,46 @@ class HTML * @param string $base base url * @return string */ - public static function relToAbs($text, $base) + public static function relToAbs(string $text, string $base): string { if (empty($base)) { return $text; } - + $base = rtrim($base, '/'); - + $base2 = $base . "/"; - + // Replace links $pattern = "/]*) href=\"(?!http|https|\/)([^\"]*)\"/"; $replace = "'; - } - /** * Loader for infinite scrolling * * @return string html for loader * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function scrollLoader() + public static function scrollLoader(): string { $tpl = Renderer::getMarkupTemplate("scroll_loader.tpl"); return Renderer::replaceMacros($tpl, [ @@ -785,22 +809,6 @@ class HTML ]); } - /** - * Get html for contact block. - * - * @deprecated since version 2019.03 - * @see ContactBlock::getHTML() - * @return string - * @throws \Friendica\Network\HTTPException\InternalServerErrorException - * @throws \ImagickException - */ - public static function contactBlock() - { - $a = DI::app(); - - return ContactBlock::getHTML($a->profile); - } - /** * Format contacts as picture links or as text links * @@ -821,7 +829,7 @@ class HTML * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \ImagickException */ - public static function micropro($contact, $redirect = false, $class = '', $textmode = false) + public static function micropro(array $contact, bool $redirect = false, string $class = '', bool $textmode = false): string { // Use the contact URL if no address is available if (empty($contact['addr'])) { @@ -833,8 +841,8 @@ class HTML $redir = false; if ($redirect) { - $url = Contact::magicLink($contact['url']); - if (strpos($url, 'redir/') === 0) { + $url = Contact::magicLinkByContact($contact); + if (strpos($url, 'contact/redir/') === 0) { $sparkle = ' sparkle'; } } @@ -861,13 +869,12 @@ class HTML * * @param string $s Search query. * @param string $id HTML id - * @param string $url Search url. * @param bool $aside Display the search widgit aside. * * @return string Formatted HTML. * @throws \Exception */ - public static function search($s, $id = 'search-box', $aside = true) + public static function search(string $s, string $id = 'search-box', bool $aside = true): string { $mode = 'text'; @@ -902,19 +909,6 @@ class HTML return Renderer::replaceMacros(Renderer::getMarkupTemplate('searchbox.tpl'), $values); } - /** - * Replace naked text hyperlink with HTML formatted hyperlink - * - * @param string $s - * @return string - */ - public static function toLink($s) - { - $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\'\%\$\!\+]*)/", ' $1', $s); - $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&\;(.*?)\>/ism", '<$1$2=$3&$4>', $s); - return $s; - } - /** * Given a HTML text and a set of filtering reasons, adds a content hiding header with the provided reasons * @@ -925,7 +919,7 @@ class HTML * @return string * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function applyContentFilter($html, array $reasons) + public static function applyContentFilter(string $html, array $reasons): string { if (count($reasons)) { $tpl = Renderer::getMarkupTemplate('wall/content_filter.tpl'); @@ -945,8 +939,147 @@ class HTML * @param string $s * @return string */ - public static function unamp($s) + public static function unamp(string $s): string { return str_replace('&', '&', $s); } + + /** + * Clean an HTML text for potentially harmful code + * + * @param string $text + * @param array $allowedIframeDomains List of allowed iframe source domains without the scheme + * @return string + */ + public static function purify(string $text, array $allowedIframeDomains = []): string + { + // Allows cid: URL scheme + \HTMLPurifier_URISchemeRegistry::instance()->register('cid', new HTMLPurifier_URIScheme_cid()); + + $config = \HTMLPurifier_HTML5Config::createDefault(); + $config->set('HTML.Doctype', 'HTML5'); + + // Used to remove iframe with src attribute filtered out + $config->set('AutoFormat.RemoveEmpty', true); + + $config->set('HTML.SafeIframe', true); + + array_walk($allowedIframeDomains, function (&$domain) { + // Allow the domain and all its eventual sub-domains + $domain = '(?:(?!-)[A-Za-z0-9-]{1,63}(?set('URI.SafeIframeRegexp', + '%^https://(?: + ' . implode('|', $allowedIframeDomains) . ' + ) + (?:/|$) # Prevents bogus domains like youtube.com.fake.tld + %xi' + ); + + $config->set('Attr.AllowedRel', [ + 'noreferrer' => true, + 'noopener' => true, + 'tag' => true, + ]); + $config->set('Attr.AllowedFrameTargets', [ + '_blank' => true, + ]); + + $config->set('AutoFormat.RemoveEmpty.Predicate', [ + 'colgroup' => [], // | + 'th' => [], // | + 'td' => [], // | + 'iframe' => ['src'], // ↳ Default HTMLPurify values + 'i' => ['class'], // Allows forkawesome icons + ]); + + // Uncomment to debug HTMLPurifier behavior + //$config->set('Core.CollectErrors', true); + //$config->set('Core.MaintainLineNumbers', true); + + $HTMLPurifier = new \HTMLPurifier($config); + + $text = $HTMLPurifier->purify($text); + + /** @var \HTMLPurifier_ErrorCollector $errorCollector */ + // Uncomment to debug HTML Purifier behavior + //$errorCollector = $HTMLPurifier->context->get('ErrorCollector'); + //var_dump($errorCollector->getRaw()); + + return $text; + } + + /** + * XPath arbitrary string quoting + * + * @see https://stackoverflow.com/a/45228168 + * @param string $value + * @return string + */ + public static function xpathQuote(string $value): string + { + if (false === strpos($value, '"')) { + return '"' . $value . '"'; + } + + if (false === strpos($value, "'")) { + return "'" . $value . "'"; + } + + // if the value contains both single and double quotes, construct an + // expression that concatenates all non-double-quote substrings with + // the quotes, e.g.: + // + // concat("'foo'", '"', "bar") + return 'concat(' . implode(', \'"\', ', array_map([self::class, 'xpathQuote'], explode('"', $value))) . ')'; + } + + /** + * Checks if the provided URL is present in the DOM document in an element with the rel="me" attribute + * + * XHTML Friends Network http://gmpg.org/xfn/ + * + * @param DOMDocument $doc + * @param UriInterface $meUrl + * @return bool + */ + public static function checkRelMeLink(DOMDocument $doc, UriInterface $meUrl): bool + { + $xpath = new \DOMXpath($doc); + + // This expression checks that "me" is among the space-delimited values of the "rel" attribute. + // And that the href attribute contains exactly the provided URL + $expression = "//*[contains(concat(' ', normalize-space(@rel), ' '), ' me ')][@href = " . self::xpathQuote($meUrl) . "]"; + + $result = $xpath->query($expression); + + return $result !== false && $result->length > 0; + } + + /** + * @param DOMDocument $doc + * @return string|null Lowercase charset + */ + public static function extractCharset(DOMDocument $doc): ?string + { + $xpath = new DOMXPath($doc); + + $expression = "string(//meta[@charset]/@charset)"; + if ($charset = $xpath->evaluate($expression)) { + return strtolower($charset); + } + + try { + // This expression looks for a meta tag with the http-equiv attribute set to "content-type" ignoring case + // whose content attribute contains a "charset" string and returns its value + $expression = "string(//meta[@http-equiv][translate(@http-equiv, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'content-type'][contains(translate(@content, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'charset')]/@content)"; + $mediaType = MediaType::fromContentType($xpath->evaluate($expression)); + if (isset($mediaType->parameters['charset'])) { + return strtolower($mediaType->parameters['charset']); + } + } catch(\InvalidArgumentException $e) {} + + return null; + } }