X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FHTML.php;h=d30e7ead038cf2edae4f516af511d298ba7002d2;hb=f23ecaff6af1982112469f90d6bcdf0408b0f22e;hp=51515137e56ddd00f3d576e93f4998b7652bf025;hpb=2bda6980bbd36ef011e6c19502ea2a0fe3376d02;p=friendica.git diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 51515137e5..d30e7ead03 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -1,6 +1,6 @@ childNodes as $key => $child) { /* Remove empty text nodes at the start or at the end of the children list */ - if ($key > 0 && $key < $node->childNodes->length - 1 || $child->nodeName != '#text' || trim($child->nodeValue)) { + if ($key > 0 && $key < $node->childNodes->length - 1 || $child->nodeName != '#text' || trim($child->nodeValue) !== '') { $newNode = $child->cloneNode(true); $node->parentNode->insertBefore($newNode, $node); } @@ -141,8 +142,17 @@ class HTML * @return string * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function toBBCode($message, $basepath = '') + public static function toBBCode(string $message, string $basepath = ''): string { + /* + * Check if message is empty to prevent a lot of code below from being executed + * for just an empty message. + */ + if ($message === '') { + return ''; + } + + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $message); $message = Strings::performWithEscapedBlocks($message, '#
#iUs', function ($message) { @@ -270,9 +280,9 @@ class HTML self::tagToBBCode($doc, 'div', [], "\r", "\r"); self::tagToBBCode($doc, 'p', [], "\n", "\n"); - self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]"); - self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]"); - self::tagToBBCode($doc, 'li', [], "[*]", ""); + self::tagToBBCode($doc, 'ul', [], "[ul]", "\n[/ul]"); + self::tagToBBCode($doc, 'ol', [], "[ol]", "\n[/ol]"); + self::tagToBBCode($doc, 'li', [], "\n[li]", "[/li]"); self::tagToBBCode($doc, 'hr', [], "[hr]", ""); @@ -338,33 +348,6 @@ class HTML $message = str_replace("\n\n\n", "\n\n", $message); } while ($oldmessage != $message); - do { - $oldmessage = $message; - $message = str_replace( - [ - "[/size]\n\n", - "\n[hr]", - "[hr]\n", - "\n[list", - "[/list]\n", - "\n[/", - "[list]\n", - "[list=1]\n", - "\n[*]"], - [ - "[/size]\n", - "[hr]", - "[hr]", - "[list", - "[/list]", - "[/", - "[list]", - "[list=1]", - "[*]"], - $message - ); - } while ($message != $oldmessage); - $message = str_replace( ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'], ['[b]', '[/b]', '[i]', '[/i]'], @@ -385,7 +368,7 @@ class HTML $prefix = '[code=' . $matches[1] . ']'; } - return $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; + return $prefix . "\n" . html_entity_decode($matches[2]) . "\n" . '[/code]'; }, $message ); @@ -396,6 +379,7 @@ class HTML $message = self::qualifyURLs($message, $basepath); } + DI::profiler()->stopRecording(); return $message; } @@ -407,7 +391,7 @@ class HTML * * @return string The expanded URL */ - private static function qualifyURLsSub($matches, $basepath) + private static function qualifyURLsSub(array $matches, string $basepath): string { $base = parse_url($basepath); unset($base['query']); @@ -434,11 +418,12 @@ class HTML * * @return string Body with expanded URLs */ - private static function qualifyURLs($body, $basepath) + private static function qualifyURLs(string $body, string $basepath): string { $URLSearchString = "^\[\]"; - $matches = ["/\[url\=([$URLSearchString]*)\].*?\[\/url\]/ism", + $matches = [ + "/\[url\=([$URLSearchString]*)\].*?\[\/url\]/ism", "/\[url\]([$URLSearchString]*)\[\/url\]/ism", "/\[img\=[0-9]*x[0-9]*\](.*?)\[\/img\]/ism", "/\[img\](.*?)\[\/img\]/ism", @@ -460,7 +445,7 @@ class HTML return $body; } - private static function breakLines($line, $level, $wraplength = 75) + private static function breakLines(string $line, int $level, int $wraplength = 75): string { if ($wraplength == 0) { $wraplength = 2000000; @@ -501,7 +486,7 @@ class HTML return implode("\n", $newlines); } - private static function quoteLevel($message, $wraplength = 75) + private static function quoteLevel(string $message, int $wraplength = 75): string { $lines = explode("\n", $message); @@ -537,7 +522,7 @@ class HTML return implode("\n", $newlines); } - private static function collectURLs($message) + private static function collectURLs(string $message): array { $pattern = '/(.*?)<\/a>/is'; preg_match_all($pattern, $message, $result, PREG_SET_ORDER); @@ -547,8 +532,10 @@ class HTML $ignore = false; // A list of some links that should be ignored - $list = ["/user/", "/tag/", "/group/", "/profile/", "/search?search=", "/search?tag=", "mailto:", "/u/", "/node/", - "//plus.google.com/", "//twitter.com/"]; + $list = [ + "/user/", "/tag/", "/group/", "/circle/", "/profile/", "/search?search=", "/search?tag=", "mailto:", "/u/", "/node/", + "//plus.google.com/", "//twitter.com/" + ]; foreach ($list as $listitem) { if (strpos($treffer[1], $listitem) !== false) { $ignore = true; @@ -583,8 +570,9 @@ class HTML * @param bool $compact True: Completely strips image tags; False: Keeps image URLs * @return string */ - public static function toPlaintext(string $html, $wraplength = 75, $compact = false) + public static function toPlaintext(string $html, int $wraplength = 75, bool $compact = false): string { + DI::profiler()->startRecording('rendering'); $message = str_replace("\r", "", $html); $doc = new DOMDocument(); @@ -593,6 +581,7 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); if (empty($message)) { + DI::profiler()->stopRecording(); return ''; } @@ -606,6 +595,7 @@ class HTML $urls = self::collectURLs($message); if (empty($message)) { + DI::profiler()->stopRecording(); return ''; } @@ -689,6 +679,7 @@ class HTML $message = self::quoteLevel(trim($message), $wraplength); + DI::profiler()->stopRecording(); return trim($message); } @@ -699,11 +690,13 @@ class HTML * @param string $html * @return string */ - public static function toMarkdown($html) + public static function toMarkdown(string $html): string { + DI::profiler()->startRecording('rendering'); $converter = new HtmlConverter(['hard_break' => true]); $markdown = $converter->convert($html); + DI::profiler()->stopRecording(); return $markdown; } @@ -713,29 +706,29 @@ class HTML * @param string $s * @return string */ - public static function toBBCodeVideo($s) + public static function toBBCodeVideo(string $s): string { $s = preg_replace( '#]+>(.*?)https?://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+)(.*?)#ism', '[youtube]$2[/youtube]', $s ); - + $s = preg_replace( '#](.*?)https?://www.youtube.com/embed/([A-Za-z0-9\-_=]+)(.*?)#ism', '[youtube]$2[/youtube]', $s ); - + $s = preg_replace( '#](.*?)https?://player.vimeo.com/video/([0-9]+)(.*?)#ism', '[vimeo]$2[/vimeo]', $s ); - + return $s; } - + /** * transform link href and img src from relative to absolute * @@ -743,56 +736,46 @@ class HTML * @param string $base base url * @return string */ - public static function relToAbs($text, $base) + public static function relToAbs(string $text, string $base): string { if (empty($base)) { return $text; } - + $base = rtrim($base, '/'); - + $base2 = $base . "/"; - + // Replace links $pattern = "/]*) href=\"(?!http|https|\/)([^\"]*)\"/"; $replace = "'; - } - /** * Loader for infinite scrolling * * @return string html for loader * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function scrollLoader() + public static function scrollLoader(): string { $tpl = Renderer::getMarkupTemplate("scroll_loader.tpl"); return Renderer::replaceMacros($tpl, [ @@ -801,22 +784,6 @@ class HTML ]); } - /** - * Get html for contact block. - * - * @deprecated since version 2019.03 - * @see ContactBlock::getHTML() - * @return string - * @throws \Friendica\Network\HTTPException\InternalServerErrorException - * @throws \ImagickException - */ - public static function contactBlock() - { - $a = DI::app(); - - return ContactBlock::getHTML($a->profile); - } - /** * Format contacts as picture links or as text links * @@ -837,7 +804,7 @@ class HTML * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \ImagickException */ - public static function micropro($contact, $redirect = false, $class = '', $textmode = false) + public static function micropro(array $contact, bool $redirect = false, string $class = '', bool $textmode = false): string { // Use the contact URL if no address is available if (empty($contact['addr'])) { @@ -850,7 +817,7 @@ class HTML if ($redirect) { $url = Contact::magicLinkByContact($contact); - if (strpos($url, 'redir/') === 0) { + if (strpos($url, 'contact/redir/') === 0) { $sparkle = ' sparkle'; } } @@ -877,13 +844,12 @@ class HTML * * @param string $s Search query. * @param string $id HTML id - * @param string $url Search url. - * @param bool $aside Display the search widgit aside. + * @param bool $aside Display the search widget aside. * * @return string Formatted HTML. * @throws \Exception */ - public static function search($s, $id = 'search-box', $aside = true) + public static function search(string $s, string $id = 'search-box', bool $aside = true): string { $mode = 'text'; @@ -898,7 +864,7 @@ class HTML '$id' => $id, '$search_label' => DI::l10n()->t('Search'), '$save_label' => $save_label, - '$search_hint' => DI::l10n()->t('@name, !forum, #tags, content'), + '$search_hint' => DI::l10n()->t('@name, !group, #tags, content'), '$mode' => $mode, '$return_url' => urlencode(Search::getSearchPath($s)), ]; @@ -911,26 +877,13 @@ class HTML ]; if (DI::config()->get('system', 'poco_local_search')) { - $values['$searchoption']['forums'] = DI::l10n()->t('Forums'); + $values['$searchoption']['groups'] = DI::l10n()->t('Groups'); } } return Renderer::replaceMacros(Renderer::getMarkupTemplate('searchbox.tpl'), $values); } - /** - * Replace naked text hyperlink with HTML formatted hyperlink - * - * @param string $s - * @return string - */ - public static function toLink($s) - { - $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\'\%\$\!\+]*)/", ' $1', $s); - $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&\;(.*?)\>/ism", '<$1$2=$3&$4>', $s); - return $s; - } - /** * Given a HTML text and a set of filtering reasons, adds a content hiding header with the provided reasons * @@ -941,7 +894,7 @@ class HTML * @return string * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function applyContentFilter($html, array $reasons) + public static function applyContentFilter(string $html, array $reasons): string { if (count($reasons)) { $tpl = Renderer::getMarkupTemplate('wall/content_filter.tpl'); @@ -961,7 +914,7 @@ class HTML * @param string $s * @return string */ - public static function unamp($s) + public static function unamp(string $s): string { return str_replace('&', '&', $s); } @@ -991,7 +944,8 @@ class HTML $domain = '(?:(?!-)[A-Za-z0-9-]{1,63}(?set('URI.SafeIframeRegexp', + $config->set( + 'URI.SafeIframeRegexp', '%^https://(?: ' . implode('|', $allowedIframeDomains) . ' ) @@ -1002,6 +956,7 @@ class HTML $config->set('Attr.AllowedRel', [ 'noreferrer' => true, 'noopener' => true, + 'tag' => true, ]); $config->set('Attr.AllowedFrameTargets', [ '_blank' => true, @@ -1030,4 +985,78 @@ class HTML return $text; } + + /** + * XPath arbitrary string quoting + * + * @see https://stackoverflow.com/a/45228168 + * @param string $value + * @return string + */ + public static function xpathQuote(string $value): string + { + if (false === strpos($value, '"')) { + return '"' . $value . '"'; + } + + if (false === strpos($value, "'")) { + return "'" . $value . "'"; + } + + // if the value contains both single and double quotes, construct an + // expression that concatenates all non-double-quote substrings with + // the quotes, e.g.: + // + // concat("'foo'", '"', "bar") + return 'concat(' . implode(', \'"\', ', array_map([self::class, 'xpathQuote'], explode('"', $value))) . ')'; + } + + /** + * Checks if the provided URL is present in the DOM document in an element with the rel="me" attribute + * + * XHTML Friends Network http://gmpg.org/xfn/ + * + * @param DOMDocument $doc + * @param UriInterface $meUrl + * @return bool + */ + public static function checkRelMeLink(DOMDocument $doc, UriInterface $meUrl): bool + { + $xpath = new \DOMXpath($doc); + + // This expression checks that "me" is among the space-delimited values of the "rel" attribute. + // And that the href attribute contains exactly the provided URL + $expression = "//*[contains(concat(' ', normalize-space(@rel), ' '), ' me ')][@href = " . self::xpathQuote($meUrl) . "]"; + + $result = $xpath->query($expression); + + return $result !== false && $result->length > 0; + } + + /** + * @param DOMDocument $doc + * @return string|null Lowercase charset + */ + public static function extractCharset(DOMDocument $doc): ?string + { + $xpath = new DOMXPath($doc); + + $expression = "string(//meta[@charset]/@charset)"; + if ($charset = $xpath->evaluate($expression)) { + return strtolower($charset); + } + + try { + // This expression looks for a meta tag with the http-equiv attribute set to "content-type" ignoring case + // whose content attribute contains a "charset" string and returns its value + $expression = "string(//meta[@http-equiv][translate(@http-equiv, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'content-type'][contains(translate(@content, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'charset')]/@content)"; + $mediaType = MediaType::fromContentType($xpath->evaluate($expression)); + if (isset($mediaType->parameters['charset'])) { + return strtolower($mediaType->parameters['charset']); + } + } catch (\InvalidArgumentException $e) { + } + + return null; + } }