X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FHTML.php;h=f9f340135cf51cf22cab2bf62c941166305b2f4c;hb=7e618856ab09ac74a3760e238c73ecb9515f6701;hp=9121087296a977d539e89018546af79c822615d0;hpb=8b7cb5d9efeab580c2592e0fbe301f7142b73a3d;p=friendica.git diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 9121087296..f9f340135c 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -1,6 +1,6 @@ childNodes as $key => $child) { /* Remove empty text nodes at the start or at the end of the children list */ - if ($key > 0 && $key < $node->childNodes->length - 1 || $child->nodeName != '#text' || trim($child->nodeValue)) { + if ($key > 0 && $key < $node->childNodes->length - 1 || $child->nodeName != '#text' || trim($child->nodeValue) !== '') { $newNode = $child->cloneNode(true); $node->parentNode->insertBefore($newNode, $node); } @@ -144,10 +146,10 @@ class HTML public static function toBBCode(string $message, string $basepath = ''): string { /* - * Check if message is empty to prevent a lot code below being executed + * Check if message is empty to prevent a lot of code below from being executed * for just an empty message. */ - if (empty($message)) { + if ($message === '') { return ''; } @@ -736,22 +738,22 @@ class HTML '[youtube]$2[/youtube]', $s ); - + $s = preg_replace( '#](.*?)https?://www.youtube.com/embed/([A-Za-z0-9\-_=]+)(.*?)#ism', '[youtube]$2[/youtube]', $s ); - + $s = preg_replace( '#](.*?)https?://player.vimeo.com/video/([0-9]+)(.*?)#ism', '[vimeo]$2[/vimeo]', $s ); - + return $s; } - + /** * transform link href and img src from relative to absolute * @@ -764,30 +766,30 @@ class HTML if (empty($base)) { return $text; } - + $base = rtrim($base, '/'); - + $base2 = $base . "/"; - + // Replace links $pattern = "/]*) href=\"(?!http|https|\/)([^\"]*)\"/"; $replace = "$1', $s); - $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&\;(.*?)\>/ism", '<$1$2=$3&$4>', $s); - return $s; - } - /** * Given a HTML text and a set of filtering reasons, adds a content hiding header with the provided reasons * @@ -1020,4 +1009,77 @@ class HTML return $text; } + + /** + * XPath arbitrary string quoting + * + * @see https://stackoverflow.com/a/45228168 + * @param string $value + * @return string + */ + public static function xpathQuote(string $value): string + { + if (false === strpos($value, '"')) { + return '"' . $value . '"'; + } + + if (false === strpos($value, "'")) { + return "'" . $value . "'"; + } + + // if the value contains both single and double quotes, construct an + // expression that concatenates all non-double-quote substrings with + // the quotes, e.g.: + // + // concat("'foo'", '"', "bar") + return 'concat(' . implode(', \'"\', ', array_map([self::class, 'xpathQuote'], explode('"', $value))) . ')'; + } + + /** + * Checks if the provided URL is present in the DOM document in an element with the rel="me" attribute + * + * XHTML Friends Network http://gmpg.org/xfn/ + * + * @param DOMDocument $doc + * @param UriInterface $meUrl + * @return bool + */ + public static function checkRelMeLink(DOMDocument $doc, UriInterface $meUrl): bool + { + $xpath = new \DOMXpath($doc); + + // This expression checks that "me" is among the space-delimited values of the "rel" attribute. + // And that the href attribute contains exactly the provided URL + $expression = "//*[contains(concat(' ', normalize-space(@rel), ' '), ' me ')][@href = " . self::xpathQuote($meUrl) . "]"; + + $result = $xpath->query($expression); + + return $result !== false && $result->length > 0; + } + + /** + * @param DOMDocument $doc + * @return string|null Lowercase charset + */ + public static function extractCharset(DOMDocument $doc): ?string + { + $xpath = new DOMXPath($doc); + + $expression = "string(//meta[@charset]/@charset)"; + if ($charset = $xpath->evaluate($expression)) { + return strtolower($charset); + } + + try { + // This expression looks for a meta tag with the http-equiv attribute set to "content-type" ignoring case + // whose content attribute contains a "charset" string and returns its value + $expression = "string(//meta[@http-equiv][translate(@http-equiv, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'content-type'][contains(translate(@content, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'charset')]/@content)"; + $mediaType = MediaType::fromContentType($xpath->evaluate($expression)); + if (isset($mediaType->parameters['charset'])) { + return strtolower($mediaType->parameters['charset']); + } + } catch(\InvalidArgumentException $e) {} + + return null; + } }