X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FHTML.php;h=89a680501f1674761d0c394cf1bc17799a50a6b5;hb=a8402109b183e81dad4e5443883dd292df094b86;hp=fa5a0a590535ae56bd49260f5ab23bc3bf2807c7;hpb=4db4d1843d19a810f3039558d312f0de030e7d6e;p=friendica.git diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index fa5a0a5905..89a680501f 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -1,6 +1,6 @@ query($expression); + + return $result !== false && $result->length > 0; + } + + /** + * @param DOMDocument $doc + * @return string|null Lowercase charset + */ + public static function extractCharset(DOMDocument $doc): ?string + { + $xpath = new DOMXPath($doc); + + $expression = "string(//meta[@charset]/@charset)"; + if ($charset = $xpath->evaluate($expression)) { + return strtolower($charset); + } + + try { + // This expression looks for a meta tag with the http-equiv attribute set to "content-type" ignoring case + // whose content attribute contains a "charset" string and returns its value + $expression = "string(//meta[@http-equiv][translate(@http-equiv, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'content-type'][contains(translate(@content, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'charset')]/@content)"; + $mediaType = MediaType::fromContentType($xpath->evaluate($expression)); + if (isset($mediaType->parameters['charset'])) { + return strtolower($mediaType->parameters['charset']); + } + } catch(\InvalidArgumentException $e) {} + + return null; + } }