]> git.mxchange.org Git - friendica.git/blobdiff - library/HTML5/Parser.php
Merge pull request #2126 from annando/1511-duplicates
[friendica.git] / library / HTML5 / Parser.php
index c7faf875ad326304c44f65aa3f64ff63b07924c6..e101d3e545ad5e1f7245fba9be5b5b6c3663de86 100644 (file)
@@ -20,7 +20,12 @@ class HTML5_Parser
 
        // Cleanup invalid HTML
        $doc = new DOMDocument();
-       @$doc->loadHTML($text);
+
+       if (mb_detect_encoding($text, "UTF-8", true) == "UTF-8")
+               @$doc->loadHTML('<?xml encoding="UTF-8" ?>'.$text);
+       else
+               @$doc->loadHTML($text);
+
        $text = $doc->saveHTML();
 
         $tokenizer = new HTML5_Tokenizer($text, $builder);