]> git.mxchange.org Git - friendica.git/blobdiff - library/HTML5/Parser.php
Merge pull request #2126 from annando/1511-duplicates
[friendica.git] / library / HTML5 / Parser.php
index 5f9ca560e594c2732ac4c0157ef8b61d420e14e0..e101d3e545ad5e1f7245fba9be5b5b6c3663de86 100644 (file)
@@ -17,6 +17,17 @@ class HTML5_Parser
      * @return Parsed HTML as DOMDocument
      */
     static public function parse($text, $builder = null) {
+
+       // Cleanup invalid HTML
+       $doc = new DOMDocument();
+
+       if (mb_detect_encoding($text, "UTF-8", true) == "UTF-8")
+               @$doc->loadHTML('<?xml encoding="UTF-8" ?>'.$text);
+       else
+               @$doc->loadHTML($text);
+
+       $text = $doc->saveHTML();
+
         $tokenizer = new HTML5_Tokenizer($text, $builder);
         $tokenizer->parse();
         return $tokenizer->save();