X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=library%2FHTML5%2FParser.php;h=e101d3e545ad5e1f7245fba9be5b5b6c3663de86;hb=146f386e37670b64c588c65d25c9f068a0e8e066;hp=5f9ca560e594c2732ac4c0157ef8b61d420e14e0;hpb=8a80ea24d1d8039be6dc532c9db23c60e71eb467;p=friendica.git diff --git a/library/HTML5/Parser.php b/library/HTML5/Parser.php index 5f9ca560e5..e101d3e545 100644 --- a/library/HTML5/Parser.php +++ b/library/HTML5/Parser.php @@ -17,6 +17,17 @@ class HTML5_Parser * @return Parsed HTML as DOMDocument */ static public function parse($text, $builder = null) { + + // Cleanup invalid HTML + $doc = new DOMDocument(); + + if (mb_detect_encoding($text, "UTF-8", true) == "UTF-8") + @$doc->loadHTML(''.$text); + else + @$doc->loadHTML($text); + + $text = $doc->saveHTML(); + $tokenizer = new HTML5_Tokenizer($text, $builder); $tokenizer->parse(); return $tokenizer->save();