3 require_once dirname(__FILE__) . '/Data.php';
4 require_once dirname(__FILE__) . '/InputStream.php';
5 require_once dirname(__FILE__) . '/TreeBuilder.php';
6 require_once dirname(__FILE__) . '/Tokenizer.php';
9 * Outwards facing interface for HTML5.
14 * Parses a full HTML document.
15 * @param $text HTML text to parse
16 * @param $builder Custom builder implementation
17 * @return Parsed HTML as DOMDocument
19 static public function parse($text, $builder = null) {
21 // Cleanup invalid HTML
22 $doc = new DOMDocument();
24 if (mb_detect_encoding($text, "UTF-8", true) == "UTF-8")
25 @$doc->loadHTML('<?xml encoding="UTF-8" ?>'.$text);
27 @$doc->loadHTML($text);
29 $text = $doc->saveHTML();
31 $tokenizer = new HTML5_Tokenizer($text, $builder);
33 return $tokenizer->save();
36 * Parses an HTML fragment.
37 * @param $text HTML text to parse
38 * @param $context String name of context element to pretend parsing is in.
39 * @param $builder Custom builder implementation
40 * @return Parsed HTML as DOMDocument
42 static public function parseFragment($text, $context = null, $builder = null) {
43 $tokenizer = new HTML5_Tokenizer($text, $builder);
44 $tokenizer->parseFragment($context);
45 return $tokenizer->save();