5 Copyright 2007 Jeroen van der Meer <http://jero.net/>
6 Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice shall be included
17 in all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 // Tags for FIX ME!!!: (in order of priority)
30 // XXX - should be fixed NAO!
31 // XERROR - with regards to parse errors
32 // XSCRIPT - with regards to scripting mode
33 // XENCODING - with regards to encoding (for reparsing tests)
35 class HTML5_TreeBuilder {
36 public $stack = array();
37 public $content_model;
40 private $original_mode;
41 private $secondary_mode;
43 // Whether or not normal insertion of nodes should actually foster
44 // parent (used in one case in spec)
45 private $foster_parent = false;
46 private $a_formatting = array();
48 private $head_pointer = null;
49 private $form_pointer = null;
51 private $flag_frameset_ok = true;
52 private $flag_force_quirks = false;
53 private $ignored = false;
54 private $quirks_mode = null;
55 // this gets to 2 when we want to ignore the next lf character, and
56 // is decrement at the beginning of each processed token (this way,
57 // code can check for (bool)$ignore_lf_token, but it phases out
59 private $ignore_lf_token = 0;
60 private $fragment = false;
63 private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
64 private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
65 private $special = array('address','area','article','aside','base','basefont','bgsound',
66 'blockquote','body','br','center','col','colgroup','command','dd','details','dialog','dir','div','dl',
67 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
68 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
69 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
70 'p','param','plaintext','pre','script','select','spacer','style',
71 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
73 // Tree construction modes
75 const BEFORE_HTML = 1;
76 const BEFORE_HEAD = 2;
78 const IN_HEAD_NOSCRIPT = 4;
81 const IN_CDATA_RCDATA = 7;
84 const IN_COLUMN_GROUP = 10;
85 const IN_TABLE_BODY = 11;
89 const IN_SELECT_IN_TABLE= 15;
90 const IN_FOREIGN_CONTENT= 16;
91 const AFTER_BODY = 17;
92 const IN_FRAMESET = 18;
93 const AFTER_FRAMESET = 19;
94 const AFTER_AFTER_BODY = 20;
95 const AFTER_AFTER_FRAMESET = 21;
98 * Converts a magic number to a readable name. Use for debugging.
100 private function strConst($number) {
103 $r = new ReflectionClass('HTML5_TreeBuilder');
104 $lookup = array_flip($r->getConstants());
106 return $lookup[$number];
109 // The different types of elements.
112 const FORMATTING = 102;
113 const PHRASING = 103;
115 // Quirks modes in $quirks_mode
116 const NO_QUIRKS = 200;
117 const QUIRKS_MODE = 201;
118 const LIMITED_QUIRKS_MODE = 202;
120 // Marker to be placed in $a_formatting
123 // Namespaces for foreign content
124 const NS_HTML = null; // to prevent DOM from requiring NS on everything
125 const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
126 const NS_SVG = 'http://www.w3.org/2000/svg';
127 const NS_XLINK = 'http://www.w3.org/1999/xlink';
128 const NS_XML = 'http://www.w3.org/XML/1998/namespace';
129 const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
131 public function __construct() {
132 $this->mode = self::INITIAL;
133 $this->dom = new DOMDocument;
135 $this->dom->encoding = 'UTF-8';
136 $this->dom->preserveWhiteSpace = true;
137 $this->dom->substituteEntities = true;
138 $this->dom->strictErrorChecking = false;
141 // Process tag tokens
142 public function emitToken($token, $mode = null) {
143 // XXX: ignore parse errors... why are we emitting them, again?
144 if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
145 if ($mode === null) $mode = $this->mode;
148 $backtrace = debug_backtrace();
149 if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
150 echo $this->strConst($mode);
151 if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
155 $this->printActiveFormattingElements();
156 if ($this->foster_parent) echo " -> this is a foster parent mode\n";
159 if ($this->ignore_lf_token) $this->ignore_lf_token--;
160 $this->ignored = false;
161 // indenting is a little wonky, this can be changed later on
166 /* A character token that is one of U+0009 CHARACTER TABULATION,
167 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
168 if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
169 /* Ignore the token. */
170 $this->ignored = true;
171 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
173 $token['name'] !== 'html' || !empty($token['public']) ||
174 !empty($token['system']) || $token !== 'about:legacy-compat'
176 /* If the DOCTYPE token's name is not a case-sensitive match
177 * for the string "html", or if the token's public identifier
178 * is not missing, or if the token's system identifier is
179 * neither missing nor a case-sensitive match for the string
180 * "about:legacy-compat", then there is a parse error (this
181 * is the DOCTYPE parse error). */
182 // DOCTYPE parse error
184 /* Append a DocumentType node to the Document node, with the name
185 * attribute set to the name given in the DOCTYPE token, or the
186 * empty string if the name was missing; the publicId attribute
187 * set to the public identifier given in the DOCTYPE token, or
188 * the empty string if the public identifier was missing; the
189 * systemId attribute set to the system identifier given in the
190 * DOCTYPE token, or the empty string if the system identifier
191 * was missing; and the other attributes specific to
192 * DocumentType objects set to null and empty lists as
193 * appropriate. Associate the DocumentType node with the
194 * Document object so that it is returned as the value of the
195 * doctype attribute of the Document object. */
196 if (!isset($token['public'])) $token['public'] = null;
197 if (!isset($token['system'])) $token['system'] = null;
198 // Yes this is hacky. I'm kind of annoyed that I can't appendChild
199 // a doctype to DOMDocument. Maybe I haven't chanted the right
201 $impl = new DOMImplementation();
202 // This call can fail for particularly pathological cases (namely,
203 // the qualifiedName parameter ($token['name']) could be missing.
204 if ($token['name']) {
205 $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
206 $this->dom->appendChild($doctype);
208 // It looks like libxml's not actually *able* to express this case.
210 $this->dom->emptyDoctype = true;
212 $public = is_null($token['public']) ? false : strtolower($token['public']);
213 $system = is_null($token['system']) ? false : strtolower($token['system']);
214 $publicStartsWithForQuirks = array(
215 "+//silmaril//dtd html pro v0r11 19970101//",
216 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
217 "-//as//dtd html 3.0 aswedit + extensions//",
218 "-//ietf//dtd html 2.0 level 1//",
219 "-//ietf//dtd html 2.0 level 2//",
220 "-//ietf//dtd html 2.0 strict level 1//",
221 "-//ietf//dtd html 2.0 strict level 2//",
222 "-//ietf//dtd html 2.0 strict//",
223 "-//ietf//dtd html 2.0//",
224 "-//ietf//dtd html 2.1e//",
225 "-//ietf//dtd html 3.0//",
226 "-//ietf//dtd html 3.2 final//",
227 "-//ietf//dtd html 3.2//",
228 "-//ietf//dtd html 3//",
229 "-//ietf//dtd html level 0//",
230 "-//ietf//dtd html level 1//",
231 "-//ietf//dtd html level 2//",
232 "-//ietf//dtd html level 3//",
233 "-//ietf//dtd html strict level 0//",
234 "-//ietf//dtd html strict level 1//",
235 "-//ietf//dtd html strict level 2//",
236 "-//ietf//dtd html strict level 3//",
237 "-//ietf//dtd html strict//",
238 "-//ietf//dtd html//",
239 "-//metrius//dtd metrius presentational//",
240 "-//microsoft//dtd internet explorer 2.0 html strict//",
241 "-//microsoft//dtd internet explorer 2.0 html//",
242 "-//microsoft//dtd internet explorer 2.0 tables//",
243 "-//microsoft//dtd internet explorer 3.0 html strict//",
244 "-//microsoft//dtd internet explorer 3.0 html//",
245 "-//microsoft//dtd internet explorer 3.0 tables//",
246 "-//netscape comm. corp.//dtd html//",
247 "-//netscape comm. corp.//dtd strict html//",
248 "-//o'reilly and associates//dtd html 2.0//",
249 "-//o'reilly and associates//dtd html extended 1.0//",
250 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
251 "-//spyglass//dtd html 2.0 extended//",
252 "-//sq//dtd html 2.0 hotmetal + extensions//",
253 "-//sun microsystems corp.//dtd hotjava html//",
254 "-//sun microsystems corp.//dtd hotjava strict html//",
255 "-//w3c//dtd html 3 1995-03-24//",
256 "-//w3c//dtd html 3.2 draft//",
257 "-//w3c//dtd html 3.2 final//",
258 "-//w3c//dtd html 3.2//",
259 "-//w3c//dtd html 3.2s draft//",
260 "-//w3c//dtd html 4.0 frameset//",
261 "-//w3c//dtd html 4.0 transitional//",
262 "-//w3c//dtd html experimental 19960712//",
263 "-//w3c//dtd html experimental 970421//",
264 "-//w3c//dtd w3 html//",
265 "-//w3o//dtd w3 html 3.0//",
266 "-//webtechs//dtd mozilla html 2.0//",
267 "-//webtechs//dtd mozilla html//",
269 $publicSetToForQuirks = array(
270 "-//w3o//dtd w3 html strict 3.0//",
271 "-/w3c/dtd html 4.0 transitional/en",
274 $publicStartsWithAndSystemForQuirks = array(
275 "-//w3c//dtd html 4.01 frameset//",
276 "-//w3c//dtd html 4.01 transitional//",
278 $publicStartsWithForLimitedQuirks = array(
279 "-//w3c//dtd xhtml 1.0 frameset//",
280 "-//w3c//dtd xhtml 1.0 transitional//",
282 $publicStartsWithAndSystemForLimitedQuirks = array(
283 "-//w3c//dtd html 4.01 frameset//",
284 "-//w3c//dtd html 4.01 transitional//",
286 // first, do easy checks
288 !empty($token['force-quirks']) ||
289 strtolower($token['name']) !== 'html'
291 $this->quirks_mode = self::QUIRKS_MODE;
295 foreach ($publicStartsWithAndSystemForQuirks as $x) {
296 if (strncmp($public, $x, strlen($x)) === 0) {
297 $this->quirks_mode = self::QUIRKS_MODE;
301 if (!is_null($this->quirks_mode)) break;
302 foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
303 if (strncmp($public, $x, strlen($x)) === 0) {
304 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
308 if (!is_null($this->quirks_mode)) break;
310 foreach ($publicSetToForQuirks as $x) {
311 if ($public === $x) {
312 $this->quirks_mode = self::QUIRKS_MODE;
316 if (!is_null($this->quirks_mode)) break;
317 foreach ($publicStartsWithForLimitedQuirks as $x) {
318 if (strncmp($public, $x, strlen($x)) === 0) {
319 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
322 if (!is_null($this->quirks_mode)) break;
323 if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
324 $this->quirks_mode = self::QUIRKS_MODE;
327 foreach ($publicStartsWithForQuirks as $x) {
328 if (strncmp($public, $x, strlen($x)) === 0) {
329 $this->quirks_mode = self::QUIRKS_MODE;
333 if (is_null($this->quirks_mode)) {
334 $this->quirks_mode = self::NO_QUIRKS;
338 $this->mode = self::BEFORE_HTML;
341 /* Switch the insertion mode to "before html", then reprocess the
343 $this->mode = self::BEFORE_HTML;
344 $this->quirks_mode = self::QUIRKS_MODE;
345 $this->emitToken($token);
349 case self::BEFORE_HTML:
351 /* A DOCTYPE token */
352 if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
353 // Parse error. Ignore the token.
354 $this->ignored = true;
356 /* A comment token */
357 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
358 /* Append a Comment node to the Document object with the data
359 attribute set to the data given in the comment token. */
360 $comment = $this->dom->createComment($token['data']);
361 $this->dom->appendChild($comment);
363 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
364 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
366 } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
367 /* Ignore the token. */
368 $this->ignored = true;
370 /* A start tag whose tag name is "html" */
371 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
372 /* Create an element for the token in the HTML namespace. Append it
373 * to the Document object. Put this element in the stack of open
375 $html = $this->insertElement($token, false);
376 $this->dom->appendChild($html);
377 $this->stack[] = $html;
379 $this->mode = self::BEFORE_HEAD;
382 /* Create an html element. Append it to the Document object. Put
383 * this element in the stack of open elements. */
384 $html = $this->dom->createElementNS(self::NS_HTML, 'html');
385 $this->dom->appendChild($html);
386 $this->stack[] = $html;
388 /* Switch the insertion mode to "before head", then reprocess the
390 $this->mode = self::BEFORE_HEAD;
391 $this->emitToken($token);
395 case self::BEFORE_HEAD:
397 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
398 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
400 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
401 /* Ignore the token. */
402 $this->ignored = true;
404 /* A comment token */
405 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
406 /* Append a Comment node to the current node with the data attribute
407 set to the data given in the comment token. */
408 $this->insertComment($token['data']);
410 /* A DOCTYPE token */
411 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
412 /* Parse error. Ignore the token */
413 $this->ignored = true;
416 /* A start tag token with the tag name "html" */
417 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
418 /* Process the token using the rules for the "in body"
420 $this->processWithRulesFor($token, self::IN_BODY);
422 /* A start tag token with the tag name "head" */
423 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
424 /* Insert an HTML element for the token. */
425 $element = $this->insertElement($token);
427 /* Set the head element pointer to this new element node. */
428 $this->head_pointer = $element;
430 /* Change the insertion mode to "in head". */
431 $this->mode = self::IN_HEAD;
433 /* An end tag whose tag name is one of: "head", "body", "html", "br" */
435 $token['type'] === HTML5_Tokenizer::ENDTAG && (
436 $token['name'] === 'head' || $token['name'] === 'body' ||
437 $token['name'] === 'html' || $token['name'] === 'br'
439 /* Act as if a start tag token with the tag name "head" and no
440 * attributes had been seen, then reprocess the current token. */
441 $this->emitToken(array(
443 'type' => HTML5_Tokenizer::STARTTAG,
446 $this->emitToken($token);
448 /* Any other end tag */
449 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
450 /* Parse error. Ignore the token. */
451 $this->ignored = true;
454 /* Act as if a start tag token with the tag name "head" and no
455 * attributes had been seen, then reprocess the current token.
456 * Note: This will result in an empty head element being
457 * generated, with the current token being reprocessed in the
458 * "after head" insertion mode. */
459 $this->emitToken(array(
461 'type' => HTML5_Tokenizer::STARTTAG,
464 $this->emitToken($token);
470 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
471 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
473 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
474 /* Insert the character into the current node. */
475 $this->insertText($token['data']);
477 /* A comment token */
478 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
479 /* Append a Comment node to the current node with the data attribute
480 set to the data given in the comment token. */
481 $this->insertComment($token['data']);
483 /* A DOCTYPE token */
484 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
485 /* Parse error. Ignore the token. */
486 $this->ignored = true;
489 /* A start tag whose tag name is "html" */
490 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
491 $token['name'] === 'html') {
492 $this->processWithRulesFor($token, self::IN_BODY);
494 /* A start tag whose tag name is one of: "base", "command", "link" */
495 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
496 ($token['name'] === 'base' || $token['name'] === 'command' ||
497 $token['name'] === 'link')) {
498 /* Insert an HTML element for the token. Immediately pop the
499 * current node off the stack of open elements. */
500 $this->insertElement($token);
501 array_pop($this->stack);
503 // YYY: Acknowledge the token's self-closing flag, if it is set.
505 /* A start tag whose tag name is "meta" */
506 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
507 /* Insert an HTML element for the token. Immediately pop the
508 * current node off the stack of open elements. */
509 $this->insertElement($token);
510 array_pop($this->stack);
512 // XERROR: Acknowledge the token's self-closing flag, if it is set.
514 // XENCODING: If the element has a charset attribute, and its value is a
515 // supported encoding, and the confidence is currently tentative,
516 // then change the encoding to the encoding given by the value of
517 // the charset attribute.
519 // Otherwise, if the element has a content attribute, and applying
520 // the algorithm for extracting an encoding from a Content-Type to
521 // its value returns a supported encoding encoding, and the
522 // confidence is currently tentative, then change the encoding to
523 // the encoding encoding.
525 /* A start tag with the tag name "title" */
526 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
527 $this->insertRCDATAElement($token);
529 /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
530 * A start tag whose tag name is one of: "noframes", "style" */
531 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
532 ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
533 // XSCRIPT: Scripting flag not respected
534 $this->insertCDATAElement($token);
536 // XSCRIPT: Scripting flag disable not implemented
538 /* A start tag with the tag name "script" */
539 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
540 /* 1. Create an element for the token in the HTML namespace. */
541 $node = $this->insertElement($token, false);
543 /* 2. Mark the element as being "parser-inserted" */
546 /* 3. If the parser was originally created for the HTML
547 * fragment parsing algorithm, then mark the script element as
548 * "already executed". (fragment case) */
551 /* 4. Append the new element to the current node and push it onto
552 * the stack of open elements. */
553 end($this->stack)->appendChild($node);
554 $this->stack[] = $node;
555 // I guess we could squash these together
557 /* 6. Let the original insertion mode be the current insertion mode. */
558 $this->original_mode = $this->mode;
559 /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
560 $this->mode = self::IN_CDATA_RCDATA;
561 /* 5. Switch the tokeniser's content model flag to the CDATA state. */
562 $this->content_model = HTML5_Tokenizer::CDATA;
564 /* An end tag with the tag name "head" */
565 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
566 /* Pop the current node (which will be the head element) off the stack of open elements. */
567 array_pop($this->stack);
569 /* Change the insertion mode to "after head". */
570 $this->mode = self::AFTER_HEAD;
572 // Slight logic inversion here to minimize duplication
573 /* A start tag with the tag name "head". */
574 /* An end tag whose tag name is not one of: "body", "html", "br" */
575 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
576 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
577 $token['name'] !== 'body' && $token['name'] !== 'br')) {
578 // Parse error. Ignore the token.
579 $this->ignored = true;
583 /* Act as if an end tag token with the tag name "head" had been
584 * seen, and reprocess the current token. */
585 $this->emitToken(array(
587 'type' => HTML5_Tokenizer::ENDTAG
590 /* Then, reprocess the current token. */
591 $this->emitToken($token);
595 case self::IN_HEAD_NOSCRIPT:
596 if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
598 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
599 $this->processWithRulesFor($token, self::IN_BODY);
600 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
601 /* Pop the current node (which will be a noscript element) from the
602 * stack of open elements; the new current node will be a head
604 array_pop($this->stack);
605 $this->mode = self::IN_HEAD;
607 ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
608 ($token['type'] === HTML5_Tokenizer::COMMENT) ||
609 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
610 $token['name'] === 'link' || $token['name'] === 'meta' ||
611 $token['name'] === 'noframes' || $token['name'] === 'style'))) {
612 $this->processWithRulesFor($token, self::IN_HEAD);
615 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
616 $token['name'] === 'head' || $token['name'] === 'noscript')) ||
617 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
618 $token['name'] !== 'br')) {
622 $this->emitToken(array(
623 'type' => HTML5_Tokenizer::ENDTAG,
624 'name' => 'noscript',
626 $this->emitToken($token);
630 case self::AFTER_HEAD:
631 /* Handle the token as follows: */
633 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
634 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
636 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
637 /* Append the character to the current node. */
638 $this->insertText($token['data']);
640 /* A comment token */
641 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
642 /* Append a Comment node to the current node with the data attribute
643 set to the data given in the comment token. */
644 $this->insertComment($token['data']);
646 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
649 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
650 $this->processWithRulesFor($token, self::IN_BODY);
652 /* A start tag token with the tag name "body" */
653 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
654 $this->insertElement($token);
656 /* Set the frameset-ok flag to "not ok". */
657 $this->flag_frameset_ok = false;
659 /* Change the insertion mode to "in body". */
660 $this->mode = self::IN_BODY;
662 /* A start tag token with the tag name "frameset" */
663 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
664 /* Insert a frameset element for the token. */
665 $this->insertElement($token);
667 /* Change the insertion mode to "in frameset". */
668 $this->mode = self::IN_FRAMESET;
670 /* A start tag token whose tag name is one of: "base", "link", "meta",
671 "script", "style", "title" */
672 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
673 array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
675 /* Push the node pointed to by the head element pointer onto the
676 * stack of open elements. */
677 $this->stack[] = $this->head_pointer;
678 $this->processWithRulesFor($token, self::IN_HEAD);
679 array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
681 // inversion of specification
683 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
684 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
685 $token['name'] !== 'body' && $token['name'] !== 'html' &&
686 $token['name'] !== 'br')) {
691 $this->emitToken(array(
693 'type' => HTML5_Tokenizer::STARTTAG,
696 $this->flag_frameset_ok = true;
697 $this->emitToken($token);
702 /* Handle the token as follows: */
704 switch($token['type']) {
705 /* A character token */
706 case HTML5_Tokenizer::CHARACTER:
707 case HTML5_Tokenizer::SPACECHARACTER:
708 /* Reconstruct the active formatting elements, if any. */
709 $this->reconstructActiveFormattingElements();
711 /* Append the token's character to the current node. */
712 $this->insertText($token['data']);
714 /* If the token is not one of U+0009 CHARACTER TABULATION,
715 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
716 * SPACE, then set the frameset-ok flag to "not ok". */
717 // i.e., if any of the characters is not whitespace
718 if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
719 $this->flag_frameset_ok = false;
723 /* A comment token */
724 case HTML5_Tokenizer::COMMENT:
725 /* Append a Comment node to the current node with the data
726 attribute set to the data given in the comment token. */
727 $this->insertComment($token['data']);
730 case HTML5_Tokenizer::DOCTYPE:
734 case HTML5_Tokenizer::STARTTAG:
735 switch($token['name']) {
738 /* For each attribute on the token, check to see if the
739 * attribute is already present on the top element of the
740 * stack of open elements. If it is not, add the attribute
741 * and its corresponding value to that element. */
742 foreach($token['attr'] as $attr) {
743 if(!$this->stack[0]->hasAttribute($attr['name'])) {
744 $this->stack[0]->setAttribute($attr['name'], $attr['value']);
749 case 'base': case 'command': case 'link': case 'meta': case 'noframes':
750 case 'script': case 'style': case 'title':
751 /* Process the token as if the insertion mode had been "in
753 $this->processWithRulesFor($token, self::IN_HEAD);
756 /* A start tag token with the tag name "body" */
758 /* Parse error. If the second element on the stack of open
759 elements is not a body element, or, if the stack of open
760 elements has only one node on it, then ignore the token.
762 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
763 $this->ignored = true;
766 /* Otherwise, for each attribute on the token, check to see
767 if the attribute is already present on the body element (the
768 second element) on the stack of open elements. If it is not,
769 add the attribute and its corresponding value to that
772 foreach($token['attr'] as $attr) {
773 if(!$this->stack[1]->hasAttribute($attr['name'])) {
774 $this->stack[1]->setAttribute($attr['name'], $attr['value']);
782 /* If the second element on the stack of open elements is
783 * not a body element, or, if the stack of open elements
784 * has only one node on it, then ignore the token.
786 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
787 $this->ignored = true;
789 } elseif (!$this->flag_frameset_ok) {
790 $this->ignored = true;
793 /* 1. Remove the second element on the stack of open
794 * elements from its parent node, if it has one. */
795 if($this->stack[1]->parentNode) {
796 $this->stack[1]->parentNode->removeChild($this->stack[1]);
799 /* 2. Pop all the nodes from the bottom of the stack of
800 * open elements, from the current node up to the root
802 array_splice($this->stack, 1);
804 $this->insertElement($token);
805 $this->mode = self::IN_FRAMESET;
809 // in spec, there is a diversion here
811 case 'address': case 'article': case 'aside': case 'blockquote':
812 case 'center': case 'datagrid': case 'details': case 'dialog': case 'dir':
813 case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
814 case 'header': case 'hgroup': case 'menu': case 'nav':
815 case 'ol': case 'p': case 'section': case 'ul':
816 /* If the stack of open elements has a p element in scope,
817 then act as if an end tag with the tag name p had been
819 if($this->elementInScope('p')) {
820 $this->emitToken(array(
822 'type' => HTML5_Tokenizer::ENDTAG
826 /* Insert an HTML element for the token. */
827 $this->insertElement($token);
830 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
832 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
833 /* If the stack of open elements has a p element in scope,
834 then act as if an end tag with the tag name p had been seen. */
835 if($this->elementInScope('p')) {
836 $this->emitToken(array(
838 'type' => HTML5_Tokenizer::ENDTAG
842 /* If the current node is an element whose tag name is one
843 * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
844 * parse error; pop the current node off the stack of open
846 $peek = array_pop($this->stack);
847 if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
850 $this->stack[] = $peek;
853 /* Insert an HTML element for the token. */
854 $this->insertElement($token);
857 case 'pre': case 'listing':
858 /* If the stack of open elements has a p element in scope,
859 then act as if an end tag with the tag name p had been seen. */
860 if($this->elementInScope('p')) {
861 $this->emitToken(array(
863 'type' => HTML5_Tokenizer::ENDTAG
866 $this->insertElement($token);
867 /* If the next token is a U+000A LINE FEED (LF) character
868 * token, then ignore that token and move on to the next
869 * one. (Newlines at the start of pre blocks are ignored as
870 * an authoring convenience.) */
871 $this->ignore_lf_token = 2;
872 $this->flag_frameset_ok = false;
875 /* A start tag whose tag name is "form" */
877 /* If the form element pointer is not null, ignore the
878 token with a parse error. */
879 if($this->form_pointer !== null) {
880 $this->ignored = true;
885 /* If the stack of open elements has a p element in
886 scope, then act as if an end tag with the tag name p
888 if($this->elementInScope('p')) {
889 $this->emitToken(array(
891 'type' => HTML5_Tokenizer::ENDTAG
895 /* Insert an HTML element for the token, and set the
896 form element pointer to point to the element created. */
897 $element = $this->insertElement($token);
898 $this->form_pointer = $element;
902 // condensed specification
903 case 'li': case 'dd': case 'dt':
904 /* 1. Set the frameset-ok flag to "not ok". */
905 $this->flag_frameset_ok = false;
907 $stack_length = count($this->stack) - 1;
908 for($n = $stack_length; 0 <= $n; $n--) {
909 /* 2. Initialise node to be the current node (the
910 bottommost node of the stack). */
912 $node = $this->stack[$n];
913 $cat = $this->getElementCategory($node);
916 /* 3. If node is an li element, then act as if an end
917 * tag with the tag name "li" had been seen, then jump
918 * to the last step. */
919 // for case 'dd': case 'dt':
920 /* If node is a dd or dt element, then act as if an end
921 * tag with the same tag name as node had been seen, then
922 * jump to the last step. */
923 if(($token['name'] === 'li' && $node->tagName === 'li') ||
924 ($token['name'] !== 'li' && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { // limited conditional
925 $this->emitToken(array(
926 'type' => HTML5_Tokenizer::ENDTAG,
927 'name' => $node->tagName,
932 /* 4. If node is not in the formatting category, and is
933 not in the phrasing category, and is not an address,
934 div or p element, then stop this algorithm. */
935 if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
936 $node->tagName !== 'address' && $node->tagName !== 'div' &&
937 $node->tagName !== 'p') {
941 /* 5. Otherwise, set node to the previous entry in the
942 * stack of open elements and return to step 2. */
945 /* 6. This is the last step. */
947 /* If the stack of open elements has a p element in scope,
948 then act as if an end tag with the tag name p had been
950 if($this->elementInScope('p')) {
951 $this->emitToken(array(
953 'type' => HTML5_Tokenizer::ENDTAG
957 /* Finally, insert an HTML element with the same tag
958 name as the token's. */
959 $this->insertElement($token);
962 /* A start tag token whose tag name is "plaintext" */
964 /* If the stack of open elements has a p element in scope,
965 then act as if an end tag with the tag name p had been
967 if($this->elementInScope('p')) {
968 $this->emitToken(array(
970 'type' => HTML5_Tokenizer::ENDTAG
974 /* Insert an HTML element for the token. */
975 $this->insertElement($token);
977 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
982 /* A start tag whose tag name is "a" */
984 /* If the list of active formatting elements contains
985 an element whose tag name is "a" between the end of the
986 list and the last marker on the list (or the start of
987 the list if there is no marker on the list), then this
988 is a parse error; act as if an end tag with the tag name
989 "a" had been seen, then remove that element from the list
990 of active formatting elements and the stack of open
991 elements if the end tag didn't already remove it (it
992 might not have if the element is not in table scope). */
993 $leng = count($this->a_formatting);
995 for($n = $leng - 1; $n >= 0; $n--) {
996 if($this->a_formatting[$n] === self::MARKER) {
999 } elseif($this->a_formatting[$n]->tagName === 'a') {
1000 $a = $this->a_formatting[$n];
1001 $this->emitToken(array(
1003 'type' => HTML5_Tokenizer::ENDTAG
1005 if (in_array($a, $this->a_formatting)) {
1006 $a_i = array_search($a, $this->a_formatting, true);
1007 if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
1009 if (in_array($a, $this->stack)) {
1010 $a_i = array_search($a, $this->stack, true);
1011 if ($a_i !== false) array_splice($this->stack, $a_i, 1);
1017 /* Reconstruct the active formatting elements, if any. */
1018 $this->reconstructActiveFormattingElements();
1020 /* Insert an HTML element for the token. */
1021 $el = $this->insertElement($token);
1023 /* Add that element to the list of active formatting
1025 $this->a_formatting[] = $el;
1028 case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
1029 case 's': case 'small': case 'strike':
1030 case 'strong': case 'tt': case 'u':
1031 /* Reconstruct the active formatting elements, if any. */
1032 $this->reconstructActiveFormattingElements();
1034 /* Insert an HTML element for the token. */
1035 $el = $this->insertElement($token);
1037 /* Add that element to the list of active formatting
1039 $this->a_formatting[] = $el;
1043 /* Reconstruct the active formatting elements, if any. */
1044 $this->reconstructActiveFormattingElements();
1046 /* If the stack of open elements has a nobr element in
1047 * scope, then this is a parse error; act as if an end tag
1048 * with the tag name "nobr" had been seen, then once again
1049 * reconstruct the active formatting elements, if any. */
1050 if ($this->elementInScope('nobr')) {
1051 $this->emitToken(array(
1053 'type' => HTML5_Tokenizer::ENDTAG,
1055 $this->reconstructActiveFormattingElements();
1058 /* Insert an HTML element for the token. */
1059 $el = $this->insertElement($token);
1061 /* Add that element to the list of active formatting
1063 $this->a_formatting[] = $el;
1066 // another diversion
1068 /* A start tag token whose tag name is "button" */
1070 /* If the stack of open elements has a button element in scope,
1071 then this is a parse error; act as if an end tag with the tag
1072 name "button" had been seen, then reprocess the token. (We don't
1073 do that. Unnecessary.) (I hope you're right! -- ezyang) */
1074 if($this->elementInScope('button')) {
1075 $this->emitToken(array(
1077 'type' => HTML5_Tokenizer::ENDTAG
1081 /* Reconstruct the active formatting elements, if any. */
1082 $this->reconstructActiveFormattingElements();
1084 /* Insert an HTML element for the token. */
1085 $this->insertElement($token);
1087 /* Insert a marker at the end of the list of active
1088 formatting elements. */
1089 $this->a_formatting[] = self::MARKER;
1091 $this->flag_frameset_ok = false;
1094 case 'applet': case 'marquee': case 'object':
1095 /* Reconstruct the active formatting elements, if any. */
1096 $this->reconstructActiveFormattingElements();
1098 /* Insert an HTML element for the token. */
1099 $this->insertElement($token);
1101 /* Insert a marker at the end of the list of active
1102 formatting elements. */
1103 $this->a_formatting[] = self::MARKER;
1105 $this->flag_frameset_ok = false;
1110 /* A start tag whose tag name is "table" */
1112 /* If the stack of open elements has a p element in scope,
1113 then act as if an end tag with the tag name p had been seen. */
1114 if($this->quirks_mode !== self::QUIRKS_MODE &&
1115 $this->elementInScope('p')) {
1116 $this->emitToken(array(
1118 'type' => HTML5_Tokenizer::ENDTAG
1122 /* Insert an HTML element for the token. */
1123 $this->insertElement($token);
1125 $this->flag_frameset_ok = false;
1127 /* Change the insertion mode to "in table". */
1128 $this->mode = self::IN_TABLE;
1131 /* A start tag whose tag name is one of: "area", "basefont",
1132 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1133 case 'area': case 'basefont': case 'bgsound': case 'br':
1134 case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
1136 /* Reconstruct the active formatting elements, if any. */
1137 $this->reconstructActiveFormattingElements();
1139 /* Insert an HTML element for the token. */
1140 $this->insertElement($token);
1142 /* Immediately pop the current node off the stack of open elements. */
1143 array_pop($this->stack);
1145 // YYY: Acknowledge the token's self-closing flag, if it is set.
1147 $this->flag_frameset_ok = false;
1150 case 'param': case 'source':
1151 /* Insert an HTML element for the token. */
1152 $this->insertElement($token);
1154 /* Immediately pop the current node off the stack of open elements. */
1155 array_pop($this->stack);
1157 // YYY: Acknowledge the token's self-closing flag, if it is set.
1160 /* A start tag whose tag name is "hr" */
1162 /* If the stack of open elements has a p element in scope,
1163 then act as if an end tag with the tag name p had been seen. */
1164 if($this->elementInScope('p')) {
1165 $this->emitToken(array(
1167 'type' => HTML5_Tokenizer::ENDTAG
1171 /* Insert an HTML element for the token. */
1172 $this->insertElement($token);
1174 /* Immediately pop the current node off the stack of open elements. */
1175 array_pop($this->stack);
1177 // YYY: Acknowledge the token's self-closing flag, if it is set.
1179 $this->flag_frameset_ok = false;
1182 /* A start tag whose tag name is "image" */
1184 /* Parse error. Change the token's tag name to "img" and
1185 reprocess it. (Don't ask.) */
1186 $token['name'] = 'img';
1187 $this->emitToken($token);
1190 /* A start tag whose tag name is "isindex" */
1194 /* If the form element pointer is not null,
1195 then ignore the token. */
1196 if($this->form_pointer === null) {
1197 /* Act as if a start tag token with the tag name "form" had
1199 /* If the token has an attribute called "action", set
1200 * the action attribute on the resulting form
1201 * element to the value of the "action" attribute of
1204 $action = $this->getAttr($token, 'action');
1205 if ($action !== false) {
1206 $attr[] = array('name' => 'action', 'value' => $action);
1208 $this->emitToken(array(
1210 'type' => HTML5_Tokenizer::STARTTAG,
1214 /* Act as if a start tag token with the tag name "hr" had
1216 $this->emitToken(array(
1218 'type' => HTML5_Tokenizer::STARTTAG,
1222 /* Act as if a start tag token with the tag name "p" had
1224 $this->emitToken(array(
1226 'type' => HTML5_Tokenizer::STARTTAG,
1230 /* Act as if a start tag token with the tag name "label"
1232 $this->emitToken(array(
1234 'type' => HTML5_Tokenizer::STARTTAG,
1238 /* Act as if a stream of character tokens had been seen. */
1239 $prompt = $this->getAttr($token, 'prompt');
1240 if ($prompt === false) {
1241 $prompt = 'This is a searchable index. '.
1242 'Insert your search keywords here: ';
1244 $this->emitToken(array(
1246 'type' => HTML5_Tokenizer::CHARACTER,
1249 /* Act as if a start tag token with the tag name "input"
1250 had been seen, with all the attributes from the "isindex"
1251 token, except with the "name" attribute set to the value
1252 "isindex" (ignoring any explicit "name" attribute). */
1254 foreach ($token['attr'] as $keypair) {
1255 if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
1256 $keypair['name'] === 'prompt') continue;
1259 $attr[] = array('name' => 'name', 'value' => 'isindex');
1261 $this->emitToken(array(
1263 'type' => HTML5_Tokenizer::STARTTAG,
1267 /* Act as if an end tag token with the tag name "label"
1269 $this->emitToken(array(
1271 'type' => HTML5_Tokenizer::ENDTAG
1274 /* Act as if an end tag token with the tag name "p" had
1276 $this->emitToken(array(
1278 'type' => HTML5_Tokenizer::ENDTAG
1281 /* Act as if a start tag token with the tag name "hr" had
1283 $this->emitToken(array(
1285 'type' => HTML5_Tokenizer::STARTTAG
1288 /* Act as if an end tag token with the tag name "form" had
1290 $this->emitToken(array(
1292 'type' => HTML5_Tokenizer::ENDTAG
1295 $this->ignored = true;
1299 /* A start tag whose tag name is "textarea" */
1301 $this->insertElement($token);
1303 /* If the next token is a U+000A LINE FEED (LF)
1304 * character token, then ignore that token and move on to
1305 * the next one. (Newlines at the start of textarea
1306 * elements are ignored as an authoring convenience.)
1307 * need flag, see also <pre> */
1308 $this->ignore_lf_token = 2;
1310 $this->original_mode = $this->mode;
1311 $this->flag_frameset_ok = false;
1312 $this->mode = self::IN_CDATA_RCDATA;
1314 /* Switch the tokeniser's content model flag to the
1316 $this->content_model = HTML5_Tokenizer::RCDATA;
1319 /* A start tag token whose tag name is "xmp" */
1321 /* Reconstruct the active formatting elements, if any. */
1322 $this->reconstructActiveFormattingElements();
1324 $this->flag_frameset_ok = false;
1326 $this->insertCDATAElement($token);
1330 $this->flag_frameset_ok = false;
1331 $this->insertCDATAElement($token);
1334 case 'noembed': case 'noscript':
1335 // XSCRIPT: should check scripting flag
1336 $this->insertCDATAElement($token);
1339 /* A start tag whose tag name is "select" */
1341 /* Reconstruct the active formatting elements, if any. */
1342 $this->reconstructActiveFormattingElements();
1344 /* Insert an HTML element for the token. */
1345 $this->insertElement($token);
1347 $this->flag_frameset_ok = false;
1349 /* If the insertion mode is one of in table", "in caption",
1350 * "in column group", "in table body", "in row", or "in
1351 * cell", then switch the insertion mode to "in select in
1352 * table". Otherwise, switch the insertion mode to "in
1355 $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
1356 $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
1357 $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
1359 $this->mode = self::IN_SELECT_IN_TABLE;
1361 $this->mode = self::IN_SELECT;
1365 case 'option': case 'optgroup':
1366 if ($this->elementInScope('option')) {
1367 $this->emitToken(array(
1369 'type' => HTML5_Tokenizer::ENDTAG,
1372 $this->reconstructActiveFormattingElements();
1373 $this->insertElement($token);
1376 case 'rp': case 'rt':
1377 /* If the stack of open elements has a ruby element in scope, then generate
1378 * implied end tags. If the current node is not then a ruby element, this is
1379 * a parse error; pop all the nodes from the current node up to the node
1380 * immediately before the bottommost ruby element on the stack of open elements.
1382 if ($this->elementInScope('ruby')) {
1383 $this->generateImpliedEndTags();
1390 $peek = array_pop($this->stack);
1391 } while ($peek->tagName !== 'ruby');
1392 $this->stack[] = $peek; // we popped one too many
1393 $this->insertElement($token);
1399 $this->reconstructActiveFormattingElements();
1400 $token = $this->adjustMathMLAttributes($token);
1401 $token = $this->adjustForeignAttributes($token);
1402 $this->insertForeignElement($token, self::NS_MATHML);
1403 if (isset($token['self-closing'])) {
1404 // XERROR: acknowledge the token's self-closing flag
1405 array_pop($this->stack);
1407 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1408 $this->secondary_mode = $this->mode;
1409 $this->mode = self::IN_FOREIGN_CONTENT;
1414 $this->reconstructActiveFormattingElements();
1415 $token = $this->adjustSVGAttributes($token);
1416 $token = $this->adjustForeignAttributes($token);
1417 $this->insertForeignElement($token, self::NS_SVG);
1418 if (isset($token['self-closing'])) {
1419 // XERROR: acknowledge the token's self-closing flag
1420 array_pop($this->stack);
1422 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1423 $this->secondary_mode = $this->mode;
1424 $this->mode = self::IN_FOREIGN_CONTENT;
1428 case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
1429 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
1433 /* A start tag token not covered by the previous entries */
1435 /* Reconstruct the active formatting elements, if any. */
1436 $this->reconstructActiveFormattingElements();
1438 $this->insertElement($token);
1439 /* This element will be a phrasing element. */
1444 case HTML5_Tokenizer::ENDTAG:
1445 switch($token['name']) {
1446 /* An end tag with the tag name "body" */
1448 /* If the second element in the stack of open elements is
1449 not a body element, this is a parse error. Ignore the token.
1451 if(count($this->stack) < 2 || $this->stack[1]->tagName !== 'body') {
1452 $this->ignored = true;
1454 /* Otherwise, if there is a node in the stack of open
1455 * elements that is not either a dd element, a dt
1456 * element, an li element, an optgroup element, an
1457 * option element, a p element, an rp element, an rt
1458 * element, a tbody element, a td element, a tfoot
1459 * element, a th element, a thead element, a tr element,
1460 * the body element, or the html element, then this is a
1463 // XERROR: implement this check for parse error
1466 /* Change the insertion mode to "after body". */
1467 $this->mode = self::AFTER_BODY;
1470 /* An end tag with the tag name "html" */
1472 /* Act as if an end tag with tag name "body" had been seen,
1473 then, if that token wasn't ignored, reprocess the current
1475 $this->emitToken(array(
1477 'type' => HTML5_Tokenizer::ENDTAG
1480 if (!$this->ignored) $this->emitToken($token);
1483 case 'address': case 'article': case 'aside': case 'blockquote':
1484 case 'center': case 'datagrid': case 'details': case 'dir':
1485 case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
1486 case 'header': case 'hgroup': case 'listing': case 'menu':
1487 case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
1488 /* If the stack of open elements has an element in scope
1489 with the same tag name as that of the token, then generate
1490 implied end tags. */
1491 if($this->elementInScope($token['name'])) {
1492 $this->generateImpliedEndTags();
1494 /* Now, if the current node is not an element with
1495 the same tag name as that of the token, then this
1496 is a parse error. */
1497 // XERROR: implement parse error logic
1499 /* If the stack of open elements has an element in
1500 scope with the same tag name as that of the token,
1501 then pop elements from this stack until an element
1502 with that tag name has been popped from the stack. */
1504 $node = array_pop($this->stack);
1505 } while ($node->tagName !== $token['name']);
1511 /* An end tag whose tag name is "form" */
1513 /* Let node be the element that the form element pointer is set to. */
1514 $node = $this->form_pointer;
1515 /* Set the form element pointer to null. */
1516 $this->form_pointer = null;
1517 /* If node is null or the stack of open elements does not
1518 * have node in scope, then this is a parse error; ignore the token. */
1519 if ($node === null || !in_array($node, $this->stack)) {
1521 $this->ignored = true;
1523 /* 1. Generate implied end tags. */
1524 $this->generateImpliedEndTags();
1525 /* 2. If the current node is not node, then this is a parse error. */
1526 if (end($this->stack) !== $node) {
1529 /* 3. Remove node from the stack of open elements. */
1530 array_splice($this->stack, array_search($node, $this->stack, true), 1);
1535 /* An end tag whose tag name is "p" */
1537 /* If the stack of open elements has a p element in scope,
1538 then generate implied end tags, except for p elements. */
1539 if($this->elementInScope('p')) {
1540 /* Generate implied end tags, except for elements with
1541 * the same tag name as the token. */
1542 $this->generateImpliedEndTags(array('p'));
1544 /* If the current node is not a p element, then this is
1546 // XERROR: implement
1548 /* Pop elements from the stack of open elements until
1549 * an element with the same tag name as the token has
1550 * been popped from the stack. */
1552 $node = array_pop($this->stack);
1553 } while ($node->tagName !== 'p');
1557 $this->emitToken(array(
1559 'type' => HTML5_Tokenizer::STARTTAG,
1561 $this->emitToken($token);
1565 /* An end tag whose tag name is "dd", "dt", or "li" */
1566 case 'dd': case 'dt': case 'li':
1567 if($this->elementInScope($token['name'])) {
1568 $this->generateImpliedEndTags(array($token['name']));
1570 /* If the current node is not an element with the same
1571 tag name as the token, then this is a parse error. */
1572 // XERROR: implement parse error
1574 /* Pop elements from the stack of open elements until
1575 * an element with the same tag name as the token has
1576 * been popped from the stack. */
1578 $node = array_pop($this->stack);
1579 } while ($node->tagName !== $token['name']);
1586 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
1588 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1589 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
1591 /* If the stack of open elements has in scope an element whose
1592 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1593 generate implied end tags. */
1594 if($this->elementInScope($elements)) {
1595 $this->generateImpliedEndTags();
1597 /* Now, if the current node is not an element with the same
1598 tag name as that of the token, then this is a parse error. */
1599 // XERROR: implement parse error
1601 /* If the stack of open elements has in scope an element
1602 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
1603 "h6", then pop elements from the stack until an element
1604 with one of those tag names has been popped from the stack. */
1606 $node = array_pop($this->stack);
1607 } while (!in_array($node->tagName, $elements));
1613 /* An end tag whose tag name is one of: "a", "b", "big", "em",
1614 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1615 case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
1616 case 'i': case 'nobr': case 's': case 'small': case 'strike':
1617 case 'strong': case 'tt': case 'u':
1618 // XERROR: generally speaking this needs parse error logic
1619 /* 1. Let the formatting element be the last element in
1620 the list of active formatting elements that:
1621 * is between the end of the list and the last scope
1622 marker in the list, if any, or the start of the list
1624 * has the same tag name as the token.
1627 for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
1628 if($this->a_formatting[$a] === self::MARKER) {
1631 } elseif($this->a_formatting[$a]->tagName === $token['name']) {
1632 $formatting_element = $this->a_formatting[$a];
1633 $in_stack = in_array($formatting_element, $this->stack, true);
1639 /* If there is no such node, or, if that node is
1640 also in the stack of open elements but the element
1641 is not in scope, then this is a parse error. Abort
1642 these steps. The token is ignored. */
1643 if(!isset($formatting_element) || ($in_stack &&
1644 !$this->elementInScope($token['name']))) {
1645 $this->ignored = true;
1648 /* Otherwise, if there is such a node, but that node
1649 is not in the stack of open elements, then this is a
1650 parse error; remove the element from the list, and
1651 abort these steps. */
1652 } elseif(isset($formatting_element) && !$in_stack) {
1653 unset($this->a_formatting[$fe_af_pos]);
1654 $this->a_formatting = array_merge($this->a_formatting);
1658 /* Otherwise, there is a formatting element and that
1659 * element is in the stack and is in scope. If the
1660 * element is not the current node, this is a parse
1661 * error. In any case, proceed with the algorithm as
1662 * written in the following steps. */
1663 // XERROR: implement me
1665 /* 2. Let the furthest block be the topmost node in the
1666 stack of open elements that is lower in the stack
1667 than the formatting element, and is not an element in
1668 the phrasing or formatting categories. There might
1670 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1671 $length = count($this->stack);
1673 for($s = $fe_s_pos + 1; $s < $length; $s++) {
1674 $category = $this->getElementCategory($this->stack[$s]);
1676 if($category !== self::PHRASING && $category !== self::FORMATTING) {
1677 $furthest_block = $this->stack[$s];
1682 /* 3. If there is no furthest block, then the UA must
1683 skip the subsequent steps and instead just pop all
1684 the nodes from the bottom of the stack of open
1685 elements, from the current node up to the formatting
1686 element, and remove the formatting element from the
1687 list of active formatting elements. */
1688 if(!isset($furthest_block)) {
1689 for($n = $length - 1; $n >= $fe_s_pos; $n--) {
1690 array_pop($this->stack);
1693 unset($this->a_formatting[$fe_af_pos]);
1694 $this->a_formatting = array_merge($this->a_formatting);
1698 /* 4. Let the common ancestor be the element
1699 immediately above the formatting element in the stack
1700 of open elements. */
1701 $common_ancestor = $this->stack[$fe_s_pos - 1];
1703 /* 5. Let a bookmark note the position of the
1704 formatting element in the list of active formatting
1705 elements relative to the elements on either side
1706 of it in the list. */
1707 $bookmark = $fe_af_pos;
1709 /* 6. Let node and last node be the furthest block.
1710 Follow these steps: */
1711 $node = $furthest_block;
1712 $last_node = $furthest_block;
1715 for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
1716 /* 6.1 Let node be the element immediately
1717 prior to node in the stack of open elements. */
1718 $node = $this->stack[$n];
1720 /* 6.2 If node is not in the list of active
1721 formatting elements, then remove node from
1722 the stack of open elements and then go back
1724 if(!in_array($node, $this->a_formatting, true)) {
1725 array_splice($this->stack, $n, 1);
1732 /* 6.3 Otherwise, if node is the formatting
1733 element, then go to the next step in the overall
1735 if($node === $formatting_element) {
1738 /* 6.4 Otherwise, if last node is the furthest
1739 block, then move the aforementioned bookmark to
1740 be immediately after the node in the list of
1741 active formatting elements. */
1742 } elseif($last_node === $furthest_block) {
1743 $bookmark = array_search($node, $this->a_formatting, true) + 1;
1746 /* 6.5 Create an element for the token for which
1747 * the element node was created, replace the entry
1748 * for node in the list of active formatting
1749 * elements with an entry for the new element,
1750 * replace the entry for node in the stack of open
1751 * elements with an entry for the new element, and
1752 * let node be the new element. */
1753 // we don't know what the token is anymore
1754 $clone = $node->cloneNode();
1755 $a_pos = array_search($node, $this->a_formatting, true);
1756 $s_pos = array_search($node, $this->stack, true);
1757 $this->a_formatting[$a_pos] = $clone;
1758 $this->stack[$s_pos] = $clone;
1761 /* 6.6 Insert last node into node, first removing
1762 it from its previous parent node if any. */
1763 if($last_node->parentNode !== null) {
1764 $last_node->parentNode->removeChild($last_node);
1767 $node->appendChild($last_node);
1769 /* 6.7 Let last node be node. */
1772 /* 6.8 Return to step 1 of this inner set of steps. */
1775 /* 7. If the common ancestor node is a table, tbody,
1776 * tfoot, thead, or tr element, then, foster parent
1777 * whatever last node ended up being in the previous
1778 * step, first removing it from its previous parent
1780 if ($last_node->parentNode) { // common step
1781 $last_node->parentNode->removeChild($last_node);
1783 if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
1784 $this->fosterParent($last_node);
1785 /* Otherwise, append whatever last node ended up being
1786 * in the previous step to the common ancestor node,
1787 * first removing it from its previous parent node if
1790 $common_ancestor->appendChild($last_node);
1793 /* 8. Create an element for the token for which the
1794 * formatting element was created. */
1795 $clone = $formatting_element->cloneNode();
1797 /* 9. Take all of the child nodes of the furthest
1798 block and append them to the element created in the
1800 while($furthest_block->hasChildNodes()) {
1801 $child = $furthest_block->firstChild;
1802 $furthest_block->removeChild($child);
1803 $clone->appendChild($child);
1806 /* 10. Append that clone to the furthest block. */
1807 $furthest_block->appendChild($clone);
1809 /* 11. Remove the formatting element from the list
1810 of active formatting elements, and insert the new element
1811 into the list of active formatting elements at the
1812 position of the aforementioned bookmark. */
1813 $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
1814 array_splice($this->a_formatting, $fe_af_pos, 1);
1816 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
1817 $af_part2 = array_slice($this->a_formatting, $bookmark);
1818 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
1820 /* 12. Remove the formatting element from the stack
1821 of open elements, and insert the new element into the stack
1822 of open elements immediately below the position of the
1823 furthest block in that stack. */
1824 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1825 array_splice($this->stack, $fe_s_pos, 1);
1827 $fb_s_pos = array_search($furthest_block, $this->stack, true);
1828 $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
1829 $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
1830 $this->stack = array_merge($s_part1, array($clone), $s_part2);
1832 /* 13. Jump back to step 1 in this series of steps. */
1833 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
1837 case 'applet': case 'button': case 'marquee': case 'object':
1838 /* If the stack of open elements has an element in scope whose
1839 tag name matches the tag name of the token, then generate implied
1841 if($this->elementInScope($token['name'])) {
1842 $this->generateImpliedEndTags();
1844 /* Now, if the current node is not an element with the same
1845 tag name as the token, then this is a parse error. */
1846 // XERROR: implement logic
1848 /* Pop elements from the stack of open elements until
1849 * an element with the same tag name as the token has
1850 * been popped from the stack. */
1852 $node = array_pop($this->stack);
1853 } while ($node->tagName !== $token['name']);
1855 /* Clear the list of active formatting elements up to the
1857 $keys = array_keys($this->a_formatting, self::MARKER, true);
1858 $marker = end($keys);
1860 for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
1861 array_pop($this->a_formatting);
1870 $this->emitToken(array(
1872 'type' => HTML5_Tokenizer::STARTTAG,
1876 /* An end tag token not covered by the previous entries */
1878 for($n = count($this->stack) - 1; $n >= 0; $n--) {
1879 /* Initialise node to be the current node (the bottommost
1880 node of the stack). */
1881 $node = $this->stack[$n];
1883 /* If node has the same tag name as the end tag token,
1885 if($token['name'] === $node->tagName) {
1886 /* Generate implied end tags. */
1887 $this->generateImpliedEndTags();
1889 /* If the tag name of the end tag token does not
1890 match the tag name of the current node, this is a
1892 // XERROR: implement this
1894 /* Pop all the nodes from the current node up to
1895 node, including node, then stop these steps. */
1898 $pop = array_pop($this->stack);
1899 } while ($pop !== $node);
1903 $category = $this->getElementCategory($node);
1905 if($category !== self::FORMATTING && $category !== self::PHRASING) {
1906 /* Otherwise, if node is in neither the formatting
1907 category nor the phrasing category, then this is a
1908 parse error. Stop this algorithm. The end tag token
1910 $this->ignored = true;
1915 /* Set node to the previous entry in the stack of open elements. Loop. */
1923 case self::IN_CDATA_RCDATA:
1925 $token['type'] === HTML5_Tokenizer::CHARACTER ||
1926 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
1928 $this->insertText($token['data']);
1929 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
1931 /* If the current node is a script element, mark the script
1932 * element as "already executed". */
1933 // probably not necessary
1934 array_pop($this->stack);
1935 $this->mode = $this->original_mode;
1936 $this->emitToken($token);
1937 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
1938 array_pop($this->stack);
1939 $this->mode = $this->original_mode;
1940 // we're ignoring all of the execution stuff
1941 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
1942 array_pop($this->stack);
1943 $this->mode = $this->original_mode;
1947 case self::IN_TABLE:
1948 $clear = array('html', 'table');
1950 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1951 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1953 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
1954 /* If the current table is tainted, then act as described in
1955 * the "anything else" entry below. */
1956 // Note: hsivonen has a test that fails due to this line
1957 // because he wants to convince Hixie not to do taint
1958 !$this->currentTableIsTainted()) {
1959 /* Append the character to the current node. */
1960 $this->insertText($token['data']);
1962 /* A comment token */
1963 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
1964 /* Append a Comment node to the current node with the data
1965 attribute set to the data given in the comment token. */
1966 $this->insertComment($token['data']);
1968 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
1971 /* A start tag whose tag name is "caption" */
1972 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
1973 $token['name'] === 'caption') {
1974 /* Clear the stack back to a table context. */
1975 $this->clearStackToTableContext($clear);
1977 /* Insert a marker at the end of the list of active
1978 formatting elements. */
1979 $this->a_formatting[] = self::MARKER;
1981 /* Insert an HTML element for the token, then switch the
1982 insertion mode to "in caption". */
1983 $this->insertElement($token);
1984 $this->mode = self::IN_CAPTION;
1986 /* A start tag whose tag name is "colgroup" */
1987 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
1988 $token['name'] === 'colgroup') {
1989 /* Clear the stack back to a table context. */
1990 $this->clearStackToTableContext($clear);
1992 /* Insert an HTML element for the token, then switch the
1993 insertion mode to "in column group". */
1994 $this->insertElement($token);
1995 $this->mode = self::IN_COLUMN_GROUP;
1997 /* A start tag whose tag name is "col" */
1998 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
1999 $token['name'] === 'col') {
2000 $this->emitToken(array(
2001 'name' => 'colgroup',
2002 'type' => HTML5_Tokenizer::STARTTAG,
2006 $this->emitToken($token);
2008 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2009 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2010 array('tbody', 'tfoot', 'thead'))) {
2011 /* Clear the stack back to a table context. */
2012 $this->clearStackToTableContext($clear);
2014 /* Insert an HTML element for the token, then switch the insertion
2015 mode to "in table body". */
2016 $this->insertElement($token);
2017 $this->mode = self::IN_TABLE_BODY;
2019 /* A start tag whose tag name is one of: "td", "th", "tr" */
2020 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2021 in_array($token['name'], array('td', 'th', 'tr'))) {
2022 /* Act as if a start tag token with the tag name "tbody" had been
2023 seen, then reprocess the current token. */
2024 $this->emitToken(array(
2026 'type' => HTML5_Tokenizer::STARTTAG,
2030 $this->emitToken($token);
2032 /* A start tag whose tag name is "table" */
2033 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2034 $token['name'] === 'table') {
2035 /* Parse error. Act as if an end tag token with the tag name "table"
2036 had been seen, then, if that token wasn't ignored, reprocess the
2038 $this->emitToken(array(
2040 'type' => HTML5_Tokenizer::ENDTAG
2043 if (!$this->ignored) $this->emitToken($token);
2045 /* An end tag whose tag name is "table" */
2046 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2047 $token['name'] === 'table') {
2048 /* If the stack of open elements does not have an element in table
2049 scope with the same tag name as the token, this is a parse error.
2050 Ignore the token. (fragment case) */
2051 if(!$this->elementInScope($token['name'], true)) {
2052 $this->ignored = true;
2057 $node = array_pop($this->stack);
2058 } while ($node->tagName !== 'table');
2060 /* Reset the insertion mode appropriately. */
2061 $this->resetInsertionMode();
2064 /* An end tag whose tag name is one of: "body", "caption", "col",
2065 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2066 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2067 array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2068 'tfoot', 'th', 'thead', 'tr'))) {
2069 // Parse error. Ignore the token.
2071 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2072 ($token['name'] === 'style' || $token['name'] === 'script')) {
2073 $this->processWithRulesFor($token, self::IN_HEAD);
2075 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
2076 // assignment is intentional
2077 /* If the token does not have an attribute with the name "type", or
2078 * if it does, but that attribute's value is not an ASCII
2079 * case-insensitive match for the string "hidden", then: act as
2080 * described in the "anything else" entry below. */
2081 ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
2082 // I.e., if its an input with the type attribute == 'hidden'
2085 $this->insertElement($token);
2086 array_pop($this->stack);
2087 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2088 /* If the current node is not the root html element, then this is a parse error. */
2089 if (end($this->stack)->tagName !== 'html') {
2090 // Note: It can only be the current node in the fragment case.
2096 /* Parse error. Process the token as if the insertion mode was "in
2097 body", with the following exception: */
2099 $old = $this->foster_parent;
2100 $this->foster_parent = true;
2101 $this->processWithRulesFor($token, self::IN_BODY);
2102 $this->foster_parent = $old;
2106 case self::IN_CAPTION:
2107 /* An end tag whose tag name is "caption" */
2108 if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
2109 /* If the stack of open elements does not have an element in table
2110 scope with the same tag name as the token, this is a parse error.
2111 Ignore the token. (fragment case) */
2112 if(!$this->elementInScope($token['name'], true)) {
2113 $this->ignored = true;
2118 /* Generate implied end tags. */
2119 $this->generateImpliedEndTags();
2121 /* Now, if the current node is not a caption element, then this
2122 is a parse error. */
2123 // XERROR: implement
2125 /* Pop elements from this stack until a caption element has
2126 been popped from the stack. */
2128 $node = array_pop($this->stack);
2129 } while ($node->tagName !== 'caption');
2131 /* Clear the list of active formatting elements up to the last
2133 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2135 /* Switch the insertion mode to "in table". */
2136 $this->mode = self::IN_TABLE;
2139 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2140 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2142 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2143 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2144 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2145 $token['name'] === 'table')) {
2146 /* Parse error. Act as if an end tag with the tag name "caption"
2147 had been seen, then, if that token wasn't ignored, reprocess the
2149 $this->emitToken(array(
2150 'name' => 'caption',
2151 'type' => HTML5_Tokenizer::ENDTAG
2154 if (!$this->ignored) $this->emitToken($token);
2156 /* An end tag whose tag name is one of: "body", "col", "colgroup",
2157 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2158 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2159 array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2161 // Parse error. Ignore the token.
2162 $this->ignored = true;
2166 /* Process the token as if the insertion mode was "in body". */
2167 $this->processWithRulesFor($token, self::IN_BODY);
2171 case self::IN_COLUMN_GROUP:
2172 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2173 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2175 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2176 /* Append the character to the current node. */
2177 $this->insertText($token['data']);
2179 /* A comment token */
2180 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2181 /* Append a Comment node to the current node with the data
2182 attribute set to the data given in the comment token. */
2183 $this->insertToken($token['data']);
2185 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2188 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2189 $this->processWithRulesFor($token, self::IN_BODY);
2191 /* A start tag whose tag name is "col" */
2192 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
2193 /* Insert a col element for the token. Immediately pop the current
2194 node off the stack of open elements. */
2195 $this->insertElement($token);
2196 array_pop($this->stack);
2197 // XERROR: Acknowledge the token's self-closing flag, if it is set.
2199 /* An end tag whose tag name is "colgroup" */
2200 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2201 $token['name'] === 'colgroup') {
2202 /* If the current node is the root html element, then this is a
2203 parse error, ignore the token. (fragment case) */
2204 if(end($this->stack)->tagName === 'html') {
2205 $this->ignored = true;
2207 /* Otherwise, pop the current node (which will be a colgroup
2208 element) from the stack of open elements. Switch the insertion
2209 mode to "in table". */
2211 array_pop($this->stack);
2212 $this->mode = self::IN_TABLE;
2215 /* An end tag whose tag name is "col" */
2216 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
2217 /* Parse error. Ignore the token. */
2218 $this->ignored = true;
2220 /* An end-of-file token */
2221 /* If the current node is the root html element */
2222 } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
2227 /* Act as if an end tag with the tag name "colgroup" had been seen,
2228 and then, if that token wasn't ignored, reprocess the current token. */
2229 $this->emitToken(array(
2230 'name' => 'colgroup',
2231 'type' => HTML5_Tokenizer::ENDTAG
2234 if (!$this->ignored) $this->emitToken($token);
2238 case self::IN_TABLE_BODY:
2239 $clear = array('tbody', 'tfoot', 'thead', 'html');
2241 /* A start tag whose tag name is "tr" */
2242 if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
2243 /* Clear the stack back to a table body context. */
2244 $this->clearStackToTableContext($clear);
2246 /* Insert a tr element for the token, then switch the insertion
2247 mode to "in row". */
2248 $this->insertElement($token);
2249 $this->mode = self::IN_ROW;
2251 /* A start tag whose tag name is one of: "th", "td" */
2252 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2253 ($token['name'] === 'th' || $token['name'] === 'td')) {
2254 /* Parse error. Act as if a start tag with the tag name "tr" had
2255 been seen, then reprocess the current token. */
2256 $this->emitToken(array(
2258 'type' => HTML5_Tokenizer::STARTTAG,
2262 $this->emitToken($token);
2264 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2265 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2266 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2267 /* If the stack of open elements does not have an element in table
2268 scope with the same tag name as the token, this is a parse error.
2269 Ignore the token. */
2270 if(!$this->elementInScope($token['name'], true)) {
2272 $this->ignored = true;
2276 /* Clear the stack back to a table body context. */
2277 $this->clearStackToTableContext($clear);
2279 /* Pop the current node from the stack of open elements. Switch
2280 the insertion mode to "in table". */
2281 array_pop($this->stack);
2282 $this->mode = self::IN_TABLE;
2285 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2286 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2287 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2288 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
2289 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2290 /* If the stack of open elements does not have a tbody, thead, or
2291 tfoot element in table scope, this is a parse error. Ignore the
2292 token. (fragment case) */
2293 if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
2295 $this->ignored = true;
2299 /* Clear the stack back to a table body context. */
2300 $this->clearStackToTableContext($clear);
2302 /* Act as if an end tag with the same tag name as the current
2303 node ("tbody", "tfoot", or "thead") had been seen, then
2304 reprocess the current token. */
2305 $this->emitToken(array(
2306 'name' => end($this->stack)->tagName,
2307 'type' => HTML5_Tokenizer::ENDTAG
2310 $this->emitToken($token);
2313 /* An end tag whose tag name is one of: "body", "caption", "col",
2314 "colgroup", "html", "td", "th", "tr" */
2315 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2316 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
2317 /* Parse error. Ignore the token. */
2318 $this->ignored = true;
2322 /* Process the token as if the insertion mode was "in table". */
2323 $this->processWithRulesFor($token, self::IN_TABLE);
2328 $clear = array('tr', 'html');
2330 /* A start tag whose tag name is one of: "th", "td" */
2331 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2332 ($token['name'] === 'th' || $token['name'] === 'td')) {
2333 /* Clear the stack back to a table row context. */
2334 $this->clearStackToTableContext($clear);
2336 /* Insert an HTML element for the token, then switch the insertion
2337 mode to "in cell". */
2338 $this->insertElement($token);
2339 $this->mode = self::IN_CELL;
2341 /* Insert a marker at the end of the list of active formatting
2343 $this->a_formatting[] = self::MARKER;
2345 /* An end tag whose tag name is "tr" */
2346 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
2347 /* If the stack of open elements does not have an element in table
2348 scope with the same tag name as the token, this is a parse error.
2349 Ignore the token. (fragment case) */
2350 if(!$this->elementInScope($token['name'], true)) {
2352 $this->ignored = true;
2356 /* Clear the stack back to a table row context. */
2357 $this->clearStackToTableContext($clear);
2359 /* Pop the current node (which will be a tr element) from the
2360 stack of open elements. Switch the insertion mode to "in table
2362 array_pop($this->stack);
2363 $this->mode = self::IN_TABLE_BODY;
2366 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2367 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
2368 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2369 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
2370 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2371 /* Act as if an end tag with the tag name "tr" had been seen, then,
2372 if that token wasn't ignored, reprocess the current token. */
2373 $this->emitToken(array(
2375 'type' => HTML5_Tokenizer::ENDTAG
2377 if (!$this->ignored) $this->emitToken($token);
2379 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2380 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2381 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2382 /* If the stack of open elements does not have an element in table
2383 scope with the same tag name as the token, this is a parse error.
2384 Ignore the token. */
2385 if(!$this->elementInScope($token['name'], true)) {
2386 $this->ignored = true;
2390 /* Otherwise, act as if an end tag with the tag name "tr" had
2391 been seen, then reprocess the current token. */
2392 $this->emitToken(array(
2394 'type' => HTML5_Tokenizer::ENDTAG
2397 $this->emitToken($token);
2400 /* An end tag whose tag name is one of: "body", "caption", "col",
2401 "colgroup", "html", "td", "th" */
2402 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2403 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
2404 /* Parse error. Ignore the token. */
2405 $this->ignored = true;
2409 /* Process the token as if the insertion mode was "in table". */
2410 $this->processWithRulesFor($token, self::IN_TABLE);
2415 /* An end tag whose tag name is one of: "td", "th" */
2416 if($token['type'] === HTML5_Tokenizer::ENDTAG &&
2417 ($token['name'] === 'td' || $token['name'] === 'th')) {
2418 /* If the stack of open elements does not have an element in table
2419 scope with the same tag name as that of the token, then this is a
2420 parse error and the token must be ignored. */
2421 if(!$this->elementInScope($token['name'], true)) {
2422 $this->ignored = true;
2426 /* Generate implied end tags, except for elements with the same
2427 tag name as the token. */
2428 $this->generateImpliedEndTags(array($token['name']));
2430 /* Now, if the current node is not an element with the same tag
2431 name as the token, then this is a parse error. */
2432 // XERROR: Implement parse error code
2434 /* Pop elements from this stack until an element with the same
2435 tag name as the token has been popped from the stack. */
2437 $node = array_pop($this->stack);
2438 } while ($node->tagName !== $token['name']);
2440 /* Clear the list of active formatting elements up to the last
2442 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2444 /* Switch the insertion mode to "in row". (The current node
2445 will be a tr element at this point.) */
2446 $this->mode = self::IN_ROW;
2449 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2450 "tbody", "td", "tfoot", "th", "thead", "tr" */
2451 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2452 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2454 /* If the stack of open elements does not have a td or th element
2455 in table scope, then this is a parse error; ignore the token.
2457 if(!$this->elementInScope(array('td', 'th'), true)) {
2459 $this->ignored = true;
2461 /* Otherwise, close the cell (see below) and reprocess the current
2465 $this->emitToken($token);
2468 /* An end tag whose tag name is one of: "body", "caption", "col",
2469 "colgroup", "html" */
2470 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2471 array('body', 'caption', 'col', 'colgroup', 'html'))) {
2472 /* Parse error. Ignore the token. */
2473 $this->ignored = true;
2475 /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
2477 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2478 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2479 /* If the stack of open elements does not have a td or th element
2480 in table scope, then this is a parse error; ignore the token.
2482 if(!$this->elementInScope(array('td', 'th'), true)) {
2484 $this->ignored = true;
2486 /* Otherwise, close the cell (see below) and reprocess the current
2490 $this->emitToken($token);
2495 /* Process the token as if the insertion mode was "in body". */
2496 $this->processWithRulesFor($token, self::IN_BODY);
2500 case self::IN_SELECT:
2501 /* Handle the token as follows: */
2503 /* A character token */
2505 $token['type'] === HTML5_Tokenizer::CHARACTER ||
2506 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
2508 /* Append the token's character to the current node. */
2509 $this->insertText($token['data']);
2511 /* A comment token */
2512 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2513 /* Append a Comment node to the current node with the data
2514 attribute set to the data given in the comment token. */
2515 $this->insertComment($token['data']);
2517 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2520 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2521 $this->processWithRulesFor($token, self::INBODY);
2523 /* A start tag token whose tag name is "option" */
2524 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2525 $token['name'] === 'option') {
2526 /* If the current node is an option element, act as if an end tag
2527 with the tag name "option" had been seen. */
2528 if(end($this->stack)->tagName === 'option') {
2529 $this->emitToken(array(
2531 'type' => HTML5_Tokenizer::ENDTAG
2535 /* Insert an HTML element for the token. */
2536 $this->insertElement($token);
2538 /* A start tag token whose tag name is "optgroup" */
2539 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2540 $token['name'] === 'optgroup') {
2541 /* If the current node is an option element, act as if an end tag
2542 with the tag name "option" had been seen. */
2543 if(end($this->stack)->tagName === 'option') {
2544 $this->emitToken(array(
2546 'type' => HTML5_Tokenizer::ENDTAG
2550 /* If the current node is an optgroup element, act as if an end tag
2551 with the tag name "optgroup" had been seen. */
2552 if(end($this->stack)->tagName === 'optgroup') {
2553 $this->emitToken(array(
2554 'name' => 'optgroup',
2555 'type' => HTML5_Tokenizer::ENDTAG
2559 /* Insert an HTML element for the token. */
2560 $this->insertElement($token);
2562 /* An end tag token whose tag name is "optgroup" */
2563 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2564 $token['name'] === 'optgroup') {
2565 /* First, if the current node is an option element, and the node
2566 immediately before it in the stack of open elements is an optgroup
2567 element, then act as if an end tag with the tag name "option" had
2569 $elements_in_stack = count($this->stack);
2571 if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
2572 $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
2573 $this->emitToken(array(
2575 'type' => HTML5_Tokenizer::ENDTAG
2579 /* If the current node is an optgroup element, then pop that node
2580 from the stack of open elements. Otherwise, this is a parse error,
2581 ignore the token. */
2582 if(end($this->stack)->tagName === 'optgroup') {
2583 array_pop($this->stack);
2586 $this->ignored = true;
2589 /* An end tag token whose tag name is "option" */
2590 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2591 $token['name'] === 'option') {
2592 /* If the current node is an option element, then pop that node
2593 from the stack of open elements. Otherwise, this is a parse error,
2594 ignore the token. */
2595 if(end($this->stack)->tagName === 'option') {
2596 array_pop($this->stack);
2599 $this->ignored = true;
2602 /* An end tag whose tag name is "select" */
2603 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2604 $token['name'] === 'select') {
2605 /* If the stack of open elements does not have an element in table
2606 scope with the same tag name as the token, this is a parse error.
2607 Ignore the token. (fragment case) */
2608 if(!$this->elementInScope($token['name'], true)) {
2609 $this->ignored = true;
2614 /* Pop elements from the stack of open elements until a select
2615 element has been popped from the stack. */
2617 $node = array_pop($this->stack);
2618 } while ($node->tagName !== 'select');
2620 /* Reset the insertion mode appropriately. */
2621 $this->resetInsertionMode();
2624 /* A start tag whose tag name is "select" */
2625 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
2626 /* Parse error. Act as if the token had been an end tag with the
2627 tag name "select" instead. */
2628 $this->emitToken(array(
2630 'type' => HTML5_Tokenizer::ENDTAG
2633 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2634 ($token['name'] === 'input' || $token['name'] === 'textarea')) {
2636 $this->emitToken(array(
2638 'type' => HTML5_Tokenizer::ENDTAG
2640 $this->emitToken($token);
2642 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
2643 $this->processWithRulesFor($token, self::IN_HEAD);
2645 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2646 // XERROR: If the current node is not the root html element, then this is a parse error.
2651 /* Parse error. Ignore the token. */
2652 $this->ignored = true;
2656 case self::IN_SELECT_IN_TABLE:
2658 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2659 in_array($token['name'], array('caption', 'table', 'tbody',
2660 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2662 $this->emitToken(array(
2664 'type' => HTML5_Tokenizer::ENDTAG,
2666 $this->emitToken($token);
2668 /* An end tag whose tag name is one of: "caption", "table", "tbody",
2669 "tfoot", "thead", "tr", "td", "th" */
2670 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2671 in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2675 /* If the stack of open elements has an element in table scope with
2676 the same tag name as that of the token, then act as if an end tag
2677 with the tag name "select" had been seen, and reprocess the token.
2678 Otherwise, ignore the token. */
2679 if($this->elementInScope($token['name'], true)) {
2680 $this->emitToken(array(
2682 'type' => HTML5_Tokenizer::ENDTAG
2685 $this->emitToken($token);
2687 $this->ignored = true;
2690 $this->processWithRulesFor($token, self::IN_SELECT);
2694 case self::IN_FOREIGN_CONTENT:
2695 if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2696 $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2697 $this->insertText($token['data']);
2698 } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2699 $this->insertComment($token['data']);
2700 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2701 // XERROR: parse error
2702 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2703 $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
2704 end($this->stack)->namespaceURI === self::NS_SVG) {
2705 array_pop($this->stack);
2706 // a bunch of script running mumbo jumbo
2708 ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2710 $token['name'] !== 'mglyph' &&
2711 $token['name'] !== 'malignmark' &&
2712 end($this->stack)->namespaceURI === self::NS_MATHML &&
2713 in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
2716 $token['name'] === 'svg' &&
2717 end($this->stack)->namespaceURI === self::NS_MATHML &&
2718 end($this->stack)->tagName === 'annotation-xml'
2721 end($this->stack)->namespaceURI === self::NS_SVG &&
2722 in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
2726 end($this->stack)->namespaceURI === self::NS_HTML
2728 ) || $token['type'] === HTML5_Tokenizer::ENDTAG
2730 $this->processWithRulesFor($token, $this->secondary_mode);
2731 /* If, after doing so, the insertion mode is still "in foreign
2732 * content", but there is no element in scope that has a namespace
2733 * other than the HTML namespace, switch the insertion mode to the
2734 * secondary insertion mode. */
2735 if ($this->mode === self::IN_FOREIGN_CONTENT) {
2737 // this basically duplicates elementInScope()
2738 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
2739 $node = $this->stack[$i];
2740 if ($node->namespaceURI !== self::NS_HTML) {
2743 } elseif (in_array($node->tagName, array('table', 'html',
2744 'applet', 'caption', 'td', 'th', 'button', 'marquee',
2745 'object')) || ($node->tagName === 'foreignObject' &&
2746 $node->namespaceURI === self::NS_SVG)) {
2751 $this->mode = $this->secondary_mode;
2754 } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
2755 $token['type'] === HTML5_Tokenizer::STARTTAG &&
2756 (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
2757 "center", "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2",
2758 "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
2759 "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
2760 "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
2761 "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
2762 $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
2763 // XERROR: parse error
2765 $node = array_pop($this->stack);
2766 } while ($node->namespaceURI !== self::NS_HTML);
2767 $this->stack[] = $node;
2768 $this->mode = $this->secondary_mode;
2769 $this->emitToken($token);
2770 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
2771 static $svg_lookup = array(
2772 'altglyph' => 'altGlyph',
2773 'altglyphdef' => 'altGlyphDef',
2774 'altglyphitem' => 'altGlyphItem',
2775 'animatecolor' => 'animateColor',
2776 'animatemotion' => 'animateMotion',
2777 'animatetransform' => 'animateTransform',
2778 'clippath' => 'clipPath',
2779 'feblend' => 'feBlend',
2780 'fecolormatrix' => 'feColorMatrix',
2781 'fecomponenttransfer' => 'feComponentTransfer',
2782 'fecomposite' => 'feComposite',
2783 'feconvolvematrix' => 'feConvolveMatrix',
2784 'fediffuselighting' => 'feDiffuseLighting',
2785 'fedisplacementmap' => 'feDisplacementMap',
2786 'fedistantlight' => 'feDistantLight',
2787 'feflood' => 'feFlood',
2788 'fefunca' => 'feFuncA',
2789 'fefuncb' => 'feFuncB',
2790 'fefuncg' => 'feFuncG',
2791 'fefuncr' => 'feFuncR',
2792 'fegaussianblur' => 'feGaussianBlur',
2793 'feimage' => 'feImage',
2794 'femerge' => 'feMerge',
2795 'femergenode' => 'feMergeNode',
2796 'femorphology' => 'feMorphology',
2797 'feoffset' => 'feOffset',
2798 'fepointlight' => 'fePointLight',
2799 'fespecularlighting' => 'feSpecularLighting',
2800 'fespotlight' => 'feSpotLight',
2801 'fetile' => 'feTile',
2802 'feturbulence' => 'feTurbulence',
2803 'foreignobject' => 'foreignObject',
2804 'glyphref' => 'glyphRef',
2805 'lineargradient' => 'linearGradient',
2806 'radialgradient' => 'radialGradient',
2807 'textpath' => 'textPath',
2809 $current = end($this->stack);
2810 if ($current->namespaceURI === self::NS_MATHML) {
2811 $token = $this->adjustMathMLAttributes($token);
2813 if ($current->namespaceURI === self::NS_SVG &&
2814 isset($svg_lookup[$token['name']])) {
2815 $token['name'] = $svg_lookup[$token['name']];
2817 if ($current->namespaceURI === self::NS_SVG) {
2818 $token = $this->adjustSVGAttributes($token);
2820 $token = $this->adjustForeignAttributes($token);
2821 $this->insertForeignElement($token, $current->namespaceURI);
2822 if (isset($token['self-closing'])) {
2823 array_pop($this->stack);
2824 // XERROR: acknowledge self-closing flag
2829 case self::AFTER_BODY:
2830 /* Handle the token as follows: */
2832 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2833 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2835 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2836 /* Process the token as it would be processed if the insertion mode
2838 $this->processWithRulesFor($token, self::IN_BODY);
2840 /* A comment token */
2841 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2842 /* Append a Comment node to the first element in the stack of open
2843 elements (the html element), with the data attribute set to the
2844 data given in the comment token. */
2845 $comment = $this->dom->createComment($token['data']);
2846 $this->stack[0]->appendChild($comment);
2848 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2851 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2852 $this->processWithRulesFor($token, self::IN_BODY);
2854 /* An end tag with the tag name "html" */
2855 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
2856 /* If the parser was originally created as part of the HTML
2857 * fragment parsing algorithm, this is a parse error; ignore
2858 * the token. (fragment case) */
2859 $this->ignored = true;
2860 // XERROR: implement this
2862 $this->mode = self::AFTER_AFTER_BODY;
2864 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2869 /* Parse error. Set the insertion mode to "in body" and reprocess
2871 $this->mode = self::IN_BODY;
2872 $this->emitToken($token);
2876 case self::IN_FRAMESET:
2877 /* Handle the token as follows: */
2879 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2880 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2881 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
2882 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2883 /* Append the character to the current node. */
2884 $this->insertText($token['data']);
2886 /* A comment token */
2887 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2888 /* Append a Comment node to the current node with the data
2889 attribute set to the data given in the comment token. */
2890 $this->insertComment($token['data']);
2892 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2895 /* A start tag with the tag name "frameset" */
2896 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2897 $token['name'] === 'frameset') {
2898 $this->insertElement($token);
2900 /* An end tag with the tag name "frameset" */
2901 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2902 $token['name'] === 'frameset') {
2903 /* If the current node is the root html element, then this is a
2904 parse error; ignore the token. (fragment case) */
2905 if(end($this->stack)->tagName === 'html') {
2906 $this->ignored = true;
2910 /* Otherwise, pop the current node from the stack of open
2912 array_pop($this->stack);
2914 /* If the parser was not originally created as part of the HTML
2915 * fragment parsing algorithm (fragment case), and the current
2916 * node is no longer a frameset element, then switch the
2917 * insertion mode to "after frameset". */
2918 $this->mode = self::AFTER_FRAMESET;
2921 /* A start tag with the tag name "frame" */
2922 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2923 $token['name'] === 'frame') {
2924 /* Insert an HTML element for the token. */
2925 $this->insertElement($token);
2927 /* Immediately pop the current node off the stack of open elements. */
2928 array_pop($this->stack);
2930 // XERROR: Acknowledge the token's self-closing flag, if it is set.
2932 /* A start tag with the tag name "noframes" */
2933 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2934 $token['name'] === 'noframes') {
2935 /* Process the token using the rules for the "in head" insertion mode. */
2936 $this->processwithRulesFor($token, self::IN_HEAD);
2938 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2939 // XERROR: If the current node is not the root html element, then this is a parse error.
2943 /* Parse error. Ignore the token. */
2944 $this->ignored = true;
2948 case self::AFTER_FRAMESET:
2949 /* Handle the token as follows: */
2951 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2952 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2953 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
2954 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2955 /* Append the character to the current node. */
2956 $this->insertText($token['data']);
2958 /* A comment token */
2959 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2960 /* Append a Comment node to the current node with the data
2961 attribute set to the data given in the comment token. */
2962 $this->insertComment($token['data']);
2964 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2967 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2968 $this->processWithRulesFor($token, self::IN_BODY);
2970 /* An end tag with the tag name "html" */
2971 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2972 $token['name'] === 'html') {
2973 $this->mode = self::AFTER_AFTER_FRAMESET;
2975 /* A start tag with the tag name "noframes" */
2976 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2977 $token['name'] === 'noframes') {
2978 $this->processWithRulesFor($token, self::IN_HEAD);
2980 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2985 /* Parse error. Ignore the token. */
2986 $this->ignored = true;
2990 case self::AFTER_AFTER_BODY:
2991 /* A comment token */
2992 if($token['type'] === HTML5_Tokenizer::COMMENT) {
2993 /* Append a Comment node to the Document object with the data
2994 attribute set to the data given in the comment token. */
2995 $comment = $this->dom->createComment($token['data']);
2996 $this->dom->appendChild($comment);
2998 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
2999 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3000 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3001 $this->processWithRulesFor($token, self::IN_BODY);
3003 /* An end-of-file token */
3004 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3008 $this->mode = self::IN_BODY;
3009 $this->emitToken($token);
3013 case self::AFTER_AFTER_FRAMESET:
3014 /* A comment token */
3015 if($token['type'] === HTML5_Tokenizer::COMMENT) {
3016 /* Append a Comment node to the Document object with the data
3017 attribute set to the data given in the comment token. */
3018 $comment = $this->dom->createComment($token['data']);
3019 $this->dom->appendChild($comment);
3021 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3022 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3023 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3024 $this->processWithRulesFor($token, self::IN_BODY);
3026 /* An end-of-file token */
3027 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3029 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
3030 $this->processWithRulesFor($token, self::IN_HEAD);
3036 // end funky indenting
3039 private function insertElement($token, $append = true) {
3040 $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
3042 if (!empty($token['attr'])) {
3043 foreach($token['attr'] as $attr) {
3045 // mike@macgirvin.com 2011-11-17, check attribute name for
3046 // validity (ignoring extenders and combiners) as illegal chars in names
3047 // causes everything to abort
3049 $valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name'],$matches);
3050 if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
3051 $el->setAttribute($attr['name'], $attr['value']);
3056 $this->appendToRealParent($el);
3057 $this->stack[] = $el;
3063 private function insertText($data) {
3064 if ($data === '') return;
3065 if ($this->ignore_lf_token) {
3066 if ($data[0] === "\n") {
3067 $data = substr($data, 1);
3068 if ($data === false) return;
3071 $text = $this->dom->createTextNode($data);
3072 $this->appendToRealParent($text);
3075 private function insertComment($data) {
3076 $comment = $this->dom->createComment($data);
3077 $this->appendToRealParent($comment);
3080 private function appendToRealParent($node) {
3081 // this is only for the foster_parent case
3082 /* If the current node is a table, tbody, tfoot, thead, or tr
3083 element, then, whenever a node would be inserted into the current
3084 node, it must instead be inserted into the foster parent element. */
3085 if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
3086 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3087 end($this->stack)->appendChild($node);
3089 $this->fosterParent($node);
3093 private function elementInScope($el, $table = false) {
3095 foreach($el as $element) {
3096 if($this->elementInScope($element, $table)) {
3104 $leng = count($this->stack);
3106 for($n = 0; $n < $leng; $n++) {
3107 /* 1. Initialise node to be the current node (the bottommost node of
3109 $node = $this->stack[$leng - 1 - $n];
3111 if($node->tagName === $el) {
3112 /* 2. If node is the target node, terminate in a match state. */
3115 // these are the common states for "in scope" and "in table scope"
3116 } elseif($node->tagName === 'table' || $node->tagName === 'html') {
3119 // these are only valid for "in scope"
3121 (in_array($node->tagName, array('applet', 'caption', 'td',
3122 'th', 'button', 'marquee', 'object')) ||
3123 $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
3127 /* Otherwise, set node to the previous entry in the stack of open
3128 elements and return to step 2. (This will never fail, since the loop
3129 will always terminate in the previous step if the top of the stack
3134 private function reconstructActiveFormattingElements() {
3135 /* 1. If there are no entries in the list of active formatting elements,
3136 then there is nothing to reconstruct; stop this algorithm. */
3137 $formatting_elements = count($this->a_formatting);
3139 if($formatting_elements === 0) {
3143 /* 3. Let entry be the last (most recently added) element in the list
3144 of active formatting elements. */
3145 $entry = end($this->a_formatting);
3147 /* 2. If the last (most recently added) entry in the list of active
3148 formatting elements is a marker, or if it is an element that is in the
3149 stack of open elements, then there is nothing to reconstruct; stop this
3151 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3155 for($a = $formatting_elements - 1; $a >= 0; true) {
3156 /* 4. If there are no entries before entry in the list of active
3157 formatting elements, then jump to step 8. */
3159 $step_seven = false;
3163 /* 5. Let entry be the entry one earlier than entry in the list of
3164 active formatting elements. */
3166 $entry = $this->a_formatting[$a];
3168 /* 6. If entry is neither a marker nor an element that is also in
3169 thetack of open elements, go to step 4. */
3170 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3176 /* 7. Let entry be the element one later than entry in the list of
3177 active formatting elements. */
3178 if(isset($step_seven) && $step_seven === true) {
3180 $entry = $this->a_formatting[$a];
3183 /* 8. Perform a shallow clone of the element entry to obtain clone. */
3184 $clone = $entry->cloneNode();
3186 /* 9. Append clone to the current node and push it onto the stack
3187 of open elements so that it is the new current node. */
3188 $this->appendToRealParent($clone);
3189 $this->stack[] = $clone;
3191 /* 10. Replace the entry for entry in the list with an entry for
3193 $this->a_formatting[$a] = $clone;
3195 /* 11. If the entry for clone in the list of active formatting
3196 elements is not the last entry in the list, return to step 7. */
3197 if(end($this->a_formatting) !== $clone) {
3205 private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3206 /* When the steps below require the UA to clear the list of active
3207 formatting elements up to the last marker, the UA must perform the
3211 /* 1. Let entry be the last (most recently added) entry in the list
3212 of active formatting elements. */
3213 $entry = end($this->a_formatting);
3215 /* 2. Remove entry from the list of active formatting elements. */
3216 array_pop($this->a_formatting);
3218 /* 3. If entry was a marker, then stop the algorithm at this point.
3219 The list has been cleared up to the last marker. */
3220 if($entry === self::MARKER) {
3226 private function generateImpliedEndTags($exclude = array()) {
3227 /* When the steps below require the UA to generate implied end tags,
3228 then, if the current node is a dd element, a dt element, an li element,
3229 a p element, a td element, a th element, or a tr element, the UA must
3230 act as if an end tag with the respective tag name had been seen and
3231 then generate implied end tags again. */
3232 $node = end($this->stack);
3233 $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3235 while(in_array(end($this->stack)->tagName, $elements)) {
3236 array_pop($this->stack);
3240 private function getElementCategory($node) {
3241 if (!is_object($node)) debug_print_backtrace();
3242 $name = $node->tagName;
3243 if(in_array($name, $this->special))
3244 return self::SPECIAL;
3246 elseif(in_array($name, $this->scoping))
3247 return self::SCOPING;
3249 elseif(in_array($name, $this->formatting))
3250 return self::FORMATTING;
3253 return self::PHRASING;
3256 private function clearStackToTableContext($elements) {
3257 /* When the steps above require the UA to clear the stack back to a
3258 table context, it means that the UA must, while the current node is not
3259 a table element or an html element, pop elements from the stack of open
3262 $name = end($this->stack)->tagName;
3264 if(in_array($name, $elements)) {
3267 array_pop($this->stack);
3272 private function resetInsertionMode($context = null) {
3273 /* 1. Let last be false. */
3275 $leng = count($this->stack);
3277 for($n = $leng - 1; $n >= 0; $n--) {
3278 /* 2. Let node be the last node in the stack of open elements. */
3279 $node = $this->stack[$n];
3281 /* 3. If node is the first node in the stack of open elements, then
3282 * set last to true and set node to the context element. (fragment
3284 if($this->stack[0]->isSameNode($node)) {
3289 /* 4. If node is a select element, then switch the insertion mode to
3290 "in select" and abort these steps. (fragment case) */
3291 if($node->tagName === 'select') {
3292 $this->mode = self::IN_SELECT;
3295 /* 5. If node is a td or th element, then switch the insertion mode
3296 to "in cell" and abort these steps. */
3297 } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
3298 $this->mode = self::IN_CELL;
3301 /* 6. If node is a tr element, then switch the insertion mode to
3302 "in row" and abort these steps. */
3303 } elseif($node->tagName === 'tr') {
3304 $this->mode = self::IN_ROW;
3307 /* 7. If node is a tbody, thead, or tfoot element, then switch the
3308 insertion mode to "in table body" and abort these steps. */
3309 } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
3310 $this->mode = self::IN_TABLE_BODY;
3313 /* 8. If node is a caption element, then switch the insertion mode
3314 to "in caption" and abort these steps. */
3315 } elseif($node->tagName === 'caption') {
3316 $this->mode = self::IN_CAPTION;
3319 /* 9. If node is a colgroup element, then switch the insertion mode
3320 to "in column group" and abort these steps. (innerHTML case) */
3321 } elseif($node->tagName === 'colgroup') {
3322 $this->mode = self::IN_COLUMN_GROUP;
3325 /* 10. If node is a table element, then switch the insertion mode
3326 to "in table" and abort these steps. */
3327 } elseif($node->tagName === 'table') {
3328 $this->mode = self::IN_TABLE;
3331 /* 11. If node is an element from the MathML namespace or the SVG
3332 * namespace, then switch the insertion mode to "in foreign
3333 * content", let the secondary insertion mode be "in body", and
3334 * abort these steps. */
3335 } elseif($node->namespaceURI === self::NS_SVG ||
3336 $node->namespaceURI === self::NS_MATHML) {
3337 $this->mode = self::IN_FOREIGN_CONTENT;
3338 $this->secondary_mode = self::IN_BODY;
3341 /* 12. If node is a head element, then switch the insertion mode
3342 to "in body" ("in body"! not "in head"!) and abort these steps.
3344 } elseif($node->tagName === 'head') {
3345 $this->mode = self::IN_BODY;
3348 /* 13. If node is a body element, then switch the insertion mode to
3349 "in body" and abort these steps. */
3350 } elseif($node->tagName === 'body') {
3351 $this->mode = self::IN_BODY;
3354 /* 14. If node is a frameset element, then switch the insertion
3355 mode to "in frameset" and abort these steps. (fragment case) */
3356 } elseif($node->tagName === 'frameset') {
3357 $this->mode = self::IN_FRAMESET;
3360 /* 15. If node is an html element, then: if the head element
3361 pointer is null, switch the insertion mode to "before head",
3362 otherwise, switch the insertion mode to "after head". In either
3363 case, abort these steps. (fragment case) */
3364 } elseif($node->tagName === 'html') {
3365 $this->mode = ($this->head_pointer === null)
3371 /* 16. If last is true, then set the insertion mode to "in body"
3372 and abort these steps. (fragment case) */
3374 $this->mode = self::IN_BODY;
3380 private function closeCell() {
3381 /* If the stack of open elements has a td or th element in table scope,
3382 then act as if an end tag token with that tag name had been seen. */
3383 foreach(array('td', 'th') as $cell) {
3384 if($this->elementInScope($cell, true)) {
3385 $this->emitToken(array(
3387 'type' => HTML5_Tokenizer::ENDTAG
3395 private function processWithRulesFor($token, $mode) {
3396 /* "using the rules for the m insertion mode", where m is one of these
3397 * modes, the user agent must use the rules described under the m
3398 * insertion mode's section, but must leave the insertion mode
3399 * unchanged unless the rules in m themselves switch the insertion mode
3400 * to a new value. */
3401 return $this->emitToken($token, $mode);
3404 private function insertCDATAElement($token) {
3405 $this->insertElement($token);
3406 $this->original_mode = $this->mode;
3407 $this->mode = self::IN_CDATA_RCDATA;
3408 $this->content_model = HTML5_Tokenizer::CDATA;
3411 private function insertRCDATAElement($token) {
3412 $this->insertElement($token);
3413 $this->original_mode = $this->mode;
3414 $this->mode = self::IN_CDATA_RCDATA;
3415 $this->content_model = HTML5_Tokenizer::RCDATA;
3418 private function getAttr($token, $key) {
3419 if (!isset($token['attr'])) return false;
3421 foreach ($token['attr'] as $keypair) {
3422 if ($keypair['name'] === $key) $ret = $keypair['value'];
3427 private function getCurrentTable() {
3428 /* The current table is the last table element in the stack of open
3429 * elements, if there is one. If there is no table element in the stack
3430 * of open elements (fragment case), then the current table is the
3431 * first element in the stack of open elements (the html element). */
3432 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
3433 if ($this->stack[$i]->tagName === 'table') {
3434 return $this->stack[$i];
3437 return $this->stack[0];
3440 private function getFosterParent() {
3441 /* The foster parent element is the parent element of the last
3442 table element in the stack of open elements, if there is a
3443 table element and it has such a parent element. If there is no
3444 table element in the stack of open elements (innerHTML case),
3445 then the foster parent element is the first element in the
3446 stack of open elements (the html element). Otherwise, if there
3447 is a table element in the stack of open elements, but the last
3448 table element in the stack of open elements has no parent, or
3449 its parent node is not an element, then the foster parent
3450 element is the element before the last table element in the
3451 stack of open elements. */
3452 for($n = count($this->stack) - 1; $n >= 0; $n--) {
3453 if($this->stack[$n]->tagName === 'table') {
3454 $table = $this->stack[$n];
3459 if(isset($table) && $table->parentNode !== null) {
3460 return $table->parentNode;
3462 } elseif(!isset($table)) {
3463 return $this->stack[0];
3465 } elseif(isset($table) && ($table->parentNode === null ||
3466 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
3467 return $this->stack[$n - 1];
3471 public function fosterParent($node) {
3472 $foster_parent = $this->getFosterParent();
3473 $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3474 /* When a node node is to be foster parented, the node node must be
3475 * inserted into the foster parent element, and the current table must
3476 * be marked as tainted. (Once the current table has been tainted,
3477 * whitespace characters are inserted into the foster parent element
3478 * instead of the current node.) */
3479 $table->tainted = true;
3480 /* If the foster parent element is the parent element of the last table
3481 * element in the stack of open elements, then node must be inserted
3482 * immediately before the last table element in the stack of open
3483 * elements in the foster parent element; otherwise, node must be
3484 * appended to the foster parent element. */
3485 if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
3486 $foster_parent->insertBefore($node, $table);
3488 $foster_parent->appendChild($node);
3493 * For debugging, prints the stack
3495 private function printStack() {
3497 foreach ($this->stack as $i => $element) {
3498 $names[] = $element->tagName;
3500 echo " -> stack [" . implode(', ', $names) . "]\n";
3504 * For debugging, prints active formatting elements
3506 private function printActiveFormattingElements() {
3507 if (!$this->a_formatting) return;
3509 foreach ($this->a_formatting as $node) {
3510 if ($node === self::MARKER) $names[] = 'MARKER';
3511 else $names[] = $node->tagName;
3513 echo " -> active formatting [" . implode(', ', $names) . "]\n";
3516 public function currentTableIsTainted() {
3517 return !empty($this->getCurrentTable()->tainted);
3521 * Sets up the tree constructor for building a fragment.
3523 public function setupContext($context = null) {
3524 $this->fragment = true;
3526 $context = $this->dom->createElementNS(self::NS_HTML, $context);
3527 /* 4.1. Set the HTML parser's tokenization stage's content model
3528 * flag according to the context element, as follows: */
3529 switch ($context->tagName) {
3530 case 'title': case 'textarea':
3531 $this->content_model = HTML5_Tokenizer::RCDATA;
3533 case 'style': case 'script': case 'xmp': case 'iframe':
3534 case 'noembed': case 'noframes':
3535 $this->content_model = HTML5_Tokenizer::CDATA;
3538 // XSCRIPT: assuming scripting is enabled
3539 $this->content_model = HTML5_Tokenizer::CDATA;
3542 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
3545 /* 4.2. Let root be a new html element with no attributes. */
3546 $root = $this->dom->createElementNS(self::NS_HTML, 'html');
3547 $this->root = $root;
3548 /* 4.3 Append the element root to the Document node created above. */
3549 $this->dom->appendChild($root);
3550 /* 4.4 Set up the parser's stack of open elements so that it
3551 * contains just the single element root. */
3552 $this->stack = array($root);
3553 /* 4.5 Reset the parser's insertion mode appropriately. */
3554 $this->resetInsertionMode($context);
3555 /* 4.6 Set the parser's form element pointer to the nearest node
3556 * to the context element that is a form element (going straight up
3557 * the ancestor chain, and including the element itself, if it is a
3558 * form element), or, if there is no such form element, to null. */
3561 if ($node->tagName === 'form') {
3562 $this->form_pointer = $node;
3565 } while ($node = $node->parentNode);
3569 public function adjustMathMLAttributes($token) {
3570 foreach ($token['attr'] as &$kp) {
3571 if ($kp['name'] === 'definitionurl') {
3572 $kp['name'] = 'definitionURL';
3578 public function adjustSVGAttributes($token) {
3579 static $lookup = array(
3580 'attributename' => 'attributeName',
3581 'attributetype' => 'attributeType',
3582 'basefrequency' => 'baseFrequency',
3583 'baseprofile' => 'baseProfile',
3584 'calcmode' => 'calcMode',
3585 'clippathunits' => 'clipPathUnits',
3586 'contentscripttype' => 'contentScriptType',
3587 'contentstyletype' => 'contentStyleType',
3588 'diffuseconstant' => 'diffuseConstant',
3589 'edgemode' => 'edgeMode',
3590 'externalresourcesrequired' => 'externalResourcesRequired',
3591 'filterres' => 'filterRes',
3592 'filterunits' => 'filterUnits',
3593 'glyphref' => 'glyphRef',
3594 'gradienttransform' => 'gradientTransform',
3595 'gradientunits' => 'gradientUnits',
3596 'kernelmatrix' => 'kernelMatrix',
3597 'kernelunitlength' => 'kernelUnitLength',
3598 'keypoints' => 'keyPoints',
3599 'keysplines' => 'keySplines',
3600 'keytimes' => 'keyTimes',
3601 'lengthadjust' => 'lengthAdjust',
3602 'limitingconeangle' => 'limitingConeAngle',
3603 'markerheight' => 'markerHeight',
3604 'markerunits' => 'markerUnits',
3605 'markerwidth' => 'markerWidth',
3606 'maskcontentunits' => 'maskContentUnits',
3607 'maskunits' => 'maskUnits',
3608 'numoctaves' => 'numOctaves',
3609 'pathlength' => 'pathLength',
3610 'patterncontentunits' => 'patternContentUnits',
3611 'patterntransform' => 'patternTransform',
3612 'patternunits' => 'patternUnits',
3613 'pointsatx' => 'pointsAtX',
3614 'pointsaty' => 'pointsAtY',
3615 'pointsatz' => 'pointsAtZ',
3616 'preservealpha' => 'preserveAlpha',
3617 'preserveaspectratio' => 'preserveAspectRatio',
3618 'primitiveunits' => 'primitiveUnits',
3621 'repeatcount' => 'repeatCount',
3622 'repeatdur' => 'repeatDur',
3623 'requiredextensions' => 'requiredExtensions',
3624 'requiredfeatures' => 'requiredFeatures',
3625 'specularconstant' => 'specularConstant',
3626 'specularexponent' => 'specularExponent',
3627 'spreadmethod' => 'spreadMethod',
3628 'startoffset' => 'startOffset',
3629 'stddeviation' => 'stdDeviation',
3630 'stitchtiles' => 'stitchTiles',
3631 'surfacescale' => 'surfaceScale',
3632 'systemlanguage' => 'systemLanguage',
3633 'tablevalues' => 'tableValues',
3634 'targetx' => 'targetX',
3635 'targety' => 'targetY',
3636 'textlength' => 'textLength',
3637 'viewbox' => 'viewBox',
3638 'viewtarget' => 'viewTarget',
3639 'xchannelselector' => 'xChannelSelector',
3640 'ychannelselector' => 'yChannelSelector',
3641 'zoomandpan' => 'zoomAndPan',
3643 foreach ($token['attr'] as &$kp) {
3644 if (isset($lookup[$kp['name']])) {
3645 $kp['name'] = $lookup[$kp['name']];
3651 public function adjustForeignAttributes($token) {
3652 static $lookup = array(
3653 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
3654 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
3655 'xlink:href' => array('xlink', 'href', self::NS_XLINK),
3656 'xlink:role' => array('xlink', 'role', self::NS_XLINK),
3657 'xlink:show' => array('xlink', 'show', self::NS_XLINK),
3658 'xlink:title' => array('xlink', 'title', self::NS_XLINK),
3659 'xlink:type' => array('xlink', 'type', self::NS_XLINK),
3660 'xml:base' => array('xml', 'base', self::NS_XML),
3661 'xml:lang' => array('xml', 'lang', self::NS_XML),
3662 'xml:space' => array('xml', 'space', self::NS_XML),
3663 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
3664 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
3666 foreach ($token['attr'] as &$kp) {
3667 if (isset($lookup[$kp['name']])) {
3668 $kp['name'] = $lookup[$kp['name']];
3674 public function insertForeignElement($token, $namespaceURI) {
3675 $el = $this->dom->createElementNS($namespaceURI, $token['name']);
3676 if (!empty($token['attr'])) {
3677 foreach ($token['attr'] as $kp) {
3678 $attr = $kp['name'];
3679 if (is_array($attr)) {
3683 $ns = self::NS_HTML;
3685 if (!$el->hasAttributeNS($ns, $attr)) {
3686 // XSKETCHY: work around godawful libxml bug
3687 if ($ns === self::NS_XLINK) {
3688 $el->setAttribute('xlink:'.$attr, $kp['value']);
3689 } elseif ($ns === self::NS_HTML) {
3690 // Another godawful libxml bug
3691 $el->setAttribute($attr, $kp['value']);
3693 $el->setAttributeNS($ns, $attr, $kp['value']);
3698 $this->appendToRealParent($el);
3699 $this->stack[] = $el;
3700 // XERROR: see below
3701 /* If the newly created element has an xmlns attribute in the XMLNS
3702 * namespace whose value is not exactly the same as the element's
3703 * namespace, that is a parse error. Similarly, if the newly created
3704 * element has an xmlns:xlink attribute in the XMLNS namespace whose
3705 * value is not the XLink Namespace, that is a parse error. */
3708 public function save() {
3709 $this->dom->normalize();
3710 if (!$this->fragment) {
3714 return $this->root->childNodes;
3716 return $this->dom->childNodes;