5 Copyright 2007 Jeroen van der Meer <http://jero.net/>
6 Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice shall be included
17 in all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 // Tags for FIX ME!!!: (in order of priority)
30 // XXX - should be fixed NAO!
31 // XERROR - with regards to parse errors
32 // XSCRIPT - with regards to scripting mode
33 // XENCODING - with regards to encoding (for reparsing tests)
35 class HTML5_TreeBuilder {
36 public $stack = array();
37 public $content_model;
40 private $original_mode;
41 private $secondary_mode;
43 // Whether or not normal insertion of nodes should actually foster
44 // parent (used in one case in spec)
45 private $foster_parent = false;
46 private $a_formatting = array();
48 private $head_pointer = null;
49 private $form_pointer = null;
51 private $flag_frameset_ok = true;
52 private $flag_force_quirks = false;
53 private $ignored = false;
54 private $quirks_mode = null;
55 // this gets to 2 when we want to ignore the next lf character, and
56 // is decrement at the beginning of each processed token (this way,
57 // code can check for (bool)$ignore_lf_token, but it phases out
59 private $ignore_lf_token = 0;
60 private $fragment = false;
63 private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
64 private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
65 private $special = array('address','area','article','aside','base','basefont','bgsound',
66 'blockquote','body','br','center','col','colgroup','command','dd','details','dialog','dir','div','dl',
67 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
68 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
69 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
70 'p','param','plaintext','pre','script','select','spacer','style',
71 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
73 // Tree construction modes
75 const BEFORE_HTML = 1;
76 const BEFORE_HEAD = 2;
78 const IN_HEAD_NOSCRIPT = 4;
81 const IN_CDATA_RCDATA = 7;
84 const IN_COLUMN_GROUP = 10;
85 const IN_TABLE_BODY = 11;
89 const IN_SELECT_IN_TABLE= 15;
90 const IN_FOREIGN_CONTENT= 16;
91 const AFTER_BODY = 17;
92 const IN_FRAMESET = 18;
93 const AFTER_FRAMESET = 19;
94 const AFTER_AFTER_BODY = 20;
95 const AFTER_AFTER_FRAMESET = 21;
98 * Converts a magic number to a readable name. Use for debugging.
100 private function strConst($number) {
103 $r = new ReflectionClass('HTML5_TreeBuilder');
104 $lookup = array_flip($r->getConstants());
106 return $lookup[$number];
109 // The different types of elements.
112 const FORMATTING = 102;
113 const PHRASING = 103;
115 // Quirks modes in $quirks_mode
116 const NO_QUIRKS = 200;
117 const QUIRKS_MODE = 201;
118 const LIMITED_QUIRKS_MODE = 202;
120 // Marker to be placed in $a_formatting
123 // Namespaces for foreign content
124 const NS_HTML = null; // to prevent DOM from requiring NS on everything
125 const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
126 const NS_SVG = 'http://www.w3.org/2000/svg';
127 const NS_XLINK = 'http://www.w3.org/1999/xlink';
128 const NS_XML = 'http://www.w3.org/XML/1998/namespace';
129 const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
131 public function __construct() {
132 $this->mode = self::INITIAL;
133 $this->dom = new DOMDocument;
135 $this->dom->encoding = 'UTF-8';
136 $this->dom->preserveWhiteSpace = true;
137 $this->dom->substituteEntities = true;
138 $this->dom->strictErrorChecking = false;
141 // Process tag tokens
142 public function emitToken($token, $mode = null) {
143 // XXX: ignore parse errors... why are we emitting them, again?
144 if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
145 if ($mode === null) $mode = $this->mode;
148 $backtrace = debug_backtrace();
149 if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
150 echo $this->strConst($mode);
151 if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
155 $this->printActiveFormattingElements();
156 if ($this->foster_parent) echo " -> this is a foster parent mode\n";
159 if ($this->ignore_lf_token) $this->ignore_lf_token--;
160 $this->ignored = false;
162 $token['name'] = str_replace(':', '-', $token['name']);
163 // indenting is a little wonky, this can be changed later on
168 /* A character token that is one of U+0009 CHARACTER TABULATION,
169 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
170 if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
171 /* Ignore the token. */
172 $this->ignored = true;
173 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
175 $token['name'] !== 'html' || !empty($token['public']) ||
176 !empty($token['system']) || $token !== 'about:legacy-compat'
178 /* If the DOCTYPE token's name is not a case-sensitive match
179 * for the string "html", or if the token's public identifier
180 * is not missing, or if the token's system identifier is
181 * neither missing nor a case-sensitive match for the string
182 * "about:legacy-compat", then there is a parse error (this
183 * is the DOCTYPE parse error). */
184 // DOCTYPE parse error
186 /* Append a DocumentType node to the Document node, with the name
187 * attribute set to the name given in the DOCTYPE token, or the
188 * empty string if the name was missing; the publicId attribute
189 * set to the public identifier given in the DOCTYPE token, or
190 * the empty string if the public identifier was missing; the
191 * systemId attribute set to the system identifier given in the
192 * DOCTYPE token, or the empty string if the system identifier
193 * was missing; and the other attributes specific to
194 * DocumentType objects set to null and empty lists as
195 * appropriate. Associate the DocumentType node with the
196 * Document object so that it is returned as the value of the
197 * doctype attribute of the Document object. */
198 if (!isset($token['public'])) $token['public'] = null;
199 if (!isset($token['system'])) $token['system'] = null;
200 // Yes this is hacky. I'm kind of annoyed that I can't appendChild
201 // a doctype to DOMDocument. Maybe I haven't chanted the right
203 $impl = new DOMImplementation();
204 // This call can fail for particularly pathological cases (namely,
205 // the qualifiedName parameter ($token['name']) could be missing.
206 if ($token['name']) {
207 $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
208 $this->dom->appendChild($doctype);
210 // It looks like libxml's not actually *able* to express this case.
212 $this->dom->emptyDoctype = true;
214 $public = is_null($token['public']) ? false : strtolower($token['public']);
215 $system = is_null($token['system']) ? false : strtolower($token['system']);
216 $publicStartsWithForQuirks = array(
217 "+//silmaril//dtd html pro v0r11 19970101//",
218 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
219 "-//as//dtd html 3.0 aswedit + extensions//",
220 "-//ietf//dtd html 2.0 level 1//",
221 "-//ietf//dtd html 2.0 level 2//",
222 "-//ietf//dtd html 2.0 strict level 1//",
223 "-//ietf//dtd html 2.0 strict level 2//",
224 "-//ietf//dtd html 2.0 strict//",
225 "-//ietf//dtd html 2.0//",
226 "-//ietf//dtd html 2.1e//",
227 "-//ietf//dtd html 3.0//",
228 "-//ietf//dtd html 3.2 final//",
229 "-//ietf//dtd html 3.2//",
230 "-//ietf//dtd html 3//",
231 "-//ietf//dtd html level 0//",
232 "-//ietf//dtd html level 1//",
233 "-//ietf//dtd html level 2//",
234 "-//ietf//dtd html level 3//",
235 "-//ietf//dtd html strict level 0//",
236 "-//ietf//dtd html strict level 1//",
237 "-//ietf//dtd html strict level 2//",
238 "-//ietf//dtd html strict level 3//",
239 "-//ietf//dtd html strict//",
240 "-//ietf//dtd html//",
241 "-//metrius//dtd metrius presentational//",
242 "-//microsoft//dtd internet explorer 2.0 html strict//",
243 "-//microsoft//dtd internet explorer 2.0 html//",
244 "-//microsoft//dtd internet explorer 2.0 tables//",
245 "-//microsoft//dtd internet explorer 3.0 html strict//",
246 "-//microsoft//dtd internet explorer 3.0 html//",
247 "-//microsoft//dtd internet explorer 3.0 tables//",
248 "-//netscape comm. corp.//dtd html//",
249 "-//netscape comm. corp.//dtd strict html//",
250 "-//o'reilly and associates//dtd html 2.0//",
251 "-//o'reilly and associates//dtd html extended 1.0//",
252 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
253 "-//spyglass//dtd html 2.0 extended//",
254 "-//sq//dtd html 2.0 hotmetal + extensions//",
255 "-//sun microsystems corp.//dtd hotjava html//",
256 "-//sun microsystems corp.//dtd hotjava strict html//",
257 "-//w3c//dtd html 3 1995-03-24//",
258 "-//w3c//dtd html 3.2 draft//",
259 "-//w3c//dtd html 3.2 final//",
260 "-//w3c//dtd html 3.2//",
261 "-//w3c//dtd html 3.2s draft//",
262 "-//w3c//dtd html 4.0 frameset//",
263 "-//w3c//dtd html 4.0 transitional//",
264 "-//w3c//dtd html experimental 19960712//",
265 "-//w3c//dtd html experimental 970421//",
266 "-//w3c//dtd w3 html//",
267 "-//w3o//dtd w3 html 3.0//",
268 "-//webtechs//dtd mozilla html 2.0//",
269 "-//webtechs//dtd mozilla html//",
271 $publicSetToForQuirks = array(
272 "-//w3o//dtd w3 html strict 3.0//",
273 "-/w3c/dtd html 4.0 transitional/en",
276 $publicStartsWithAndSystemForQuirks = array(
277 "-//w3c//dtd html 4.01 frameset//",
278 "-//w3c//dtd html 4.01 transitional//",
280 $publicStartsWithForLimitedQuirks = array(
281 "-//w3c//dtd xhtml 1.0 frameset//",
282 "-//w3c//dtd xhtml 1.0 transitional//",
284 $publicStartsWithAndSystemForLimitedQuirks = array(
285 "-//w3c//dtd html 4.01 frameset//",
286 "-//w3c//dtd html 4.01 transitional//",
288 // first, do easy checks
290 !empty($token['force-quirks']) ||
291 strtolower($token['name']) !== 'html'
293 $this->quirks_mode = self::QUIRKS_MODE;
297 foreach ($publicStartsWithAndSystemForQuirks as $x) {
298 if (strncmp($public, $x, strlen($x)) === 0) {
299 $this->quirks_mode = self::QUIRKS_MODE;
303 if (!is_null($this->quirks_mode)) break;
304 foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
305 if (strncmp($public, $x, strlen($x)) === 0) {
306 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
310 if (!is_null($this->quirks_mode)) break;
312 foreach ($publicSetToForQuirks as $x) {
313 if ($public === $x) {
314 $this->quirks_mode = self::QUIRKS_MODE;
318 if (!is_null($this->quirks_mode)) break;
319 foreach ($publicStartsWithForLimitedQuirks as $x) {
320 if (strncmp($public, $x, strlen($x)) === 0) {
321 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
324 if (!is_null($this->quirks_mode)) break;
325 if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
326 $this->quirks_mode = self::QUIRKS_MODE;
329 foreach ($publicStartsWithForQuirks as $x) {
330 if (strncmp($public, $x, strlen($x)) === 0) {
331 $this->quirks_mode = self::QUIRKS_MODE;
335 if (is_null($this->quirks_mode)) {
336 $this->quirks_mode = self::NO_QUIRKS;
340 $this->mode = self::BEFORE_HTML;
343 /* Switch the insertion mode to "before html", then reprocess the
345 $this->mode = self::BEFORE_HTML;
346 $this->quirks_mode = self::QUIRKS_MODE;
347 $this->emitToken($token);
351 case self::BEFORE_HTML:
353 /* A DOCTYPE token */
354 if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
355 // Parse error. Ignore the token.
356 $this->ignored = true;
358 /* A comment token */
359 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
360 /* Append a Comment node to the Document object with the data
361 attribute set to the data given in the comment token. */
362 $comment = $this->dom->createComment($token['data']);
363 $this->dom->appendChild($comment);
365 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
366 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
368 } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
369 /* Ignore the token. */
370 $this->ignored = true;
372 /* A start tag whose tag name is "html" */
373 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
374 /* Create an element for the token in the HTML namespace. Append it
375 * to the Document object. Put this element in the stack of open
377 $html = $this->insertElement($token, false);
378 $this->dom->appendChild($html);
379 $this->stack[] = $html;
381 $this->mode = self::BEFORE_HEAD;
384 /* Create an html element. Append it to the Document object. Put
385 * this element in the stack of open elements. */
386 $html = $this->dom->createElementNS(self::NS_HTML, 'html');
387 $this->dom->appendChild($html);
388 $this->stack[] = $html;
390 /* Switch the insertion mode to "before head", then reprocess the
392 $this->mode = self::BEFORE_HEAD;
393 $this->emitToken($token);
397 case self::BEFORE_HEAD:
399 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
400 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
402 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
403 /* Ignore the token. */
404 $this->ignored = true;
406 /* A comment token */
407 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
408 /* Append a Comment node to the current node with the data attribute
409 set to the data given in the comment token. */
410 $this->insertComment($token['data']);
412 /* A DOCTYPE token */
413 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
414 /* Parse error. Ignore the token */
415 $this->ignored = true;
418 /* A start tag token with the tag name "html" */
419 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
420 /* Process the token using the rules for the "in body"
422 $this->processWithRulesFor($token, self::IN_BODY);
424 /* A start tag token with the tag name "head" */
425 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
426 /* Insert an HTML element for the token. */
427 $element = $this->insertElement($token);
429 /* Set the head element pointer to this new element node. */
430 $this->head_pointer = $element;
432 /* Change the insertion mode to "in head". */
433 $this->mode = self::IN_HEAD;
435 /* An end tag whose tag name is one of: "head", "body", "html", "br" */
437 $token['type'] === HTML5_Tokenizer::ENDTAG && (
438 $token['name'] === 'head' || $token['name'] === 'body' ||
439 $token['name'] === 'html' || $token['name'] === 'br'
441 /* Act as if a start tag token with the tag name "head" and no
442 * attributes had been seen, then reprocess the current token. */
443 $this->emitToken(array(
445 'type' => HTML5_Tokenizer::STARTTAG,
448 $this->emitToken($token);
450 /* Any other end tag */
451 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
452 /* Parse error. Ignore the token. */
453 $this->ignored = true;
456 /* Act as if a start tag token with the tag name "head" and no
457 * attributes had been seen, then reprocess the current token.
458 * Note: This will result in an empty head element being
459 * generated, with the current token being reprocessed in the
460 * "after head" insertion mode. */
461 $this->emitToken(array(
463 'type' => HTML5_Tokenizer::STARTTAG,
466 $this->emitToken($token);
472 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
473 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
475 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
476 /* Insert the character into the current node. */
477 $this->insertText($token['data']);
479 /* A comment token */
480 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
481 /* Append a Comment node to the current node with the data attribute
482 set to the data given in the comment token. */
483 $this->insertComment($token['data']);
485 /* A DOCTYPE token */
486 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
487 /* Parse error. Ignore the token. */
488 $this->ignored = true;
491 /* A start tag whose tag name is "html" */
492 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
493 $token['name'] === 'html') {
494 $this->processWithRulesFor($token, self::IN_BODY);
496 /* A start tag whose tag name is one of: "base", "command", "link" */
497 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
498 ($token['name'] === 'base' || $token['name'] === 'command' ||
499 $token['name'] === 'link')) {
500 /* Insert an HTML element for the token. Immediately pop the
501 * current node off the stack of open elements. */
502 $this->insertElement($token);
503 array_pop($this->stack);
505 // YYY: Acknowledge the token's self-closing flag, if it is set.
507 /* A start tag whose tag name is "meta" */
508 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
509 /* Insert an HTML element for the token. Immediately pop the
510 * current node off the stack of open elements. */
511 $this->insertElement($token);
512 array_pop($this->stack);
514 // XERROR: Acknowledge the token's self-closing flag, if it is set.
516 // XENCODING: If the element has a charset attribute, and its value is a
517 // supported encoding, and the confidence is currently tentative,
518 // then change the encoding to the encoding given by the value of
519 // the charset attribute.
521 // Otherwise, if the element has a content attribute, and applying
522 // the algorithm for extracting an encoding from a Content-Type to
523 // its value returns a supported encoding encoding, and the
524 // confidence is currently tentative, then change the encoding to
525 // the encoding encoding.
527 /* A start tag with the tag name "title" */
528 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
529 $this->insertRCDATAElement($token);
531 /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
532 * A start tag whose tag name is one of: "noframes", "style" */
533 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
534 ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
535 // XSCRIPT: Scripting flag not respected
536 $this->insertCDATAElement($token);
538 // XSCRIPT: Scripting flag disable not implemented
540 /* A start tag with the tag name "script" */
541 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
542 /* 1. Create an element for the token in the HTML namespace. */
543 $node = $this->insertElement($token, false);
545 /* 2. Mark the element as being "parser-inserted" */
548 /* 3. If the parser was originally created for the HTML
549 * fragment parsing algorithm, then mark the script element as
550 * "already executed". (fragment case) */
553 /* 4. Append the new element to the current node and push it onto
554 * the stack of open elements. */
555 end($this->stack)->appendChild($node);
556 $this->stack[] = $node;
557 // I guess we could squash these together
559 /* 6. Let the original insertion mode be the current insertion mode. */
560 $this->original_mode = $this->mode;
561 /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
562 $this->mode = self::IN_CDATA_RCDATA;
563 /* 5. Switch the tokeniser's content model flag to the CDATA state. */
564 $this->content_model = HTML5_Tokenizer::CDATA;
566 /* An end tag with the tag name "head" */
567 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
568 /* Pop the current node (which will be the head element) off the stack of open elements. */
569 array_pop($this->stack);
571 /* Change the insertion mode to "after head". */
572 $this->mode = self::AFTER_HEAD;
574 // Slight logic inversion here to minimize duplication
575 /* A start tag with the tag name "head". */
576 /* An end tag whose tag name is not one of: "body", "html", "br" */
577 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
578 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
579 $token['name'] !== 'body' && $token['name'] !== 'br')) {
580 // Parse error. Ignore the token.
581 $this->ignored = true;
585 /* Act as if an end tag token with the tag name "head" had been
586 * seen, and reprocess the current token. */
587 $this->emitToken(array(
589 'type' => HTML5_Tokenizer::ENDTAG
592 /* Then, reprocess the current token. */
593 $this->emitToken($token);
597 case self::IN_HEAD_NOSCRIPT:
598 if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
600 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
601 $this->processWithRulesFor($token, self::IN_BODY);
602 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
603 /* Pop the current node (which will be a noscript element) from the
604 * stack of open elements; the new current node will be a head
606 array_pop($this->stack);
607 $this->mode = self::IN_HEAD;
609 ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
610 ($token['type'] === HTML5_Tokenizer::COMMENT) ||
611 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
612 $token['name'] === 'link' || $token['name'] === 'meta' ||
613 $token['name'] === 'noframes' || $token['name'] === 'style'))) {
614 $this->processWithRulesFor($token, self::IN_HEAD);
617 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
618 $token['name'] === 'head' || $token['name'] === 'noscript')) ||
619 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
620 $token['name'] !== 'br')) {
624 $this->emitToken(array(
625 'type' => HTML5_Tokenizer::ENDTAG,
626 'name' => 'noscript',
628 $this->emitToken($token);
632 case self::AFTER_HEAD:
633 /* Handle the token as follows: */
635 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
636 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
638 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
639 /* Append the character to the current node. */
640 $this->insertText($token['data']);
642 /* A comment token */
643 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
644 /* Append a Comment node to the current node with the data attribute
645 set to the data given in the comment token. */
646 $this->insertComment($token['data']);
648 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
651 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
652 $this->processWithRulesFor($token, self::IN_BODY);
654 /* A start tag token with the tag name "body" */
655 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
656 $this->insertElement($token);
658 /* Set the frameset-ok flag to "not ok". */
659 $this->flag_frameset_ok = false;
661 /* Change the insertion mode to "in body". */
662 $this->mode = self::IN_BODY;
664 /* A start tag token with the tag name "frameset" */
665 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
666 /* Insert a frameset element for the token. */
667 $this->insertElement($token);
669 /* Change the insertion mode to "in frameset". */
670 $this->mode = self::IN_FRAMESET;
672 /* A start tag token whose tag name is one of: "base", "link", "meta",
673 "script", "style", "title" */
674 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
675 array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
677 /* Push the node pointed to by the head element pointer onto the
678 * stack of open elements. */
679 $this->stack[] = $this->head_pointer;
680 $this->processWithRulesFor($token, self::IN_HEAD);
681 array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
683 // inversion of specification
685 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
686 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
687 $token['name'] !== 'body' && $token['name'] !== 'html' &&
688 $token['name'] !== 'br')) {
693 $this->emitToken(array(
695 'type' => HTML5_Tokenizer::STARTTAG,
698 $this->flag_frameset_ok = true;
699 $this->emitToken($token);
704 /* Handle the token as follows: */
706 switch($token['type']) {
707 /* A character token */
708 case HTML5_Tokenizer::CHARACTER:
709 case HTML5_Tokenizer::SPACECHARACTER:
710 /* Reconstruct the active formatting elements, if any. */
711 $this->reconstructActiveFormattingElements();
713 /* Append the token's character to the current node. */
714 $this->insertText($token['data']);
716 /* If the token is not one of U+0009 CHARACTER TABULATION,
717 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
718 * SPACE, then set the frameset-ok flag to "not ok". */
719 // i.e., if any of the characters is not whitespace
720 if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
721 $this->flag_frameset_ok = false;
725 /* A comment token */
726 case HTML5_Tokenizer::COMMENT:
727 /* Append a Comment node to the current node with the data
728 attribute set to the data given in the comment token. */
729 $this->insertComment($token['data']);
732 case HTML5_Tokenizer::DOCTYPE:
736 case HTML5_Tokenizer::STARTTAG:
737 switch($token['name']) {
740 /* For each attribute on the token, check to see if the
741 * attribute is already present on the top element of the
742 * stack of open elements. If it is not, add the attribute
743 * and its corresponding value to that element. */
744 foreach($token['attr'] as $attr) {
745 if(!$this->stack[0]->hasAttribute($attr['name'])) {
746 $this->stack[0]->setAttribute($attr['name'], $attr['value']);
751 case 'base': case 'command': case 'link': case 'meta': case 'noframes':
752 case 'script': case 'style': case 'title':
753 /* Process the token as if the insertion mode had been "in
755 $this->processWithRulesFor($token, self::IN_HEAD);
758 /* A start tag token with the tag name "body" */
760 /* Parse error. If the second element on the stack of open
761 elements is not a body element, or, if the stack of open
762 elements has only one node on it, then ignore the token.
764 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
765 $this->ignored = true;
768 /* Otherwise, for each attribute on the token, check to see
769 if the attribute is already present on the body element (the
770 second element) on the stack of open elements. If it is not,
771 add the attribute and its corresponding value to that
774 foreach($token['attr'] as $attr) {
775 if(!$this->stack[1]->hasAttribute($attr['name'])) {
776 $this->stack[1]->setAttribute($attr['name'], $attr['value']);
784 /* If the second element on the stack of open elements is
785 * not a body element, or, if the stack of open elements
786 * has only one node on it, then ignore the token.
788 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
789 $this->ignored = true;
791 } elseif (!$this->flag_frameset_ok) {
792 $this->ignored = true;
795 /* 1. Remove the second element on the stack of open
796 * elements from its parent node, if it has one. */
797 if($this->stack[1]->parentNode) {
798 $this->stack[1]->parentNode->removeChild($this->stack[1]);
801 /* 2. Pop all the nodes from the bottom of the stack of
802 * open elements, from the current node up to the root
804 array_splice($this->stack, 1);
806 $this->insertElement($token);
807 $this->mode = self::IN_FRAMESET;
811 // in spec, there is a diversion here
813 case 'address': case 'article': case 'aside': case 'blockquote':
814 case 'center': case 'datagrid': case 'details': case 'dialog': case 'dir':
815 case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
816 case 'header': case 'hgroup': case 'menu': case 'nav':
817 case 'ol': case 'p': case 'section': case 'ul':
818 /* If the stack of open elements has a p element in scope,
819 then act as if an end tag with the tag name p had been
821 if($this->elementInScope('p')) {
822 $this->emitToken(array(
824 'type' => HTML5_Tokenizer::ENDTAG
828 /* Insert an HTML element for the token. */
829 $this->insertElement($token);
832 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
834 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
835 /* If the stack of open elements has a p element in scope,
836 then act as if an end tag with the tag name p had been seen. */
837 if($this->elementInScope('p')) {
838 $this->emitToken(array(
840 'type' => HTML5_Tokenizer::ENDTAG
844 /* If the current node is an element whose tag name is one
845 * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
846 * parse error; pop the current node off the stack of open
848 $peek = array_pop($this->stack);
849 if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
852 $this->stack[] = $peek;
855 /* Insert an HTML element for the token. */
856 $this->insertElement($token);
859 case 'pre': case 'listing':
860 /* If the stack of open elements has a p element in scope,
861 then act as if an end tag with the tag name p had been seen. */
862 if($this->elementInScope('p')) {
863 $this->emitToken(array(
865 'type' => HTML5_Tokenizer::ENDTAG
868 $this->insertElement($token);
869 /* If the next token is a U+000A LINE FEED (LF) character
870 * token, then ignore that token and move on to the next
871 * one. (Newlines at the start of pre blocks are ignored as
872 * an authoring convenience.) */
873 $this->ignore_lf_token = 2;
874 $this->flag_frameset_ok = false;
877 /* A start tag whose tag name is "form" */
879 /* If the form element pointer is not null, ignore the
880 token with a parse error. */
881 if($this->form_pointer !== null) {
882 $this->ignored = true;
887 /* If the stack of open elements has a p element in
888 scope, then act as if an end tag with the tag name p
890 if($this->elementInScope('p')) {
891 $this->emitToken(array(
893 'type' => HTML5_Tokenizer::ENDTAG
897 /* Insert an HTML element for the token, and set the
898 form element pointer to point to the element created. */
899 $element = $this->insertElement($token);
900 $this->form_pointer = $element;
904 // condensed specification
905 case 'li': case 'dd': case 'dt':
906 /* 1. Set the frameset-ok flag to "not ok". */
907 $this->flag_frameset_ok = false;
909 $stack_length = count($this->stack) - 1;
910 for($n = $stack_length; 0 <= $n; $n--) {
911 /* 2. Initialise node to be the current node (the
912 bottommost node of the stack). */
914 $node = $this->stack[$n];
915 $cat = $this->getElementCategory($node);
918 /* 3. If node is an li element, then act as if an end
919 * tag with the tag name "li" had been seen, then jump
920 * to the last step. */
921 // for case 'dd': case 'dt':
922 /* If node is a dd or dt element, then act as if an end
923 * tag with the same tag name as node had been seen, then
924 * jump to the last step. */
925 if(($token['name'] === 'li' && $node->tagName === 'li') ||
926 ($token['name'] !== 'li' && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { // limited conditional
927 $this->emitToken(array(
928 'type' => HTML5_Tokenizer::ENDTAG,
929 'name' => $node->tagName,
934 /* 4. If node is not in the formatting category, and is
935 not in the phrasing category, and is not an address,
936 div or p element, then stop this algorithm. */
937 if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
938 $node->tagName !== 'address' && $node->tagName !== 'div' &&
939 $node->tagName !== 'p') {
943 /* 5. Otherwise, set node to the previous entry in the
944 * stack of open elements and return to step 2. */
947 /* 6. This is the last step. */
949 /* If the stack of open elements has a p element in scope,
950 then act as if an end tag with the tag name p had been
952 if($this->elementInScope('p')) {
953 $this->emitToken(array(
955 'type' => HTML5_Tokenizer::ENDTAG
959 /* Finally, insert an HTML element with the same tag
960 name as the token's. */
961 $this->insertElement($token);
964 /* A start tag token whose tag name is "plaintext" */
966 /* If the stack of open elements has a p element in scope,
967 then act as if an end tag with the tag name p had been
969 if($this->elementInScope('p')) {
970 $this->emitToken(array(
972 'type' => HTML5_Tokenizer::ENDTAG
976 /* Insert an HTML element for the token. */
977 $this->insertElement($token);
979 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
984 /* A start tag whose tag name is "a" */
986 /* If the list of active formatting elements contains
987 an element whose tag name is "a" between the end of the
988 list and the last marker on the list (or the start of
989 the list if there is no marker on the list), then this
990 is a parse error; act as if an end tag with the tag name
991 "a" had been seen, then remove that element from the list
992 of active formatting elements and the stack of open
993 elements if the end tag didn't already remove it (it
994 might not have if the element is not in table scope). */
995 $leng = count($this->a_formatting);
997 for($n = $leng - 1; $n >= 0; $n--) {
998 if($this->a_formatting[$n] === self::MARKER) {
1001 } elseif($this->a_formatting[$n]->tagName === 'a') {
1002 $a = $this->a_formatting[$n];
1003 $this->emitToken(array(
1005 'type' => HTML5_Tokenizer::ENDTAG
1007 if (in_array($a, $this->a_formatting)) {
1008 $a_i = array_search($a, $this->a_formatting, true);
1009 if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
1011 if (in_array($a, $this->stack)) {
1012 $a_i = array_search($a, $this->stack, true);
1013 if ($a_i !== false) array_splice($this->stack, $a_i, 1);
1019 /* Reconstruct the active formatting elements, if any. */
1020 $this->reconstructActiveFormattingElements();
1022 /* Insert an HTML element for the token. */
1023 $el = $this->insertElement($token);
1025 /* Add that element to the list of active formatting
1027 $this->a_formatting[] = $el;
1030 case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
1031 case 's': case 'small': case 'strike':
1032 case 'strong': case 'tt': case 'u':
1033 /* Reconstruct the active formatting elements, if any. */
1034 $this->reconstructActiveFormattingElements();
1036 /* Insert an HTML element for the token. */
1037 $el = $this->insertElement($token);
1039 /* Add that element to the list of active formatting
1041 $this->a_formatting[] = $el;
1045 /* Reconstruct the active formatting elements, if any. */
1046 $this->reconstructActiveFormattingElements();
1048 /* If the stack of open elements has a nobr element in
1049 * scope, then this is a parse error; act as if an end tag
1050 * with the tag name "nobr" had been seen, then once again
1051 * reconstruct the active formatting elements, if any. */
1052 if ($this->elementInScope('nobr')) {
1053 $this->emitToken(array(
1055 'type' => HTML5_Tokenizer::ENDTAG,
1057 $this->reconstructActiveFormattingElements();
1060 /* Insert an HTML element for the token. */
1061 $el = $this->insertElement($token);
1063 /* Add that element to the list of active formatting
1065 $this->a_formatting[] = $el;
1068 // another diversion
1070 /* A start tag token whose tag name is "button" */
1072 /* If the stack of open elements has a button element in scope,
1073 then this is a parse error; act as if an end tag with the tag
1074 name "button" had been seen, then reprocess the token. (We don't
1075 do that. Unnecessary.) (I hope you're right! -- ezyang) */
1076 if($this->elementInScope('button')) {
1077 $this->emitToken(array(
1079 'type' => HTML5_Tokenizer::ENDTAG
1083 /* Reconstruct the active formatting elements, if any. */
1084 $this->reconstructActiveFormattingElements();
1086 /* Insert an HTML element for the token. */
1087 $this->insertElement($token);
1089 /* Insert a marker at the end of the list of active
1090 formatting elements. */
1091 $this->a_formatting[] = self::MARKER;
1093 $this->flag_frameset_ok = false;
1096 case 'applet': case 'marquee': case 'object':
1097 /* Reconstruct the active formatting elements, if any. */
1098 $this->reconstructActiveFormattingElements();
1100 /* Insert an HTML element for the token. */
1101 $this->insertElement($token);
1103 /* Insert a marker at the end of the list of active
1104 formatting elements. */
1105 $this->a_formatting[] = self::MARKER;
1107 $this->flag_frameset_ok = false;
1112 /* A start tag whose tag name is "table" */
1114 /* If the stack of open elements has a p element in scope,
1115 then act as if an end tag with the tag name p had been seen. */
1116 if($this->quirks_mode !== self::QUIRKS_MODE &&
1117 $this->elementInScope('p')) {
1118 $this->emitToken(array(
1120 'type' => HTML5_Tokenizer::ENDTAG
1124 /* Insert an HTML element for the token. */
1125 $this->insertElement($token);
1127 $this->flag_frameset_ok = false;
1129 /* Change the insertion mode to "in table". */
1130 $this->mode = self::IN_TABLE;
1133 /* A start tag whose tag name is one of: "area", "basefont",
1134 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1135 case 'area': case 'basefont': case 'bgsound': case 'br':
1136 case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
1138 /* Reconstruct the active formatting elements, if any. */
1139 $this->reconstructActiveFormattingElements();
1141 /* Insert an HTML element for the token. */
1142 $this->insertElement($token);
1144 /* Immediately pop the current node off the stack of open elements. */
1145 array_pop($this->stack);
1147 // YYY: Acknowledge the token's self-closing flag, if it is set.
1149 $this->flag_frameset_ok = false;
1152 case 'param': case 'source':
1153 /* Insert an HTML element for the token. */
1154 $this->insertElement($token);
1156 /* Immediately pop the current node off the stack of open elements. */
1157 array_pop($this->stack);
1159 // YYY: Acknowledge the token's self-closing flag, if it is set.
1162 /* A start tag whose tag name is "hr" */
1164 /* If the stack of open elements has a p element in scope,
1165 then act as if an end tag with the tag name p had been seen. */
1166 if($this->elementInScope('p')) {
1167 $this->emitToken(array(
1169 'type' => HTML5_Tokenizer::ENDTAG
1173 /* Insert an HTML element for the token. */
1174 $this->insertElement($token);
1176 /* Immediately pop the current node off the stack of open elements. */
1177 array_pop($this->stack);
1179 // YYY: Acknowledge the token's self-closing flag, if it is set.
1181 $this->flag_frameset_ok = false;
1184 /* A start tag whose tag name is "image" */
1186 /* Parse error. Change the token's tag name to "img" and
1187 reprocess it. (Don't ask.) */
1188 $token['name'] = 'img';
1189 $this->emitToken($token);
1192 /* A start tag whose tag name is "isindex" */
1196 /* If the form element pointer is not null,
1197 then ignore the token. */
1198 if($this->form_pointer === null) {
1199 /* Act as if a start tag token with the tag name "form" had
1201 /* If the token has an attribute called "action", set
1202 * the action attribute on the resulting form
1203 * element to the value of the "action" attribute of
1206 $action = $this->getAttr($token, 'action');
1207 if ($action !== false) {
1208 $attr[] = array('name' => 'action', 'value' => $action);
1210 $this->emitToken(array(
1212 'type' => HTML5_Tokenizer::STARTTAG,
1216 /* Act as if a start tag token with the tag name "hr" had
1218 $this->emitToken(array(
1220 'type' => HTML5_Tokenizer::STARTTAG,
1224 /* Act as if a start tag token with the tag name "p" had
1226 $this->emitToken(array(
1228 'type' => HTML5_Tokenizer::STARTTAG,
1232 /* Act as if a start tag token with the tag name "label"
1234 $this->emitToken(array(
1236 'type' => HTML5_Tokenizer::STARTTAG,
1240 /* Act as if a stream of character tokens had been seen. */
1241 $prompt = $this->getAttr($token, 'prompt');
1242 if ($prompt === false) {
1243 $prompt = 'This is a searchable index. '.
1244 'Insert your search keywords here: ';
1246 $this->emitToken(array(
1248 'type' => HTML5_Tokenizer::CHARACTER,
1251 /* Act as if a start tag token with the tag name "input"
1252 had been seen, with all the attributes from the "isindex"
1253 token, except with the "name" attribute set to the value
1254 "isindex" (ignoring any explicit "name" attribute). */
1256 foreach ($token['attr'] as $keypair) {
1257 if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
1258 $keypair['name'] === 'prompt') continue;
1261 $attr[] = array('name' => 'name', 'value' => 'isindex');
1263 $this->emitToken(array(
1265 'type' => HTML5_Tokenizer::STARTTAG,
1269 /* Act as if an end tag token with the tag name "label"
1271 $this->emitToken(array(
1273 'type' => HTML5_Tokenizer::ENDTAG
1276 /* Act as if an end tag token with the tag name "p" had
1278 $this->emitToken(array(
1280 'type' => HTML5_Tokenizer::ENDTAG
1283 /* Act as if a start tag token with the tag name "hr" had
1285 $this->emitToken(array(
1287 'type' => HTML5_Tokenizer::STARTTAG
1290 /* Act as if an end tag token with the tag name "form" had
1292 $this->emitToken(array(
1294 'type' => HTML5_Tokenizer::ENDTAG
1297 $this->ignored = true;
1301 /* A start tag whose tag name is "textarea" */
1303 $this->insertElement($token);
1305 /* If the next token is a U+000A LINE FEED (LF)
1306 * character token, then ignore that token and move on to
1307 * the next one. (Newlines at the start of textarea
1308 * elements are ignored as an authoring convenience.)
1309 * need flag, see also <pre> */
1310 $this->ignore_lf_token = 2;
1312 $this->original_mode = $this->mode;
1313 $this->flag_frameset_ok = false;
1314 $this->mode = self::IN_CDATA_RCDATA;
1316 /* Switch the tokeniser's content model flag to the
1318 $this->content_model = HTML5_Tokenizer::RCDATA;
1321 /* A start tag token whose tag name is "xmp" */
1323 /* Reconstruct the active formatting elements, if any. */
1324 $this->reconstructActiveFormattingElements();
1326 $this->flag_frameset_ok = false;
1328 $this->insertCDATAElement($token);
1332 $this->flag_frameset_ok = false;
1333 $this->insertCDATAElement($token);
1336 case 'noembed': case 'noscript':
1337 // XSCRIPT: should check scripting flag
1338 $this->insertCDATAElement($token);
1341 /* A start tag whose tag name is "select" */
1343 /* Reconstruct the active formatting elements, if any. */
1344 $this->reconstructActiveFormattingElements();
1346 /* Insert an HTML element for the token. */
1347 $this->insertElement($token);
1349 $this->flag_frameset_ok = false;
1351 /* If the insertion mode is one of in table", "in caption",
1352 * "in column group", "in table body", "in row", or "in
1353 * cell", then switch the insertion mode to "in select in
1354 * table". Otherwise, switch the insertion mode to "in
1357 $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
1358 $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
1359 $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
1361 $this->mode = self::IN_SELECT_IN_TABLE;
1363 $this->mode = self::IN_SELECT;
1367 case 'option': case 'optgroup':
1368 if ($this->elementInScope('option')) {
1369 $this->emitToken(array(
1371 'type' => HTML5_Tokenizer::ENDTAG,
1374 $this->reconstructActiveFormattingElements();
1375 $this->insertElement($token);
1378 case 'rp': case 'rt':
1379 /* If the stack of open elements has a ruby element in scope, then generate
1380 * implied end tags. If the current node is not then a ruby element, this is
1381 * a parse error; pop all the nodes from the current node up to the node
1382 * immediately before the bottommost ruby element on the stack of open elements.
1384 if ($this->elementInScope('ruby')) {
1385 $this->generateImpliedEndTags();
1392 $peek = array_pop($this->stack);
1393 } while ($peek->tagName !== 'ruby');
1394 $this->stack[] = $peek; // we popped one too many
1395 $this->insertElement($token);
1401 $this->reconstructActiveFormattingElements();
1402 $token = $this->adjustMathMLAttributes($token);
1403 $token = $this->adjustForeignAttributes($token);
1404 $this->insertForeignElement($token, self::NS_MATHML);
1405 if (isset($token['self-closing'])) {
1406 // XERROR: acknowledge the token's self-closing flag
1407 array_pop($this->stack);
1409 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1410 $this->secondary_mode = $this->mode;
1411 $this->mode = self::IN_FOREIGN_CONTENT;
1416 $this->reconstructActiveFormattingElements();
1417 $token = $this->adjustSVGAttributes($token);
1418 $token = $this->adjustForeignAttributes($token);
1419 $this->insertForeignElement($token, self::NS_SVG);
1420 if (isset($token['self-closing'])) {
1421 // XERROR: acknowledge the token's self-closing flag
1422 array_pop($this->stack);
1424 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1425 $this->secondary_mode = $this->mode;
1426 $this->mode = self::IN_FOREIGN_CONTENT;
1430 case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
1431 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
1435 /* A start tag token not covered by the previous entries */
1437 /* Reconstruct the active formatting elements, if any. */
1438 $this->reconstructActiveFormattingElements();
1440 $this->insertElement($token);
1441 /* This element will be a phrasing element. */
1446 case HTML5_Tokenizer::ENDTAG:
1447 switch($token['name']) {
1448 /* An end tag with the tag name "body" */
1450 /* If the second element in the stack of open elements is
1451 not a body element, this is a parse error. Ignore the token.
1453 if(count($this->stack) < 2 || $this->stack[1]->tagName !== 'body') {
1454 $this->ignored = true;
1456 /* Otherwise, if there is a node in the stack of open
1457 * elements that is not either a dd element, a dt
1458 * element, an li element, an optgroup element, an
1459 * option element, a p element, an rp element, an rt
1460 * element, a tbody element, a td element, a tfoot
1461 * element, a th element, a thead element, a tr element,
1462 * the body element, or the html element, then this is a
1465 // XERROR: implement this check for parse error
1468 /* Change the insertion mode to "after body". */
1469 $this->mode = self::AFTER_BODY;
1472 /* An end tag with the tag name "html" */
1474 /* Act as if an end tag with tag name "body" had been seen,
1475 then, if that token wasn't ignored, reprocess the current
1477 $this->emitToken(array(
1479 'type' => HTML5_Tokenizer::ENDTAG
1482 if (!$this->ignored) $this->emitToken($token);
1485 case 'address': case 'article': case 'aside': case 'blockquote':
1486 case 'center': case 'datagrid': case 'details': case 'dir':
1487 case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
1488 case 'header': case 'hgroup': case 'listing': case 'menu':
1489 case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
1490 /* If the stack of open elements has an element in scope
1491 with the same tag name as that of the token, then generate
1492 implied end tags. */
1493 if($this->elementInScope($token['name'])) {
1494 $this->generateImpliedEndTags();
1496 /* Now, if the current node is not an element with
1497 the same tag name as that of the token, then this
1498 is a parse error. */
1499 // XERROR: implement parse error logic
1501 /* If the stack of open elements has an element in
1502 scope with the same tag name as that of the token,
1503 then pop elements from this stack until an element
1504 with that tag name has been popped from the stack. */
1506 $node = array_pop($this->stack);
1507 } while ($node->tagName !== $token['name']);
1513 /* An end tag whose tag name is "form" */
1515 /* Let node be the element that the form element pointer is set to. */
1516 $node = $this->form_pointer;
1517 /* Set the form element pointer to null. */
1518 $this->form_pointer = null;
1519 /* If node is null or the stack of open elements does not
1520 * have node in scope, then this is a parse error; ignore the token. */
1521 if ($node === null || !in_array($node, $this->stack)) {
1523 $this->ignored = true;
1525 /* 1. Generate implied end tags. */
1526 $this->generateImpliedEndTags();
1527 /* 2. If the current node is not node, then this is a parse error. */
1528 if (end($this->stack) !== $node) {
1531 /* 3. Remove node from the stack of open elements. */
1532 array_splice($this->stack, array_search($node, $this->stack, true), 1);
1537 /* An end tag whose tag name is "p" */
1539 /* If the stack of open elements has a p element in scope,
1540 then generate implied end tags, except for p elements. */
1541 if($this->elementInScope('p')) {
1542 /* Generate implied end tags, except for elements with
1543 * the same tag name as the token. */
1544 $this->generateImpliedEndTags(array('p'));
1546 /* If the current node is not a p element, then this is
1548 // XERROR: implement
1550 /* Pop elements from the stack of open elements until
1551 * an element with the same tag name as the token has
1552 * been popped from the stack. */
1554 $node = array_pop($this->stack);
1555 } while ($node->tagName !== 'p');
1559 $this->emitToken(array(
1561 'type' => HTML5_Tokenizer::STARTTAG,
1563 $this->emitToken($token);
1567 /* An end tag whose tag name is "dd", "dt", or "li" */
1568 case 'dd': case 'dt': case 'li':
1569 if($this->elementInScope($token['name'])) {
1570 $this->generateImpliedEndTags(array($token['name']));
1572 /* If the current node is not an element with the same
1573 tag name as the token, then this is a parse error. */
1574 // XERROR: implement parse error
1576 /* Pop elements from the stack of open elements until
1577 * an element with the same tag name as the token has
1578 * been popped from the stack. */
1580 $node = array_pop($this->stack);
1581 } while ($node->tagName !== $token['name']);
1588 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
1590 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1591 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
1593 /* If the stack of open elements has in scope an element whose
1594 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1595 generate implied end tags. */
1596 if($this->elementInScope($elements)) {
1597 $this->generateImpliedEndTags();
1599 /* Now, if the current node is not an element with the same
1600 tag name as that of the token, then this is a parse error. */
1601 // XERROR: implement parse error
1603 /* If the stack of open elements has in scope an element
1604 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
1605 "h6", then pop elements from the stack until an element
1606 with one of those tag names has been popped from the stack. */
1608 $node = array_pop($this->stack);
1609 } while (!in_array($node->tagName, $elements));
1615 /* An end tag whose tag name is one of: "a", "b", "big", "em",
1616 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1617 case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
1618 case 'i': case 'nobr': case 's': case 'small': case 'strike':
1619 case 'strong': case 'tt': case 'u':
1620 // XERROR: generally speaking this needs parse error logic
1621 /* 1. Let the formatting element be the last element in
1622 the list of active formatting elements that:
1623 * is between the end of the list and the last scope
1624 marker in the list, if any, or the start of the list
1626 * has the same tag name as the token.
1629 for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
1630 if($this->a_formatting[$a] === self::MARKER) {
1633 } elseif($this->a_formatting[$a]->tagName === $token['name']) {
1634 $formatting_element = $this->a_formatting[$a];
1635 $in_stack = in_array($formatting_element, $this->stack, true);
1641 /* If there is no such node, or, if that node is
1642 also in the stack of open elements but the element
1643 is not in scope, then this is a parse error. Abort
1644 these steps. The token is ignored. */
1645 if(!isset($formatting_element) || ($in_stack &&
1646 !$this->elementInScope($token['name']))) {
1647 $this->ignored = true;
1650 /* Otherwise, if there is such a node, but that node
1651 is not in the stack of open elements, then this is a
1652 parse error; remove the element from the list, and
1653 abort these steps. */
1654 } elseif(isset($formatting_element) && !$in_stack) {
1655 unset($this->a_formatting[$fe_af_pos]);
1656 $this->a_formatting = array_merge($this->a_formatting);
1660 /* Otherwise, there is a formatting element and that
1661 * element is in the stack and is in scope. If the
1662 * element is not the current node, this is a parse
1663 * error. In any case, proceed with the algorithm as
1664 * written in the following steps. */
1665 // XERROR: implement me
1667 /* 2. Let the furthest block be the topmost node in the
1668 stack of open elements that is lower in the stack
1669 than the formatting element, and is not an element in
1670 the phrasing or formatting categories. There might
1672 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1673 $length = count($this->stack);
1675 for($s = $fe_s_pos + 1; $s < $length; $s++) {
1676 $category = $this->getElementCategory($this->stack[$s]);
1678 if($category !== self::PHRASING && $category !== self::FORMATTING) {
1679 $furthest_block = $this->stack[$s];
1684 /* 3. If there is no furthest block, then the UA must
1685 skip the subsequent steps and instead just pop all
1686 the nodes from the bottom of the stack of open
1687 elements, from the current node up to the formatting
1688 element, and remove the formatting element from the
1689 list of active formatting elements. */
1690 if(!isset($furthest_block)) {
1691 for($n = $length - 1; $n >= $fe_s_pos; $n--) {
1692 array_pop($this->stack);
1695 unset($this->a_formatting[$fe_af_pos]);
1696 $this->a_formatting = array_merge($this->a_formatting);
1700 /* 4. Let the common ancestor be the element
1701 immediately above the formatting element in the stack
1702 of open elements. */
1703 $common_ancestor = $this->stack[$fe_s_pos - 1];
1705 /* 5. Let a bookmark note the position of the
1706 formatting element in the list of active formatting
1707 elements relative to the elements on either side
1708 of it in the list. */
1709 $bookmark = $fe_af_pos;
1711 /* 6. Let node and last node be the furthest block.
1712 Follow these steps: */
1713 $node = $furthest_block;
1714 $last_node = $furthest_block;
1717 for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
1718 /* 6.1 Let node be the element immediately
1719 prior to node in the stack of open elements. */
1720 $node = $this->stack[$n];
1722 /* 6.2 If node is not in the list of active
1723 formatting elements, then remove node from
1724 the stack of open elements and then go back
1726 if(!in_array($node, $this->a_formatting, true)) {
1727 array_splice($this->stack, $n, 1);
1734 /* 6.3 Otherwise, if node is the formatting
1735 element, then go to the next step in the overall
1737 if($node === $formatting_element) {
1740 /* 6.4 Otherwise, if last node is the furthest
1741 block, then move the aforementioned bookmark to
1742 be immediately after the node in the list of
1743 active formatting elements. */
1744 } elseif($last_node === $furthest_block) {
1745 $bookmark = array_search($node, $this->a_formatting, true) + 1;
1748 /* 6.5 Create an element for the token for which
1749 * the element node was created, replace the entry
1750 * for node in the list of active formatting
1751 * elements with an entry for the new element,
1752 * replace the entry for node in the stack of open
1753 * elements with an entry for the new element, and
1754 * let node be the new element. */
1755 // we don't know what the token is anymore
1756 $clone = $node->cloneNode();
1757 $a_pos = array_search($node, $this->a_formatting, true);
1758 $s_pos = array_search($node, $this->stack, true);
1759 $this->a_formatting[$a_pos] = $clone;
1760 $this->stack[$s_pos] = $clone;
1763 /* 6.6 Insert last node into node, first removing
1764 it from its previous parent node if any. */
1765 if($last_node->parentNode !== null) {
1766 $last_node->parentNode->removeChild($last_node);
1769 $node->appendChild($last_node);
1771 /* 6.7 Let last node be node. */
1774 /* 6.8 Return to step 1 of this inner set of steps. */
1777 /* 7. If the common ancestor node is a table, tbody,
1778 * tfoot, thead, or tr element, then, foster parent
1779 * whatever last node ended up being in the previous
1780 * step, first removing it from its previous parent
1782 if ($last_node->parentNode) { // common step
1783 $last_node->parentNode->removeChild($last_node);
1785 if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
1786 $this->fosterParent($last_node);
1787 /* Otherwise, append whatever last node ended up being
1788 * in the previous step to the common ancestor node,
1789 * first removing it from its previous parent node if
1792 $common_ancestor->appendChild($last_node);
1795 /* 8. Create an element for the token for which the
1796 * formatting element was created. */
1797 $clone = $formatting_element->cloneNode();
1799 /* 9. Take all of the child nodes of the furthest
1800 block and append them to the element created in the
1802 while($furthest_block->hasChildNodes()) {
1803 $child = $furthest_block->firstChild;
1804 $furthest_block->removeChild($child);
1805 $clone->appendChild($child);
1808 /* 10. Append that clone to the furthest block. */
1809 $furthest_block->appendChild($clone);
1811 /* 11. Remove the formatting element from the list
1812 of active formatting elements, and insert the new element
1813 into the list of active formatting elements at the
1814 position of the aforementioned bookmark. */
1815 $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
1816 array_splice($this->a_formatting, $fe_af_pos, 1);
1818 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
1819 $af_part2 = array_slice($this->a_formatting, $bookmark);
1820 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
1822 /* 12. Remove the formatting element from the stack
1823 of open elements, and insert the new element into the stack
1824 of open elements immediately below the position of the
1825 furthest block in that stack. */
1826 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1827 array_splice($this->stack, $fe_s_pos, 1);
1829 $fb_s_pos = array_search($furthest_block, $this->stack, true);
1830 $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
1831 $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
1832 $this->stack = array_merge($s_part1, array($clone), $s_part2);
1834 /* 13. Jump back to step 1 in this series of steps. */
1835 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
1839 case 'applet': case 'button': case 'marquee': case 'object':
1840 /* If the stack of open elements has an element in scope whose
1841 tag name matches the tag name of the token, then generate implied
1843 if($this->elementInScope($token['name'])) {
1844 $this->generateImpliedEndTags();
1846 /* Now, if the current node is not an element with the same
1847 tag name as the token, then this is a parse error. */
1848 // XERROR: implement logic
1850 /* Pop elements from the stack of open elements until
1851 * an element with the same tag name as the token has
1852 * been popped from the stack. */
1854 $node = array_pop($this->stack);
1855 } while ($node->tagName !== $token['name']);
1857 /* Clear the list of active formatting elements up to the
1859 $keys = array_keys($this->a_formatting, self::MARKER, true);
1860 $marker = end($keys);
1862 for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
1863 array_pop($this->a_formatting);
1872 $this->emitToken(array(
1874 'type' => HTML5_Tokenizer::STARTTAG,
1878 /* An end tag token not covered by the previous entries */
1880 for($n = count($this->stack) - 1; $n >= 0; $n--) {
1881 /* Initialise node to be the current node (the bottommost
1882 node of the stack). */
1883 $node = $this->stack[$n];
1885 /* If node has the same tag name as the end tag token,
1887 if($token['name'] === $node->tagName) {
1888 /* Generate implied end tags. */
1889 $this->generateImpliedEndTags();
1891 /* If the tag name of the end tag token does not
1892 match the tag name of the current node, this is a
1894 // XERROR: implement this
1896 /* Pop all the nodes from the current node up to
1897 node, including node, then stop these steps. */
1900 $pop = array_pop($this->stack);
1901 } while ($pop !== $node);
1905 $category = $this->getElementCategory($node);
1907 if($category !== self::FORMATTING && $category !== self::PHRASING) {
1908 /* Otherwise, if node is in neither the formatting
1909 category nor the phrasing category, then this is a
1910 parse error. Stop this algorithm. The end tag token
1912 $this->ignored = true;
1917 /* Set node to the previous entry in the stack of open elements. Loop. */
1925 case self::IN_CDATA_RCDATA:
1927 $token['type'] === HTML5_Tokenizer::CHARACTER ||
1928 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
1930 $this->insertText($token['data']);
1931 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
1933 /* If the current node is a script element, mark the script
1934 * element as "already executed". */
1935 // probably not necessary
1936 array_pop($this->stack);
1937 $this->mode = $this->original_mode;
1938 $this->emitToken($token);
1939 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
1940 array_pop($this->stack);
1941 $this->mode = $this->original_mode;
1942 // we're ignoring all of the execution stuff
1943 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
1944 array_pop($this->stack);
1945 $this->mode = $this->original_mode;
1949 case self::IN_TABLE:
1950 $clear = array('html', 'table');
1952 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1953 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1955 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
1956 /* If the current table is tainted, then act as described in
1957 * the "anything else" entry below. */
1958 // Note: hsivonen has a test that fails due to this line
1959 // because he wants to convince Hixie not to do taint
1960 !$this->currentTableIsTainted()) {
1961 /* Append the character to the current node. */
1962 $this->insertText($token['data']);
1964 /* A comment token */
1965 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
1966 /* Append a Comment node to the current node with the data
1967 attribute set to the data given in the comment token. */
1968 $this->insertComment($token['data']);
1970 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
1973 /* A start tag whose tag name is "caption" */
1974 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
1975 $token['name'] === 'caption') {
1976 /* Clear the stack back to a table context. */
1977 $this->clearStackToTableContext($clear);
1979 /* Insert a marker at the end of the list of active
1980 formatting elements. */
1981 $this->a_formatting[] = self::MARKER;
1983 /* Insert an HTML element for the token, then switch the
1984 insertion mode to "in caption". */
1985 $this->insertElement($token);
1986 $this->mode = self::IN_CAPTION;
1988 /* A start tag whose tag name is "colgroup" */
1989 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
1990 $token['name'] === 'colgroup') {
1991 /* Clear the stack back to a table context. */
1992 $this->clearStackToTableContext($clear);
1994 /* Insert an HTML element for the token, then switch the
1995 insertion mode to "in column group". */
1996 $this->insertElement($token);
1997 $this->mode = self::IN_COLUMN_GROUP;
1999 /* A start tag whose tag name is "col" */
2000 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2001 $token['name'] === 'col') {
2002 $this->emitToken(array(
2003 'name' => 'colgroup',
2004 'type' => HTML5_Tokenizer::STARTTAG,
2008 $this->emitToken($token);
2010 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2011 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2012 array('tbody', 'tfoot', 'thead'))) {
2013 /* Clear the stack back to a table context. */
2014 $this->clearStackToTableContext($clear);
2016 /* Insert an HTML element for the token, then switch the insertion
2017 mode to "in table body". */
2018 $this->insertElement($token);
2019 $this->mode = self::IN_TABLE_BODY;
2021 /* A start tag whose tag name is one of: "td", "th", "tr" */
2022 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2023 in_array($token['name'], array('td', 'th', 'tr'))) {
2024 /* Act as if a start tag token with the tag name "tbody" had been
2025 seen, then reprocess the current token. */
2026 $this->emitToken(array(
2028 'type' => HTML5_Tokenizer::STARTTAG,
2032 $this->emitToken($token);
2034 /* A start tag whose tag name is "table" */
2035 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2036 $token['name'] === 'table') {
2037 /* Parse error. Act as if an end tag token with the tag name "table"
2038 had been seen, then, if that token wasn't ignored, reprocess the
2040 $this->emitToken(array(
2042 'type' => HTML5_Tokenizer::ENDTAG
2045 if (!$this->ignored) $this->emitToken($token);
2047 /* An end tag whose tag name is "table" */
2048 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2049 $token['name'] === 'table') {
2050 /* If the stack of open elements does not have an element in table
2051 scope with the same tag name as the token, this is a parse error.
2052 Ignore the token. (fragment case) */
2053 if(!$this->elementInScope($token['name'], true)) {
2054 $this->ignored = true;
2059 $node = array_pop($this->stack);
2060 } while ($node->tagName !== 'table');
2062 /* Reset the insertion mode appropriately. */
2063 $this->resetInsertionMode();
2066 /* An end tag whose tag name is one of: "body", "caption", "col",
2067 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2068 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2069 array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2070 'tfoot', 'th', 'thead', 'tr'))) {
2071 // Parse error. Ignore the token.
2073 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2074 ($token['name'] === 'style' || $token['name'] === 'script')) {
2075 $this->processWithRulesFor($token, self::IN_HEAD);
2077 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
2078 // assignment is intentional
2079 /* If the token does not have an attribute with the name "type", or
2080 * if it does, but that attribute's value is not an ASCII
2081 * case-insensitive match for the string "hidden", then: act as
2082 * described in the "anything else" entry below. */
2083 ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
2084 // I.e., if its an input with the type attribute == 'hidden'
2087 $this->insertElement($token);
2088 array_pop($this->stack);
2089 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2090 /* If the current node is not the root html element, then this is a parse error. */
2091 if (end($this->stack)->tagName !== 'html') {
2092 // Note: It can only be the current node in the fragment case.
2098 /* Parse error. Process the token as if the insertion mode was "in
2099 body", with the following exception: */
2101 $old = $this->foster_parent;
2102 $this->foster_parent = true;
2103 $this->processWithRulesFor($token, self::IN_BODY);
2104 $this->foster_parent = $old;
2108 case self::IN_CAPTION:
2109 /* An end tag whose tag name is "caption" */
2110 if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
2111 /* If the stack of open elements does not have an element in table
2112 scope with the same tag name as the token, this is a parse error.
2113 Ignore the token. (fragment case) */
2114 if(!$this->elementInScope($token['name'], true)) {
2115 $this->ignored = true;
2120 /* Generate implied end tags. */
2121 $this->generateImpliedEndTags();
2123 /* Now, if the current node is not a caption element, then this
2124 is a parse error. */
2125 // XERROR: implement
2127 /* Pop elements from this stack until a caption element has
2128 been popped from the stack. */
2130 $node = array_pop($this->stack);
2131 } while ($node->tagName !== 'caption');
2133 /* Clear the list of active formatting elements up to the last
2135 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2137 /* Switch the insertion mode to "in table". */
2138 $this->mode = self::IN_TABLE;
2141 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2142 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2144 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2145 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2146 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2147 $token['name'] === 'table')) {
2148 /* Parse error. Act as if an end tag with the tag name "caption"
2149 had been seen, then, if that token wasn't ignored, reprocess the
2151 $this->emitToken(array(
2152 'name' => 'caption',
2153 'type' => HTML5_Tokenizer::ENDTAG
2156 if (!$this->ignored) $this->emitToken($token);
2158 /* An end tag whose tag name is one of: "body", "col", "colgroup",
2159 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2160 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2161 array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2163 // Parse error. Ignore the token.
2164 $this->ignored = true;
2168 /* Process the token as if the insertion mode was "in body". */
2169 $this->processWithRulesFor($token, self::IN_BODY);
2173 case self::IN_COLUMN_GROUP:
2174 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2175 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2177 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2178 /* Append the character to the current node. */
2179 $this->insertText($token['data']);
2181 /* A comment token */
2182 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2183 /* Append a Comment node to the current node with the data
2184 attribute set to the data given in the comment token. */
2185 $this->insertToken($token['data']);
2187 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2190 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2191 $this->processWithRulesFor($token, self::IN_BODY);
2193 /* A start tag whose tag name is "col" */
2194 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
2195 /* Insert a col element for the token. Immediately pop the current
2196 node off the stack of open elements. */
2197 $this->insertElement($token);
2198 array_pop($this->stack);
2199 // XERROR: Acknowledge the token's self-closing flag, if it is set.
2201 /* An end tag whose tag name is "colgroup" */
2202 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2203 $token['name'] === 'colgroup') {
2204 /* If the current node is the root html element, then this is a
2205 parse error, ignore the token. (fragment case) */
2206 if(end($this->stack)->tagName === 'html') {
2207 $this->ignored = true;
2209 /* Otherwise, pop the current node (which will be a colgroup
2210 element) from the stack of open elements. Switch the insertion
2211 mode to "in table". */
2213 array_pop($this->stack);
2214 $this->mode = self::IN_TABLE;
2217 /* An end tag whose tag name is "col" */
2218 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
2219 /* Parse error. Ignore the token. */
2220 $this->ignored = true;
2222 /* An end-of-file token */
2223 /* If the current node is the root html element */
2224 } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
2229 /* Act as if an end tag with the tag name "colgroup" had been seen,
2230 and then, if that token wasn't ignored, reprocess the current token. */
2231 $this->emitToken(array(
2232 'name' => 'colgroup',
2233 'type' => HTML5_Tokenizer::ENDTAG
2236 if (!$this->ignored) $this->emitToken($token);
2240 case self::IN_TABLE_BODY:
2241 $clear = array('tbody', 'tfoot', 'thead', 'html');
2243 /* A start tag whose tag name is "tr" */
2244 if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
2245 /* Clear the stack back to a table body context. */
2246 $this->clearStackToTableContext($clear);
2248 /* Insert a tr element for the token, then switch the insertion
2249 mode to "in row". */
2250 $this->insertElement($token);
2251 $this->mode = self::IN_ROW;
2253 /* A start tag whose tag name is one of: "th", "td" */
2254 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2255 ($token['name'] === 'th' || $token['name'] === 'td')) {
2256 /* Parse error. Act as if a start tag with the tag name "tr" had
2257 been seen, then reprocess the current token. */
2258 $this->emitToken(array(
2260 'type' => HTML5_Tokenizer::STARTTAG,
2264 $this->emitToken($token);
2266 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2267 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2268 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2269 /* If the stack of open elements does not have an element in table
2270 scope with the same tag name as the token, this is a parse error.
2271 Ignore the token. */
2272 if(!$this->elementInScope($token['name'], true)) {
2274 $this->ignored = true;
2278 /* Clear the stack back to a table body context. */
2279 $this->clearStackToTableContext($clear);
2281 /* Pop the current node from the stack of open elements. Switch
2282 the insertion mode to "in table". */
2283 array_pop($this->stack);
2284 $this->mode = self::IN_TABLE;
2287 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2288 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2289 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2290 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
2291 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2292 /* If the stack of open elements does not have a tbody, thead, or
2293 tfoot element in table scope, this is a parse error. Ignore the
2294 token. (fragment case) */
2295 if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
2297 $this->ignored = true;
2301 /* Clear the stack back to a table body context. */
2302 $this->clearStackToTableContext($clear);
2304 /* Act as if an end tag with the same tag name as the current
2305 node ("tbody", "tfoot", or "thead") had been seen, then
2306 reprocess the current token. */
2307 $this->emitToken(array(
2308 'name' => end($this->stack)->tagName,
2309 'type' => HTML5_Tokenizer::ENDTAG
2312 $this->emitToken($token);
2315 /* An end tag whose tag name is one of: "body", "caption", "col",
2316 "colgroup", "html", "td", "th", "tr" */
2317 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2318 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
2319 /* Parse error. Ignore the token. */
2320 $this->ignored = true;
2324 /* Process the token as if the insertion mode was "in table". */
2325 $this->processWithRulesFor($token, self::IN_TABLE);
2330 $clear = array('tr', 'html');
2332 /* A start tag whose tag name is one of: "th", "td" */
2333 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2334 ($token['name'] === 'th' || $token['name'] === 'td')) {
2335 /* Clear the stack back to a table row context. */
2336 $this->clearStackToTableContext($clear);
2338 /* Insert an HTML element for the token, then switch the insertion
2339 mode to "in cell". */
2340 $this->insertElement($token);
2341 $this->mode = self::IN_CELL;
2343 /* Insert a marker at the end of the list of active formatting
2345 $this->a_formatting[] = self::MARKER;
2347 /* An end tag whose tag name is "tr" */
2348 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
2349 /* If the stack of open elements does not have an element in table
2350 scope with the same tag name as the token, this is a parse error.
2351 Ignore the token. (fragment case) */
2352 if(!$this->elementInScope($token['name'], true)) {
2354 $this->ignored = true;
2358 /* Clear the stack back to a table row context. */
2359 $this->clearStackToTableContext($clear);
2361 /* Pop the current node (which will be a tr element) from the
2362 stack of open elements. Switch the insertion mode to "in table
2364 array_pop($this->stack);
2365 $this->mode = self::IN_TABLE_BODY;
2368 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2369 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
2370 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2371 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
2372 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2373 /* Act as if an end tag with the tag name "tr" had been seen, then,
2374 if that token wasn't ignored, reprocess the current token. */
2375 $this->emitToken(array(
2377 'type' => HTML5_Tokenizer::ENDTAG
2379 if (!$this->ignored) $this->emitToken($token);
2381 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2382 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2383 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2384 /* If the stack of open elements does not have an element in table
2385 scope with the same tag name as the token, this is a parse error.
2386 Ignore the token. */
2387 if(!$this->elementInScope($token['name'], true)) {
2388 $this->ignored = true;
2392 /* Otherwise, act as if an end tag with the tag name "tr" had
2393 been seen, then reprocess the current token. */
2394 $this->emitToken(array(
2396 'type' => HTML5_Tokenizer::ENDTAG
2399 $this->emitToken($token);
2402 /* An end tag whose tag name is one of: "body", "caption", "col",
2403 "colgroup", "html", "td", "th" */
2404 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2405 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
2406 /* Parse error. Ignore the token. */
2407 $this->ignored = true;
2411 /* Process the token as if the insertion mode was "in table". */
2412 $this->processWithRulesFor($token, self::IN_TABLE);
2417 /* An end tag whose tag name is one of: "td", "th" */
2418 if($token['type'] === HTML5_Tokenizer::ENDTAG &&
2419 ($token['name'] === 'td' || $token['name'] === 'th')) {
2420 /* If the stack of open elements does not have an element in table
2421 scope with the same tag name as that of the token, then this is a
2422 parse error and the token must be ignored. */
2423 if(!$this->elementInScope($token['name'], true)) {
2424 $this->ignored = true;
2428 /* Generate implied end tags, except for elements with the same
2429 tag name as the token. */
2430 $this->generateImpliedEndTags(array($token['name']));
2432 /* Now, if the current node is not an element with the same tag
2433 name as the token, then this is a parse error. */
2434 // XERROR: Implement parse error code
2436 /* Pop elements from this stack until an element with the same
2437 tag name as the token has been popped from the stack. */
2439 $node = array_pop($this->stack);
2440 } while ($node->tagName !== $token['name']);
2442 /* Clear the list of active formatting elements up to the last
2444 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2446 /* Switch the insertion mode to "in row". (The current node
2447 will be a tr element at this point.) */
2448 $this->mode = self::IN_ROW;
2451 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2452 "tbody", "td", "tfoot", "th", "thead", "tr" */
2453 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2454 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2456 /* If the stack of open elements does not have a td or th element
2457 in table scope, then this is a parse error; ignore the token.
2459 if(!$this->elementInScope(array('td', 'th'), true)) {
2461 $this->ignored = true;
2463 /* Otherwise, close the cell (see below) and reprocess the current
2467 $this->emitToken($token);
2470 /* An end tag whose tag name is one of: "body", "caption", "col",
2471 "colgroup", "html" */
2472 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2473 array('body', 'caption', 'col', 'colgroup', 'html'))) {
2474 /* Parse error. Ignore the token. */
2475 $this->ignored = true;
2477 /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
2479 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2480 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2481 /* If the stack of open elements does not have a td or th element
2482 in table scope, then this is a parse error; ignore the token.
2484 if(!$this->elementInScope(array('td', 'th'), true)) {
2486 $this->ignored = true;
2488 /* Otherwise, close the cell (see below) and reprocess the current
2492 $this->emitToken($token);
2497 /* Process the token as if the insertion mode was "in body". */
2498 $this->processWithRulesFor($token, self::IN_BODY);
2502 case self::IN_SELECT:
2503 /* Handle the token as follows: */
2505 /* A character token */
2507 $token['type'] === HTML5_Tokenizer::CHARACTER ||
2508 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
2510 /* Append the token's character to the current node. */
2511 $this->insertText($token['data']);
2513 /* A comment token */
2514 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2515 /* Append a Comment node to the current node with the data
2516 attribute set to the data given in the comment token. */
2517 $this->insertComment($token['data']);
2519 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2522 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2523 $this->processWithRulesFor($token, self::INBODY);
2525 /* A start tag token whose tag name is "option" */
2526 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2527 $token['name'] === 'option') {
2528 /* If the current node is an option element, act as if an end tag
2529 with the tag name "option" had been seen. */
2530 if(end($this->stack)->tagName === 'option') {
2531 $this->emitToken(array(
2533 'type' => HTML5_Tokenizer::ENDTAG
2537 /* Insert an HTML element for the token. */
2538 $this->insertElement($token);
2540 /* A start tag token whose tag name is "optgroup" */
2541 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2542 $token['name'] === 'optgroup') {
2543 /* If the current node is an option element, act as if an end tag
2544 with the tag name "option" had been seen. */
2545 if(end($this->stack)->tagName === 'option') {
2546 $this->emitToken(array(
2548 'type' => HTML5_Tokenizer::ENDTAG
2552 /* If the current node is an optgroup element, act as if an end tag
2553 with the tag name "optgroup" had been seen. */
2554 if(end($this->stack)->tagName === 'optgroup') {
2555 $this->emitToken(array(
2556 'name' => 'optgroup',
2557 'type' => HTML5_Tokenizer::ENDTAG
2561 /* Insert an HTML element for the token. */
2562 $this->insertElement($token);
2564 /* An end tag token whose tag name is "optgroup" */
2565 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2566 $token['name'] === 'optgroup') {
2567 /* First, if the current node is an option element, and the node
2568 immediately before it in the stack of open elements is an optgroup
2569 element, then act as if an end tag with the tag name "option" had
2571 $elements_in_stack = count($this->stack);
2573 if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
2574 $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
2575 $this->emitToken(array(
2577 'type' => HTML5_Tokenizer::ENDTAG
2581 /* If the current node is an optgroup element, then pop that node
2582 from the stack of open elements. Otherwise, this is a parse error,
2583 ignore the token. */
2584 if(end($this->stack)->tagName === 'optgroup') {
2585 array_pop($this->stack);
2588 $this->ignored = true;
2591 /* An end tag token whose tag name is "option" */
2592 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2593 $token['name'] === 'option') {
2594 /* If the current node is an option element, then pop that node
2595 from the stack of open elements. Otherwise, this is a parse error,
2596 ignore the token. */
2597 if(end($this->stack)->tagName === 'option') {
2598 array_pop($this->stack);
2601 $this->ignored = true;
2604 /* An end tag whose tag name is "select" */
2605 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2606 $token['name'] === 'select') {
2607 /* If the stack of open elements does not have an element in table
2608 scope with the same tag name as the token, this is a parse error.
2609 Ignore the token. (fragment case) */
2610 if(!$this->elementInScope($token['name'], true)) {
2611 $this->ignored = true;
2616 /* Pop elements from the stack of open elements until a select
2617 element has been popped from the stack. */
2619 $node = array_pop($this->stack);
2620 } while ($node->tagName !== 'select');
2622 /* Reset the insertion mode appropriately. */
2623 $this->resetInsertionMode();
2626 /* A start tag whose tag name is "select" */
2627 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
2628 /* Parse error. Act as if the token had been an end tag with the
2629 tag name "select" instead. */
2630 $this->emitToken(array(
2632 'type' => HTML5_Tokenizer::ENDTAG
2635 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2636 ($token['name'] === 'input' || $token['name'] === 'textarea')) {
2638 $this->emitToken(array(
2640 'type' => HTML5_Tokenizer::ENDTAG
2642 $this->emitToken($token);
2644 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
2645 $this->processWithRulesFor($token, self::IN_HEAD);
2647 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2648 // XERROR: If the current node is not the root html element, then this is a parse error.
2653 /* Parse error. Ignore the token. */
2654 $this->ignored = true;
2658 case self::IN_SELECT_IN_TABLE:
2660 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2661 in_array($token['name'], array('caption', 'table', 'tbody',
2662 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2664 $this->emitToken(array(
2666 'type' => HTML5_Tokenizer::ENDTAG,
2668 $this->emitToken($token);
2670 /* An end tag whose tag name is one of: "caption", "table", "tbody",
2671 "tfoot", "thead", "tr", "td", "th" */
2672 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2673 in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2677 /* If the stack of open elements has an element in table scope with
2678 the same tag name as that of the token, then act as if an end tag
2679 with the tag name "select" had been seen, and reprocess the token.
2680 Otherwise, ignore the token. */
2681 if($this->elementInScope($token['name'], true)) {
2682 $this->emitToken(array(
2684 'type' => HTML5_Tokenizer::ENDTAG
2687 $this->emitToken($token);
2689 $this->ignored = true;
2692 $this->processWithRulesFor($token, self::IN_SELECT);
2696 case self::IN_FOREIGN_CONTENT:
2697 if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2698 $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2699 $this->insertText($token['data']);
2700 } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2701 $this->insertComment($token['data']);
2702 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2703 // XERROR: parse error
2704 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2705 $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
2706 end($this->stack)->namespaceURI === self::NS_SVG) {
2707 array_pop($this->stack);
2708 // a bunch of script running mumbo jumbo
2710 ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2712 $token['name'] !== 'mglyph' &&
2713 $token['name'] !== 'malignmark' &&
2714 end($this->stack)->namespaceURI === self::NS_MATHML &&
2715 in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
2718 $token['name'] === 'svg' &&
2719 end($this->stack)->namespaceURI === self::NS_MATHML &&
2720 end($this->stack)->tagName === 'annotation-xml'
2723 end($this->stack)->namespaceURI === self::NS_SVG &&
2724 in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
2728 end($this->stack)->namespaceURI === self::NS_HTML
2730 ) || $token['type'] === HTML5_Tokenizer::ENDTAG
2732 $this->processWithRulesFor($token, $this->secondary_mode);
2733 /* If, after doing so, the insertion mode is still "in foreign
2734 * content", but there is no element in scope that has a namespace
2735 * other than the HTML namespace, switch the insertion mode to the
2736 * secondary insertion mode. */
2737 if ($this->mode === self::IN_FOREIGN_CONTENT) {
2739 // this basically duplicates elementInScope()
2740 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
2741 $node = $this->stack[$i];
2742 if ($node->namespaceURI !== self::NS_HTML) {
2745 } elseif (in_array($node->tagName, array('table', 'html',
2746 'applet', 'caption', 'td', 'th', 'button', 'marquee',
2747 'object')) || ($node->tagName === 'foreignObject' &&
2748 $node->namespaceURI === self::NS_SVG)) {
2753 $this->mode = $this->secondary_mode;
2756 } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
2757 $token['type'] === HTML5_Tokenizer::STARTTAG &&
2758 (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
2759 "center", "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2",
2760 "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
2761 "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
2762 "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
2763 "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
2764 $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
2765 // XERROR: parse error
2767 $node = array_pop($this->stack);
2768 } while ($node->namespaceURI !== self::NS_HTML);
2769 $this->stack[] = $node;
2770 $this->mode = $this->secondary_mode;
2771 $this->emitToken($token);
2772 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
2773 static $svg_lookup = array(
2774 'altglyph' => 'altGlyph',
2775 'altglyphdef' => 'altGlyphDef',
2776 'altglyphitem' => 'altGlyphItem',
2777 'animatecolor' => 'animateColor',
2778 'animatemotion' => 'animateMotion',
2779 'animatetransform' => 'animateTransform',
2780 'clippath' => 'clipPath',
2781 'feblend' => 'feBlend',
2782 'fecolormatrix' => 'feColorMatrix',
2783 'fecomponenttransfer' => 'feComponentTransfer',
2784 'fecomposite' => 'feComposite',
2785 'feconvolvematrix' => 'feConvolveMatrix',
2786 'fediffuselighting' => 'feDiffuseLighting',
2787 'fedisplacementmap' => 'feDisplacementMap',
2788 'fedistantlight' => 'feDistantLight',
2789 'feflood' => 'feFlood',
2790 'fefunca' => 'feFuncA',
2791 'fefuncb' => 'feFuncB',
2792 'fefuncg' => 'feFuncG',
2793 'fefuncr' => 'feFuncR',
2794 'fegaussianblur' => 'feGaussianBlur',
2795 'feimage' => 'feImage',
2796 'femerge' => 'feMerge',
2797 'femergenode' => 'feMergeNode',
2798 'femorphology' => 'feMorphology',
2799 'feoffset' => 'feOffset',
2800 'fepointlight' => 'fePointLight',
2801 'fespecularlighting' => 'feSpecularLighting',
2802 'fespotlight' => 'feSpotLight',
2803 'fetile' => 'feTile',
2804 'feturbulence' => 'feTurbulence',
2805 'foreignobject' => 'foreignObject',
2806 'glyphref' => 'glyphRef',
2807 'lineargradient' => 'linearGradient',
2808 'radialgradient' => 'radialGradient',
2809 'textpath' => 'textPath',
2811 $current = end($this->stack);
2812 if ($current->namespaceURI === self::NS_MATHML) {
2813 $token = $this->adjustMathMLAttributes($token);
2815 if ($current->namespaceURI === self::NS_SVG &&
2816 isset($svg_lookup[$token['name']])) {
2817 $token['name'] = $svg_lookup[$token['name']];
2819 if ($current->namespaceURI === self::NS_SVG) {
2820 $token = $this->adjustSVGAttributes($token);
2822 $token = $this->adjustForeignAttributes($token);
2823 $this->insertForeignElement($token, $current->namespaceURI);
2824 if (isset($token['self-closing'])) {
2825 array_pop($this->stack);
2826 // XERROR: acknowledge self-closing flag
2831 case self::AFTER_BODY:
2832 /* Handle the token as follows: */
2834 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2835 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2837 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2838 /* Process the token as it would be processed if the insertion mode
2840 $this->processWithRulesFor($token, self::IN_BODY);
2842 /* A comment token */
2843 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2844 /* Append a Comment node to the first element in the stack of open
2845 elements (the html element), with the data attribute set to the
2846 data given in the comment token. */
2847 $comment = $this->dom->createComment($token['data']);
2848 $this->stack[0]->appendChild($comment);
2850 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2853 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2854 $this->processWithRulesFor($token, self::IN_BODY);
2856 /* An end tag with the tag name "html" */
2857 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
2858 /* If the parser was originally created as part of the HTML
2859 * fragment parsing algorithm, this is a parse error; ignore
2860 * the token. (fragment case) */
2861 $this->ignored = true;
2862 // XERROR: implement this
2864 $this->mode = self::AFTER_AFTER_BODY;
2866 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2871 /* Parse error. Set the insertion mode to "in body" and reprocess
2873 $this->mode = self::IN_BODY;
2874 $this->emitToken($token);
2878 case self::IN_FRAMESET:
2879 /* Handle the token as follows: */
2881 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2882 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2883 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
2884 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2885 /* Append the character to the current node. */
2886 $this->insertText($token['data']);
2888 /* A comment token */
2889 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2890 /* Append a Comment node to the current node with the data
2891 attribute set to the data given in the comment token. */
2892 $this->insertComment($token['data']);
2894 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2897 /* A start tag with the tag name "frameset" */
2898 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2899 $token['name'] === 'frameset') {
2900 $this->insertElement($token);
2902 /* An end tag with the tag name "frameset" */
2903 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2904 $token['name'] === 'frameset') {
2905 /* If the current node is the root html element, then this is a
2906 parse error; ignore the token. (fragment case) */
2907 if(end($this->stack)->tagName === 'html') {
2908 $this->ignored = true;
2912 /* Otherwise, pop the current node from the stack of open
2914 array_pop($this->stack);
2916 /* If the parser was not originally created as part of the HTML
2917 * fragment parsing algorithm (fragment case), and the current
2918 * node is no longer a frameset element, then switch the
2919 * insertion mode to "after frameset". */
2920 $this->mode = self::AFTER_FRAMESET;
2923 /* A start tag with the tag name "frame" */
2924 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2925 $token['name'] === 'frame') {
2926 /* Insert an HTML element for the token. */
2927 $this->insertElement($token);
2929 /* Immediately pop the current node off the stack of open elements. */
2930 array_pop($this->stack);
2932 // XERROR: Acknowledge the token's self-closing flag, if it is set.
2934 /* A start tag with the tag name "noframes" */
2935 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2936 $token['name'] === 'noframes') {
2937 /* Process the token using the rules for the "in head" insertion mode. */
2938 $this->processwithRulesFor($token, self::IN_HEAD);
2940 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2941 // XERROR: If the current node is not the root html element, then this is a parse error.
2945 /* Parse error. Ignore the token. */
2946 $this->ignored = true;
2950 case self::AFTER_FRAMESET:
2951 /* Handle the token as follows: */
2953 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2954 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2955 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
2956 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2957 /* Append the character to the current node. */
2958 $this->insertText($token['data']);
2960 /* A comment token */
2961 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2962 /* Append a Comment node to the current node with the data
2963 attribute set to the data given in the comment token. */
2964 $this->insertComment($token['data']);
2966 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2969 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2970 $this->processWithRulesFor($token, self::IN_BODY);
2972 /* An end tag with the tag name "html" */
2973 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2974 $token['name'] === 'html') {
2975 $this->mode = self::AFTER_AFTER_FRAMESET;
2977 /* A start tag with the tag name "noframes" */
2978 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2979 $token['name'] === 'noframes') {
2980 $this->processWithRulesFor($token, self::IN_HEAD);
2982 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2987 /* Parse error. Ignore the token. */
2988 $this->ignored = true;
2992 case self::AFTER_AFTER_BODY:
2993 /* A comment token */
2994 if($token['type'] === HTML5_Tokenizer::COMMENT) {
2995 /* Append a Comment node to the Document object with the data
2996 attribute set to the data given in the comment token. */
2997 $comment = $this->dom->createComment($token['data']);
2998 $this->dom->appendChild($comment);
3000 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3001 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3002 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3003 $this->processWithRulesFor($token, self::IN_BODY);
3005 /* An end-of-file token */
3006 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3010 $this->mode = self::IN_BODY;
3011 $this->emitToken($token);
3015 case self::AFTER_AFTER_FRAMESET:
3016 /* A comment token */
3017 if($token['type'] === HTML5_Tokenizer::COMMENT) {
3018 /* Append a Comment node to the Document object with the data
3019 attribute set to the data given in the comment token. */
3020 $comment = $this->dom->createComment($token['data']);
3021 $this->dom->appendChild($comment);
3023 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3024 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3025 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3026 $this->processWithRulesFor($token, self::IN_BODY);
3028 /* An end-of-file token */
3029 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3031 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
3032 $this->processWithRulesFor($token, self::IN_HEAD);
3038 // end funky indenting
3041 private function insertElement($token, $append = true) {
3042 $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
3044 logger('insertElement(): ignoring invalid token='.$token['name']);
3048 if (!empty($token['attr'])) {
3049 foreach($token['attr'] as $attr) {
3051 // mike@macgirvin.com 2011-11-17, check attribute name for
3052 // validity (ignoring extenders and combiners) as illegal chars in names
3053 // causes everything to abort
3055 $valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name'],$matches);
3056 if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
3057 $el->setAttribute($attr['name'], $attr['value']);
3062 $this->appendToRealParent($el);
3063 $this->stack[] = $el;
3069 private function insertText($data) {
3070 if ($data === '') return;
3071 if ($this->ignore_lf_token) {
3072 if ($data[0] === "\n") {
3073 $data = substr($data, 1);
3074 if ($data === false) return;
3077 $text = $this->dom->createTextNode($data);
3078 $this->appendToRealParent($text);
3081 private function insertComment($data) {
3082 $comment = $this->dom->createComment($data);
3083 $this->appendToRealParent($comment);
3086 private function appendToRealParent($node) {
3087 // this is only for the foster_parent case
3088 /* If the current node is a table, tbody, tfoot, thead, or tr
3089 element, then, whenever a node would be inserted into the current
3090 node, it must instead be inserted into the foster parent element. */
3091 if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
3092 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3093 end($this->stack)->appendChild($node);
3095 $this->fosterParent($node);
3099 private function elementInScope($el, $table = false) {
3101 foreach($el as $element) {
3102 if($this->elementInScope($element, $table)) {
3110 $leng = count($this->stack);
3112 for($n = 0; $n < $leng; $n++) {
3113 /* 1. Initialise node to be the current node (the bottommost node of
3115 $node = $this->stack[$leng - 1 - $n];
3117 if($node->tagName === $el) {
3118 /* 2. If node is the target node, terminate in a match state. */
3121 // these are the common states for "in scope" and "in table scope"
3122 } elseif($node->tagName === 'table' || $node->tagName === 'html') {
3125 // these are only valid for "in scope"
3127 (in_array($node->tagName, array('applet', 'caption', 'td',
3128 'th', 'button', 'marquee', 'object')) ||
3129 $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
3133 /* Otherwise, set node to the previous entry in the stack of open
3134 elements and return to step 2. (This will never fail, since the loop
3135 will always terminate in the previous step if the top of the stack
3140 private function reconstructActiveFormattingElements() {
3141 /* 1. If there are no entries in the list of active formatting elements,
3142 then there is nothing to reconstruct; stop this algorithm. */
3143 $formatting_elements = count($this->a_formatting);
3145 if($formatting_elements === 0) {
3149 /* 3. Let entry be the last (most recently added) element in the list
3150 of active formatting elements. */
3151 $entry = end($this->a_formatting);
3153 /* 2. If the last (most recently added) entry in the list of active
3154 formatting elements is a marker, or if it is an element that is in the
3155 stack of open elements, then there is nothing to reconstruct; stop this
3157 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3161 for($a = $formatting_elements - 1; $a >= 0; true) {
3162 /* 4. If there are no entries before entry in the list of active
3163 formatting elements, then jump to step 8. */
3165 $step_seven = false;
3169 /* 5. Let entry be the entry one earlier than entry in the list of
3170 active formatting elements. */
3172 $entry = $this->a_formatting[$a];
3174 /* 6. If entry is neither a marker nor an element that is also in
3175 thetack of open elements, go to step 4. */
3176 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3182 /* 7. Let entry be the element one later than entry in the list of
3183 active formatting elements. */
3184 if(isset($step_seven) && $step_seven === true) {
3186 $entry = $this->a_formatting[$a];
3189 /* 8. Perform a shallow clone of the element entry to obtain clone. */
3190 $clone = $entry->cloneNode();
3192 /* 9. Append clone to the current node and push it onto the stack
3193 of open elements so that it is the new current node. */
3194 $this->appendToRealParent($clone);
3195 $this->stack[] = $clone;
3197 /* 10. Replace the entry for entry in the list with an entry for
3199 $this->a_formatting[$a] = $clone;
3201 /* 11. If the entry for clone in the list of active formatting
3202 elements is not the last entry in the list, return to step 7. */
3203 if(end($this->a_formatting) !== $clone) {
3211 private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3212 /* When the steps below require the UA to clear the list of active
3213 formatting elements up to the last marker, the UA must perform the
3217 /* 1. Let entry be the last (most recently added) entry in the list
3218 of active formatting elements. */
3219 $entry = end($this->a_formatting);
3221 /* 2. Remove entry from the list of active formatting elements. */
3222 array_pop($this->a_formatting);
3224 /* 3. If entry was a marker, then stop the algorithm at this point.
3225 The list has been cleared up to the last marker. */
3226 if($entry === self::MARKER) {
3232 private function generateImpliedEndTags($exclude = array()) {
3233 /* When the steps below require the UA to generate implied end tags,
3234 then, if the current node is a dd element, a dt element, an li element,
3235 a p element, a td element, a th element, or a tr element, the UA must
3236 act as if an end tag with the respective tag name had been seen and
3237 then generate implied end tags again. */
3238 $node = end($this->stack);
3239 $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3241 while(in_array(end($this->stack)->tagName, $elements)) {
3242 array_pop($this->stack);
3246 private function getElementCategory($node) {
3247 if (!is_object($node)) debug_print_backtrace();
3248 $name = $node->tagName;
3249 if(in_array($name, $this->special))
3250 return self::SPECIAL;
3252 elseif(in_array($name, $this->scoping))
3253 return self::SCOPING;
3255 elseif(in_array($name, $this->formatting))
3256 return self::FORMATTING;
3259 return self::PHRASING;
3262 private function clearStackToTableContext($elements) {
3263 /* When the steps above require the UA to clear the stack back to a
3264 table context, it means that the UA must, while the current node is not
3265 a table element or an html element, pop elements from the stack of open
3268 $name = end($this->stack)->tagName;
3270 if(in_array($name, $elements)) {
3273 array_pop($this->stack);
3278 private function resetInsertionMode($context = null) {
3279 /* 1. Let last be false. */
3281 $leng = count($this->stack);
3283 for($n = $leng - 1; $n >= 0; $n--) {
3284 /* 2. Let node be the last node in the stack of open elements. */
3285 $node = $this->stack[$n];
3287 /* 3. If node is the first node in the stack of open elements, then
3288 * set last to true and set node to the context element. (fragment
3290 if($this->stack[0]->isSameNode($node)) {
3295 /* 4. If node is a select element, then switch the insertion mode to
3296 "in select" and abort these steps. (fragment case) */
3297 if($node->tagName === 'select') {
3298 $this->mode = self::IN_SELECT;
3301 /* 5. If node is a td or th element, then switch the insertion mode
3302 to "in cell" and abort these steps. */
3303 } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
3304 $this->mode = self::IN_CELL;
3307 /* 6. If node is a tr element, then switch the insertion mode to
3308 "in row" and abort these steps. */
3309 } elseif($node->tagName === 'tr') {
3310 $this->mode = self::IN_ROW;
3313 /* 7. If node is a tbody, thead, or tfoot element, then switch the
3314 insertion mode to "in table body" and abort these steps. */
3315 } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
3316 $this->mode = self::IN_TABLE_BODY;
3319 /* 8. If node is a caption element, then switch the insertion mode
3320 to "in caption" and abort these steps. */
3321 } elseif($node->tagName === 'caption') {
3322 $this->mode = self::IN_CAPTION;
3325 /* 9. If node is a colgroup element, then switch the insertion mode
3326 to "in column group" and abort these steps. (innerHTML case) */
3327 } elseif($node->tagName === 'colgroup') {
3328 $this->mode = self::IN_COLUMN_GROUP;
3331 /* 10. If node is a table element, then switch the insertion mode
3332 to "in table" and abort these steps. */
3333 } elseif($node->tagName === 'table') {
3334 $this->mode = self::IN_TABLE;
3337 /* 11. If node is an element from the MathML namespace or the SVG
3338 * namespace, then switch the insertion mode to "in foreign
3339 * content", let the secondary insertion mode be "in body", and
3340 * abort these steps. */
3341 } elseif($node->namespaceURI === self::NS_SVG ||
3342 $node->namespaceURI === self::NS_MATHML) {
3343 $this->mode = self::IN_FOREIGN_CONTENT;
3344 $this->secondary_mode = self::IN_BODY;
3347 /* 12. If node is a head element, then switch the insertion mode
3348 to "in body" ("in body"! not "in head"!) and abort these steps.
3350 } elseif($node->tagName === 'head') {
3351 $this->mode = self::IN_BODY;
3354 /* 13. If node is a body element, then switch the insertion mode to
3355 "in body" and abort these steps. */
3356 } elseif($node->tagName === 'body') {
3357 $this->mode = self::IN_BODY;
3360 /* 14. If node is a frameset element, then switch the insertion
3361 mode to "in frameset" and abort these steps. (fragment case) */
3362 } elseif($node->tagName === 'frameset') {
3363 $this->mode = self::IN_FRAMESET;
3366 /* 15. If node is an html element, then: if the head element
3367 pointer is null, switch the insertion mode to "before head",
3368 otherwise, switch the insertion mode to "after head". In either
3369 case, abort these steps. (fragment case) */
3370 } elseif($node->tagName === 'html') {
3371 $this->mode = ($this->head_pointer === null)
3377 /* 16. If last is true, then set the insertion mode to "in body"
3378 and abort these steps. (fragment case) */
3380 $this->mode = self::IN_BODY;
3386 private function closeCell() {
3387 /* If the stack of open elements has a td or th element in table scope,
3388 then act as if an end tag token with that tag name had been seen. */
3389 foreach(array('td', 'th') as $cell) {
3390 if($this->elementInScope($cell, true)) {
3391 $this->emitToken(array(
3393 'type' => HTML5_Tokenizer::ENDTAG
3401 private function processWithRulesFor($token, $mode) {
3402 /* "using the rules for the m insertion mode", where m is one of these
3403 * modes, the user agent must use the rules described under the m
3404 * insertion mode's section, but must leave the insertion mode
3405 * unchanged unless the rules in m themselves switch the insertion mode
3406 * to a new value. */
3407 return $this->emitToken($token, $mode);
3410 private function insertCDATAElement($token) {
3411 $this->insertElement($token);
3412 $this->original_mode = $this->mode;
3413 $this->mode = self::IN_CDATA_RCDATA;
3414 $this->content_model = HTML5_Tokenizer::CDATA;
3417 private function insertRCDATAElement($token) {
3418 $this->insertElement($token);
3419 $this->original_mode = $this->mode;
3420 $this->mode = self::IN_CDATA_RCDATA;
3421 $this->content_model = HTML5_Tokenizer::RCDATA;
3424 private function getAttr($token, $key) {
3425 if (!isset($token['attr'])) return false;
3427 foreach ($token['attr'] as $keypair) {
3428 if ($keypair['name'] === $key) $ret = $keypair['value'];
3433 private function getCurrentTable() {
3434 /* The current table is the last table element in the stack of open
3435 * elements, if there is one. If there is no table element in the stack
3436 * of open elements (fragment case), then the current table is the
3437 * first element in the stack of open elements (the html element). */
3438 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
3439 if ($this->stack[$i]->tagName === 'table') {
3440 return $this->stack[$i];
3443 return $this->stack[0];
3446 private function getFosterParent() {
3447 /* The foster parent element is the parent element of the last
3448 table element in the stack of open elements, if there is a
3449 table element and it has such a parent element. If there is no
3450 table element in the stack of open elements (innerHTML case),
3451 then the foster parent element is the first element in the
3452 stack of open elements (the html element). Otherwise, if there
3453 is a table element in the stack of open elements, but the last
3454 table element in the stack of open elements has no parent, or
3455 its parent node is not an element, then the foster parent
3456 element is the element before the last table element in the
3457 stack of open elements. */
3458 for($n = count($this->stack) - 1; $n >= 0; $n--) {
3459 if($this->stack[$n]->tagName === 'table') {
3460 $table = $this->stack[$n];
3465 if(isset($table) && $table->parentNode !== null) {
3466 return $table->parentNode;
3468 } elseif(!isset($table)) {
3469 return $this->stack[0];
3471 } elseif(isset($table) && ($table->parentNode === null ||
3472 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
3473 return $this->stack[$n - 1];
3477 public function fosterParent($node) {
3478 $foster_parent = $this->getFosterParent();
3479 $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3480 /* When a node node is to be foster parented, the node node must be
3481 * inserted into the foster parent element, and the current table must
3482 * be marked as tainted. (Once the current table has been tainted,
3483 * whitespace characters are inserted into the foster parent element
3484 * instead of the current node.) */
3485 $table->tainted = true;
3486 /* If the foster parent element is the parent element of the last table
3487 * element in the stack of open elements, then node must be inserted
3488 * immediately before the last table element in the stack of open
3489 * elements in the foster parent element; otherwise, node must be
3490 * appended to the foster parent element. */
3491 if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
3492 $foster_parent->insertBefore($node, $table);
3494 $foster_parent->appendChild($node);
3499 * For debugging, prints the stack
3501 private function printStack() {
3503 foreach ($this->stack as $i => $element) {
3504 $names[] = $element->tagName;
3506 echo " -> stack [" . implode(', ', $names) . "]\n";
3510 * For debugging, prints active formatting elements
3512 private function printActiveFormattingElements() {
3513 if (!$this->a_formatting) return;
3515 foreach ($this->a_formatting as $node) {
3516 if ($node === self::MARKER) $names[] = 'MARKER';
3517 else $names[] = $node->tagName;
3519 echo " -> active formatting [" . implode(', ', $names) . "]\n";
3522 public function currentTableIsTainted() {
3523 return !empty($this->getCurrentTable()->tainted);
3527 * Sets up the tree constructor for building a fragment.
3529 public function setupContext($context = null) {
3530 $this->fragment = true;
3532 $context = $this->dom->createElementNS(self::NS_HTML, $context);
3533 /* 4.1. Set the HTML parser's tokenization stage's content model
3534 * flag according to the context element, as follows: */
3535 switch ($context->tagName) {
3536 case 'title': case 'textarea':
3537 $this->content_model = HTML5_Tokenizer::RCDATA;
3539 case 'style': case 'script': case 'xmp': case 'iframe':
3540 case 'noembed': case 'noframes':
3541 $this->content_model = HTML5_Tokenizer::CDATA;
3544 // XSCRIPT: assuming scripting is enabled
3545 $this->content_model = HTML5_Tokenizer::CDATA;
3548 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
3551 /* 4.2. Let root be a new html element with no attributes. */
3552 $root = $this->dom->createElementNS(self::NS_HTML, 'html');
3553 $this->root = $root;
3554 /* 4.3 Append the element root to the Document node created above. */
3555 $this->dom->appendChild($root);
3556 /* 4.4 Set up the parser's stack of open elements so that it
3557 * contains just the single element root. */
3558 $this->stack = array($root);
3559 /* 4.5 Reset the parser's insertion mode appropriately. */
3560 $this->resetInsertionMode($context);
3561 /* 4.6 Set the parser's form element pointer to the nearest node
3562 * to the context element that is a form element (going straight up
3563 * the ancestor chain, and including the element itself, if it is a
3564 * form element), or, if there is no such form element, to null. */
3567 if ($node->tagName === 'form') {
3568 $this->form_pointer = $node;
3571 } while ($node = $node->parentNode);
3575 public function adjustMathMLAttributes($token) {
3576 foreach ($token['attr'] as &$kp) {
3577 if ($kp['name'] === 'definitionurl') {
3578 $kp['name'] = 'definitionURL';
3584 public function adjustSVGAttributes($token) {
3585 static $lookup = array(
3586 'attributename' => 'attributeName',
3587 'attributetype' => 'attributeType',
3588 'basefrequency' => 'baseFrequency',
3589 'baseprofile' => 'baseProfile',
3590 'calcmode' => 'calcMode',
3591 'clippathunits' => 'clipPathUnits',
3592 'contentscripttype' => 'contentScriptType',
3593 'contentstyletype' => 'contentStyleType',
3594 'diffuseconstant' => 'diffuseConstant',
3595 'edgemode' => 'edgeMode',
3596 'externalresourcesrequired' => 'externalResourcesRequired',
3597 'filterres' => 'filterRes',
3598 'filterunits' => 'filterUnits',
3599 'glyphref' => 'glyphRef',
3600 'gradienttransform' => 'gradientTransform',
3601 'gradientunits' => 'gradientUnits',
3602 'kernelmatrix' => 'kernelMatrix',
3603 'kernelunitlength' => 'kernelUnitLength',
3604 'keypoints' => 'keyPoints',
3605 'keysplines' => 'keySplines',
3606 'keytimes' => 'keyTimes',
3607 'lengthadjust' => 'lengthAdjust',
3608 'limitingconeangle' => 'limitingConeAngle',
3609 'markerheight' => 'markerHeight',
3610 'markerunits' => 'markerUnits',
3611 'markerwidth' => 'markerWidth',
3612 'maskcontentunits' => 'maskContentUnits',
3613 'maskunits' => 'maskUnits',
3614 'numoctaves' => 'numOctaves',
3615 'pathlength' => 'pathLength',
3616 'patterncontentunits' => 'patternContentUnits',
3617 'patterntransform' => 'patternTransform',
3618 'patternunits' => 'patternUnits',
3619 'pointsatx' => 'pointsAtX',
3620 'pointsaty' => 'pointsAtY',
3621 'pointsatz' => 'pointsAtZ',
3622 'preservealpha' => 'preserveAlpha',
3623 'preserveaspectratio' => 'preserveAspectRatio',
3624 'primitiveunits' => 'primitiveUnits',
3627 'repeatcount' => 'repeatCount',
3628 'repeatdur' => 'repeatDur',
3629 'requiredextensions' => 'requiredExtensions',
3630 'requiredfeatures' => 'requiredFeatures',
3631 'specularconstant' => 'specularConstant',
3632 'specularexponent' => 'specularExponent',
3633 'spreadmethod' => 'spreadMethod',
3634 'startoffset' => 'startOffset',
3635 'stddeviation' => 'stdDeviation',
3636 'stitchtiles' => 'stitchTiles',
3637 'surfacescale' => 'surfaceScale',
3638 'systemlanguage' => 'systemLanguage',
3639 'tablevalues' => 'tableValues',
3640 'targetx' => 'targetX',
3641 'targety' => 'targetY',
3642 'textlength' => 'textLength',
3643 'viewbox' => 'viewBox',
3644 'viewtarget' => 'viewTarget',
3645 'xchannelselector' => 'xChannelSelector',
3646 'ychannelselector' => 'yChannelSelector',
3647 'zoomandpan' => 'zoomAndPan',
3649 foreach ($token['attr'] as &$kp) {
3650 if (isset($lookup[$kp['name']])) {
3651 $kp['name'] = $lookup[$kp['name']];
3657 public function adjustForeignAttributes($token) {
3658 static $lookup = array(
3659 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
3660 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
3661 'xlink:href' => array('xlink', 'href', self::NS_XLINK),
3662 'xlink:role' => array('xlink', 'role', self::NS_XLINK),
3663 'xlink:show' => array('xlink', 'show', self::NS_XLINK),
3664 'xlink:title' => array('xlink', 'title', self::NS_XLINK),
3665 'xlink:type' => array('xlink', 'type', self::NS_XLINK),
3666 'xml:base' => array('xml', 'base', self::NS_XML),
3667 'xml:lang' => array('xml', 'lang', self::NS_XML),
3668 'xml:space' => array('xml', 'space', self::NS_XML),
3669 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
3670 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
3672 foreach ($token['attr'] as &$kp) {
3673 if (isset($lookup[$kp['name']])) {
3674 $kp['name'] = $lookup[$kp['name']];
3680 public function insertForeignElement($token, $namespaceURI) {
3681 $el = $this->dom->createElementNS($namespaceURI, $token['name']);
3682 if (!empty($token['attr'])) {
3683 foreach ($token['attr'] as $kp) {
3684 $attr = $kp['name'];
3685 if (is_array($attr)) {
3689 $ns = self::NS_HTML;
3691 if (!$el->hasAttributeNS($ns, $attr)) {
3692 // XSKETCHY: work around godawful libxml bug
3693 if ($ns === self::NS_XLINK) {
3694 $el->setAttribute('xlink:'.$attr, $kp['value']);
3695 } elseif ($ns === self::NS_HTML) {
3696 // Another godawful libxml bug
3697 $el->setAttribute($attr, $kp['value']);
3699 $el->setAttributeNS($ns, $attr, $kp['value']);
3704 $this->appendToRealParent($el);
3705 $this->stack[] = $el;
3706 // XERROR: see below
3707 /* If the newly created element has an xmlns attribute in the XMLNS
3708 * namespace whose value is not exactly the same as the element's
3709 * namespace, that is a parse error. Similarly, if the newly created
3710 * element has an xmlns:xlink attribute in the XMLNS namespace whose
3711 * value is not the XLink Namespace, that is a parse error. */
3714 public function save() {
3715 $this->dom->normalize();
3716 if (!$this->fragment) {
3720 return $this->root->childNodes;
3722 return $this->dom->childNodes;