"Fossies" - the Fresh Open Source Software Archive

Member "4.6.1/vendor/HTMLPurifier/standalone/HTMLPurifier/Lexer/PH5P.php" (8 Apr 2021, 182583 Bytes) of package /linux/www/studip-4.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 <?php
    2 
    3 /**
    4  * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
    5  * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.
    6  *
    7  * @note
    8  *    Recent changes to PHP's DOM extension have resulted in some fatal
    9  *    error conditions with the original version of PH5P. Pending changes,
   10  *    this lexer will punt to DirectLex if DOM throws an exception.
   11  */
   12 
   13 class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
   14 {
   15     /**
   16      * @param string $html
   17      * @param HTMLPurifier_Config $config
   18      * @param HTMLPurifier_Context $context
   19      * @return HTMLPurifier_Token[]
   20      */
   21     public function tokenizeHTML($html, $config, $context)
   22     {
   23         $new_html = $this->normalize($html, $config, $context);
   24         $new_html = $this->wrapHTML($new_html, $config, $context, false /* no div */);
   25         try {
   26             $parser = new HTML5($new_html);
   27             $doc = $parser->save();
   28         } catch (DOMException $e) {
   29             // Uh oh, it failed. Punt to DirectLex.
   30             $lexer = new HTMLPurifier_Lexer_DirectLex();
   31             $context->register('PH5PError', $e); // save the error, so we can detect it
   32             return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
   33         }
   34         $tokens = array();
   35         $this->tokenizeDOM(
   36             $doc->getElementsByTagName('html')->item(0)-> // <html>
   37                   getElementsByTagName('body')->item(0) //   <body>
   38             ,
   39             $tokens, $config
   40         );
   41         return $tokens;
   42     }
   43 }
   44 
   45 /*
   46 
   47 Copyright 2007 Jeroen van der Meer <http://jero.net/>
   48 
   49 Permission is hereby granted, free of charge, to any person obtaining a
   50 copy of this software and associated documentation files (the
   51 "Software"), to deal in the Software without restriction, including
   52 without limitation the rights to use, copy, modify, merge, publish,
   53 distribute, sublicense, and/or sell copies of the Software, and to
   54 permit persons to whom the Software is furnished to do so, subject to
   55 the following conditions:
   56 
   57 The above copyright notice and this permission notice shall be included
   58 in all copies or substantial portions of the Software.
   59 
   60 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   61 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   62 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   63 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
   64 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   65 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   66 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   67 
   68 */
   69 
   70 class HTML5
   71 {
   72     private $data;
   73     private $char;
   74     private $EOF;
   75     private $state;
   76     private $tree;
   77     private $token;
   78     private $content_model;
   79     private $escape = false;
   80     private $entities = array(
   81         'AElig;',
   82         'AElig',
   83         'AMP;',
   84         'AMP',
   85         'Aacute;',
   86         'Aacute',
   87         'Acirc;',
   88         'Acirc',
   89         'Agrave;',
   90         'Agrave',
   91         'Alpha;',
   92         'Aring;',
   93         'Aring',
   94         'Atilde;',
   95         'Atilde',
   96         'Auml;',
   97         'Auml',
   98         'Beta;',
   99         'COPY;',
  100         'COPY',
  101         'Ccedil;',
  102         'Ccedil',
  103         'Chi;',
  104         'Dagger;',
  105         'Delta;',
  106         'ETH;',
  107         'ETH',
  108         'Eacute;',
  109         'Eacute',
  110         'Ecirc;',
  111         'Ecirc',
  112         'Egrave;',
  113         'Egrave',
  114         'Epsilon;',
  115         'Eta;',
  116         'Euml;',
  117         'Euml',
  118         'GT;',
  119         'GT',
  120         'Gamma;',
  121         'Iacute;',
  122         'Iacute',
  123         'Icirc;',
  124         'Icirc',
  125         'Igrave;',
  126         'Igrave',
  127         'Iota;',
  128         'Iuml;',
  129         'Iuml',
  130         'Kappa;',
  131         'LT;',
  132         'LT',
  133         'Lambda;',
  134         'Mu;',
  135         'Ntilde;',
  136         'Ntilde',
  137         'Nu;',
  138         'OElig;',
  139         'Oacute;',
  140         'Oacute',
  141         'Ocirc;',
  142         'Ocirc',
  143         'Ograve;',
  144         'Ograve',
  145         'Omega;',
  146         'Omicron;',
  147         'Oslash;',
  148         'Oslash',
  149         'Otilde;',
  150         'Otilde',
  151         'Ouml;',
  152         'Ouml',
  153         'Phi;',
  154         'Pi;',
  155         'Prime;',
  156         'Psi;',
  157         'QUOT;',
  158         'QUOT',
  159         'REG;',
  160         'REG',
  161         'Rho;',
  162         'Scaron;',
  163         'Sigma;',
  164         'THORN;',
  165         'THORN',
  166         'TRADE;',
  167         'Tau;',
  168         'Theta;',
  169         'Uacute;',
  170         'Uacute',
  171         'Ucirc;',
  172         'Ucirc',
  173         'Ugrave;',
  174         'Ugrave',
  175         'Upsilon;',
  176         'Uuml;',
  177         'Uuml',
  178         'Xi;',
  179         'Yacute;',
  180         'Yacute',
  181         'Yuml;',
  182         'Zeta;',
  183         'aacute;',
  184         'aacute',
  185         'acirc;',
  186         'acirc',
  187         'acute;',
  188         'acute',
  189         'aelig;',
  190         'aelig',
  191         'agrave;',
  192         'agrave',
  193         'alefsym;',
  194         'alpha;',
  195         'amp;',
  196         'amp',
  197         'and;',
  198         'ang;',
  199         'apos;',
  200         'aring;',
  201         'aring',
  202         'asymp;',
  203         'atilde;',
  204         'atilde',
  205         'auml;',
  206         'auml',
  207         'bdquo;',
  208         'beta;',
  209         'brvbar;',
  210         'brvbar',
  211         'bull;',
  212         'cap;',
  213         'ccedil;',
  214         'ccedil',
  215         'cedil;',
  216         'cedil',
  217         'cent;',
  218         'cent',
  219         'chi;',
  220         'circ;',
  221         'clubs;',
  222         'cong;',
  223         'copy;',
  224         'copy',
  225         'crarr;',
  226         'cup;',
  227         'curren;',
  228         'curren',
  229         'dArr;',
  230         'dagger;',
  231         'darr;',
  232         'deg;',
  233         'deg',
  234         'delta;',
  235         'diams;',
  236         'divide;',
  237         'divide',
  238         'eacute;',
  239         'eacute',
  240         'ecirc;',
  241         'ecirc',
  242         'egrave;',
  243         'egrave',
  244         'empty;',
  245         'emsp;',
  246         'ensp;',
  247         'epsilon;',
  248         'equiv;',
  249         'eta;',
  250         'eth;',
  251         'eth',
  252         'euml;',
  253         'euml',
  254         'euro;',
  255         'exist;',
  256         'fnof;',
  257         'forall;',
  258         'frac12;',
  259         'frac12',
  260         'frac14;',
  261         'frac14',
  262         'frac34;',
  263         'frac34',
  264         'frasl;',
  265         'gamma;',
  266         'ge;',
  267         'gt;',
  268         'gt',
  269         'hArr;',
  270         'harr;',
  271         'hearts;',
  272         'hellip;',
  273         'iacute;',
  274         'iacute',
  275         'icirc;',
  276         'icirc',
  277         'iexcl;',
  278         'iexcl',
  279         'igrave;',
  280         'igrave',
  281         'image;',
  282         'infin;',
  283         'int;',
  284         'iota;',
  285         'iquest;',
  286         'iquest',
  287         'isin;',
  288         'iuml;',
  289         'iuml',
  290         'kappa;',
  291         'lArr;',
  292         'lambda;',
  293         'lang;',
  294         'laquo;',
  295         'laquo',
  296         'larr;',
  297         'lceil;',
  298         'ldquo;',
  299         'le;',
  300         'lfloor;',
  301         'lowast;',
  302         'loz;',
  303         'lrm;',
  304         'lsaquo;',
  305         'lsquo;',
  306         'lt;',
  307         'lt',
  308         'macr;',
  309         'macr',
  310         'mdash;',
  311         'micro;',
  312         'micro',
  313         'middot;',
  314         'middot',
  315         'minus;',
  316         'mu;',
  317         'nabla;',
  318         'nbsp;',
  319         'nbsp',
  320         'ndash;',
  321         'ne;',
  322         'ni;',
  323         'not;',
  324         'not',
  325         'notin;',
  326         'nsub;',
  327         'ntilde;',
  328         'ntilde',
  329         'nu;',
  330         'oacute;',
  331         'oacute',
  332         'ocirc;',
  333         'ocirc',
  334         'oelig;',
  335         'ograve;',
  336         'ograve',
  337         'oline;',
  338         'omega;',
  339         'omicron;',
  340         'oplus;',
  341         'or;',
  342         'ordf;',
  343         'ordf',
  344         'ordm;',
  345         'ordm',
  346         'oslash;',
  347         'oslash',
  348         'otilde;',
  349         'otilde',
  350         'otimes;',
  351         'ouml;',
  352         'ouml',
  353         'para;',
  354         'para',
  355         'part;',
  356         'permil;',
  357         'perp;',
  358         'phi;',
  359         'pi;',
  360         'piv;',
  361         'plusmn;',
  362         'plusmn',
  363         'pound;',
  364         'pound',
  365         'prime;',
  366         'prod;',
  367         'prop;',
  368         'psi;',
  369         'quot;',
  370         'quot',
  371         'rArr;',
  372         'radic;',
  373         'rang;',
  374         'raquo;',
  375         'raquo',
  376         'rarr;',
  377         'rceil;',
  378         'rdquo;',
  379         'real;',
  380         'reg;',
  381         'reg',
  382         'rfloor;',
  383         'rho;',
  384         'rlm;',
  385         'rsaquo;',
  386         'rsquo;',
  387         'sbquo;',
  388         'scaron;',
  389         'sdot;',
  390         'sect;',
  391         'sect',
  392         'shy;',
  393         'shy',
  394         'sigma;',
  395         'sigmaf;',
  396         'sim;',
  397         'spades;',
  398         'sub;',
  399         'sube;',
  400         'sum;',
  401         'sup1;',
  402         'sup1',
  403         'sup2;',
  404         'sup2',
  405         'sup3;',
  406         'sup3',
  407         'sup;',
  408         'supe;',
  409         'szlig;',
  410         'szlig',
  411         'tau;',
  412         'there4;',
  413         'theta;',
  414         'thetasym;',
  415         'thinsp;',
  416         'thorn;',
  417         'thorn',
  418         'tilde;',
  419         'times;',
  420         'times',
  421         'trade;',
  422         'uArr;',
  423         'uacute;',
  424         'uacute',
  425         'uarr;',
  426         'ucirc;',
  427         'ucirc',
  428         'ugrave;',
  429         'ugrave',
  430         'uml;',
  431         'uml',
  432         'upsih;',
  433         'upsilon;',
  434         'uuml;',
  435         'uuml',
  436         'weierp;',
  437         'xi;',
  438         'yacute;',
  439         'yacute',
  440         'yen;',
  441         'yen',
  442         'yuml;',
  443         'yuml',
  444         'zeta;',
  445         'zwj;',
  446         'zwnj;'
  447     );
  448 
  449     const PCDATA = 0;
  450     const RCDATA = 1;
  451     const CDATA = 2;
  452     const PLAINTEXT = 3;
  453 
  454     const DOCTYPE = 0;
  455     const STARTTAG = 1;
  456     const ENDTAG = 2;
  457     const COMMENT = 3;
  458     const CHARACTR = 4;
  459     const EOF = 5;
  460 
  461     public function __construct($data)
  462     {
  463         $this->data = $data;
  464         $this->char = -1;
  465         $this->EOF = strlen($data);
  466         $this->tree = new HTML5TreeConstructer;
  467         $this->content_model = self::PCDATA;
  468 
  469         $this->state = 'data';
  470 
  471         while ($this->state !== null) {
  472             $this->{$this->state . 'State'}();
  473         }
  474     }
  475 
  476     public function save()
  477     {
  478         return $this->tree->save();
  479     }
  480 
  481     private function char()
  482     {
  483         return ($this->char < $this->EOF)
  484             ? $this->data[$this->char]
  485             : false;
  486     }
  487 
  488     private function character($s, $l = 0)
  489     {
  490         if ($s + $l < $this->EOF) {
  491             if ($l === 0) {
  492                 return $this->data[$s];
  493             } else {
  494                 return substr($this->data, $s, $l);
  495             }
  496         }
  497     }
  498 
  499     private function characters($char_class, $start)
  500     {
  501         return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
  502     }
  503 
  504     private function dataState()
  505     {
  506         // Consume the next input character
  507         $this->char++;
  508         $char = $this->char();
  509 
  510         if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
  511             /* U+0026 AMPERSAND (&)
  512             When the content model flag is set to one of the PCDATA or RCDATA
  513             states: switch to the entity data state. Otherwise: treat it as per
  514             the "anything else"    entry below. */
  515             $this->state = 'entityData';
  516 
  517         } elseif ($char === '-') {
  518             /* If the content model flag is set to either the RCDATA state or
  519             the CDATA state, and the escape flag is false, and there are at
  520             least three characters before this one in the input stream, and the
  521             last four characters in the input stream, including this one, are
  522             U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
  523             and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
  524             if (($this->content_model === self::RCDATA || $this->content_model ===
  525                     self::CDATA) && $this->escape === false &&
  526                 $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
  527             ) {
  528                 $this->escape = true;
  529             }
  530 
  531             /* In any case, emit the input character as a character token. Stay
  532             in the data state. */
  533             $this->emitToken(
  534                 array(
  535                     'type' => self::CHARACTR,
  536                     'data' => $char
  537                 )
  538             );
  539 
  540             /* U+003C LESS-THAN SIGN (<) */
  541         } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
  542                 (($this->content_model === self::RCDATA ||
  543                         $this->content_model === self::CDATA) && $this->escape === false))
  544         ) {
  545             /* When the content model flag is set to the PCDATA state: switch
  546             to the tag open state.
  547 
  548             When the content model flag is set to either the RCDATA state or
  549             the CDATA state and the escape flag is false: switch to the tag
  550             open state.
  551 
  552             Otherwise: treat it as per the "anything else" entry below. */
  553             $this->state = 'tagOpen';
  554 
  555             /* U+003E GREATER-THAN SIGN (>) */
  556         } elseif ($char === '>') {
  557             /* If the content model flag is set to either the RCDATA state or
  558             the CDATA state, and the escape flag is true, and the last three
  559             characters in the input stream including this one are U+002D
  560             HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
  561             set the escape flag to false. */
  562             if (($this->content_model === self::RCDATA ||
  563                     $this->content_model === self::CDATA) && $this->escape === true &&
  564                 $this->character($this->char, 3) === '-->'
  565             ) {
  566                 $this->escape = false;
  567             }
  568 
  569             /* In any case, emit the input character as a character token.
  570             Stay in the data state. */
  571             $this->emitToken(
  572                 array(
  573                     'type' => self::CHARACTR,
  574                     'data' => $char
  575                 )
  576             );
  577 
  578         } elseif ($this->char === $this->EOF) {
  579             /* EOF
  580             Emit an end-of-file token. */
  581             $this->EOF();
  582 
  583         } elseif ($this->content_model === self::PLAINTEXT) {
  584             /* When the content model flag is set to the PLAINTEXT state
  585             THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
  586             the text and emit it as a character token. */
  587             $this->emitToken(
  588                 array(
  589                     'type' => self::CHARACTR,
  590                     'data' => substr($this->data, $this->char)
  591                 )
  592             );
  593 
  594             $this->EOF();
  595 
  596         } else {
  597             /* Anything else
  598             THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
  599             otherwise would also be treated as a character token and emit it
  600             as a single character token. Stay in the data state. */
  601             $len = strcspn($this->data, '<&', $this->char);
  602             $char = substr($this->data, $this->char, $len);
  603             $this->char += $len - 1;
  604 
  605             $this->emitToken(
  606                 array(
  607                     'type' => self::CHARACTR,
  608                     'data' => $char
  609                 )
  610             );
  611 
  612             $this->state = 'data';
  613         }
  614     }
  615 
  616     private function entityDataState()
  617     {
  618         // Attempt to consume an entity.
  619         $entity = $this->entity();
  620 
  621         // If nothing is returned, emit a U+0026 AMPERSAND character token.
  622         // Otherwise, emit the character token that was returned.
  623         $char = (!$entity) ? '&' : $entity;
  624         $this->emitToken(
  625             array(
  626                 'type' => self::CHARACTR,
  627                 'data' => $char
  628             )
  629         );
  630 
  631         // Finally, switch to the data state.
  632         $this->state = 'data';
  633     }
  634 
  635     private function tagOpenState()
  636     {
  637         switch ($this->content_model) {
  638             case self::RCDATA:
  639             case self::CDATA:
  640                 /* If the next input character is a U+002F SOLIDUS (/) character,
  641                 consume it and switch to the close tag open state. If the next
  642                 input character is not a U+002F SOLIDUS (/) character, emit a
  643                 U+003C LESS-THAN SIGN character token and switch to the data
  644                 state to process the next input character. */
  645                 if ($this->character($this->char + 1) === '/') {
  646                     $this->char++;
  647                     $this->state = 'closeTagOpen';
  648 
  649                 } else {
  650                     $this->emitToken(
  651                         array(
  652                             'type' => self::CHARACTR,
  653                             'data' => '<'
  654                         )
  655                     );
  656 
  657                     $this->state = 'data';
  658                 }
  659                 break;
  660 
  661             case self::PCDATA:
  662                 // If the content model flag is set to the PCDATA state
  663                 // Consume the next input character:
  664                 $this->char++;
  665                 $char = $this->char();
  666 
  667                 if ($char === '!') {
  668                     /* U+0021 EXCLAMATION MARK (!)
  669                     Switch to the markup declaration open state. */
  670                     $this->state = 'markupDeclarationOpen';
  671 
  672                 } elseif ($char === '/') {
  673                     /* U+002F SOLIDUS (/)
  674                     Switch to the close tag open state. */
  675                     $this->state = 'closeTagOpen';
  676 
  677                 } elseif (preg_match('/^[A-Za-z]$/', $char)) {
  678                     /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
  679                     Create a new start tag token, set its tag name to the lowercase
  680                     version of the input character (add 0x0020 to the character's code
  681                     point), then switch to the tag name state. (Don't emit the token
  682                     yet; further details will be filled in before it is emitted.) */
  683                     $this->token = array(
  684                         'name' => strtolower($char),
  685                         'type' => self::STARTTAG,
  686                         'attr' => array()
  687                     );
  688 
  689                     $this->state = 'tagName';
  690 
  691                 } elseif ($char === '>') {
  692                     /* U+003E GREATER-THAN SIGN (>)
  693                     Parse error. Emit a U+003C LESS-THAN SIGN character token and a
  694                     U+003E GREATER-THAN SIGN character token. Switch to the data state. */
  695                     $this->emitToken(
  696                         array(
  697                             'type' => self::CHARACTR,
  698                             'data' => '<>'
  699                         )
  700                     );
  701 
  702                     $this->state = 'data';
  703 
  704                 } elseif ($char === '?') {
  705                     /* U+003F QUESTION MARK (?)
  706                     Parse error. Switch to the bogus comment state. */
  707                     $this->state = 'bogusComment';
  708 
  709                 } else {
  710                     /* Anything else
  711                     Parse error. Emit a U+003C LESS-THAN SIGN character token and
  712                     reconsume the current input character in the data state. */
  713                     $this->emitToken(
  714                         array(
  715                             'type' => self::CHARACTR,
  716                             'data' => '<'
  717                         )
  718                     );
  719 
  720                     $this->char--;
  721                     $this->state = 'data';
  722                 }
  723                 break;
  724         }
  725     }
  726 
  727     private function closeTagOpenState()
  728     {
  729         $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
  730         $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
  731 
  732         if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
  733             (!$the_same || ($the_same && (!preg_match(
  734                             '/[\t\n\x0b\x0c >\/]/',
  735                             $this->character($this->char + 1 + strlen($next_node))
  736                         ) || $this->EOF === $this->char)))
  737         ) {
  738             /* If the content model flag is set to the RCDATA or CDATA states then
  739             examine the next few characters. If they do not match the tag name of
  740             the last start tag token emitted (case insensitively), or if they do but
  741             they are not immediately followed by one of the following characters:
  742                 * U+0009 CHARACTER TABULATION
  743                 * U+000A LINE FEED (LF)
  744                 * U+000B LINE TABULATION
  745                 * U+000C FORM FEED (FF)
  746                 * U+0020 SPACE
  747                 * U+003E GREATER-THAN SIGN (>)
  748                 * U+002F SOLIDUS (/)
  749                 * EOF
  750             ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
  751             token, a U+002F SOLIDUS character token, and switch to the data state
  752             to process the next input character. */
  753             $this->emitToken(
  754                 array(
  755                     'type' => self::CHARACTR,
  756                     'data' => '</'
  757                 )
  758             );
  759 
  760             $this->state = 'data';
  761 
  762         } else {
  763             /* Otherwise, if the content model flag is set to the PCDATA state,
  764             or if the next few characters do match that tag name, consume the
  765             next input character: */
  766             $this->char++;
  767             $char = $this->char();
  768 
  769             if (preg_match('/^[A-Za-z]$/', $char)) {
  770                 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
  771                 Create a new end tag token, set its tag name to the lowercase version
  772                 of the input character (add 0x0020 to the character's code point), then
  773                 switch to the tag name state. (Don't emit the token yet; further details
  774                 will be filled in before it is emitted.) */
  775                 $this->token = array(
  776                     'name' => strtolower($char),
  777                     'type' => self::ENDTAG
  778                 );
  779 
  780                 $this->state = 'tagName';
  781 
  782             } elseif ($char === '>') {
  783                 /* U+003E GREATER-THAN SIGN (>)
  784                 Parse error. Switch to the data state. */
  785                 $this->state = 'data';
  786 
  787             } elseif ($this->char === $this->EOF) {
  788                 /* EOF
  789                 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
  790                 SOLIDUS character token. Reconsume the EOF character in the data state. */
  791                 $this->emitToken(
  792                     array(
  793                         'type' => self::CHARACTR,
  794                         'data' => '</'
  795                     )
  796                 );
  797 
  798                 $this->char--;
  799                 $this->state = 'data';
  800 
  801             } else {
  802                 /* Parse error. Switch to the bogus comment state. */
  803                 $this->state = 'bogusComment';
  804             }
  805         }
  806     }
  807 
  808     private function tagNameState()
  809     {
  810         // Consume the next input character:
  811         $this->char++;
  812         $char = $this->character($this->char);
  813 
  814         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
  815             /* U+0009 CHARACTER TABULATION
  816             U+000A LINE FEED (LF)
  817             U+000B LINE TABULATION
  818             U+000C FORM FEED (FF)
  819             U+0020 SPACE
  820             Switch to the before attribute name state. */
  821             $this->state = 'beforeAttributeName';
  822 
  823         } elseif ($char === '>') {
  824             /* U+003E GREATER-THAN SIGN (>)
  825             Emit the current tag token. Switch to the data state. */
  826             $this->emitToken($this->token);
  827             $this->state = 'data';
  828 
  829         } elseif ($this->char === $this->EOF) {
  830             /* EOF
  831             Parse error. Emit the current tag token. Reconsume the EOF
  832             character in the data state. */
  833             $this->emitToken($this->token);
  834 
  835             $this->char--;
  836             $this->state = 'data';
  837 
  838         } elseif ($char === '/') {
  839             /* U+002F SOLIDUS (/)
  840             Parse error unless this is a permitted slash. Switch to the before
  841             attribute name state. */
  842             $this->state = 'beforeAttributeName';
  843 
  844         } else {
  845             /* Anything else
  846             Append the current input character to the current tag token's tag name.
  847             Stay in the tag name state. */
  848             $this->token['name'] .= strtolower($char);
  849             $this->state = 'tagName';
  850         }
  851     }
  852 
  853     private function beforeAttributeNameState()
  854     {
  855         // Consume the next input character:
  856         $this->char++;
  857         $char = $this->character($this->char);
  858 
  859         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
  860             /* U+0009 CHARACTER TABULATION
  861             U+000A LINE FEED (LF)
  862             U+000B LINE TABULATION
  863             U+000C FORM FEED (FF)
  864             U+0020 SPACE
  865             Stay in the before attribute name state. */
  866             $this->state = 'beforeAttributeName';
  867 
  868         } elseif ($char === '>') {
  869             /* U+003E GREATER-THAN SIGN (>)
  870             Emit the current tag token. Switch to the data state. */
  871             $this->emitToken($this->token);
  872             $this->state = 'data';
  873 
  874         } elseif ($char === '/') {
  875             /* U+002F SOLIDUS (/)
  876             Parse error unless this is a permitted slash. Stay in the before
  877             attribute name state. */
  878             $this->state = 'beforeAttributeName';
  879 
  880         } elseif ($this->char === $this->EOF) {
  881             /* EOF
  882             Parse error. Emit the current tag token. Reconsume the EOF
  883             character in the data state. */
  884             $this->emitToken($this->token);
  885 
  886             $this->char--;
  887             $this->state = 'data';
  888 
  889         } else {
  890             /* Anything else
  891             Start a new attribute in the current tag token. Set that attribute's
  892             name to the current input character, and its value to the empty string.
  893             Switch to the attribute name state. */
  894             $this->token['attr'][] = array(
  895                 'name' => strtolower($char),
  896                 'value' => null
  897             );
  898 
  899             $this->state = 'attributeName';
  900         }
  901     }
  902 
  903     private function attributeNameState()
  904     {
  905         // Consume the next input character:
  906         $this->char++;
  907         $char = $this->character($this->char);
  908 
  909         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
  910             /* U+0009 CHARACTER TABULATION
  911             U+000A LINE FEED (LF)
  912             U+000B LINE TABULATION
  913             U+000C FORM FEED (FF)
  914             U+0020 SPACE
  915             Stay in the before attribute name state. */
  916             $this->state = 'afterAttributeName';
  917 
  918         } elseif ($char === '=') {
  919             /* U+003D EQUALS SIGN (=)
  920             Switch to the before attribute value state. */
  921             $this->state = 'beforeAttributeValue';
  922 
  923         } elseif ($char === '>') {
  924             /* U+003E GREATER-THAN SIGN (>)
  925             Emit the current tag token. Switch to the data state. */
  926             $this->emitToken($this->token);
  927             $this->state = 'data';
  928 
  929         } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
  930             /* U+002F SOLIDUS (/)
  931             Parse error unless this is a permitted slash. Switch to the before
  932             attribute name state. */
  933             $this->state = 'beforeAttributeName';
  934 
  935         } elseif ($this->char === $this->EOF) {
  936             /* EOF
  937             Parse error. Emit the current tag token. Reconsume the EOF
  938             character in the data state. */
  939             $this->emitToken($this->token);
  940 
  941             $this->char--;
  942             $this->state = 'data';
  943 
  944         } else {
  945             /* Anything else
  946             Append the current input character to the current attribute's name.
  947             Stay in the attribute name state. */
  948             $last = count($this->token['attr']) - 1;
  949             $this->token['attr'][$last]['name'] .= strtolower($char);
  950 
  951             $this->state = 'attributeName';
  952         }
  953     }
  954 
  955     private function afterAttributeNameState()
  956     {
  957         // Consume the next input character:
  958         $this->char++;
  959         $char = $this->character($this->char);
  960 
  961         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
  962             /* U+0009 CHARACTER TABULATION
  963             U+000A LINE FEED (LF)
  964             U+000B LINE TABULATION
  965             U+000C FORM FEED (FF)
  966             U+0020 SPACE
  967             Stay in the after attribute name state. */
  968             $this->state = 'afterAttributeName';
  969 
  970         } elseif ($char === '=') {
  971             /* U+003D EQUALS SIGN (=)
  972             Switch to the before attribute value state. */
  973             $this->state = 'beforeAttributeValue';
  974 
  975         } elseif ($char === '>') {
  976             /* U+003E GREATER-THAN SIGN (>)
  977             Emit the current tag token. Switch to the data state. */
  978             $this->emitToken($this->token);
  979             $this->state = 'data';
  980 
  981         } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
  982             /* U+002F SOLIDUS (/)
  983             Parse error unless this is a permitted slash. Switch to the
  984             before attribute name state. */
  985             $this->state = 'beforeAttributeName';
  986 
  987         } elseif ($this->char === $this->EOF) {
  988             /* EOF
  989             Parse error. Emit the current tag token. Reconsume the EOF
  990             character in the data state. */
  991             $this->emitToken($this->token);
  992 
  993             $this->char--;
  994             $this->state = 'data';
  995 
  996         } else {
  997             /* Anything else
  998             Start a new attribute in the current tag token. Set that attribute's
  999             name to the current input character, and its value to the empty string.
 1000             Switch to the attribute name state. */
 1001             $this->token['attr'][] = array(
 1002                 'name' => strtolower($char),
 1003                 'value' => null
 1004             );
 1005 
 1006             $this->state = 'attributeName';
 1007         }
 1008     }
 1009 
 1010     private function beforeAttributeValueState()
 1011     {
 1012         // Consume the next input character:
 1013         $this->char++;
 1014         $char = $this->character($this->char);
 1015 
 1016         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 1017             /* U+0009 CHARACTER TABULATION
 1018             U+000A LINE FEED (LF)
 1019             U+000B LINE TABULATION
 1020             U+000C FORM FEED (FF)
 1021             U+0020 SPACE
 1022             Stay in the before attribute value state. */
 1023             $this->state = 'beforeAttributeValue';
 1024 
 1025         } elseif ($char === '"') {
 1026             /* U+0022 QUOTATION MARK (")
 1027             Switch to the attribute value (double-quoted) state. */
 1028             $this->state = 'attributeValueDoubleQuoted';
 1029 
 1030         } elseif ($char === '&') {
 1031             /* U+0026 AMPERSAND (&)
 1032             Switch to the attribute value (unquoted) state and reconsume
 1033             this input character. */
 1034             $this->char--;
 1035             $this->state = 'attributeValueUnquoted';
 1036 
 1037         } elseif ($char === '\'') {
 1038             /* U+0027 APOSTROPHE (')
 1039             Switch to the attribute value (single-quoted) state. */
 1040             $this->state = 'attributeValueSingleQuoted';
 1041 
 1042         } elseif ($char === '>') {
 1043             /* U+003E GREATER-THAN SIGN (>)
 1044             Emit the current tag token. Switch to the data state. */
 1045             $this->emitToken($this->token);
 1046             $this->state = 'data';
 1047 
 1048         } else {
 1049             /* Anything else
 1050             Append the current input character to the current attribute's value.
 1051             Switch to the attribute value (unquoted) state. */
 1052             $last = count($this->token['attr']) - 1;
 1053             $this->token['attr'][$last]['value'] .= $char;
 1054 
 1055             $this->state = 'attributeValueUnquoted';
 1056         }
 1057     }
 1058 
 1059     private function attributeValueDoubleQuotedState()
 1060     {
 1061         // Consume the next input character:
 1062         $this->char++;
 1063         $char = $this->character($this->char);
 1064 
 1065         if ($char === '"') {
 1066             /* U+0022 QUOTATION MARK (")
 1067             Switch to the before attribute name state. */
 1068             $this->state = 'beforeAttributeName';
 1069 
 1070         } elseif ($char === '&') {
 1071             /* U+0026 AMPERSAND (&)
 1072             Switch to the entity in attribute value state. */
 1073             $this->entityInAttributeValueState('double');
 1074 
 1075         } elseif ($this->char === $this->EOF) {
 1076             /* EOF
 1077             Parse error. Emit the current tag token. Reconsume the character
 1078             in the data state. */
 1079             $this->emitToken($this->token);
 1080 
 1081             $this->char--;
 1082             $this->state = 'data';
 1083 
 1084         } else {
 1085             /* Anything else
 1086             Append the current input character to the current attribute's value.
 1087             Stay in the attribute value (double-quoted) state. */
 1088             $last = count($this->token['attr']) - 1;
 1089             $this->token['attr'][$last]['value'] .= $char;
 1090 
 1091             $this->state = 'attributeValueDoubleQuoted';
 1092         }
 1093     }
 1094 
 1095     private function attributeValueSingleQuotedState()
 1096     {
 1097         // Consume the next input character:
 1098         $this->char++;
 1099         $char = $this->character($this->char);
 1100 
 1101         if ($char === '\'') {
 1102             /* U+0022 QUOTATION MARK (')
 1103             Switch to the before attribute name state. */
 1104             $this->state = 'beforeAttributeName';
 1105 
 1106         } elseif ($char === '&') {
 1107             /* U+0026 AMPERSAND (&)
 1108             Switch to the entity in attribute value state. */
 1109             $this->entityInAttributeValueState('single');
 1110 
 1111         } elseif ($this->char === $this->EOF) {
 1112             /* EOF
 1113             Parse error. Emit the current tag token. Reconsume the character
 1114             in the data state. */
 1115             $this->emitToken($this->token);
 1116 
 1117             $this->char--;
 1118             $this->state = 'data';
 1119 
 1120         } else {
 1121             /* Anything else
 1122             Append the current input character to the current attribute's value.
 1123             Stay in the attribute value (single-quoted) state. */
 1124             $last = count($this->token['attr']) - 1;
 1125             $this->token['attr'][$last]['value'] .= $char;
 1126 
 1127             $this->state = 'attributeValueSingleQuoted';
 1128         }
 1129     }
 1130 
 1131     private function attributeValueUnquotedState()
 1132     {
 1133         // Consume the next input character:
 1134         $this->char++;
 1135         $char = $this->character($this->char);
 1136 
 1137         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 1138             /* U+0009 CHARACTER TABULATION
 1139             U+000A LINE FEED (LF)
 1140             U+000B LINE TABULATION
 1141             U+000C FORM FEED (FF)
 1142             U+0020 SPACE
 1143             Switch to the before attribute name state. */
 1144             $this->state = 'beforeAttributeName';
 1145 
 1146         } elseif ($char === '&') {
 1147             /* U+0026 AMPERSAND (&)
 1148             Switch to the entity in attribute value state. */
 1149             $this->entityInAttributeValueState();
 1150 
 1151         } elseif ($char === '>') {
 1152             /* U+003E GREATER-THAN SIGN (>)
 1153             Emit the current tag token. Switch to the data state. */
 1154             $this->emitToken($this->token);
 1155             $this->state = 'data';
 1156 
 1157         } else {
 1158             /* Anything else
 1159             Append the current input character to the current attribute's value.
 1160             Stay in the attribute value (unquoted) state. */
 1161             $last = count($this->token['attr']) - 1;
 1162             $this->token['attr'][$last]['value'] .= $char;
 1163 
 1164             $this->state = 'attributeValueUnquoted';
 1165         }
 1166     }
 1167 
 1168     private function entityInAttributeValueState()
 1169     {
 1170         // Attempt to consume an entity.
 1171         $entity = $this->entity();
 1172 
 1173         // If nothing is returned, append a U+0026 AMPERSAND character to the
 1174         // current attribute's value. Otherwise, emit the character token that
 1175         // was returned.
 1176         $char = (!$entity)
 1177             ? '&'
 1178             : $entity;
 1179 
 1180         $last = count($this->token['attr']) - 1;
 1181         $this->token['attr'][$last]['value'] .= $char;
 1182     }
 1183 
 1184     private function bogusCommentState()
 1185     {
 1186         /* Consume every character up to the first U+003E GREATER-THAN SIGN
 1187         character (>) or the end of the file (EOF), whichever comes first. Emit
 1188         a comment token whose data is the concatenation of all the characters
 1189         starting from and including the character that caused the state machine
 1190         to switch into the bogus comment state, up to and including the last
 1191         consumed character before the U+003E character, if any, or up to the
 1192         end of the file otherwise. (If the comment was started by the end of
 1193         the file (EOF), the token is empty.) */
 1194         $data = $this->characters('^>', $this->char);
 1195         $this->emitToken(
 1196             array(
 1197                 'data' => $data,
 1198                 'type' => self::COMMENT
 1199             )
 1200         );
 1201 
 1202         $this->char += strlen($data);
 1203 
 1204         /* Switch to the data state. */
 1205         $this->state = 'data';
 1206 
 1207         /* If the end of the file was reached, reconsume the EOF character. */
 1208         if ($this->char === $this->EOF) {
 1209             $this->char = $this->EOF - 1;
 1210         }
 1211     }
 1212 
 1213     private function markupDeclarationOpenState()
 1214     {
 1215         /* If the next two characters are both U+002D HYPHEN-MINUS (-)
 1216         characters, consume those two characters, create a comment token whose
 1217         data is the empty string, and switch to the comment state. */
 1218         if ($this->character($this->char + 1, 2) === '--') {
 1219             $this->char += 2;
 1220             $this->state = 'comment';
 1221             $this->token = array(
 1222                 'data' => null,
 1223                 'type' => self::COMMENT
 1224             );
 1225 
 1226             /* Otherwise if the next seven chacacters are a case-insensitive match
 1227             for the word "DOCTYPE", then consume those characters and switch to the
 1228             DOCTYPE state. */
 1229         } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
 1230             $this->char += 7;
 1231             $this->state = 'doctype';
 1232 
 1233             /* Otherwise, is is a parse error. Switch to the bogus comment state.
 1234             The next character that is consumed, if any, is the first character
 1235             that will be in the comment. */
 1236         } else {
 1237             $this->char++;
 1238             $this->state = 'bogusComment';
 1239         }
 1240     }
 1241 
 1242     private function commentState()
 1243     {
 1244         /* Consume the next input character: */
 1245         $this->char++;
 1246         $char = $this->char();
 1247 
 1248         /* U+002D HYPHEN-MINUS (-) */
 1249         if ($char === '-') {
 1250             /* Switch to the comment dash state  */
 1251             $this->state = 'commentDash';
 1252 
 1253             /* EOF */
 1254         } elseif ($this->char === $this->EOF) {
 1255             /* Parse error. Emit the comment token. Reconsume the EOF character
 1256             in the data state. */
 1257             $this->emitToken($this->token);
 1258             $this->char--;
 1259             $this->state = 'data';
 1260 
 1261             /* Anything else */
 1262         } else {
 1263             /* Append the input character to the comment token's data. Stay in
 1264             the comment state. */
 1265             $this->token['data'] .= $char;
 1266         }
 1267     }
 1268 
 1269     private function commentDashState()
 1270     {
 1271         /* Consume the next input character: */
 1272         $this->char++;
 1273         $char = $this->char();
 1274 
 1275         /* U+002D HYPHEN-MINUS (-) */
 1276         if ($char === '-') {
 1277             /* Switch to the comment end state  */
 1278             $this->state = 'commentEnd';
 1279 
 1280             /* EOF */
 1281         } elseif ($this->char === $this->EOF) {
 1282             /* Parse error. Emit the comment token. Reconsume the EOF character
 1283             in the data state. */
 1284             $this->emitToken($this->token);
 1285             $this->char--;
 1286             $this->state = 'data';
 1287 
 1288             /* Anything else */
 1289         } else {
 1290             /* Append a U+002D HYPHEN-MINUS (-) character and the input
 1291             character to the comment token's data. Switch to the comment state. */
 1292             $this->token['data'] .= '-' . $char;
 1293             $this->state = 'comment';
 1294         }
 1295     }
 1296 
 1297     private function commentEndState()
 1298     {
 1299         /* Consume the next input character: */
 1300         $this->char++;
 1301         $char = $this->char();
 1302 
 1303         if ($char === '>') {
 1304             $this->emitToken($this->token);
 1305             $this->state = 'data';
 1306 
 1307         } elseif ($char === '-') {
 1308             $this->token['data'] .= '-';
 1309 
 1310         } elseif ($this->char === $this->EOF) {
 1311             $this->emitToken($this->token);
 1312             $this->char--;
 1313             $this->state = 'data';
 1314 
 1315         } else {
 1316             $this->token['data'] .= '--' . $char;
 1317             $this->state = 'comment';
 1318         }
 1319     }
 1320 
 1321     private function doctypeState()
 1322     {
 1323         /* Consume the next input character: */
 1324         $this->char++;
 1325         $char = $this->char();
 1326 
 1327         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 1328             $this->state = 'beforeDoctypeName';
 1329 
 1330         } else {
 1331             $this->char--;
 1332             $this->state = 'beforeDoctypeName';
 1333         }
 1334     }
 1335 
 1336     private function beforeDoctypeNameState()
 1337     {
 1338         /* Consume the next input character: */
 1339         $this->char++;
 1340         $char = $this->char();
 1341 
 1342         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 1343             // Stay in the before DOCTYPE name state.
 1344 
 1345         } elseif (preg_match('/^[a-z]$/', $char)) {
 1346             $this->token = array(
 1347                 'name' => strtoupper($char),
 1348                 'type' => self::DOCTYPE,
 1349                 'error' => true
 1350             );
 1351 
 1352             $this->state = 'doctypeName';
 1353 
 1354         } elseif ($char === '>') {
 1355             $this->emitToken(
 1356                 array(
 1357                     'name' => null,
 1358                     'type' => self::DOCTYPE,
 1359                     'error' => true
 1360                 )
 1361             );
 1362 
 1363             $this->state = 'data';
 1364 
 1365         } elseif ($this->char === $this->EOF) {
 1366             $this->emitToken(
 1367                 array(
 1368                     'name' => null,
 1369                     'type' => self::DOCTYPE,
 1370                     'error' => true
 1371                 )
 1372             );
 1373 
 1374             $this->char--;
 1375             $this->state = 'data';
 1376 
 1377         } else {
 1378             $this->token = array(
 1379                 'name' => $char,
 1380                 'type' => self::DOCTYPE,
 1381                 'error' => true
 1382             );
 1383 
 1384             $this->state = 'doctypeName';
 1385         }
 1386     }
 1387 
 1388     private function doctypeNameState()
 1389     {
 1390         /* Consume the next input character: */
 1391         $this->char++;
 1392         $char = $this->char();
 1393 
 1394         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 1395             $this->state = 'AfterDoctypeName';
 1396 
 1397         } elseif ($char === '>') {
 1398             $this->emitToken($this->token);
 1399             $this->state = 'data';
 1400 
 1401         } elseif (preg_match('/^[a-z]$/', $char)) {
 1402             $this->token['name'] .= strtoupper($char);
 1403 
 1404         } elseif ($this->char === $this->EOF) {
 1405             $this->emitToken($this->token);
 1406             $this->char--;
 1407             $this->state = 'data';
 1408 
 1409         } else {
 1410             $this->token['name'] .= $char;
 1411         }
 1412 
 1413         $this->token['error'] = ($this->token['name'] === 'HTML')
 1414             ? false
 1415             : true;
 1416     }
 1417 
 1418     private function afterDoctypeNameState()
 1419     {
 1420         /* Consume the next input character: */
 1421         $this->char++;
 1422         $char = $this->char();
 1423 
 1424         if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
 1425             // Stay in the DOCTYPE name state.
 1426 
 1427         } elseif ($char === '>') {
 1428             $this->emitToken($this->token);
 1429             $this->state = 'data';
 1430 
 1431         } elseif ($this->char === $this->EOF) {
 1432             $this->emitToken($this->token);
 1433             $this->char--;
 1434             $this->state = 'data';
 1435 
 1436         } else {
 1437             $this->token['error'] = true;
 1438             $this->state = 'bogusDoctype';
 1439         }
 1440     }
 1441 
 1442     private function bogusDoctypeState()
 1443     {
 1444         /* Consume the next input character: */
 1445         $this->char++;
 1446         $char = $this->char();
 1447 
 1448         if ($char === '>') {
 1449             $this->emitToken($this->token);
 1450             $this->state = 'data';
 1451 
 1452         } elseif ($this->char === $this->EOF) {
 1453             $this->emitToken($this->token);
 1454             $this->char--;
 1455             $this->state = 'data';
 1456 
 1457         } else {
 1458             // Stay in the bogus DOCTYPE state.
 1459         }
 1460     }
 1461 
 1462     private function entity()
 1463     {
 1464         $start = $this->char;
 1465 
 1466         // This section defines how to consume an entity. This definition is
 1467         // used when parsing entities in text and in attributes.
 1468 
 1469         // The behaviour depends on the identity of the next character (the
 1470         // one immediately after the U+0026 AMPERSAND character):
 1471 
 1472         switch ($this->character($this->char + 1)) {
 1473             // U+0023 NUMBER SIGN (#)
 1474             case '#':
 1475 
 1476                 // The behaviour further depends on the character after the
 1477                 // U+0023 NUMBER SIGN:
 1478                 switch ($this->character($this->char + 1)) {
 1479                     // U+0078 LATIN SMALL LETTER X
 1480                     // U+0058 LATIN CAPITAL LETTER X
 1481                     case 'x':
 1482                     case 'X':
 1483                         // Follow the steps below, but using the range of
 1484                         // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
 1485                         // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
 1486                         // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
 1487                         // A, through to U+0046 LATIN CAPITAL LETTER F (in other
 1488                         // words, 0-9, A-F, a-f).
 1489                         $char = 1;
 1490                         $char_class = '0-9A-Fa-f';
 1491                         break;
 1492 
 1493                     // Anything else
 1494                     default:
 1495                         // Follow the steps below, but using the range of
 1496                         // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
 1497                         // NINE (i.e. just 0-9).
 1498                         $char = 0;
 1499                         $char_class = '0-9';
 1500                         break;
 1501                 }
 1502 
 1503                 // Consume as many characters as match the range of characters
 1504                 // given above.
 1505                 $this->char++;
 1506                 $e_name = $this->characters($char_class, $this->char + $char + 1);
 1507                 $entity = $this->character($start, $this->char);
 1508                 $cond = strlen($e_name) > 0;
 1509 
 1510                 // The rest of the parsing happens bellow.
 1511                 break;
 1512 
 1513             // Anything else
 1514             default:
 1515                 // Consume the maximum number of characters possible, with the
 1516                 // consumed characters case-sensitively matching one of the
 1517                 // identifiers in the first column of the entities table.
 1518 
 1519                 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
 1520                 $len = strlen($e_name);
 1521 
 1522                 for ($c = 1; $c <= $len; $c++) {
 1523                     $id = substr($e_name, 0, $c);
 1524                     $this->char++;
 1525 
 1526                     if (in_array($id, $this->entities)) {
 1527                         if ($e_name[$c - 1] !== ';') {
 1528                             if ($c < $len && $e_name[$c] == ';') {
 1529                                 $this->char++; // consume extra semicolon
 1530                             }
 1531                         }
 1532                         $entity = $id;
 1533                         break;
 1534                     }
 1535                 }
 1536 
 1537                 $cond = isset($entity);
 1538                 // The rest of the parsing happens bellow.
 1539                 break;
 1540         }
 1541 
 1542         if (!$cond) {
 1543             // If no match can be made, then this is a parse error. No
 1544             // characters are consumed, and nothing is returned.
 1545             $this->char = $start;
 1546             return false;
 1547         }
 1548 
 1549         // Return a character token for the character corresponding to the
 1550         // entity name (as given by the second column of the entities table).
 1551         return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
 1552     }
 1553 
 1554     private function emitToken($token)
 1555     {
 1556         $emit = $this->tree->emitToken($token);
 1557 
 1558         if (is_int($emit)) {
 1559             $this->content_model = $emit;
 1560 
 1561         } elseif ($token['type'] === self::ENDTAG) {
 1562             $this->content_model = self::PCDATA;
 1563         }
 1564     }
 1565 
 1566     private function EOF()
 1567     {
 1568         $this->state = null;
 1569         $this->tree->emitToken(
 1570             array(
 1571                 'type' => self::EOF
 1572             )
 1573         );
 1574     }
 1575 }
 1576 
 1577 class HTML5TreeConstructer
 1578 {
 1579     public $stack = array();
 1580 
 1581     private $phase;
 1582     private $mode;
 1583     private $dom;
 1584     private $foster_parent = null;
 1585     private $a_formatting = array();
 1586 
 1587     private $head_pointer = null;
 1588     private $form_pointer = null;
 1589 
 1590     private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th');
 1591     private $formatting = array(
 1592         'a',
 1593         'b',
 1594         'big',
 1595         'em',
 1596         'font',
 1597         'i',
 1598         'nobr',
 1599         's',
 1600         'small',
 1601         'strike',
 1602         'strong',
 1603         'tt',
 1604         'u'
 1605     );
 1606     private $special = array(
 1607         'address',
 1608         'area',
 1609         'base',
 1610         'basefont',
 1611         'bgsound',
 1612         'blockquote',
 1613         'body',
 1614         'br',
 1615         'center',
 1616         'col',
 1617         'colgroup',
 1618         'dd',
 1619         'dir',
 1620         'div',
 1621         'dl',
 1622         'dt',
 1623         'embed',
 1624         'fieldset',
 1625         'form',
 1626         'frame',
 1627         'frameset',
 1628         'h1',
 1629         'h2',
 1630         'h3',
 1631         'h4',
 1632         'h5',
 1633         'h6',
 1634         'head',
 1635         'hr',
 1636         'iframe',
 1637         'image',
 1638         'img',
 1639         'input',
 1640         'isindex',
 1641         'li',
 1642         'link',
 1643         'listing',
 1644         'menu',
 1645         'meta',
 1646         'noembed',
 1647         'noframes',
 1648         'noscript',
 1649         'ol',
 1650         'optgroup',
 1651         'option',
 1652         'p',
 1653         'param',
 1654         'plaintext',
 1655         'pre',
 1656         'script',
 1657         'select',
 1658         'spacer',
 1659         'style',
 1660         'tbody',
 1661         'textarea',
 1662         'tfoot',
 1663         'thead',
 1664         'title',
 1665         'tr',
 1666         'ul',
 1667         'wbr'
 1668     );
 1669 
 1670     // The different phases.
 1671     const INIT_PHASE = 0;
 1672     const ROOT_PHASE = 1;
 1673     const MAIN_PHASE = 2;
 1674     const END_PHASE = 3;
 1675 
 1676     // The different insertion modes for the main phase.
 1677     const BEFOR_HEAD = 0;
 1678     const IN_HEAD = 1;
 1679     const AFTER_HEAD = 2;
 1680     const IN_BODY = 3;
 1681     const IN_TABLE = 4;
 1682     const IN_CAPTION = 5;
 1683     const IN_CGROUP = 6;
 1684     const IN_TBODY = 7;
 1685     const IN_ROW = 8;
 1686     const IN_CELL = 9;
 1687     const IN_SELECT = 10;
 1688     const AFTER_BODY = 11;
 1689     const IN_FRAME = 12;
 1690     const AFTR_FRAME = 13;
 1691 
 1692     // The different types of elements.
 1693     const SPECIAL = 0;
 1694     const SCOPING = 1;
 1695     const FORMATTING = 2;
 1696     const PHRASING = 3;
 1697 
 1698     const MARKER = 0;
 1699 
 1700     public function __construct()
 1701     {
 1702         $this->phase = self::INIT_PHASE;
 1703         $this->mode = self::BEFOR_HEAD;
 1704         $this->dom = new DOMDocument;
 1705 
 1706         $this->dom->encoding = 'UTF-8';
 1707         $this->dom->preserveWhiteSpace = true;
 1708         $this->dom->substituteEntities = true;
 1709         $this->dom->strictErrorChecking = false;
 1710     }
 1711 
 1712     // Process tag tokens
 1713     public function emitToken($token)
 1714     {
 1715         switch ($this->phase) {
 1716             case self::INIT_PHASE:
 1717                 return $this->initPhase($token);
 1718                 break;
 1719             case self::ROOT_PHASE:
 1720                 return $this->rootElementPhase($token);
 1721                 break;
 1722             case self::MAIN_PHASE:
 1723                 return $this->mainPhase($token);
 1724                 break;
 1725             case self::END_PHASE :
 1726                 return $this->trailingEndPhase($token);
 1727                 break;
 1728         }
 1729     }
 1730 
 1731     private function initPhase($token)
 1732     {
 1733         /* Initially, the tree construction stage must handle each token
 1734         emitted from the tokenisation stage as follows: */
 1735 
 1736         /* A DOCTYPE token that is marked as being in error
 1737         A comment token
 1738         A start tag token
 1739         An end tag token
 1740         A character token that is not one of one of U+0009 CHARACTER TABULATION,
 1741             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 1742             or U+0020 SPACE
 1743         An end-of-file token */
 1744         if ((isset($token['error']) && $token['error']) ||
 1745             $token['type'] === HTML5::COMMENT ||
 1746             $token['type'] === HTML5::STARTTAG ||
 1747             $token['type'] === HTML5::ENDTAG ||
 1748             $token['type'] === HTML5::EOF ||
 1749             ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
 1750                 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))
 1751         ) {
 1752             /* This specification does not define how to handle this case. In
 1753             particular, user agents may ignore the entirety of this specification
 1754             altogether for such documents, and instead invoke special parse modes
 1755             with a greater emphasis on backwards compatibility. */
 1756 
 1757             $this->phase = self::ROOT_PHASE;
 1758             return $this->rootElementPhase($token);
 1759 
 1760             /* A DOCTYPE token marked as being correct */
 1761         } elseif (isset($token['error']) && !$token['error']) {
 1762             /* Append a DocumentType node to the Document  node, with the name
 1763             attribute set to the name given in the DOCTYPE token (which will be
 1764             "HTML"), and the other attributes specific to DocumentType objects
 1765             set to null, empty lists, or the empty string as appropriate. */
 1766             $doctype = new DOMDocumentType(null, null, 'HTML');
 1767 
 1768             /* Then, switch to the root element phase of the tree construction
 1769             stage. */
 1770             $this->phase = self::ROOT_PHASE;
 1771 
 1772             /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 1773             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 1774             or U+0020 SPACE */
 1775         } elseif (isset($token['data']) && preg_match(
 1776                 '/^[\t\n\x0b\x0c ]+$/',
 1777                 $token['data']
 1778             )
 1779         ) {
 1780             /* Append that character  to the Document node. */
 1781             $text = $this->dom->createTextNode($token['data']);
 1782             $this->dom->appendChild($text);
 1783         }
 1784     }
 1785 
 1786     private function rootElementPhase($token)
 1787     {
 1788         /* After the initial phase, as each token is emitted from the tokenisation
 1789         stage, it must be processed as described in this section. */
 1790 
 1791         /* A DOCTYPE token */
 1792         if ($token['type'] === HTML5::DOCTYPE) {
 1793             // Parse error. Ignore the token.
 1794 
 1795             /* A comment token */
 1796         } elseif ($token['type'] === HTML5::COMMENT) {
 1797             /* Append a Comment node to the Document object with the data
 1798             attribute set to the data given in the comment token. */
 1799             $comment = $this->dom->createComment($token['data']);
 1800             $this->dom->appendChild($comment);
 1801 
 1802             /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 1803             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 1804             or U+0020 SPACE */
 1805         } elseif ($token['type'] === HTML5::CHARACTR &&
 1806             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 1807         ) {
 1808             /* Append that character  to the Document node. */
 1809             $text = $this->dom->createTextNode($token['data']);
 1810             $this->dom->appendChild($text);
 1811 
 1812             /* A character token that is not one of U+0009 CHARACTER TABULATION,
 1813                 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
 1814                 (FF), or U+0020 SPACE
 1815             A start tag token
 1816             An end tag token
 1817             An end-of-file token */
 1818         } elseif (($token['type'] === HTML5::CHARACTR &&
 1819                 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
 1820             $token['type'] === HTML5::STARTTAG ||
 1821             $token['type'] === HTML5::ENDTAG ||
 1822             $token['type'] === HTML5::EOF
 1823         ) {
 1824             /* Create an HTMLElement node with the tag name html, in the HTML
 1825             namespace. Append it to the Document object. Switch to the main
 1826             phase and reprocess the current token. */
 1827             $html = $this->dom->createElement('html');
 1828             $this->dom->appendChild($html);
 1829             $this->stack[] = $html;
 1830 
 1831             $this->phase = self::MAIN_PHASE;
 1832             return $this->mainPhase($token);
 1833         }
 1834     }
 1835 
 1836     private function mainPhase($token)
 1837     {
 1838         /* Tokens in the main phase must be handled as follows: */
 1839 
 1840         /* A DOCTYPE token */
 1841         if ($token['type'] === HTML5::DOCTYPE) {
 1842             // Parse error. Ignore the token.
 1843 
 1844             /* A start tag token with the tag name "html" */
 1845         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
 1846             /* If this start tag token was not the first start tag token, then
 1847             it is a parse error. */
 1848 
 1849             /* For each attribute on the token, check to see if the attribute
 1850             is already present on the top element of the stack of open elements.
 1851             If it is not, add the attribute and its corresponding value to that
 1852             element. */
 1853             foreach ($token['attr'] as $attr) {
 1854                 if (!$this->stack[0]->hasAttribute($attr['name'])) {
 1855                     $this->stack[0]->setAttribute($attr['name'], $attr['value']);
 1856                 }
 1857             }
 1858 
 1859             /* An end-of-file token */
 1860         } elseif ($token['type'] === HTML5::EOF) {
 1861             /* Generate implied end tags. */
 1862             $this->generateImpliedEndTags();
 1863 
 1864             /* Anything else. */
 1865         } else {
 1866             /* Depends on the insertion mode: */
 1867             switch ($this->mode) {
 1868                 case self::BEFOR_HEAD:
 1869                     return $this->beforeHead($token);
 1870                     break;
 1871                 case self::IN_HEAD:
 1872                     return $this->inHead($token);
 1873                     break;
 1874                 case self::AFTER_HEAD:
 1875                     return $this->afterHead($token);
 1876                     break;
 1877                 case self::IN_BODY:
 1878                     return $this->inBody($token);
 1879                     break;
 1880                 case self::IN_TABLE:
 1881                     return $this->inTable($token);
 1882                     break;
 1883                 case self::IN_CAPTION:
 1884                     return $this->inCaption($token);
 1885                     break;
 1886                 case self::IN_CGROUP:
 1887                     return $this->inColumnGroup($token);
 1888                     break;
 1889                 case self::IN_TBODY:
 1890                     return $this->inTableBody($token);
 1891                     break;
 1892                 case self::IN_ROW:
 1893                     return $this->inRow($token);
 1894                     break;
 1895                 case self::IN_CELL:
 1896                     return $this->inCell($token);
 1897                     break;
 1898                 case self::IN_SELECT:
 1899                     return $this->inSelect($token);
 1900                     break;
 1901                 case self::AFTER_BODY:
 1902                     return $this->afterBody($token);
 1903                     break;
 1904                 case self::IN_FRAME:
 1905                     return $this->inFrameset($token);
 1906                     break;
 1907                 case self::AFTR_FRAME:
 1908                     return $this->afterFrameset($token);
 1909                     break;
 1910                 case self::END_PHASE:
 1911                     return $this->trailingEndPhase($token);
 1912                     break;
 1913             }
 1914         }
 1915     }
 1916 
 1917     private function beforeHead($token)
 1918     {
 1919         /* Handle the token as follows: */
 1920 
 1921         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 1922         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 1923         or U+0020 SPACE */
 1924         if ($token['type'] === HTML5::CHARACTR &&
 1925             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 1926         ) {
 1927             /* Append the character to the current node. */
 1928             $this->insertText($token['data']);
 1929 
 1930             /* A comment token */
 1931         } elseif ($token['type'] === HTML5::COMMENT) {
 1932             /* Append a Comment node to the current node with the data attribute
 1933             set to the data given in the comment token. */
 1934             $this->insertComment($token['data']);
 1935 
 1936             /* A start tag token with the tag name "head" */
 1937         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
 1938             /* Create an element for the token, append the new element to the
 1939             current node and push it onto the stack of open elements. */
 1940             $element = $this->insertElement($token);
 1941 
 1942             /* Set the head element pointer to this new element node. */
 1943             $this->head_pointer = $element;
 1944 
 1945             /* Change the insertion mode to "in head". */
 1946             $this->mode = self::IN_HEAD;
 1947 
 1948             /* A start tag token whose tag name is one of: "base", "link", "meta",
 1949             "script", "style", "title". Or an end tag with the tag name "html".
 1950             Or a character token that is not one of U+0009 CHARACTER TABULATION,
 1951             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 1952             or U+0020 SPACE. Or any other start tag token */
 1953         } elseif ($token['type'] === HTML5::STARTTAG ||
 1954             ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
 1955             ($token['type'] === HTML5::CHARACTR && !preg_match(
 1956                     '/^[\t\n\x0b\x0c ]$/',
 1957                     $token['data']
 1958                 ))
 1959         ) {
 1960             /* Act as if a start tag token with the tag name "head" and no
 1961             attributes had been seen, then reprocess the current token. */
 1962             $this->beforeHead(
 1963                 array(
 1964                     'name' => 'head',
 1965                     'type' => HTML5::STARTTAG,
 1966                     'attr' => array()
 1967                 )
 1968             );
 1969 
 1970             return $this->inHead($token);
 1971 
 1972             /* Any other end tag */
 1973         } elseif ($token['type'] === HTML5::ENDTAG) {
 1974             /* Parse error. Ignore the token. */
 1975         }
 1976     }
 1977 
 1978     private function inHead($token)
 1979     {
 1980         /* Handle the token as follows: */
 1981 
 1982         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 1983         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 1984         or U+0020 SPACE.
 1985 
 1986         THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
 1987         or script element, append the character to the current node regardless
 1988         of its content. */
 1989         if (($token['type'] === HTML5::CHARACTR &&
 1990                 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
 1991                 $token['type'] === HTML5::CHARACTR && in_array(
 1992                     end($this->stack)->nodeName,
 1993                     array('title', 'style', 'script')
 1994                 ))
 1995         ) {
 1996             /* Append the character to the current node. */
 1997             $this->insertText($token['data']);
 1998 
 1999             /* A comment token */
 2000         } elseif ($token['type'] === HTML5::COMMENT) {
 2001             /* Append a Comment node to the current node with the data attribute
 2002             set to the data given in the comment token. */
 2003             $this->insertComment($token['data']);
 2004 
 2005         } elseif ($token['type'] === HTML5::ENDTAG &&
 2006             in_array($token['name'], array('title', 'style', 'script'))
 2007         ) {
 2008             array_pop($this->stack);
 2009             return HTML5::PCDATA;
 2010 
 2011             /* A start tag with the tag name "title" */
 2012         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
 2013             /* Create an element for the token and append the new element to the
 2014             node pointed to by the head element pointer, or, if that is null
 2015             (innerHTML case), to the current node. */
 2016             if ($this->head_pointer !== null) {
 2017                 $element = $this->insertElement($token, false);
 2018                 $this->head_pointer->appendChild($element);
 2019 
 2020             } else {
 2021                 $element = $this->insertElement($token);
 2022             }
 2023 
 2024             /* Switch the tokeniser's content model flag  to the RCDATA state. */
 2025             return HTML5::RCDATA;
 2026 
 2027             /* A start tag with the tag name "style" */
 2028         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
 2029             /* Create an element for the token and append the new element to the
 2030             node pointed to by the head element pointer, or, if that is null
 2031             (innerHTML case), to the current node. */
 2032             if ($this->head_pointer !== null) {
 2033                 $element = $this->insertElement($token, false);
 2034                 $this->head_pointer->appendChild($element);
 2035 
 2036             } else {
 2037                 $this->insertElement($token);
 2038             }
 2039 
 2040             /* Switch the tokeniser's content model flag  to the CDATA state. */
 2041             return HTML5::CDATA;
 2042 
 2043             /* A start tag with the tag name "script" */
 2044         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
 2045             /* Create an element for the token. */
 2046             $element = $this->insertElement($token, false);
 2047             $this->head_pointer->appendChild($element);
 2048 
 2049             /* Switch the tokeniser's content model flag  to the CDATA state. */
 2050             return HTML5::CDATA;
 2051 
 2052             /* A start tag with the tag name "base", "link", or "meta" */
 2053         } elseif ($token['type'] === HTML5::STARTTAG && in_array(
 2054                 $token['name'],
 2055                 array('base', 'link', 'meta')
 2056             )
 2057         ) {
 2058             /* Create an element for the token and append the new element to the
 2059             node pointed to by the head element pointer, or, if that is null
 2060             (innerHTML case), to the current node. */
 2061             if ($this->head_pointer !== null) {
 2062                 $element = $this->insertElement($token, false);
 2063                 $this->head_pointer->appendChild($element);
 2064                 array_pop($this->stack);
 2065 
 2066             } else {
 2067                 $this->insertElement($token);
 2068             }
 2069 
 2070             /* An end tag with the tag name "head" */
 2071         } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
 2072             /* If the current node is a head element, pop the current node off
 2073             the stack of open elements. */
 2074             if ($this->head_pointer->isSameNode(end($this->stack))) {
 2075                 array_pop($this->stack);
 2076 
 2077                 /* Otherwise, this is a parse error. */
 2078             } else {
 2079                 // k
 2080             }
 2081 
 2082             /* Change the insertion mode to "after head". */
 2083             $this->mode = self::AFTER_HEAD;
 2084 
 2085             /* A start tag with the tag name "head" or an end tag except "html". */
 2086         } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
 2087             ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')
 2088         ) {
 2089             // Parse error. Ignore the token.
 2090 
 2091             /* Anything else */
 2092         } else {
 2093             /* If the current node is a head element, act as if an end tag
 2094             token with the tag name "head" had been seen. */
 2095             if ($this->head_pointer->isSameNode(end($this->stack))) {
 2096                 $this->inHead(
 2097                     array(
 2098                         'name' => 'head',
 2099                         'type' => HTML5::ENDTAG
 2100                     )
 2101                 );
 2102 
 2103                 /* Otherwise, change the insertion mode to "after head". */
 2104             } else {
 2105                 $this->mode = self::AFTER_HEAD;
 2106             }
 2107 
 2108             /* Then, reprocess the current token. */
 2109             return $this->afterHead($token);
 2110         }
 2111     }
 2112 
 2113     private function afterHead($token)
 2114     {
 2115         /* Handle the token as follows: */
 2116 
 2117         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 2118         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 2119         or U+0020 SPACE */
 2120         if ($token['type'] === HTML5::CHARACTR &&
 2121             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 2122         ) {
 2123             /* Append the character to the current node. */
 2124             $this->insertText($token['data']);
 2125 
 2126             /* A comment token */
 2127         } elseif ($token['type'] === HTML5::COMMENT) {
 2128             /* Append a Comment node to the current node with the data attribute
 2129             set to the data given in the comment token. */
 2130             $this->insertComment($token['data']);
 2131 
 2132             /* A start tag token with the tag name "body" */
 2133         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
 2134             /* Insert a body element for the token. */
 2135             $this->insertElement($token);
 2136 
 2137             /* Change the insertion mode to "in body". */
 2138             $this->mode = self::IN_BODY;
 2139 
 2140             /* A start tag token with the tag name "frameset" */
 2141         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
 2142             /* Insert a frameset element for the token. */
 2143             $this->insertElement($token);
 2144 
 2145             /* Change the insertion mode to "in frameset". */
 2146             $this->mode = self::IN_FRAME;
 2147 
 2148             /* A start tag token whose tag name is one of: "base", "link", "meta",
 2149             "script", "style", "title" */
 2150         } elseif ($token['type'] === HTML5::STARTTAG && in_array(
 2151                 $token['name'],
 2152                 array('base', 'link', 'meta', 'script', 'style', 'title')
 2153             )
 2154         ) {
 2155             /* Parse error. Switch the insertion mode back to "in head" and
 2156             reprocess the token. */
 2157             $this->mode = self::IN_HEAD;
 2158             return $this->inHead($token);
 2159 
 2160             /* Anything else */
 2161         } else {
 2162             /* Act as if a start tag token with the tag name "body" and no
 2163             attributes had been seen, and then reprocess the current token. */
 2164             $this->afterHead(
 2165                 array(
 2166                     'name' => 'body',
 2167                     'type' => HTML5::STARTTAG,
 2168                     'attr' => array()
 2169                 )
 2170             );
 2171 
 2172             return $this->inBody($token);
 2173         }
 2174     }
 2175 
 2176     private function inBody($token)
 2177     {
 2178         /* Handle the token as follows: */
 2179 
 2180         switch ($token['type']) {
 2181             /* A character token */
 2182             case HTML5::CHARACTR:
 2183                 /* Reconstruct the active formatting elements, if any. */
 2184                 $this->reconstructActiveFormattingElements();
 2185 
 2186                 /* Append the token's character to the current node. */
 2187                 $this->insertText($token['data']);
 2188                 break;
 2189 
 2190             /* A comment token */
 2191             case HTML5::COMMENT:
 2192                 /* Append a Comment node to the current node with the data
 2193                 attribute set to the data given in the comment token. */
 2194                 $this->insertComment($token['data']);
 2195                 break;
 2196 
 2197             case HTML5::STARTTAG:
 2198                 switch ($token['name']) {
 2199                     /* A start tag token whose tag name is one of: "script",
 2200                     "style" */
 2201                     case 'script':
 2202                     case 'style':
 2203                         /* Process the token as if the insertion mode had been "in
 2204                         head". */
 2205                         return $this->inHead($token);
 2206                         break;
 2207 
 2208                     /* A start tag token whose tag name is one of: "base", "link",
 2209                     "meta", "title" */
 2210                     case 'base':
 2211                     case 'link':
 2212                     case 'meta':
 2213                     case 'title':
 2214                         /* Parse error. Process the token as if the insertion mode
 2215                         had    been "in head". */
 2216                         return $this->inHead($token);
 2217                         break;
 2218 
 2219                     /* A start tag token with the tag name "body" */
 2220                     case 'body':
 2221                         /* Parse error. If the second element on the stack of open
 2222                         elements is not a body element, or, if the stack of open
 2223                         elements has only one node on it, then ignore the token.
 2224                         (innerHTML case) */
 2225                         if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
 2226                             // Ignore
 2227 
 2228                             /* Otherwise, for each attribute on the token, check to see
 2229                             if the attribute is already present on the body element (the
 2230                             second element)    on the stack of open elements. If it is not,
 2231                             add the attribute and its corresponding value to that
 2232                             element. */
 2233                         } else {
 2234                             foreach ($token['attr'] as $attr) {
 2235                                 if (!$this->stack[1]->hasAttribute($attr['name'])) {
 2236                                     $this->stack[1]->setAttribute($attr['name'], $attr['value']);
 2237                                 }
 2238                             }
 2239                         }
 2240                         break;
 2241 
 2242                     /* A start tag whose tag name is one of: "address",
 2243                     "blockquote", "center", "dir", "div", "dl", "fieldset",
 2244                     "listing", "menu", "ol", "p", "ul" */
 2245                     case 'address':
 2246                     case 'blockquote':
 2247                     case 'center':
 2248                     case 'dir':
 2249                     case 'div':
 2250                     case 'dl':
 2251                     case 'fieldset':
 2252                     case 'listing':
 2253                     case 'menu':
 2254                     case 'ol':
 2255                     case 'p':
 2256                     case 'ul':
 2257                         /* If the stack of open elements has a p element in scope,
 2258                         then act as if an end tag with the tag name p had been
 2259                         seen. */
 2260                         if ($this->elementInScope('p')) {
 2261                             $this->emitToken(
 2262                                 array(
 2263                                     'name' => 'p',
 2264                                     'type' => HTML5::ENDTAG
 2265                                 )
 2266                             );
 2267                         }
 2268 
 2269                         /* Insert an HTML element for the token. */
 2270                         $this->insertElement($token);
 2271                         break;
 2272 
 2273                     /* A start tag whose tag name is "form" */
 2274                     case 'form':
 2275                         /* If the form element pointer is not null, ignore the
 2276                         token with a parse error. */
 2277                         if ($this->form_pointer !== null) {
 2278                             // Ignore.
 2279 
 2280                             /* Otherwise: */
 2281                         } else {
 2282                             /* If the stack of open elements has a p element in
 2283                             scope, then act as if an end tag with the tag name p
 2284                             had been seen. */
 2285                             if ($this->elementInScope('p')) {
 2286                                 $this->emitToken(
 2287                                     array(
 2288                                         'name' => 'p',
 2289                                         'type' => HTML5::ENDTAG
 2290                                     )
 2291                                 );
 2292                             }
 2293 
 2294                             /* Insert an HTML element for the token, and set the
 2295                             form element pointer to point to the element created. */
 2296                             $element = $this->insertElement($token);
 2297                             $this->form_pointer = $element;
 2298                         }
 2299                         break;
 2300 
 2301                     /* A start tag whose tag name is "li", "dd" or "dt" */
 2302                     case 'li':
 2303                     case 'dd':
 2304                     case 'dt':
 2305                         /* If the stack of open elements has a p  element in scope,
 2306                         then act as if an end tag with the tag name p had been
 2307                         seen. */
 2308                         if ($this->elementInScope('p')) {
 2309                             $this->emitToken(
 2310                                 array(
 2311                                     'name' => 'p',
 2312                                     'type' => HTML5::ENDTAG
 2313                                 )
 2314                             );
 2315                         }
 2316 
 2317                         $stack_length = count($this->stack) - 1;
 2318 
 2319                         for ($n = $stack_length; 0 <= $n; $n--) {
 2320                             /* 1. Initialise node to be the current node (the
 2321                             bottommost node of the stack). */
 2322                             $stop = false;
 2323                             $node = $this->stack[$n];
 2324                             $cat = $this->getElementCategory($node->tagName);
 2325 
 2326                             /* 2. If node is an li, dd or dt element, then pop all
 2327                             the    nodes from the current node up to node, including
 2328                             node, then stop this algorithm. */
 2329                             if ($token['name'] === $node->tagName || ($token['name'] !== 'li'
 2330                                     && ($node->tagName === 'dd' || $node->tagName === 'dt'))
 2331                             ) {
 2332                                 for ($x = $stack_length; $x >= $n; $x--) {
 2333                                     array_pop($this->stack);
 2334                                 }
 2335 
 2336                                 break;
 2337                             }
 2338 
 2339                             /* 3. If node is not in the formatting category, and is
 2340                             not    in the phrasing category, and is not an address or
 2341                             div element, then stop this algorithm. */
 2342                             if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
 2343                                 $node->tagName !== 'address' && $node->tagName !== 'div'
 2344                             ) {
 2345                                 break;
 2346                             }
 2347                         }
 2348 
 2349                         /* Finally, insert an HTML element with the same tag
 2350                         name as the    token's. */
 2351                         $this->insertElement($token);
 2352                         break;
 2353 
 2354                     /* A start tag token whose tag name is "plaintext" */
 2355                     case 'plaintext':
 2356                         /* If the stack of open elements has a p  element in scope,
 2357                         then act as if an end tag with the tag name p had been
 2358                         seen. */
 2359                         if ($this->elementInScope('p')) {
 2360                             $this->emitToken(
 2361                                 array(
 2362                                     'name' => 'p',
 2363                                     'type' => HTML5::ENDTAG
 2364                                 )
 2365                             );
 2366                         }
 2367 
 2368                         /* Insert an HTML element for the token. */
 2369                         $this->insertElement($token);
 2370 
 2371                         return HTML5::PLAINTEXT;
 2372                         break;
 2373 
 2374                     /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
 2375                     "h5", "h6" */
 2376                     case 'h1':
 2377                     case 'h2':
 2378                     case 'h3':
 2379                     case 'h4':
 2380                     case 'h5':
 2381                     case 'h6':
 2382                         /* If the stack of open elements has a p  element in scope,
 2383                         then act as if an end tag with the tag name p had been seen. */
 2384                         if ($this->elementInScope('p')) {
 2385                             $this->emitToken(
 2386                                 array(
 2387                                     'name' => 'p',
 2388                                     'type' => HTML5::ENDTAG
 2389                                 )
 2390                             );
 2391                         }
 2392 
 2393                         /* If the stack of open elements has in scope an element whose
 2394                         tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
 2395                         this is a parse error; pop elements from the stack until an
 2396                         element with one of those tag names has been popped from the
 2397                         stack. */
 2398                         while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
 2399                             array_pop($this->stack);
 2400                         }
 2401 
 2402                         /* Insert an HTML element for the token. */
 2403                         $this->insertElement($token);
 2404                         break;
 2405 
 2406                     /* A start tag whose tag name is "a" */
 2407                     case 'a':
 2408                         /* If the list of active formatting elements contains
 2409                         an element whose tag name is "a" between the end of the
 2410                         list and the last marker on the list (or the start of
 2411                         the list if there is no marker on the list), then this
 2412                         is a parse error; act as if an end tag with the tag name
 2413                         "a" had been seen, then remove that element from the list
 2414                         of active formatting elements and the stack of open
 2415                         elements if the end tag didn't already remove it (it
 2416                         might not have if the element is not in table scope). */
 2417                         $leng = count($this->a_formatting);
 2418 
 2419                         for ($n = $leng - 1; $n >= 0; $n--) {
 2420                             if ($this->a_formatting[$n] === self::MARKER) {
 2421                                 break;
 2422 
 2423                             } elseif ($this->a_formatting[$n]->nodeName === 'a') {
 2424                                 $this->emitToken(
 2425                                     array(
 2426                                         'name' => 'a',
 2427                                         'type' => HTML5::ENDTAG
 2428                                     )
 2429                                 );
 2430                                 break;
 2431                             }
 2432                         }
 2433 
 2434                         /* Reconstruct the active formatting elements, if any. */
 2435                         $this->reconstructActiveFormattingElements();
 2436 
 2437                         /* Insert an HTML element for the token. */
 2438                         $el = $this->insertElement($token);
 2439 
 2440                         /* Add that element to the list of active formatting
 2441                         elements. */
 2442                         $this->a_formatting[] = $el;
 2443                         break;
 2444 
 2445                     /* A start tag whose tag name is one of: "b", "big", "em", "font",
 2446                     "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
 2447                     case 'b':
 2448                     case 'big':
 2449                     case 'em':
 2450                     case 'font':
 2451                     case 'i':
 2452                     case 'nobr':
 2453                     case 's':
 2454                     case 'small':
 2455                     case 'strike':
 2456                     case 'strong':
 2457                     case 'tt':
 2458                     case 'u':
 2459                         /* Reconstruct the active formatting elements, if any. */
 2460                         $this->reconstructActiveFormattingElements();
 2461 
 2462                         /* Insert an HTML element for the token. */
 2463                         $el = $this->insertElement($token);
 2464 
 2465                         /* Add that element to the list of active formatting
 2466                         elements. */
 2467                         $this->a_formatting[] = $el;
 2468                         break;
 2469 
 2470                     /* A start tag token whose tag name is "button" */
 2471                     case 'button':
 2472                         /* If the stack of open elements has a button element in scope,
 2473                         then this is a parse error; act as if an end tag with the tag
 2474                         name "button" had been seen, then reprocess the token. (We don't
 2475                         do that. Unnecessary.) */
 2476                         if ($this->elementInScope('button')) {
 2477                             $this->inBody(
 2478                                 array(
 2479                                     'name' => 'button',
 2480                                     'type' => HTML5::ENDTAG
 2481                                 )
 2482                             );
 2483                         }
 2484 
 2485                         /* Reconstruct the active formatting elements, if any. */
 2486                         $this->reconstructActiveFormattingElements();
 2487 
 2488                         /* Insert an HTML element for the token. */
 2489                         $this->insertElement($token);
 2490 
 2491                         /* Insert a marker at the end of the list of active
 2492                         formatting elements. */
 2493                         $this->a_formatting[] = self::MARKER;
 2494                         break;
 2495 
 2496                     /* A start tag token whose tag name is one of: "marquee", "object" */
 2497                     case 'marquee':
 2498                     case 'object':
 2499                         /* Reconstruct the active formatting elements, if any. */
 2500                         $this->reconstructActiveFormattingElements();
 2501 
 2502                         /* Insert an HTML element for the token. */
 2503                         $this->insertElement($token);
 2504 
 2505                         /* Insert a marker at the end of the list of active
 2506                         formatting elements. */
 2507                         $this->a_formatting[] = self::MARKER;
 2508                         break;
 2509 
 2510                     /* A start tag token whose tag name is "xmp" */
 2511                     case 'xmp':
 2512                         /* Reconstruct the active formatting elements, if any. */
 2513                         $this->reconstructActiveFormattingElements();
 2514 
 2515                         /* Insert an HTML element for the token. */
 2516                         $this->insertElement($token);
 2517 
 2518                         /* Switch the content model flag to the CDATA state. */
 2519                         return HTML5::CDATA;
 2520                         break;
 2521 
 2522                     /* A start tag whose tag name is "table" */
 2523                     case 'table':
 2524                         /* If the stack of open elements has a p element in scope,
 2525                         then act as if an end tag with the tag name p had been seen. */
 2526                         if ($this->elementInScope('p')) {
 2527                             $this->emitToken(
 2528                                 array(
 2529                                     'name' => 'p',
 2530                                     'type' => HTML5::ENDTAG
 2531                                 )
 2532                             );
 2533                         }
 2534 
 2535                         /* Insert an HTML element for the token. */
 2536                         $this->insertElement($token);
 2537 
 2538                         /* Change the insertion mode to "in table". */
 2539                         $this->mode = self::IN_TABLE;
 2540                         break;
 2541 
 2542                     /* A start tag whose tag name is one of: "area", "basefont",
 2543                     "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
 2544                     case 'area':
 2545                     case 'basefont':
 2546                     case 'bgsound':
 2547                     case 'br':
 2548                     case 'embed':
 2549                     case 'img':
 2550                     case 'param':
 2551                     case 'spacer':
 2552                     case 'wbr':
 2553                         /* Reconstruct the active formatting elements, if any. */
 2554                         $this->reconstructActiveFormattingElements();
 2555 
 2556                         /* Insert an HTML element for the token. */
 2557                         $this->insertElement($token);
 2558 
 2559                         /* Immediately pop the current node off the stack of open elements. */
 2560                         array_pop($this->stack);
 2561                         break;
 2562 
 2563                     /* A start tag whose tag name is "hr" */
 2564                     case 'hr':
 2565                         /* If the stack of open elements has a p element in scope,
 2566                         then act as if an end tag with the tag name p had been seen. */
 2567                         if ($this->elementInScope('p')) {
 2568                             $this->emitToken(
 2569                                 array(
 2570                                     'name' => 'p',
 2571                                     'type' => HTML5::ENDTAG
 2572                                 )
 2573                             );
 2574                         }
 2575 
 2576                         /* Insert an HTML element for the token. */
 2577                         $this->insertElement($token);
 2578 
 2579                         /* Immediately pop the current node off the stack of open elements. */
 2580                         array_pop($this->stack);
 2581                         break;
 2582 
 2583                     /* A start tag whose tag name is "image" */
 2584                     case 'image':
 2585                         /* Parse error. Change the token's tag name to "img" and
 2586                         reprocess it. (Don't ask.) */
 2587                         $token['name'] = 'img';
 2588                         return $this->inBody($token);
 2589                         break;
 2590 
 2591                     /* A start tag whose tag name is "input" */
 2592                     case 'input':
 2593                         /* Reconstruct the active formatting elements, if any. */
 2594                         $this->reconstructActiveFormattingElements();
 2595 
 2596                         /* Insert an input element for the token. */
 2597                         $element = $this->insertElement($token, false);
 2598 
 2599                         /* If the form element pointer is not null, then associate the
 2600                         input element with the form element pointed to by the form
 2601                         element pointer. */
 2602                         $this->form_pointer !== null
 2603                             ? $this->form_pointer->appendChild($element)
 2604                             : end($this->stack)->appendChild($element);
 2605 
 2606                         /* Pop that input element off the stack of open elements. */
 2607                         array_pop($this->stack);
 2608                         break;
 2609 
 2610                     /* A start tag whose tag name is "isindex" */
 2611                     case 'isindex':
 2612                         /* Parse error. */
 2613                         // w/e
 2614 
 2615                         /* If the form element pointer is not null,
 2616                         then ignore the token. */
 2617                         if ($this->form_pointer === null) {
 2618                             /* Act as if a start tag token with the tag name "form" had
 2619                             been seen. */
 2620                             $this->inBody(
 2621                                 array(
 2622                                     'name' => 'body',
 2623                                     'type' => HTML5::STARTTAG,
 2624                                     'attr' => array()
 2625                                 )
 2626                             );
 2627 
 2628                             /* Act as if a start tag token with the tag name "hr" had
 2629                             been seen. */
 2630                             $this->inBody(
 2631                                 array(
 2632                                     'name' => 'hr',
 2633                                     'type' => HTML5::STARTTAG,
 2634                                     'attr' => array()
 2635                                 )
 2636                             );
 2637 
 2638                             /* Act as if a start tag token with the tag name "p" had
 2639                             been seen. */
 2640                             $this->inBody(
 2641                                 array(
 2642                                     'name' => 'p',
 2643                                     'type' => HTML5::STARTTAG,
 2644                                     'attr' => array()
 2645                                 )
 2646                             );
 2647 
 2648                             /* Act as if a start tag token with the tag name "label"
 2649                             had been seen. */
 2650                             $this->inBody(
 2651                                 array(
 2652                                     'name' => 'label',
 2653                                     'type' => HTML5::STARTTAG,
 2654                                     'attr' => array()
 2655                                 )
 2656                             );
 2657 
 2658                             /* Act as if a stream of character tokens had been seen. */
 2659                             $this->insertText(
 2660                                 'This is a searchable index. ' .
 2661                                 'Insert your search keywords here: '
 2662                             );
 2663 
 2664                             /* Act as if a start tag token with the tag name "input"
 2665                             had been seen, with all the attributes from the "isindex"
 2666                             token, except with the "name" attribute set to the value
 2667                             "isindex" (ignoring any explicit "name" attribute). */
 2668                             $attr = $token['attr'];
 2669                             $attr[] = array('name' => 'name', 'value' => 'isindex');
 2670 
 2671                             $this->inBody(
 2672                                 array(
 2673                                     'name' => 'input',
 2674                                     'type' => HTML5::STARTTAG,
 2675                                     'attr' => $attr
 2676                                 )
 2677                             );
 2678 
 2679                             /* Act as if a stream of character tokens had been seen
 2680                             (see below for what they should say). */
 2681                             $this->insertText(
 2682                                 'This is a searchable index. ' .
 2683                                 'Insert your search keywords here: '
 2684                             );
 2685 
 2686                             /* Act as if an end tag token with the tag name "label"
 2687                             had been seen. */
 2688                             $this->inBody(
 2689                                 array(
 2690                                     'name' => 'label',
 2691                                     'type' => HTML5::ENDTAG
 2692                                 )
 2693                             );
 2694 
 2695                             /* Act as if an end tag token with the tag name "p" had
 2696                             been seen. */
 2697                             $this->inBody(
 2698                                 array(
 2699                                     'name' => 'p',
 2700                                     'type' => HTML5::ENDTAG
 2701                                 )
 2702                             );
 2703 
 2704                             /* Act as if a start tag token with the tag name "hr" had
 2705                             been seen. */
 2706                             $this->inBody(
 2707                                 array(
 2708                                     'name' => 'hr',
 2709                                     'type' => HTML5::ENDTAG
 2710                                 )
 2711                             );
 2712 
 2713                             /* Act as if an end tag token with the tag name "form" had
 2714                             been seen. */
 2715                             $this->inBody(
 2716                                 array(
 2717                                     'name' => 'form',
 2718                                     'type' => HTML5::ENDTAG
 2719                                 )
 2720                             );
 2721                         }
 2722                         break;
 2723 
 2724                     /* A start tag whose tag name is "textarea" */
 2725                     case 'textarea':
 2726                         $this->insertElement($token);
 2727 
 2728                         /* Switch the tokeniser's content model flag to the
 2729                         RCDATA state. */
 2730                         return HTML5::RCDATA;
 2731                         break;
 2732 
 2733                     /* A start tag whose tag name is one of: "iframe", "noembed",
 2734                     "noframes" */
 2735                     case 'iframe':
 2736                     case 'noembed':
 2737                     case 'noframes':
 2738                         $this->insertElement($token);
 2739 
 2740                         /* Switch the tokeniser's content model flag to the CDATA state. */
 2741                         return HTML5::CDATA;
 2742                         break;
 2743 
 2744                     /* A start tag whose tag name is "select" */
 2745                     case 'select':
 2746                         /* Reconstruct the active formatting elements, if any. */
 2747                         $this->reconstructActiveFormattingElements();
 2748 
 2749                         /* Insert an HTML element for the token. */
 2750                         $this->insertElement($token);
 2751 
 2752                         /* Change the insertion mode to "in select". */
 2753                         $this->mode = self::IN_SELECT;
 2754                         break;
 2755 
 2756                     /* A start or end tag whose tag name is one of: "caption", "col",
 2757                     "colgroup", "frame", "frameset", "head", "option", "optgroup",
 2758                     "tbody", "td", "tfoot", "th", "thead", "tr". */
 2759                     case 'caption':
 2760                     case 'col':
 2761                     case 'colgroup':
 2762                     case 'frame':
 2763                     case 'frameset':
 2764                     case 'head':
 2765                     case 'option':
 2766                     case 'optgroup':
 2767                     case 'tbody':
 2768                     case 'td':
 2769                     case 'tfoot':
 2770                     case 'th':
 2771                     case 'thead':
 2772                     case 'tr':
 2773                         // Parse error. Ignore the token.
 2774                         break;
 2775 
 2776                     /* A start or end tag whose tag name is one of: "event-source",
 2777                     "section", "nav", "article", "aside", "header", "footer",
 2778                     "datagrid", "command" */
 2779                     case 'event-source':
 2780                     case 'section':
 2781                     case 'nav':
 2782                     case 'article':
 2783                     case 'aside':
 2784                     case 'header':
 2785                     case 'footer':
 2786                     case 'datagrid':
 2787                     case 'command':
 2788                         // Work in progress!
 2789                         break;
 2790 
 2791                     /* A start tag token not covered by the previous entries */
 2792                     default:
 2793                         /* Reconstruct the active formatting elements, if any. */
 2794                         $this->reconstructActiveFormattingElements();
 2795 
 2796                         $this->insertElement($token, true, true);
 2797                         break;
 2798                 }
 2799                 break;
 2800 
 2801             case HTML5::ENDTAG:
 2802                 switch ($token['name']) {
 2803                     /* An end tag with the tag name "body" */
 2804                     case 'body':
 2805                         /* If the second element in the stack of open elements is
 2806                         not a body element, this is a parse error. Ignore the token.
 2807                         (innerHTML case) */
 2808                         if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
 2809                             // Ignore.
 2810 
 2811                             /* If the current node is not the body element, then this
 2812                             is a parse error. */
 2813                         } elseif (end($this->stack)->nodeName !== 'body') {
 2814                             // Parse error.
 2815                         }
 2816 
 2817                         /* Change the insertion mode to "after body". */
 2818                         $this->mode = self::AFTER_BODY;
 2819                         break;
 2820 
 2821                     /* An end tag with the tag name "html" */
 2822                     case 'html':
 2823                         /* Act as if an end tag with tag name "body" had been seen,
 2824                         then, if that token wasn't ignored, reprocess the current
 2825                         token. */
 2826                         $this->inBody(
 2827                             array(
 2828                                 'name' => 'body',
 2829                                 'type' => HTML5::ENDTAG
 2830                             )
 2831                         );
 2832 
 2833                         return $this->afterBody($token);
 2834                         break;
 2835 
 2836                     /* An end tag whose tag name is one of: "address", "blockquote",
 2837                     "center", "dir", "div", "dl", "fieldset", "listing", "menu",
 2838                     "ol", "pre", "ul" */
 2839                     case 'address':
 2840                     case 'blockquote':
 2841                     case 'center':
 2842                     case 'dir':
 2843                     case 'div':
 2844                     case 'dl':
 2845                     case 'fieldset':
 2846                     case 'listing':
 2847                     case 'menu':
 2848                     case 'ol':
 2849                     case 'pre':
 2850                     case 'ul':
 2851                         /* If the stack of open elements has an element in scope
 2852                         with the same tag name as that of the token, then generate
 2853                         implied end tags. */
 2854                         if ($this->elementInScope($token['name'])) {
 2855                             $this->generateImpliedEndTags();
 2856 
 2857                             /* Now, if the current node is not an element with
 2858                             the same tag name as that of the token, then this
 2859                             is a parse error. */
 2860                             // w/e
 2861 
 2862                             /* If the stack of open elements has an element in
 2863                             scope with the same tag name as that of the token,
 2864                             then pop elements from this stack until an element
 2865                             with that tag name has been popped from the stack. */
 2866                             for ($n = count($this->stack) - 1; $n >= 0; $n--) {
 2867                                 if ($this->stack[$n]->nodeName === $token['name']) {
 2868                                     $n = -1;
 2869                                 }
 2870 
 2871                                 array_pop($this->stack);
 2872                             }
 2873                         }
 2874                         break;
 2875 
 2876                     /* An end tag whose tag name is "form" */
 2877                     case 'form':
 2878                         /* If the stack of open elements has an element in scope
 2879                         with the same tag name as that of the token, then generate
 2880                         implied    end tags. */
 2881                         if ($this->elementInScope($token['name'])) {
 2882                             $this->generateImpliedEndTags();
 2883 
 2884                         }
 2885 
 2886                         if (end($this->stack)->nodeName !== $token['name']) {
 2887                             /* Now, if the current node is not an element with the
 2888                             same tag name as that of the token, then this is a parse
 2889                             error. */
 2890                             // w/e
 2891 
 2892                         } else {
 2893                             /* Otherwise, if the current node is an element with
 2894                             the same tag name as that of the token pop that element
 2895                             from the stack. */
 2896                             array_pop($this->stack);
 2897                         }
 2898 
 2899                         /* In any case, set the form element pointer to null. */
 2900                         $this->form_pointer = null;
 2901                         break;
 2902 
 2903                     /* An end tag whose tag name is "p" */
 2904                     case 'p':
 2905                         /* If the stack of open elements has a p element in scope,
 2906                         then generate implied end tags, except for p elements. */
 2907                         if ($this->elementInScope('p')) {
 2908                             $this->generateImpliedEndTags(array('p'));
 2909 
 2910                             /* If the current node is not a p element, then this is
 2911                             a parse error. */
 2912                             // k
 2913 
 2914                             /* If the stack of open elements has a p element in
 2915                             scope, then pop elements from this stack until the stack
 2916                             no longer has a p element in scope. */
 2917                             for ($n = count($this->stack) - 1; $n >= 0; $n--) {
 2918                                 if ($this->elementInScope('p')) {
 2919                                     array_pop($this->stack);
 2920 
 2921                                 } else {
 2922                                     break;
 2923                                 }
 2924                             }
 2925                         }
 2926                         break;
 2927 
 2928                     /* An end tag whose tag name is "dd", "dt", or "li" */
 2929                     case 'dd':
 2930                     case 'dt':
 2931                     case 'li':
 2932                         /* If the stack of open elements has an element in scope
 2933                         whose tag name matches the tag name of the token, then
 2934                         generate implied end tags, except for elements with the
 2935                         same tag name as the token. */
 2936                         if ($this->elementInScope($token['name'])) {
 2937                             $this->generateImpliedEndTags(array($token['name']));
 2938 
 2939                             /* If the current node is not an element with the same
 2940                             tag name as the token, then this is a parse error. */
 2941                             // w/e
 2942 
 2943                             /* If the stack of open elements has an element in scope
 2944                             whose tag name matches the tag name of the token, then
 2945                             pop elements from this stack until an element with that
 2946                             tag name has been popped from the stack. */
 2947                             for ($n = count($this->stack) - 1; $n >= 0; $n--) {
 2948                                 if ($this->stack[$n]->nodeName === $token['name']) {
 2949                                     $n = -1;
 2950                                 }
 2951 
 2952                                 array_pop($this->stack);
 2953                             }
 2954                         }
 2955                         break;
 2956 
 2957                     /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
 2958                     "h5", "h6" */
 2959                     case 'h1':
 2960                     case 'h2':
 2961                     case 'h3':
 2962                     case 'h4':
 2963                     case 'h5':
 2964                     case 'h6':
 2965                         $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
 2966 
 2967                         /* If the stack of open elements has in scope an element whose
 2968                         tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
 2969                         generate implied end tags. */
 2970                         if ($this->elementInScope($elements)) {
 2971                             $this->generateImpliedEndTags();
 2972 
 2973                             /* Now, if the current node is not an element with the same
 2974                             tag name as that of the token, then this is a parse error. */
 2975                             // w/e
 2976 
 2977                             /* If the stack of open elements has in scope an element
 2978                             whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
 2979                             "h6", then pop elements from the stack until an element
 2980                             with one of those tag names has been popped from the stack. */
 2981                             while ($this->elementInScope($elements)) {
 2982                                 array_pop($this->stack);
 2983                             }
 2984                         }
 2985                         break;
 2986 
 2987                     /* An end tag whose tag name is one of: "a", "b", "big", "em",
 2988                     "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
 2989                     case 'a':
 2990                     case 'b':
 2991                     case 'big':
 2992                     case 'em':
 2993                     case 'font':
 2994                     case 'i':
 2995                     case 'nobr':
 2996                     case 's':
 2997                     case 'small':
 2998                     case 'strike':
 2999                     case 'strong':
 3000                     case 'tt':
 3001                     case 'u':
 3002                         /* 1. Let the formatting element be the last element in
 3003                         the list of active formatting elements that:
 3004                             * is between the end of the list and the last scope
 3005                             marker in the list, if any, or the start of the list
 3006                             otherwise, and
 3007                             * has the same tag name as the token.
 3008                         */
 3009                         while (true) {
 3010                             for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
 3011                                 if ($this->a_formatting[$a] === self::MARKER) {
 3012                                     break;
 3013 
 3014                                 } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
 3015                                     $formatting_element = $this->a_formatting[$a];
 3016                                     $in_stack = in_array($formatting_element, $this->stack, true);
 3017                                     $fe_af_pos = $a;
 3018                                     break;
 3019                                 }
 3020                             }
 3021 
 3022                             /* If there is no such node, or, if that node is
 3023                             also in the stack of open elements but the element
 3024                             is not in scope, then this is a parse error. Abort
 3025                             these steps. The token is ignored. */
 3026                             if (!isset($formatting_element) || ($in_stack &&
 3027                                     !$this->elementInScope($token['name']))
 3028                             ) {
 3029                                 break;
 3030 
 3031                                 /* Otherwise, if there is such a node, but that node
 3032                                 is not in the stack of open elements, then this is a
 3033                                 parse error; remove the element from the list, and
 3034                                 abort these steps. */
 3035                             } elseif (isset($formatting_element) && !$in_stack) {
 3036                                 unset($this->a_formatting[$fe_af_pos]);
 3037                                 $this->a_formatting = array_merge($this->a_formatting);
 3038                                 break;
 3039                             }
 3040 
 3041                             /* 2. Let the furthest block be the topmost node in the
 3042                             stack of open elements that is lower in the stack
 3043                             than the formatting element, and is not an element in
 3044                             the phrasing or formatting categories. There might
 3045                             not be one. */
 3046                             $fe_s_pos = array_search($formatting_element, $this->stack, true);
 3047                             $length = count($this->stack);
 3048 
 3049                             for ($s = $fe_s_pos + 1; $s < $length; $s++) {
 3050                                 $category = $this->getElementCategory($this->stack[$s]->nodeName);
 3051 
 3052                                 if ($category !== self::PHRASING && $category !== self::FORMATTING) {
 3053                                     $furthest_block = $this->stack[$s];
 3054                                 }
 3055                             }
 3056 
 3057                             /* 3. If there is no furthest block, then the UA must
 3058                             skip the subsequent steps and instead just pop all
 3059                             the nodes from the bottom of the stack of open
 3060                             elements, from the current node up to the formatting
 3061                             element, and remove the formatting element from the
 3062                             list of active formatting elements. */
 3063                             if (!isset($furthest_block)) {
 3064                                 for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
 3065                                     array_pop($this->stack);
 3066                                 }
 3067 
 3068                                 unset($this->a_formatting[$fe_af_pos]);
 3069                                 $this->a_formatting = array_merge($this->a_formatting);
 3070                                 break;
 3071                             }
 3072 
 3073                             /* 4. Let the common ancestor be the element
 3074                             immediately above the formatting element in the stack
 3075                             of open elements. */
 3076                             $common_ancestor = $this->stack[$fe_s_pos - 1];
 3077 
 3078                             /* 5. If the furthest block has a parent node, then
 3079                             remove the furthest block from its parent node. */
 3080                             if ($furthest_block->parentNode !== null) {
 3081                                 $furthest_block->parentNode->removeChild($furthest_block);
 3082                             }
 3083 
 3084                             /* 6. Let a bookmark note the position of the
 3085                             formatting element in the list of active formatting
 3086                             elements relative to the elements on either side
 3087                             of it in the list. */
 3088                             $bookmark = $fe_af_pos;
 3089 
 3090                             /* 7. Let node and last node  be the furthest block.
 3091                             Follow these steps: */
 3092                             $node = $furthest_block;
 3093                             $last_node = $furthest_block;
 3094 
 3095                             while (true) {
 3096                                 for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
 3097                                     /* 7.1 Let node be the element immediately
 3098                                     prior to node in the stack of open elements. */
 3099                                     $node = $this->stack[$n];
 3100 
 3101                                     /* 7.2 If node is not in the list of active
 3102                                     formatting elements, then remove node from
 3103                                     the stack of open elements and then go back
 3104                                     to step 1. */
 3105                                     if (!in_array($node, $this->a_formatting, true)) {
 3106                                         unset($this->stack[$n]);
 3107                                         $this->stack = array_merge($this->stack);
 3108 
 3109                                     } else {
 3110                                         break;
 3111                                     }
 3112                                 }
 3113 
 3114                                 /* 7.3 Otherwise, if node is the formatting
 3115                                 element, then go to the next step in the overall
 3116                                 algorithm. */
 3117                                 if ($node === $formatting_element) {
 3118                                     break;
 3119 
 3120                                     /* 7.4 Otherwise, if last node is the furthest
 3121                                     block, then move the aforementioned bookmark to
 3122                                     be immediately after the node in the list of
 3123                                     active formatting elements. */
 3124                                 } elseif ($last_node === $furthest_block) {
 3125                                     $bookmark = array_search($node, $this->a_formatting, true) + 1;
 3126                                 }
 3127 
 3128                                 /* 7.5 If node has any children, perform a
 3129                                 shallow clone of node, replace the entry for
 3130                                 node in the list of active formatting elements
 3131                                 with an entry for the clone, replace the entry
 3132                                 for node in the stack of open elements with an
 3133                                 entry for the clone, and let node be the clone. */
 3134                                 if ($node->hasChildNodes()) {
 3135                                     $clone = $node->cloneNode();
 3136                                     $s_pos = array_search($node, $this->stack, true);
 3137                                     $a_pos = array_search($node, $this->a_formatting, true);
 3138 
 3139                                     $this->stack[$s_pos] = $clone;
 3140                                     $this->a_formatting[$a_pos] = $clone;
 3141                                     $node = $clone;
 3142                                 }
 3143 
 3144                                 /* 7.6 Insert last node into node, first removing
 3145                                 it from its previous parent node if any. */
 3146                                 if ($last_node->parentNode !== null) {
 3147                                     $last_node->parentNode->removeChild($last_node);
 3148                                 }
 3149 
 3150                                 $node->appendChild($last_node);
 3151 
 3152                                 /* 7.7 Let last node be node. */
 3153                                 $last_node = $node;
 3154                             }
 3155 
 3156                             /* 8. Insert whatever last node ended up being in
 3157                             the previous step into the common ancestor node,
 3158                             first removing it from its previous parent node if
 3159                             any. */
 3160                             if ($last_node->parentNode !== null) {
 3161                                 $last_node->parentNode->removeChild($last_node);
 3162                             }
 3163 
 3164                             $common_ancestor->appendChild($last_node);
 3165 
 3166                             /* 9. Perform a shallow clone of the formatting
 3167                             element. */
 3168                             $clone = $formatting_element->cloneNode();
 3169 
 3170                             /* 10. Take all of the child nodes of the furthest
 3171                             block and append them to the clone created in the
 3172                             last step. */
 3173                             while ($furthest_block->hasChildNodes()) {
 3174                                 $child = $furthest_block->firstChild;
 3175                                 $furthest_block->removeChild($child);
 3176                                 $clone->appendChild($child);
 3177                             }
 3178 
 3179                             /* 11. Append that clone to the furthest block. */
 3180                             $furthest_block->appendChild($clone);
 3181 
 3182                             /* 12. Remove the formatting element from the list
 3183                             of active formatting elements, and insert the clone
 3184                             into the list of active formatting elements at the
 3185                             position of the aforementioned bookmark. */
 3186                             $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
 3187                             unset($this->a_formatting[$fe_af_pos]);
 3188                             $this->a_formatting = array_merge($this->a_formatting);
 3189 
 3190                             $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
 3191                             $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
 3192                             $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
 3193 
 3194                             /* 13. Remove the formatting element from the stack
 3195                             of open elements, and insert the clone into the stack
 3196                             of open elements immediately after (i.e. in a more
 3197                             deeply nested position than) the position of the
 3198                             furthest block in that stack. */
 3199                             $fe_s_pos = array_search($formatting_element, $this->stack, true);
 3200                             $fb_s_pos = array_search($furthest_block, $this->stack, true);
 3201                             unset($this->stack[$fe_s_pos]);
 3202 
 3203                             $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
 3204                             $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
 3205                             $this->stack = array_merge($s_part1, array($clone), $s_part2);
 3206 
 3207                             /* 14. Jump back to step 1 in this series of steps. */
 3208                             unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
 3209                         }
 3210                         break;
 3211 
 3212                     /* An end tag token whose tag name is one of: "button",
 3213                     "marquee", "object" */
 3214                     case 'button':
 3215                     case 'marquee':
 3216                     case 'object':
 3217                         /* If the stack of open elements has an element in scope whose
 3218                         tag name matches the tag name of the token, then generate implied
 3219                         tags. */
 3220                         if ($this->elementInScope($token['name'])) {
 3221                             $this->generateImpliedEndTags();
 3222 
 3223                             /* Now, if the current node is not an element with the same
 3224                             tag name as the token, then this is a parse error. */
 3225                             // k
 3226 
 3227                             /* Now, if the stack of open elements has an element in scope
 3228                             whose tag name matches the tag name of the token, then pop
 3229                             elements from the stack until that element has been popped from
 3230                             the stack, and clear the list of active formatting elements up
 3231                             to the last marker. */
 3232                             for ($n = count($this->stack) - 1; $n >= 0; $n--) {
 3233                                 if ($this->stack[$n]->nodeName === $token['name']) {
 3234                                     $n = -1;
 3235                                 }
 3236 
 3237                                 array_pop($this->stack);
 3238                             }
 3239 
 3240                             $marker = end(array_keys($this->a_formatting, self::MARKER, true));
 3241 
 3242                             for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
 3243                                 array_pop($this->a_formatting);
 3244                             }
 3245                         }
 3246                         break;
 3247 
 3248                     /* Or an end tag whose tag name is one of: "area", "basefont",
 3249                     "bgsound", "br", "embed", "hr", "iframe", "image", "img",
 3250                     "input", "isindex", "noembed", "noframes", "param", "select",
 3251                     "spacer", "table", "textarea", "wbr" */
 3252                     case 'area':
 3253                     case 'basefont':
 3254                     case 'bgsound':
 3255                     case 'br':
 3256                     case 'embed':
 3257                     case 'hr':
 3258                     case 'iframe':
 3259                     case 'image':
 3260                     case 'img':
 3261                     case 'input':
 3262                     case 'isindex':
 3263                     case 'noembed':
 3264                     case 'noframes':
 3265                     case 'param':
 3266                     case 'select':
 3267                     case 'spacer':
 3268                     case 'table':
 3269                     case 'textarea':
 3270                     case 'wbr':
 3271                         // Parse error. Ignore the token.
 3272                         break;
 3273 
 3274                     /* An end tag token not covered by the previous entries */
 3275                     default:
 3276                         for ($n = count($this->stack) - 1; $n >= 0; $n--) {
 3277                             /* Initialise node to be the current node (the bottommost
 3278                             node of the stack). */
 3279                             $node = end($this->stack);
 3280 
 3281                             /* If node has the same tag name as the end tag token,
 3282                             then: */
 3283                             if ($token['name'] === $node->nodeName) {
 3284                                 /* Generate implied end tags. */
 3285                                 $this->generateImpliedEndTags();
 3286 
 3287                                 /* If the tag name of the end tag token does not
 3288                                 match the tag name of the current node, this is a
 3289                                 parse error. */
 3290                                 // k
 3291 
 3292                                 /* Pop all the nodes from the current node up to
 3293                                 node, including node, then stop this algorithm. */
 3294                                 for ($x = count($this->stack) - $n; $x >= $n; $x--) {
 3295                                     array_pop($this->stack);
 3296                                 }
 3297 
 3298                             } else {
 3299                                 $category = $this->getElementCategory($node);
 3300 
 3301                                 if ($category !== self::SPECIAL && $category !== self::SCOPING) {
 3302                                     /* Otherwise, if node is in neither the formatting
 3303                                     category nor the phrasing category, then this is a
 3304                                     parse error. Stop this algorithm. The end tag token
 3305                                     is ignored. */
 3306                                     return false;
 3307                                 }
 3308                             }
 3309                         }
 3310                         break;
 3311                 }
 3312                 break;
 3313         }
 3314     }
 3315 
 3316     private function inTable($token)
 3317     {
 3318         $clear = array('html', 'table');
 3319 
 3320         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 3321         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 3322         or U+0020 SPACE */
 3323         if ($token['type'] === HTML5::CHARACTR &&
 3324             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 3325         ) {
 3326             /* Append the character to the current node. */
 3327             $text = $this->dom->createTextNode($token['data']);
 3328             end($this->stack)->appendChild($text);
 3329 
 3330             /* A comment token */
 3331         } elseif ($token['type'] === HTML5::COMMENT) {
 3332             /* Append a Comment node to the current node with the data
 3333             attribute set to the data given in the comment token. */
 3334             $comment = $this->dom->createComment($token['data']);
 3335             end($this->stack)->appendChild($comment);
 3336 
 3337             /* A start tag whose tag name is "caption" */
 3338         } elseif ($token['type'] === HTML5::STARTTAG &&
 3339             $token['name'] === 'caption'
 3340         ) {
 3341             /* Clear the stack back to a table context. */
 3342             $this->clearStackToTableContext($clear);
 3343 
 3344             /* Insert a marker at the end of the list of active
 3345             formatting elements. */
 3346             $this->a_formatting[] = self::MARKER;
 3347 
 3348             /* Insert an HTML element for the token, then switch the
 3349             insertion mode to "in caption". */
 3350             $this->insertElement($token);
 3351             $this->mode = self::IN_CAPTION;
 3352 
 3353             /* A start tag whose tag name is "colgroup" */
 3354         } elseif ($token['type'] === HTML5::STARTTAG &&
 3355             $token['name'] === 'colgroup'
 3356         ) {
 3357             /* Clear the stack back to a table context. */
 3358             $this->clearStackToTableContext($clear);
 3359 
 3360             /* Insert an HTML element for the token, then switch the
 3361             insertion mode to "in column group". */
 3362             $this->insertElement($token);
 3363             $this->mode = self::IN_CGROUP;
 3364 
 3365             /* A start tag whose tag name is "col" */
 3366         } elseif ($token['type'] === HTML5::STARTTAG &&
 3367             $token['name'] === 'col'
 3368         ) {
 3369             $this->inTable(
 3370                 array(
 3371                     'name' => 'colgroup',
 3372                     'type' => HTML5::STARTTAG,
 3373                     'attr' => array()
 3374                 )
 3375             );
 3376 
 3377             $this->inColumnGroup($token);
 3378 
 3379             /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
 3380         } elseif ($token['type'] === HTML5::STARTTAG && in_array(
 3381                 $token['name'],
 3382                 array('tbody', 'tfoot', 'thead')
 3383             )
 3384         ) {
 3385             /* Clear the stack back to a table context. */
 3386             $this->clearStackToTableContext($clear);
 3387 
 3388             /* Insert an HTML element for the token, then switch the insertion
 3389             mode to "in table body". */
 3390             $this->insertElement($token);
 3391             $this->mode = self::IN_TBODY;
 3392 
 3393             /* A start tag whose tag name is one of: "td", "th", "tr" */
 3394         } elseif ($token['type'] === HTML5::STARTTAG &&
 3395             in_array($token['name'], array('td', 'th', 'tr'))
 3396         ) {
 3397             /* Act as if a start tag token with the tag name "tbody" had been
 3398             seen, then reprocess the current token. */
 3399             $this->inTable(
 3400                 array(
 3401                     'name' => 'tbody',
 3402                     'type' => HTML5::STARTTAG,
 3403                     'attr' => array()
 3404                 )
 3405             );
 3406 
 3407             return $this->inTableBody($token);
 3408 
 3409             /* A start tag whose tag name is "table" */
 3410         } elseif ($token['type'] === HTML5::STARTTAG &&
 3411             $token['name'] === 'table'
 3412         ) {
 3413             /* Parse error. Act as if an end tag token with the tag name "table"
 3414             had been seen, then, if that token wasn't ignored, reprocess the
 3415             current token. */
 3416             $this->inTable(
 3417                 array(
 3418                     'name' => 'table',
 3419                     'type' => HTML5::ENDTAG
 3420                 )
 3421             );
 3422 
 3423             return $this->mainPhase($token);
 3424 
 3425             /* An end tag whose tag name is "table" */
 3426         } elseif ($token['type'] === HTML5::ENDTAG &&
 3427             $token['name'] === 'table'
 3428         ) {
 3429             /* If the stack of open elements does not have an element in table
 3430             scope with the same tag name as the token, this is a parse error.
 3431             Ignore the token. (innerHTML case) */
 3432             if (!$this->elementInScope($token['name'], true)) {
 3433                 return false;
 3434 
 3435                 /* Otherwise: */
 3436             } else {
 3437                 /* Generate implied end tags. */
 3438                 $this->generateImpliedEndTags();
 3439 
 3440                 /* Now, if the current node is not a table element, then this
 3441                 is a parse error. */
 3442                 // w/e
 3443 
 3444                 /* Pop elements from this stack until a table element has been
 3445                 popped from the stack. */
 3446                 while (true) {
 3447                     $current = end($this->stack)->nodeName;
 3448                     array_pop($this->stack);
 3449 
 3450                     if ($current === 'table') {
 3451                         break;
 3452                     }
 3453                 }
 3454 
 3455                 /* Reset the insertion mode appropriately. */
 3456                 $this->resetInsertionMode();
 3457             }
 3458 
 3459             /* An end tag whose tag name is one of: "body", "caption", "col",
 3460             "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
 3461         } elseif ($token['type'] === HTML5::ENDTAG && in_array(
 3462                 $token['name'],
 3463                 array(
 3464                     'body',
 3465                     'caption',
 3466                     'col',
 3467                     'colgroup',
 3468                     'html',
 3469                     'tbody',
 3470                     'td',
 3471                     'tfoot',
 3472                     'th',
 3473                     'thead',
 3474                     'tr'
 3475                 )
 3476             )
 3477         ) {
 3478             // Parse error. Ignore the token.
 3479 
 3480             /* Anything else */
 3481         } else {
 3482             /* Parse error. Process the token as if the insertion mode was "in
 3483             body", with the following exception: */
 3484 
 3485             /* If the current node is a table, tbody, tfoot, thead, or tr
 3486             element, then, whenever a node would be inserted into the current
 3487             node, it must instead be inserted into the foster parent element. */
 3488             if (in_array(
 3489                 end($this->stack)->nodeName,
 3490                 array('table', 'tbody', 'tfoot', 'thead', 'tr')
 3491             )
 3492             ) {
 3493                 /* The foster parent element is the parent element of the last
 3494                 table element in the stack of open elements, if there is a
 3495                 table element and it has such a parent element. If there is no
 3496                 table element in the stack of open elements (innerHTML case),
 3497                 then the foster parent element is the first element in the
 3498                 stack of open elements (the html  element). Otherwise, if there
 3499                 is a table element in the stack of open elements, but the last
 3500                 table element in the stack of open elements has no parent, or
 3501                 its parent node is not an element, then the foster parent
 3502                 element is the element before the last table element in the
 3503                 stack of open elements. */
 3504                 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
 3505                     if ($this->stack[$n]->nodeName === 'table') {
 3506                         $table = $this->stack[$n];
 3507                         break;
 3508                     }
 3509                 }
 3510 
 3511                 if (isset($table) && $table->parentNode !== null) {
 3512                     $this->foster_parent = $table->parentNode;
 3513 
 3514                 } elseif (!isset($table)) {
 3515                     $this->foster_parent = $this->stack[0];
 3516 
 3517                 } elseif (isset($table) && ($table->parentNode === null ||
 3518                         $table->parentNode->nodeType !== XML_ELEMENT_NODE)
 3519                 ) {
 3520                     $this->foster_parent = $this->stack[$n - 1];
 3521                 }
 3522             }
 3523 
 3524             $this->inBody($token);
 3525         }
 3526     }
 3527 
 3528     private function inCaption($token)
 3529     {
 3530         /* An end tag whose tag name is "caption" */
 3531         if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
 3532             /* If the stack of open elements does not have an element in table
 3533             scope with the same tag name as the token, this is a parse error.
 3534             Ignore the token. (innerHTML case) */
 3535             if (!$this->elementInScope($token['name'], true)) {
 3536                 // Ignore
 3537 
 3538                 /* Otherwise: */
 3539             } else {
 3540                 /* Generate implied end tags. */
 3541                 $this->generateImpliedEndTags();
 3542 
 3543                 /* Now, if the current node is not a caption element, then this
 3544                 is a parse error. */
 3545                 // w/e
 3546 
 3547                 /* Pop elements from this stack until a caption element has
 3548                 been popped from the stack. */
 3549                 while (true) {
 3550                     $node = end($this->stack)->nodeName;
 3551                     array_pop($this->stack);
 3552 
 3553                     if ($node === 'caption') {
 3554                         break;
 3555                     }
 3556                 }
 3557 
 3558                 /* Clear the list of active formatting elements up to the last
 3559                 marker. */
 3560                 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
 3561 
 3562                 /* Switch the insertion mode to "in table". */
 3563                 $this->mode = self::IN_TABLE;
 3564             }
 3565 
 3566             /* A start tag whose tag name is one of: "caption", "col", "colgroup",
 3567             "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
 3568             name is "table" */
 3569         } elseif (($token['type'] === HTML5::STARTTAG && in_array(
 3570                     $token['name'],
 3571                     array(
 3572                         'caption',
 3573                         'col',
 3574                         'colgroup',
 3575                         'tbody',
 3576                         'td',
 3577                         'tfoot',
 3578                         'th',
 3579                         'thead',
 3580                         'tr'
 3581                     )
 3582                 )) || ($token['type'] === HTML5::ENDTAG &&
 3583                 $token['name'] === 'table')
 3584         ) {
 3585             /* Parse error. Act as if an end tag with the tag name "caption"
 3586             had been seen, then, if that token wasn't ignored, reprocess the
 3587             current token. */
 3588             $this->inCaption(
 3589                 array(
 3590                     'name' => 'caption',
 3591                     'type' => HTML5::ENDTAG
 3592                 )
 3593             );
 3594 
 3595             return $this->inTable($token);
 3596 
 3597             /* An end tag whose tag name is one of: "body", "col", "colgroup",
 3598             "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
 3599         } elseif ($token['type'] === HTML5::ENDTAG && in_array(
 3600                 $token['name'],
 3601                 array(
 3602                     'body',
 3603                     'col',
 3604                     'colgroup',
 3605                     'html',
 3606                     'tbody',
 3607                     'tfoot',
 3608                     'th',
 3609                     'thead',
 3610                     'tr'
 3611                 )
 3612             )
 3613         ) {
 3614             // Parse error. Ignore the token.
 3615 
 3616             /* Anything else */
 3617         } else {
 3618             /* Process the token as if the insertion mode was "in body". */
 3619             $this->inBody($token);
 3620         }
 3621     }
 3622 
 3623     private function inColumnGroup($token)
 3624     {
 3625         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 3626         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 3627         or U+0020 SPACE */
 3628         if ($token['type'] === HTML5::CHARACTR &&
 3629             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 3630         ) {
 3631             /* Append the character to the current node. */
 3632             $text = $this->dom->createTextNode($token['data']);
 3633             end($this->stack)->appendChild($text);
 3634 
 3635             /* A comment token */
 3636         } elseif ($token['type'] === HTML5::COMMENT) {
 3637             /* Append a Comment node to the current node with the data
 3638             attribute set to the data given in the comment token. */
 3639             $comment = $this->dom->createComment($token['data']);
 3640             end($this->stack)->appendChild($comment);
 3641 
 3642             /* A start tag whose tag name is "col" */
 3643         } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
 3644             /* Insert a col element for the token. Immediately pop the current
 3645             node off the stack of open elements. */
 3646             $this->insertElement($token);
 3647             array_pop($this->stack);
 3648 
 3649             /* An end tag whose tag name is "colgroup" */
 3650         } elseif ($token['type'] === HTML5::ENDTAG &&
 3651             $token['name'] === 'colgroup'
 3652         ) {
 3653             /* If the current node is the root html element, then this is a
 3654             parse error, ignore the token. (innerHTML case) */
 3655             if (end($this->stack)->nodeName === 'html') {
 3656                 // Ignore
 3657 
 3658                 /* Otherwise, pop the current node (which will be a colgroup
 3659                 element) from the stack of open elements. Switch the insertion
 3660                 mode to "in table". */
 3661             } else {
 3662                 array_pop($this->stack);
 3663                 $this->mode = self::IN_TABLE;
 3664             }
 3665 
 3666             /* An end tag whose tag name is "col" */
 3667         } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
 3668             /* Parse error. Ignore the token. */
 3669 
 3670             /* Anything else */
 3671         } else {
 3672             /* Act as if an end tag with the tag name "colgroup" had been seen,
 3673             and then, if that token wasn't ignored, reprocess the current token. */
 3674             $this->inColumnGroup(
 3675                 array(
 3676                     'name' => 'colgroup',
 3677                     'type' => HTML5::ENDTAG
 3678                 )
 3679             );
 3680 
 3681             return $this->inTable($token);
 3682         }
 3683     }
 3684 
 3685     private function inTableBody($token)
 3686     {
 3687         $clear = array('tbody', 'tfoot', 'thead', 'html');
 3688 
 3689         /* A start tag whose tag name is "tr" */
 3690         if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
 3691             /* Clear the stack back to a table body context. */
 3692             $this->clearStackToTableContext($clear);
 3693 
 3694             /* Insert a tr element for the token, then switch the insertion
 3695             mode to "in row". */
 3696             $this->insertElement($token);
 3697             $this->mode = self::IN_ROW;
 3698 
 3699             /* A start tag whose tag name is one of: "th", "td" */
 3700         } elseif ($token['type'] === HTML5::STARTTAG &&
 3701             ($token['name'] === 'th' || $token['name'] === 'td')
 3702         ) {
 3703             /* Parse error. Act as if a start tag with the tag name "tr" had
 3704             been seen, then reprocess the current token. */
 3705             $this->inTableBody(
 3706                 array(
 3707                     'name' => 'tr',
 3708                     'type' => HTML5::STARTTAG,
 3709                     'attr' => array()
 3710                 )
 3711             );
 3712 
 3713             return $this->inRow($token);
 3714 
 3715             /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
 3716         } elseif ($token['type'] === HTML5::ENDTAG &&
 3717             in_array($token['name'], array('tbody', 'tfoot', 'thead'))
 3718         ) {
 3719             /* If the stack of open elements does not have an element in table
 3720             scope with the same tag name as the token, this is a parse error.
 3721             Ignore the token. */
 3722             if (!$this->elementInScope($token['name'], true)) {
 3723                 // Ignore
 3724 
 3725                 /* Otherwise: */
 3726             } else {
 3727                 /* Clear the stack back to a table body context. */
 3728                 $this->clearStackToTableContext($clear);
 3729 
 3730                 /* Pop the current node from the stack of open elements. Switch
 3731                 the insertion mode to "in table". */
 3732                 array_pop($this->stack);
 3733                 $this->mode = self::IN_TABLE;
 3734             }
 3735 
 3736             /* A start tag whose tag name is one of: "caption", "col", "colgroup",
 3737             "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
 3738         } elseif (($token['type'] === HTML5::STARTTAG && in_array(
 3739                     $token['name'],
 3740                     array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead')
 3741                 )) ||
 3742             ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')
 3743         ) {
 3744             /* If the stack of open elements does not have a tbody, thead, or
 3745             tfoot element in table scope, this is a parse error. Ignore the
 3746             token. (innerHTML case) */
 3747             if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
 3748                 // Ignore.
 3749 
 3750                 /* Otherwise: */
 3751             } else {
 3752                 /* Clear the stack back to a table body context. */
 3753                 $this->clearStackToTableContext($clear);
 3754 
 3755                 /* Act as if an end tag with the same tag name as the current
 3756                 node ("tbody", "tfoot", or "thead") had been seen, then
 3757                 reprocess the current token. */
 3758                 $this->inTableBody(
 3759                     array(
 3760                         'name' => end($this->stack)->nodeName,
 3761                         'type' => HTML5::ENDTAG
 3762                     )
 3763                 );
 3764 
 3765                 return $this->mainPhase($token);
 3766             }
 3767 
 3768             /* An end tag whose tag name is one of: "body", "caption", "col",
 3769             "colgroup", "html", "td", "th", "tr" */
 3770         } elseif ($token['type'] === HTML5::ENDTAG && in_array(
 3771                 $token['name'],
 3772                 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
 3773             )
 3774         ) {
 3775             /* Parse error. Ignore the token. */
 3776 
 3777             /* Anything else */
 3778         } else {
 3779             /* Process the token as if the insertion mode was "in table". */
 3780             $this->inTable($token);
 3781         }
 3782     }
 3783 
 3784     private function inRow($token)
 3785     {
 3786         $clear = array('tr', 'html');
 3787 
 3788         /* A start tag whose tag name is one of: "th", "td" */
 3789         if ($token['type'] === HTML5::STARTTAG &&
 3790             ($token['name'] === 'th' || $token['name'] === 'td')
 3791         ) {
 3792             /* Clear the stack back to a table row context. */
 3793             $this->clearStackToTableContext($clear);
 3794 
 3795             /* Insert an HTML element for the token, then switch the insertion
 3796             mode to "in cell". */
 3797             $this->insertElement($token);
 3798             $this->mode = self::IN_CELL;
 3799 
 3800             /* Insert a marker at the end of the list of active formatting
 3801             elements. */
 3802             $this->a_formatting[] = self::MARKER;
 3803 
 3804             /* An end tag whose tag name is "tr" */
 3805         } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
 3806             /* If the stack of open elements does not have an element in table
 3807             scope with the same tag name as the token, this is a parse error.
 3808             Ignore the token. (innerHTML case) */
 3809             if (!$this->elementInScope($token['name'], true)) {
 3810                 // Ignore.
 3811 
 3812                 /* Otherwise: */
 3813             } else {
 3814                 /* Clear the stack back to a table row context. */
 3815                 $this->clearStackToTableContext($clear);
 3816 
 3817                 /* Pop the current node (which will be a tr element) from the
 3818                 stack of open elements. Switch the insertion mode to "in table
 3819                 body". */
 3820                 array_pop($this->stack);
 3821                 $this->mode = self::IN_TBODY;
 3822             }
 3823 
 3824             /* A start tag whose tag name is one of: "caption", "col", "colgroup",
 3825             "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
 3826         } elseif ($token['type'] === HTML5::STARTTAG && in_array(
 3827                 $token['name'],
 3828                 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr')
 3829             )
 3830         ) {
 3831             /* Act as if an end tag with the tag name "tr" had been seen, then,
 3832             if that token wasn't ignored, reprocess the current token. */
 3833             $this->inRow(
 3834                 array(
 3835                     'name' => 'tr',
 3836                     'type' => HTML5::ENDTAG
 3837                 )
 3838             );
 3839 
 3840             return $this->inCell($token);
 3841 
 3842             /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
 3843         } elseif ($token['type'] === HTML5::ENDTAG &&
 3844             in_array($token['name'], array('tbody', 'tfoot', 'thead'))
 3845         ) {
 3846             /* If the stack of open elements does not have an element in table
 3847             scope with the same tag name as the token, this is a parse error.
 3848             Ignore the token. */
 3849             if (!$this->elementInScope($token['name'], true)) {
 3850                 // Ignore.
 3851 
 3852                 /* Otherwise: */
 3853             } else {
 3854                 /* Otherwise, act as if an end tag with the tag name "tr" had
 3855                 been seen, then reprocess the current token. */
 3856                 $this->inRow(
 3857                     array(
 3858                         'name' => 'tr',
 3859                         'type' => HTML5::ENDTAG
 3860                     )
 3861                 );
 3862 
 3863                 return $this->inCell($token);
 3864             }
 3865 
 3866             /* An end tag whose tag name is one of: "body", "caption", "col",
 3867             "colgroup", "html", "td", "th" */
 3868         } elseif ($token['type'] === HTML5::ENDTAG && in_array(
 3869                 $token['name'],
 3870                 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
 3871             )
 3872         ) {
 3873             /* Parse error. Ignore the token. */
 3874 
 3875             /* Anything else */
 3876         } else {
 3877             /* Process the token as if the insertion mode was "in table". */
 3878             $this->inTable($token);
 3879         }
 3880     }
 3881 
 3882     private function inCell($token)
 3883     {
 3884         /* An end tag whose tag name is one of: "td", "th" */
 3885         if ($token['type'] === HTML5::ENDTAG &&
 3886             ($token['name'] === 'td' || $token['name'] === 'th')
 3887         ) {
 3888             /* If the stack of open elements does not have an element in table
 3889             scope with the same tag name as that of the token, then this is a
 3890             parse error and the token must be ignored. */
 3891             if (!$this->elementInScope($token['name'], true)) {
 3892                 // Ignore.
 3893 
 3894                 /* Otherwise: */
 3895             } else {
 3896                 /* Generate implied end tags, except for elements with the same
 3897                 tag name as the token. */
 3898                 $this->generateImpliedEndTags(array($token['name']));
 3899 
 3900                 /* Now, if the current node is not an element with the same tag
 3901                 name as the token, then this is a parse error. */
 3902                 // k
 3903 
 3904                 /* Pop elements from this stack until an element with the same
 3905                 tag name as the token has been popped from the stack. */
 3906                 while (true) {
 3907                     $node = end($this->stack)->nodeName;
 3908                     array_pop($this->stack);
 3909 
 3910                     if ($node === $token['name']) {
 3911                         break;
 3912                     }
 3913                 }
 3914 
 3915                 /* Clear the list of active formatting elements up to the last
 3916                 marker. */
 3917                 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
 3918 
 3919                 /* Switch the insertion mode to "in row". (The current node
 3920                 will be a tr element at this point.) */
 3921                 $this->mode = self::IN_ROW;
 3922             }
 3923 
 3924             /* A start tag whose tag name is one of: "caption", "col", "colgroup",
 3925             "tbody", "td", "tfoot", "th", "thead", "tr" */
 3926         } elseif ($token['type'] === HTML5::STARTTAG && in_array(
 3927                 $token['name'],
 3928                 array(
 3929                     'caption',
 3930                     'col',
 3931                     'colgroup',
 3932                     'tbody',
 3933                     'td',
 3934                     'tfoot',
 3935                     'th',
 3936                     'thead',
 3937                     'tr'
 3938                 )
 3939             )
 3940         ) {
 3941             /* If the stack of open elements does not have a td or th element
 3942             in table scope, then this is a parse error; ignore the token.
 3943             (innerHTML case) */
 3944             if (!$this->elementInScope(array('td', 'th'), true)) {
 3945                 // Ignore.
 3946 
 3947                 /* Otherwise, close the cell (see below) and reprocess the current
 3948                 token. */
 3949             } else {
 3950                 $this->closeCell();
 3951                 return $this->inRow($token);
 3952             }
 3953 
 3954             /* A start tag whose tag name is one of: "caption", "col", "colgroup",
 3955             "tbody", "td", "tfoot", "th", "thead", "tr" */
 3956         } elseif ($token['type'] === HTML5::STARTTAG && in_array(
 3957                 $token['name'],
 3958                 array(
 3959                     'caption',
 3960                     'col',
 3961                     'colgroup',
 3962                     'tbody',
 3963                     'td',
 3964                     'tfoot',
 3965                     'th',
 3966                     'thead',
 3967                     'tr'
 3968                 )
 3969             )
 3970         ) {
 3971             /* If the stack of open elements does not have a td or th element
 3972             in table scope, then this is a parse error; ignore the token.
 3973             (innerHTML case) */
 3974             if (!$this->elementInScope(array('td', 'th'), true)) {
 3975                 // Ignore.
 3976 
 3977                 /* Otherwise, close the cell (see below) and reprocess the current
 3978                 token. */
 3979             } else {
 3980                 $this->closeCell();
 3981                 return $this->inRow($token);
 3982             }
 3983 
 3984             /* An end tag whose tag name is one of: "body", "caption", "col",
 3985             "colgroup", "html" */
 3986         } elseif ($token['type'] === HTML5::ENDTAG && in_array(
 3987                 $token['name'],
 3988                 array('body', 'caption', 'col', 'colgroup', 'html')
 3989             )
 3990         ) {
 3991             /* Parse error. Ignore the token. */
 3992 
 3993             /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
 3994             "thead", "tr" */
 3995         } elseif ($token['type'] === HTML5::ENDTAG && in_array(
 3996                 $token['name'],
 3997                 array('table', 'tbody', 'tfoot', 'thead', 'tr')
 3998             )
 3999         ) {
 4000             /* If the stack of open elements does not have an element in table
 4001             scope with the same tag name as that of the token (which can only
 4002             happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
 4003             then this is a parse error and the token must be ignored. */
 4004             if (!$this->elementInScope($token['name'], true)) {
 4005                 // Ignore.
 4006 
 4007                 /* Otherwise, close the cell (see below) and reprocess the current
 4008                 token. */
 4009             } else {
 4010                 $this->closeCell();
 4011                 return $this->inRow($token);
 4012             }
 4013 
 4014             /* Anything else */
 4015         } else {
 4016             /* Process the token as if the insertion mode was "in body". */
 4017             $this->inBody($token);
 4018         }
 4019     }
 4020 
 4021     private function inSelect($token)
 4022     {
 4023         /* Handle the token as follows: */
 4024 
 4025         /* A character token */
 4026         if ($token['type'] === HTML5::CHARACTR) {
 4027             /* Append the token's character to the current node. */
 4028             $this->insertText($token['data']);
 4029 
 4030             /* A comment token */
 4031         } elseif ($token['type'] === HTML5::COMMENT) {
 4032             /* Append a Comment node to the current node with the data
 4033             attribute set to the data given in the comment token. */
 4034             $this->insertComment($token['data']);
 4035 
 4036             /* A start tag token whose tag name is "option" */
 4037         } elseif ($token['type'] === HTML5::STARTTAG &&
 4038             $token['name'] === 'option'
 4039         ) {
 4040             /* If the current node is an option element, act as if an end tag
 4041             with the tag name "option" had been seen. */
 4042             if (end($this->stack)->nodeName === 'option') {
 4043                 $this->inSelect(
 4044                     array(
 4045                         'name' => 'option',
 4046                         'type' => HTML5::ENDTAG
 4047                     )
 4048                 );
 4049             }
 4050 
 4051             /* Insert an HTML element for the token. */
 4052             $this->insertElement($token);
 4053 
 4054             /* A start tag token whose tag name is "optgroup" */
 4055         } elseif ($token['type'] === HTML5::STARTTAG &&
 4056             $token['name'] === 'optgroup'
 4057         ) {
 4058             /* If the current node is an option element, act as if an end tag
 4059             with the tag name "option" had been seen. */
 4060             if (end($this->stack)->nodeName === 'option') {
 4061                 $this->inSelect(
 4062                     array(
 4063                         'name' => 'option',
 4064                         'type' => HTML5::ENDTAG
 4065                     )
 4066                 );
 4067             }
 4068 
 4069             /* If the current node is an optgroup element, act as if an end tag
 4070             with the tag name "optgroup" had been seen. */
 4071             if (end($this->stack)->nodeName === 'optgroup') {
 4072                 $this->inSelect(
 4073                     array(
 4074                         'name' => 'optgroup',
 4075                         'type' => HTML5::ENDTAG
 4076                     )
 4077                 );
 4078             }
 4079 
 4080             /* Insert an HTML element for the token. */
 4081             $this->insertElement($token);
 4082 
 4083             /* An end tag token whose tag name is "optgroup" */
 4084         } elseif ($token['type'] === HTML5::ENDTAG &&
 4085             $token['name'] === 'optgroup'
 4086         ) {
 4087             /* First, if the current node is an option element, and the node
 4088             immediately before it in the stack of open elements is an optgroup
 4089             element, then act as if an end tag with the tag name "option" had
 4090             been seen. */
 4091             $elements_in_stack = count($this->stack);
 4092 
 4093             if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
 4094                 $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup'
 4095             ) {
 4096                 $this->inSelect(
 4097                     array(
 4098                         'name' => 'option',
 4099                         'type' => HTML5::ENDTAG
 4100                     )
 4101                 );
 4102             }
 4103 
 4104             /* If the current node is an optgroup element, then pop that node
 4105             from the stack of open elements. Otherwise, this is a parse error,
 4106             ignore the token. */
 4107             if ($this->stack[$elements_in_stack - 1] === 'optgroup') {
 4108                 array_pop($this->stack);
 4109             }
 4110 
 4111             /* An end tag token whose tag name is "option" */
 4112         } elseif ($token['type'] === HTML5::ENDTAG &&
 4113             $token['name'] === 'option'
 4114         ) {
 4115             /* If the current node is an option element, then pop that node
 4116             from the stack of open elements. Otherwise, this is a parse error,
 4117             ignore the token. */
 4118             if (end($this->stack)->nodeName === 'option') {
 4119                 array_pop($this->stack);
 4120             }
 4121 
 4122             /* An end tag whose tag name is "select" */
 4123         } elseif ($token['type'] === HTML5::ENDTAG &&
 4124             $token['name'] === 'select'
 4125         ) {
 4126             /* If the stack of open elements does not have an element in table
 4127             scope with the same tag name as the token, this is a parse error.
 4128             Ignore the token. (innerHTML case) */
 4129             if (!$this->elementInScope($token['name'], true)) {
 4130                 // w/e
 4131 
 4132                 /* Otherwise: */
 4133             } else {
 4134                 /* Pop elements from the stack of open elements until a select
 4135                 element has been popped from the stack. */
 4136                 while (true) {
 4137                     $current = end($this->stack)->nodeName;
 4138                     array_pop($this->stack);
 4139 
 4140                     if ($current === 'select') {
 4141                         break;
 4142                     }
 4143                 }
 4144 
 4145                 /* Reset the insertion mode appropriately. */
 4146                 $this->resetInsertionMode();
 4147             }
 4148 
 4149             /* A start tag whose tag name is "select" */
 4150         } elseif ($token['name'] === 'select' &&
 4151             $token['type'] === HTML5::STARTTAG
 4152         ) {
 4153             /* Parse error. Act as if the token had been an end tag with the
 4154             tag name "select" instead. */
 4155             $this->inSelect(
 4156                 array(
 4157                     'name' => 'select',
 4158                     'type' => HTML5::ENDTAG
 4159                 )
 4160             );
 4161 
 4162             /* An end tag whose tag name is one of: "caption", "table", "tbody",
 4163             "tfoot", "thead", "tr", "td", "th" */
 4164         } elseif (in_array(
 4165                 $token['name'],
 4166                 array(
 4167                     'caption',
 4168                     'table',
 4169                     'tbody',
 4170                     'tfoot',
 4171                     'thead',
 4172                     'tr',
 4173                     'td',
 4174                     'th'
 4175                 )
 4176             ) && $token['type'] === HTML5::ENDTAG
 4177         ) {
 4178             /* Parse error. */
 4179             // w/e
 4180 
 4181             /* If the stack of open elements has an element in table scope with
 4182             the same tag name as that of the token, then act as if an end tag
 4183             with the tag name "select" had been seen, and reprocess the token.
 4184             Otherwise, ignore the token. */
 4185             if ($this->elementInScope($token['name'], true)) {
 4186                 $this->inSelect(
 4187                     array(
 4188                         'name' => 'select',
 4189                         'type' => HTML5::ENDTAG
 4190                     )
 4191                 );
 4192 
 4193                 $this->mainPhase($token);
 4194             }
 4195 
 4196             /* Anything else */
 4197         } else {
 4198             /* Parse error. Ignore the token. */
 4199         }
 4200     }
 4201 
 4202     private function afterBody($token)
 4203     {
 4204         /* Handle the token as follows: */
 4205 
 4206         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 4207         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 4208         or U+0020 SPACE */
 4209         if ($token['type'] === HTML5::CHARACTR &&
 4210             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 4211         ) {
 4212             /* Process the token as it would be processed if the insertion mode
 4213             was "in body". */
 4214             $this->inBody($token);
 4215 
 4216             /* A comment token */
 4217         } elseif ($token['type'] === HTML5::COMMENT) {
 4218             /* Append a Comment node to the first element in the stack of open
 4219             elements (the html element), with the data attribute set to the
 4220             data given in the comment token. */
 4221             $comment = $this->dom->createComment($token['data']);
 4222             $this->stack[0]->appendChild($comment);
 4223 
 4224             /* An end tag with the tag name "html" */
 4225         } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
 4226             /* If the parser was originally created in order to handle the
 4227             setting of an element's innerHTML attribute, this is a parse error;
 4228             ignore the token. (The element will be an html element in this
 4229             case.) (innerHTML case) */
 4230 
 4231             /* Otherwise, switch to the trailing end phase. */
 4232             $this->phase = self::END_PHASE;
 4233 
 4234             /* Anything else */
 4235         } else {
 4236             /* Parse error. Set the insertion mode to "in body" and reprocess
 4237             the token. */
 4238             $this->mode = self::IN_BODY;
 4239             return $this->inBody($token);
 4240         }
 4241     }
 4242 
 4243     private function inFrameset($token)
 4244     {
 4245         /* Handle the token as follows: */
 4246 
 4247         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 4248         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 4249         U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
 4250         if ($token['type'] === HTML5::CHARACTR &&
 4251             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 4252         ) {
 4253             /* Append the character to the current node. */
 4254             $this->insertText($token['data']);
 4255 
 4256             /* A comment token */
 4257         } elseif ($token['type'] === HTML5::COMMENT) {
 4258             /* Append a Comment node to the current node with the data
 4259             attribute set to the data given in the comment token. */
 4260             $this->insertComment($token['data']);
 4261 
 4262             /* A start tag with the tag name "frameset" */
 4263         } elseif ($token['name'] === 'frameset' &&
 4264             $token['type'] === HTML5::STARTTAG
 4265         ) {
 4266             $this->insertElement($token);
 4267 
 4268             /* An end tag with the tag name "frameset" */
 4269         } elseif ($token['name'] === 'frameset' &&
 4270             $token['type'] === HTML5::ENDTAG
 4271         ) {
 4272             /* If the current node is the root html element, then this is a
 4273             parse error; ignore the token. (innerHTML case) */
 4274             if (end($this->stack)->nodeName === 'html') {
 4275                 // Ignore
 4276 
 4277             } else {
 4278                 /* Otherwise, pop the current node from the stack of open
 4279                 elements. */
 4280                 array_pop($this->stack);
 4281 
 4282                 /* If the parser was not originally created in order to handle
 4283                 the setting of an element's innerHTML attribute (innerHTML case),
 4284                 and the current node is no longer a frameset element, then change
 4285                 the insertion mode to "after frameset". */
 4286                 $this->mode = self::AFTR_FRAME;
 4287             }
 4288 
 4289             /* A start tag with the tag name "frame" */
 4290         } elseif ($token['name'] === 'frame' &&
 4291             $token['type'] === HTML5::STARTTAG
 4292         ) {
 4293             /* Insert an HTML element for the token. */
 4294             $this->insertElement($token);
 4295 
 4296             /* Immediately pop the current node off the stack of open elements. */
 4297             array_pop($this->stack);
 4298 
 4299             /* A start tag with the tag name "noframes" */
 4300         } elseif ($token['name'] === 'noframes' &&
 4301             $token['type'] === HTML5::STARTTAG
 4302         ) {
 4303             /* Process the token as if the insertion mode had been "in body". */
 4304             $this->inBody($token);
 4305 
 4306             /* Anything else */
 4307         } else {
 4308             /* Parse error. Ignore the token. */
 4309         }
 4310     }
 4311 
 4312     private function afterFrameset($token)
 4313     {
 4314         /* Handle the token as follows: */
 4315 
 4316         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 4317         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 4318         U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
 4319         if ($token['type'] === HTML5::CHARACTR &&
 4320             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 4321         ) {
 4322             /* Append the character to the current node. */
 4323             $this->insertText($token['data']);
 4324 
 4325             /* A comment token */
 4326         } elseif ($token['type'] === HTML5::COMMENT) {
 4327             /* Append a Comment node to the current node with the data
 4328             attribute set to the data given in the comment token. */
 4329             $this->insertComment($token['data']);
 4330 
 4331             /* An end tag with the tag name "html" */
 4332         } elseif ($token['name'] === 'html' &&
 4333             $token['type'] === HTML5::ENDTAG
 4334         ) {
 4335             /* Switch to the trailing end phase. */
 4336             $this->phase = self::END_PHASE;
 4337 
 4338             /* A start tag with the tag name "noframes" */
 4339         } elseif ($token['name'] === 'noframes' &&
 4340             $token['type'] === HTML5::STARTTAG
 4341         ) {
 4342             /* Process the token as if the insertion mode had been "in body". */
 4343             $this->inBody($token);
 4344 
 4345             /* Anything else */
 4346         } else {
 4347             /* Parse error. Ignore the token. */
 4348         }
 4349     }
 4350 
 4351     private function trailingEndPhase($token)
 4352     {
 4353         /* After the main phase, as each token is emitted from the tokenisation
 4354         stage, it must be processed as described in this section. */
 4355 
 4356         /* A DOCTYPE token */
 4357         if ($token['type'] === HTML5::DOCTYPE) {
 4358             // Parse error. Ignore the token.
 4359 
 4360             /* A comment token */
 4361         } elseif ($token['type'] === HTML5::COMMENT) {
 4362             /* Append a Comment node to the Document object with the data
 4363             attribute set to the data given in the comment token. */
 4364             $comment = $this->dom->createComment($token['data']);
 4365             $this->dom->appendChild($comment);
 4366 
 4367             /* A character token that is one of one of U+0009 CHARACTER TABULATION,
 4368             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 4369             or U+0020 SPACE */
 4370         } elseif ($token['type'] === HTML5::CHARACTR &&
 4371             preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
 4372         ) {
 4373             /* Process the token as it would be processed in the main phase. */
 4374             $this->mainPhase($token);
 4375 
 4376             /* A character token that is not one of U+0009 CHARACTER TABULATION,
 4377             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
 4378             or U+0020 SPACE. Or a start tag token. Or an end tag token. */
 4379         } elseif (($token['type'] === HTML5::CHARACTR &&
 4380                 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
 4381             $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG
 4382         ) {
 4383             /* Parse error. Switch back to the main phase and reprocess the
 4384             token. */
 4385             $this->phase = self::MAIN_PHASE;
 4386             return $this->mainPhase($token);
 4387 
 4388             /* An end-of-file token */
 4389         } elseif ($token['type'] === HTML5::EOF) {
 4390             /* OMG DONE!! */
 4391         }
 4392     }
 4393 
 4394     private function insertElement($token, $append = true, $check = false)
 4395     {
 4396         // Proprietary workaround for libxml2's limitations with tag names
 4397         if ($check) {
 4398             // Slightly modified HTML5 tag-name modification,
 4399             // removing anything that's not an ASCII letter, digit, or hyphen
 4400             $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
 4401             // Remove leading hyphens and numbers
 4402             $token['name'] = ltrim($token['name'], '-0..9');
 4403             // In theory, this should ever be needed, but just in case
 4404             if ($token['name'] === '') {
 4405                 $token['name'] = 'span';
 4406             } // arbitrary generic choice
 4407         }
 4408 
 4409         $el = $this->dom->createElement($token['name']);
 4410 
 4411         foreach ($token['attr'] as $attr) {
 4412             if (!$el->hasAttribute($attr['name'])) {
 4413                 $el->setAttribute($attr['name'], $attr['value']);
 4414             }
 4415         }
 4416 
 4417         $this->appendToRealParent($el);
 4418         $this->stack[] = $el;
 4419 
 4420         return $el;
 4421     }
 4422 
 4423     private function insertText($data)
 4424     {
 4425         $text = $this->dom->createTextNode($data);
 4426         $this->appendToRealParent($text);
 4427     }
 4428 
 4429     private function insertComment($data)
 4430     {
 4431         $comment = $this->dom->createComment($data);
 4432         $this->appendToRealParent($comment);
 4433     }
 4434 
 4435     private function appendToRealParent($node)
 4436     {
 4437         if ($this->foster_parent === null) {
 4438             end($this->stack)->appendChild($node);
 4439 
 4440         } elseif ($this->foster_parent !== null) {
 4441             /* If the foster parent element is the parent element of the
 4442             last table element in the stack of open elements, then the new
 4443             node must be inserted immediately before the last table element
 4444             in the stack of open elements in the foster parent element;
 4445             otherwise, the new node must be appended to the foster parent
 4446             element. */
 4447             for ($n = count($this->stack) - 1; $n >= 0; $n--) {
 4448                 if ($this->stack[$n]->nodeName === 'table' &&
 4449                     $this->stack[$n]->parentNode !== null
 4450                 ) {
 4451                     $table = $this->stack[$n];
 4452                     break;
 4453                 }
 4454             }
 4455 
 4456             if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
 4457                 $this->foster_parent->insertBefore($node, $table);
 4458             } else {
 4459                 $this->foster_parent->appendChild($node);
 4460             }
 4461 
 4462             $this->foster_parent = null;
 4463         }
 4464     }
 4465 
 4466     private function elementInScope($el, $table = false)
 4467     {
 4468         if (is_array($el)) {
 4469             foreach ($el as $element) {
 4470                 if ($this->elementInScope($element, $table)) {
 4471                     return true;
 4472                 }
 4473             }
 4474 
 4475             return false;
 4476         }
 4477 
 4478         $leng = count($this->stack);
 4479 
 4480         for ($n = 0; $n < $leng; $n++) {
 4481             /* 1. Initialise node to be the current node (the bottommost node of
 4482             the stack). */
 4483             $node = $this->stack[$leng - 1 - $n];
 4484 
 4485             if ($node->tagName === $el) {
 4486                 /* 2. If node is the target node, terminate in a match state. */
 4487                 return true;
 4488 
 4489             } elseif ($node->tagName === 'table') {
 4490                 /* 3. Otherwise, if node is a table element, terminate in a failure
 4491                 state. */
 4492                 return false;
 4493 
 4494             } elseif ($table === true && in_array(
 4495                     $node->tagName,
 4496                     array(
 4497                         'caption',
 4498                         'td',
 4499                         'th',
 4500                         'button',
 4501                         'marquee',
 4502                         'object'
 4503                     )
 4504                 )
 4505             ) {
 4506                 /* 4. Otherwise, if the algorithm is the "has an element in scope"
 4507                 variant (rather than the "has an element in table scope" variant),
 4508                 and node is one of the following, terminate in a failure state. */
 4509                 return false;
 4510 
 4511             } elseif ($node === $node->ownerDocument->documentElement) {
 4512                 /* 5. Otherwise, if node is an html element (root element), terminate
 4513                 in a failure state. (This can only happen if the node is the topmost
 4514                 node of the    stack of open elements, and prevents the next step from
 4515                 being invoked if there are no more elements in the stack.) */
 4516                 return false;
 4517             }
 4518 
 4519             /* Otherwise, set node to the previous entry in the stack of open
 4520             elements and return to step 2. (This will never fail, since the loop
 4521             will always terminate in the previous step if the top of the stack
 4522             is reached.) */
 4523         }
 4524     }
 4525 
 4526     private function reconstructActiveFormattingElements()
 4527     {
 4528         /* 1. If there are no entries in the list of active formatting elements,
 4529         then there is nothing to reconstruct; stop this algorithm. */
 4530         $formatting_elements = count($this->a_formatting);
 4531 
 4532         if ($formatting_elements === 0) {
 4533             return false;
 4534         }
 4535 
 4536         /* 3. Let entry be the last (most recently added) element in the list
 4537         of active formatting elements. */
 4538         $entry = end($this->a_formatting);
 4539 
 4540         /* 2. If the last (most recently added) entry in the list of active
 4541         formatting elements is a marker, or if it is an element that is in the
 4542         stack of open elements, then there is nothing to reconstruct; stop this
 4543         algorithm. */
 4544         if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
 4545             return false;
 4546         }
 4547 
 4548         for ($a = $formatting_elements - 1; $a >= 0; true) {
 4549             /* 4. If there are no entries before entry in the list of active
 4550             formatting elements, then jump to step 8. */
 4551             if ($a === 0) {
 4552                 $step_seven = false;
 4553                 break;
 4554             }
 4555 
 4556             /* 5. Let entry be the entry one earlier than entry in the list of
 4557             active formatting elements. */
 4558             $a--;
 4559             $entry = $this->a_formatting[$a];
 4560 
 4561             /* 6. If entry is neither a marker nor an element that is also in
 4562             thetack of open elements, go to step 4. */
 4563             if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
 4564                 break;
 4565             }
 4566         }
 4567 
 4568         while (true) {
 4569             /* 7. Let entry be the element one later than entry in the list of
 4570             active formatting elements. */
 4571             if (isset($step_seven) && $step_seven === true) {
 4572                 $a++;
 4573                 $entry = $this->a_formatting[$a];
 4574             }
 4575 
 4576             /* 8. Perform a shallow clone of the element entry to obtain clone. */
 4577             $clone = $entry->cloneNode();
 4578 
 4579             /* 9. Append clone to the current node and push it onto the stack
 4580             of open elements  so that it is the new current node. */
 4581             end($this->stack)->appendChild($clone);
 4582             $this->stack[] = $clone;
 4583 
 4584             /* 10. Replace the entry for entry in the list with an entry for
 4585             clone. */
 4586             $this->a_formatting[$a] = $clone;
 4587 
 4588             /* 11. If the entry for clone in the list of active formatting
 4589             elements is not the last entry in the list, return to step 7. */
 4590             if (end($this->a_formatting) !== $clone) {
 4591                 $step_seven = true;
 4592             } else {
 4593                 break;
 4594             }
 4595         }
 4596     }
 4597 
 4598     private function clearTheActiveFormattingElementsUpToTheLastMarker()
 4599     {
 4600         /* When the steps below require the UA to clear the list of active
 4601         formatting elements up to the last marker, the UA must perform the
 4602         following steps: */
 4603 
 4604         while (true) {
 4605             /* 1. Let entry be the last (most recently added) entry in the list
 4606             of active formatting elements. */
 4607             $entry = end($this->a_formatting);
 4608 
 4609             /* 2. Remove entry from the list of active formatting elements. */
 4610             array_pop($this->a_formatting);
 4611 
 4612             /* 3. If entry was a marker, then stop the algorithm at this point.
 4613             The list has been cleared up to the last marker. */
 4614             if ($entry === self::MARKER) {
 4615                 break;
 4616             }
 4617         }
 4618     }
 4619 
 4620     private function generateImpliedEndTags($exclude = array())
 4621     {
 4622         /* When the steps below require the UA to generate implied end tags,
 4623         then, if the current node is a dd element, a dt element, an li element,
 4624         a p element, a td element, a th  element, or a tr element, the UA must
 4625         act as if an end tag with the respective tag name had been seen and
 4626         then generate implied end tags again. */
 4627         $node = end($this->stack);
 4628         $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
 4629 
 4630         while (in_array(end($this->stack)->nodeName, $elements)) {
 4631             array_pop($this->stack);
 4632         }
 4633     }
 4634 
 4635     private function getElementCategory($node)
 4636     {
 4637         $name = $node->tagName;
 4638         if (in_array($name, $this->special)) {
 4639             return self::SPECIAL;
 4640         } elseif (in_array($name, $this->scoping)) {
 4641             return self::SCOPING;
 4642         } elseif (in_array($name, $this->formatting)) {
 4643             return self::FORMATTING;
 4644         } else {
 4645             return self::PHRASING;
 4646         }
 4647     }
 4648 
 4649     private function clearStackToTableContext($elements)
 4650     {
 4651         /* When the steps above require the UA to clear the stack back to a
 4652         table context, it means that the UA must, while the current node is not
 4653         a table element or an html element, pop elements from the stack of open
 4654         elements. If this causes any elements to be popped from the stack, then
 4655         this is a parse error. */
 4656         while (true) {
 4657             $node = end($this->stack)->nodeName;
 4658 
 4659             if (in_array($node, $elements)) {
 4660                 break;
 4661             } else {
 4662                 array_pop($this->stack);
 4663             }
 4664         }
 4665     }
 4666 
 4667     private function resetInsertionMode()
 4668     {
 4669         /* 1. Let last be false. */
 4670         $last = false;
 4671         $leng = count($this->stack);
 4672 
 4673         for ($n = $leng - 1; $n >= 0; $n--) {
 4674             /* 2. Let node be the last node in the stack of open elements. */
 4675             $node = $this->stack[$n];
 4676 
 4677             /* 3. If node is the first node in the stack of open elements, then
 4678             set last to true. If the element whose innerHTML  attribute is being
 4679             set is neither a td  element nor a th element, then set node to the
 4680             element whose innerHTML  attribute is being set. (innerHTML  case) */
 4681             if ($this->stack[0]->isSameNode($node)) {
 4682                 $last = true;
 4683             }
 4684 
 4685             /* 4. If node is a select element, then switch the insertion mode to
 4686             "in select" and abort these steps. (innerHTML case) */
 4687             if ($node->nodeName === 'select') {
 4688                 $this->mode = self::IN_SELECT;
 4689                 break;
 4690 
 4691                 /* 5. If node is a td or th element, then switch the insertion mode
 4692                 to "in cell" and abort these steps. */
 4693             } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') {
 4694                 $this->mode = self::IN_CELL;
 4695                 break;
 4696 
 4697                 /* 6. If node is a tr element, then switch the insertion mode to
 4698                 "in    row" and abort these steps. */
 4699             } elseif ($node->nodeName === 'tr') {
 4700                 $this->mode = self::IN_ROW;
 4701                 break;
 4702 
 4703                 /* 7. If node is a tbody, thead, or tfoot element, then switch the
 4704                 insertion mode to "in table body" and abort these steps. */
 4705             } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
 4706                 $this->mode = self::IN_TBODY;
 4707                 break;
 4708 
 4709                 /* 8. If node is a caption element, then switch the insertion mode
 4710                 to "in caption" and abort these steps. */
 4711             } elseif ($node->nodeName === 'caption') {
 4712                 $this->mode = self::IN_CAPTION;
 4713                 break;
 4714 
 4715                 /* 9. If node is a colgroup element, then switch the insertion mode
 4716                 to "in column group" and abort these steps. (innerHTML case) */
 4717             } elseif ($node->nodeName === 'colgroup') {
 4718                 $this->mode = self::IN_CGROUP;
 4719                 break;
 4720 
 4721                 /* 10. If node is a table element, then switch the insertion mode
 4722                 to "in table" and abort these steps. */
 4723             } elseif ($node->nodeName === 'table') {
 4724                 $this->mode = self::IN_TABLE;
 4725                 break;
 4726 
 4727                 /* 11. If node is a head element, then switch the insertion mode
 4728                 to "in body" ("in body"! not "in head"!) and abort these steps.
 4729                 (innerHTML case) */
 4730             } elseif ($node->nodeName === 'head') {
 4731                 $this->mode = self::IN_BODY;
 4732                 break;
 4733 
 4734                 /* 12. If node is a body element, then switch the insertion mode to
 4735                 "in body" and abort these steps. */
 4736             } elseif ($node->nodeName === 'body') {
 4737                 $this->mode = self::IN_BODY;
 4738                 break;
 4739 
 4740                 /* 13. If node is a frameset element, then switch the insertion
 4741                 mode to "in frameset" and abort these steps. (innerHTML case) */
 4742             } elseif ($node->nodeName === 'frameset') {
 4743                 $this->mode = self::IN_FRAME;
 4744                 break;
 4745 
 4746                 /* 14. If node is an html element, then: if the head element
 4747                 pointer is null, switch the insertion mode to "before head",
 4748                 otherwise, switch the insertion mode to "after head". In either
 4749                 case, abort these steps. (innerHTML case) */
 4750             } elseif ($node->nodeName === 'html') {
 4751                 $this->mode = ($this->head_pointer === null)
 4752                     ? self::BEFOR_HEAD
 4753                     : self::AFTER_HEAD;
 4754 
 4755                 break;
 4756 
 4757                 /* 15. If last is true, then set the insertion mode to "in body"
 4758                 and    abort these steps. (innerHTML case) */
 4759             } elseif ($last) {
 4760                 $this->mode = self::IN_BODY;
 4761                 break;
 4762             }
 4763         }
 4764     }
 4765 
 4766     private function closeCell()
 4767     {
 4768         /* If the stack of open elements has a td or th element in table scope,
 4769         then act as if an end tag token with that tag name had been seen. */
 4770         foreach (array('td', 'th') as $cell) {
 4771             if ($this->elementInScope($cell, true)) {
 4772                 $this->inCell(
 4773                     array(
 4774                         'name' => $cell,
 4775                         'type' => HTML5::ENDTAG
 4776                     )
 4777                 );
 4778 
 4779                 break;
 4780             }
 4781         }
 4782     }
 4783 
 4784     public function save()
 4785     {
 4786         return $this->dom;
 4787     }
 4788 }