"Fossies" - the Fresh Open Source Software Archive

Member "tidy-html5-5.8.0/src/parser.c" (16 Jul 2021, 148143 Bytes) of package /linux/www/tidy-html5-5.8.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "parser.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.7.28_vs_5.8.0.

    1 /* parser.c -- HTML Parser
    2 
    3   (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
    4   See tidy.h for the copyright notice.
    5 
    6 */
    7 
    8 #include "tidy-int.h"
    9 #include "lexer.h"
   10 #include "parser.h"
   11 #include "message.h"
   12 #include "clean.h"
   13 #include "tags.h"
   14 #include "tmbstr.h"
   15 #include "sprtf.h"
   16 
   17 /*
   18   Issue #72 - Need to know to avoid error-reporting - no warning only if --show-body-only yes
   19   Issue #132 - likewise avoid warning if showing body only
   20  */
   21 #define showingBodyOnly(doc) (cfgAutoBool(doc,TidyBodyOnly) == TidyYesState) ? yes : no
   22 
   23 
   24 Bool TY_(CheckNodeIntegrity)(Node *node)
   25 {
   26 #ifndef NO_NODE_INTEGRITY_CHECK
   27     Node *child;
   28 
   29     if (node->prev)
   30     {
   31         if (node->prev->next != node)
   32             return no;
   33     }
   34 
   35     if (node->next)
   36     {
   37         if (node->next == node || node->next->prev != node)
   38             return no;
   39     }
   40 
   41     if (node->parent)
   42     {
   43         if (node->prev == NULL && node->parent->content != node)
   44             return no;
   45 
   46         if (node->next == NULL && node->parent->last != node)
   47             return no;
   48     }
   49 
   50     for (child = node->content; child; child = child->next)
   51         if ( child->parent != node || !TY_(CheckNodeIntegrity)(child) )
   52             return no;
   53 
   54 #endif
   55     return yes;
   56 }
   57 
   58 /*
   59  used to determine how attributes
   60  without values should be printed
   61  this was introduced to deal with
   62  user defined tags e.g. ColdFusion
   63 */
   64 Bool TY_(IsNewNode)(Node *node)
   65 {
   66     if (node && node->tag)
   67     {
   68         return (node->tag->model & CM_NEW);
   69     }
   70     return yes;
   71 }
   72 
   73 void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool unexpected)
   74 {
   75     const Dict* tag = TY_(LookupTagDef)(tid);
   76     Node* tmp = TY_(InferredTag)(doc, tag->id);
   77 
   78     if (obsolete)
   79         TY_(Report)(doc, node, tmp, OBSOLETE_ELEMENT);
   80     else if (unexpected)
   81         TY_(Report)(doc, node, tmp, REPLACING_UNEX_ELEMENT);
   82     else
   83         TY_(Report)(doc, node, tmp, REPLACING_ELEMENT);
   84 
   85     TidyDocFree(doc, tmp->element);
   86     TidyDocFree(doc, tmp);
   87 
   88     node->was = node->tag;
   89     node->tag = tag;
   90     node->type = StartTag;
   91     node->implicit = yes;
   92     TidyDocFree(doc, node->element);
   93     node->element = TY_(tmbstrdup)(doc->allocator, tag->name);
   94 }
   95 
   96 /* extract a node and its children from a markup tree */
   97 Node *TY_(RemoveNode)(Node *node)
   98 {
   99     if (node->prev)
  100         node->prev->next = node->next;
  101 
  102     if (node->next)
  103         node->next->prev = node->prev;
  104 
  105     if (node->parent)
  106     {
  107         if (node->parent->content == node)
  108             node->parent->content = node->next;
  109 
  110         if (node->parent->last == node)
  111             node->parent->last = node->prev;
  112     }
  113 
  114     node->parent = node->prev = node->next = NULL;
  115     return node;
  116 }
  117 
  118 /* remove node from markup tree and discard it */
  119 Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element )
  120 {
  121     Node *next = NULL;
  122 
  123     if (element)
  124     {
  125         next = element->next;
  126         TY_(RemoveNode)(element);
  127         TY_(FreeNode)( doc, element);
  128     }
  129 
  130     return next;
  131 }
  132 
  133 /*
  134  insert "node" into markup tree as the firt element
  135  of content of "element"
  136 */
  137 void TY_(InsertNodeAtStart)(Node *element, Node *node)
  138 {
  139     node->parent = element;
  140 
  141     if (element->content == NULL)
  142         element->last = node;
  143     else
  144         element->content->prev = node;
  145 
  146     node->next = element->content;
  147     node->prev = NULL;
  148     element->content = node;
  149 }
  150 
  151 /*
  152  insert "node" into markup tree as the last element
  153  of content of "element"
  154 */
  155 void TY_(InsertNodeAtEnd)(Node *element, Node *node)
  156 {
  157     node->parent = element;
  158     node->prev = element->last;
  159 
  160     if (element->last != NULL)
  161         element->last->next = node;
  162     else
  163         element->content = node;
  164 
  165     element->last = node;
  166 }
  167 
  168 /*
  169  insert "node" into markup tree in place of "element"
  170  which is moved to become the child of the node
  171 */
  172 static void InsertNodeAsParent(Node *element, Node *node)
  173 {
  174     node->content = element;
  175     node->last = element;
  176     node->parent = element->parent;
  177     element->parent = node;
  178 
  179     if (node->parent->content == element)
  180         node->parent->content = node;
  181 
  182     if (node->parent->last == element)
  183         node->parent->last = node;
  184 
  185     node->prev = element->prev;
  186     element->prev = NULL;
  187 
  188     if (node->prev)
  189         node->prev->next = node;
  190 
  191     node->next = element->next;
  192     element->next = NULL;
  193 
  194     if (node->next)
  195         node->next->prev = node;
  196 }
  197 
  198 /* insert "node" into markup tree before "element" */
  199 void TY_(InsertNodeBeforeElement)(Node *element, Node *node)
  200 {
  201     Node *parent;
  202 
  203     parent = element->parent;
  204     node->parent = parent;
  205     node->next = element;
  206     node->prev = element->prev;
  207     element->prev = node;
  208 
  209     if (node->prev)
  210         node->prev->next = node;
  211 
  212     if (parent->content == element)
  213         parent->content = node;
  214 }
  215 
  216 /* insert "node" into markup tree after "element" */
  217 void TY_(InsertNodeAfterElement)(Node *element, Node *node)
  218 {
  219     Node *parent;
  220 
  221     parent = element->parent;
  222     node->parent = parent;
  223 
  224     /* AQ - 13 Jan 2000 fix for parent == NULL */
  225     if (parent != NULL && parent->last == element)
  226         parent->last = node;
  227     else
  228     {
  229         node->next = element->next;
  230         /* AQ - 13 Jan 2000 fix for node->next == NULL */
  231         if (node->next != NULL)
  232             node->next->prev = node;
  233     }
  234 
  235     element->next = node;
  236     node->prev = element;
  237 }
  238 
  239 static Bool CanPrune( TidyDocImpl* doc, Node *element )
  240 {
  241     if ( !cfgBool(doc, TidyDropEmptyElems) )
  242         return no;
  243 
  244     if ( TY_(nodeIsText)(element) )
  245         return yes;
  246 
  247     if ( element->content )
  248         return no;
  249 
  250     if ( element->tag == NULL )
  251         return no;
  252 
  253     if ( element->tag->model & CM_BLOCK && element->attributes != NULL )
  254         return no;
  255 
  256     if ( nodeIsA(element) && element->attributes != NULL )
  257         return no;
  258 
  259     if ( nodeIsP(element) && !cfgBool(doc, TidyDropEmptyParas) )
  260         return no;
  261 
  262     if ( element->tag->model & CM_ROW )
  263         return no;
  264 
  265     if ( element->tag->model & CM_EMPTY )
  266         return no;
  267 
  268     if ( nodeIsAPPLET(element) )
  269         return no;
  270 
  271     if ( nodeIsOBJECT(element) )
  272         return no;
  273 
  274     if ( nodeIsSCRIPT(element) && attrGetSRC(element) )
  275         return no;
  276 
  277     if ( nodeIsTITLE(element) )
  278         return no;
  279 
  280     /* #433359 - fix by Randy Waki 12 Mar 01 */
  281     if ( nodeIsIFRAME(element) )
  282         return no;
  283 
  284     /* fix for bug 770297 */
  285     if (nodeIsTEXTAREA(element))
  286         return no;
  287 
  288     /* fix for ISSUE #7 https://github.com/w3c/tidy-html5/issues/7 */
  289     if (nodeIsCANVAS(element))
  290         return no;
  291     
  292     if (nodeIsPROGRESS(element))
  293         return no;
  294 
  295     if ( attrGetID(element) || attrGetNAME(element) )
  296         return no;
  297 
  298     /* fix for bug 695408; a better fix would look for unknown and    */
  299     /* known proprietary attributes that make the element significant */
  300     if (attrGetDATAFLD(element))
  301         return no;
  302 
  303     /* fix for bug 723772, don't trim new-...-tags */
  304     if (element->tag->id == TidyTag_UNKNOWN)
  305         return no;
  306 
  307     if (nodeIsBODY(element))
  308         return no;
  309 
  310     if (nodeIsCOLGROUP(element))
  311         return no;
  312 
  313     /* HTML5 - do NOT drop empty option if it has attributes */
  314     if ( nodeIsOPTION(element) && element->attributes != NULL )
  315         return no;
  316 
  317     /* fix for #103 - don't drop empty dd tags lest document not validate */
  318     if (nodeIsDD(element))
  319         return no;
  320 
  321     return yes;
  322 }
  323 
  324 /* return next element */
  325 Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element )
  326 {
  327     if ( CanPrune(doc, element) )
  328     {
  329         if (element->type != TextNode)
  330         {
  331             doc->footnotes |= FN_TRIM_EMPTY_ELEMENT;
  332             TY_(Report)(doc, element, NULL, TRIM_EMPTY_ELEMENT);
  333         }
  334         
  335         return TY_(DiscardElement)(doc, element);
  336     }
  337     return element->next;
  338 }
  339 
  340 Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node)
  341 {
  342     Node* next;
  343 
  344     while (node)
  345     {
  346         next = node->next;
  347 
  348         if (node->content)
  349             TY_(DropEmptyElements)(doc, node->content);
  350 
  351         if (!TY_(nodeIsElement)(node) &&
  352             !(TY_(nodeIsText)(node) && !(node->start < node->end)))
  353         {
  354             node = next;
  355             continue;
  356         }
  357 
  358         next = TY_(TrimEmptyElement)(doc, node);
  359         node = next;
  360     }
  361 
  362     return node;
  363 }
  364 
  365 /* 
  366   errors in positioning of form start or end tags
  367   generally require human intervention to fix
  368   Issue #166 - repeated <main> element also uses this flag
  369   to indicate duplicates, discarded
  370 */
  371 static void BadForm( TidyDocImpl* doc )
  372 {
  373     doc->badForm |= flg_BadForm;
  374     /* doc->errors++; */
  375 }
  376 
  377 /*
  378   This maps 
  379        <em>hello </em><strong>world</strong>
  380   to
  381        <em>hello</em> <strong>world</strong>
  382 
  383   If last child of element is a text node
  384   then trim trailing white space character
  385   moving it to after element's end tag.
  386 */
  387 static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last )
  388 {
  389     Lexer* lexer = doc->lexer;
  390     byte c;
  391 
  392     if (TY_(nodeIsText)(last))
  393     {
  394         if (last->end > last->start)
  395         {
  396             c = (byte) lexer->lexbuf[ last->end - 1 ];
  397 
  398             if ( c == ' ' )
  399             {
  400                 last->end -= 1;
  401                 if ( (element->tag->model & CM_INLINE) &&
  402                      !(element->tag->model & CM_FIELD) )
  403                     lexer->insertspace = yes;
  404             }
  405         }
  406     }
  407 }
  408 
  409 /* Only true for text nodes. */
  410 Bool TY_(IsBlank)(Lexer *lexer, Node *node)
  411 {
  412     Bool isBlank = TY_(nodeIsText)(node);
  413     if ( isBlank )
  414         isBlank = ( node->end == node->start ||       /* Zero length */
  415                     ( node->end == node->start+1      /* or one blank. */
  416                       && lexer->lexbuf[node->start] == ' ' ) );
  417     return isBlank;
  418 }
  419 
  420 /*
  421   This maps 
  422        <p>hello<em> world</em>
  423   to
  424        <p>hello <em>world</em>
  425 
  426   Trims initial space, by moving it before the
  427   start tag, or if this element is the first in
  428   parent's content, then by discarding the space
  429 */
  430 static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text )
  431 {
  432     Lexer* lexer = doc->lexer;
  433     Node *prev, *node;
  434 
  435     if ( TY_(nodeIsText)(text) && 
  436          lexer->lexbuf[text->start] == ' ' && 
  437          text->start < text->end )
  438     {
  439         if ( (element->tag->model & CM_INLINE) &&
  440              !(element->tag->model & CM_FIELD) )
  441         {
  442             prev = element->prev;
  443 
  444             if (TY_(nodeIsText)(prev))
  445             {
  446                 if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ')
  447                     lexer->lexbuf[(prev->end)++] = ' ';
  448 
  449                 ++(element->start);
  450             }
  451             else /* create new node */
  452             {
  453                 node = TY_(NewNode)(lexer->allocator, lexer);
  454                 node->start = (element->start)++;
  455                 node->end = element->start;
  456                 lexer->lexbuf[node->start] = ' ';
  457                 TY_(InsertNodeBeforeElement)(element ,node);
  458                 DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n",
  459                     (element->element ? element->element : "unknown")));
  460             }
  461         }
  462 
  463         /* discard the space in current node */
  464         ++(text->start);
  465     }
  466 }
  467 
  468 static Bool IsPreDescendant(Node* node)
  469 {
  470     Node *parent = node->parent;
  471 
  472     while (parent)
  473     {
  474         if (parent->tag && parent->tag->parser == TY_(ParsePre))
  475             return yes;
  476 
  477         parent = parent->parent;
  478     }
  479 
  480     return no;
  481 }
  482 
  483 static Bool CleanTrailingWhitespace(TidyDocImpl* doc, Node* node)
  484 {
  485     Node* next;
  486 
  487     if (!TY_(nodeIsText)(node))
  488         return no;
  489 
  490     if (node->parent->type == DocTypeTag)
  491         return no;
  492 
  493     if (IsPreDescendant(node))
  494         return no;
  495 
  496     if (node->parent->tag && node->parent->tag->parser == TY_(ParseScript))
  497         return no;
  498 
  499     next = node->next;
  500 
  501     /* <p>... </p> */
  502     if (!next && !TY_(nodeHasCM)(node->parent, CM_INLINE))
  503         return yes;
  504 
  505     /* <div><small>... </small><h3>...</h3></div> */
  506     if (!next && node->parent->next && !TY_(nodeHasCM)(node->parent->next, CM_INLINE))
  507         return yes;
  508 
  509     if (!next)
  510         return no;
  511 
  512     if (nodeIsBR(next))
  513         return yes;
  514 
  515     if (TY_(nodeHasCM)(next, CM_INLINE))
  516         return no;
  517 
  518     /* <a href='/'>...</a> <p>...</p> */
  519     if (next->type == StartTag)
  520         return yes;
  521 
  522     /* <strong>...</strong> <hr /> */
  523     if (next->type == StartEndTag)
  524         return yes;
  525 
  526     /* evil adjacent text nodes, Tidy should not generate these :-( */
  527     if (TY_(nodeIsText)(next) && next->start < next->end
  528         && TY_(IsWhite)(doc->lexer->lexbuf[next->start]))
  529         return yes;
  530 
  531     return no;
  532 }
  533 
  534 static Bool CleanLeadingWhitespace(TidyDocImpl* ARG_UNUSED(doc), Node* node)
  535 {
  536     if (!TY_(nodeIsText)(node))
  537         return no;
  538 
  539     if (node->parent->type == DocTypeTag)
  540         return no;
  541 
  542     if (IsPreDescendant(node))
  543         return no;
  544 
  545     if (node->parent->tag && node->parent->tag->parser == TY_(ParseScript))
  546         return no;
  547 
  548     /* <p>...<br> <em>...</em>...</p> */
  549     if (nodeIsBR(node->prev))
  550         return yes;
  551 
  552     /* <p> ...</p> */
  553     if (node->prev == NULL && !TY_(nodeHasCM)(node->parent, CM_INLINE))
  554         return yes;
  555 
  556     /* <h4>...</h4> <em>...</em> */
  557     if (node->prev && !TY_(nodeHasCM)(node->prev, CM_INLINE) &&
  558         TY_(nodeIsElement)(node->prev))
  559         return yes;
  560 
  561     /* <p><span> ...</span></p> */
  562     if (!node->prev && !node->parent->prev && !TY_(nodeHasCM)(node->parent->parent, CM_INLINE))
  563         return yes;
  564 
  565     return no;
  566 }
  567 
  568 static void CleanSpaces(TidyDocImpl* doc, Node* node)
  569 {
  570     Node* next;
  571 
  572     while (node)
  573     {
  574         next = node->next;
  575 
  576         if (TY_(nodeIsText)(node) && CleanLeadingWhitespace(doc, node))
  577             while (node->start < node->end && TY_(IsWhite)(doc->lexer->lexbuf[node->start]))
  578                 ++(node->start);
  579 
  580         if (TY_(nodeIsText)(node) && CleanTrailingWhitespace(doc, node))
  581             while (node->end > node->start && TY_(IsWhite)(doc->lexer->lexbuf[node->end - 1]))
  582                 --(node->end);
  583 
  584         if (TY_(nodeIsText)(node) && !(node->start < node->end))
  585         {
  586             TY_(RemoveNode)(node);
  587             TY_(FreeNode)(doc, node);
  588             node = next;
  589 
  590             continue;
  591         }
  592 
  593         if (node->content)
  594             CleanSpaces(doc, node->content);
  595 
  596         node = next;
  597     }
  598 }
  599 
  600 /* 
  601   Move initial and trailing space out.
  602   This routine maps:
  603 
  604        hello<em> world</em>
  605   to
  606        hello <em>world</em>
  607   and
  608        <em>hello </em><strong>world</strong>
  609   to
  610        <em>hello</em> <strong>world</strong>
  611 */
  612 static void TrimSpaces( TidyDocImpl* doc, Node *element)
  613 {
  614     Node* text = element->content;
  615 
  616     if (nodeIsPRE(element) || IsPreDescendant(element))
  617         return;
  618 
  619     if (TY_(nodeIsText)(text))
  620         TrimInitialSpace(doc, element, text);
  621 
  622     text = element->last;
  623 
  624     if (TY_(nodeIsText)(text))
  625         TrimTrailingSpace(doc, element, text);
  626 }
  627 
  628 static Bool DescendantOf( Node *element, TidyTagId tid )
  629 {
  630     Node *parent;
  631     for ( parent = element->parent;
  632           parent != NULL;
  633           parent = parent->parent )
  634     {
  635         if ( TagIsId(parent, tid) )
  636             return yes;
  637     }
  638     return no;
  639 }
  640 
  641 static Bool InsertMisc(Node *element, Node *node)
  642 {
  643     if (node->type == CommentTag ||
  644         node->type == ProcInsTag ||
  645         node->type == CDATATag ||
  646         node->type == SectionTag ||
  647         node->type == AspTag ||
  648         node->type == JsteTag ||
  649         node->type == PhpTag )
  650     {
  651         TY_(InsertNodeAtEnd)(element, node);
  652         return yes;
  653     }
  654 
  655     if ( node->type == XmlDecl )
  656     {
  657         Node* root = element;
  658         while ( root && root->parent )
  659             root = root->parent;
  660         if ( root && !(root->content && root->content->type == XmlDecl))
  661         {
  662           TY_(InsertNodeAtStart)( root, node );
  663           return yes;
  664         }
  665     }
  666 
  667     /* Declared empty tags seem to be slipping through
  668     ** the cracks.  This is an experiment to figure out
  669     ** a decent place to pick them up.
  670     */
  671     if ( node->tag &&
  672          TY_(nodeIsElement)(node) &&
  673          TY_(nodeCMIsEmpty)(node) && TagId(node) == TidyTag_UNKNOWN &&
  674          (node->tag->versions & VERS_PROPRIETARY) != 0 )
  675     {
  676         TY_(InsertNodeAtEnd)(element, node);
  677         return yes;
  678     }
  679 
  680     return no;
  681 }
  682 
  683 
  684 static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode )
  685 {
  686     Lexer* lexer = doc->lexer;
  687 
  688     if (node->tag == NULL) /* [i_a]2 prevent crash for active content (php, asp) docs */
  689         return;
  690 
  691     /*
  692        Fix by GLP 2000-12-21.  Need to reset insertspace if this 
  693        is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
  694     */
  695     if (node->tag->model & CM_EMPTY)
  696     {
  697         lexer->waswhite = no;
  698         if (node->tag->parser == NULL)
  699             return;
  700     }
  701     else if (!(node->tag->model & CM_INLINE))
  702         lexer->insertspace = no;
  703 
  704     if (node->tag->parser == NULL)
  705         return;
  706 
  707     if (node->type == StartEndTag)
  708         return;
  709 
  710     lexer->parent = node; /* [i_a]2 added this - not sure why - CHECKME: */
  711 
  712     (*node->tag->parser)( doc, node, mode );
  713 }
  714 
  715 /*
  716  the doctype has been found after other tags,
  717  and needs moving to before the html element
  718 */
  719 static void InsertDocType( TidyDocImpl* doc, Node *element, Node *doctype )
  720 {
  721     Node* existing = TY_(FindDocType)( doc );
  722     if ( existing )
  723     {
  724         TY_(Report)(doc, element, doctype, DISCARDING_UNEXPECTED );
  725         TY_(FreeNode)( doc, doctype );
  726     }
  727     else
  728     {
  729         TY_(Report)(doc, element, doctype, DOCTYPE_AFTER_TAGS );
  730         while ( !nodeIsHTML(element) )
  731             element = element->parent;
  732         TY_(InsertNodeBeforeElement)( element, doctype );
  733     }
  734 }
  735 
  736 /*
  737  move node to the head, where element is used as starting
  738  point in hunt for head. normally called during parsing
  739 */
  740 static void MoveToHead( TidyDocImpl* doc, Node *element, Node *node )
  741 {
  742     Node *head;
  743 
  744     TY_(RemoveNode)( node );  /* make sure that node is isolated */
  745 
  746     if ( TY_(nodeIsElement)(node) )
  747     {
  748         TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN );
  749 
  750         head = TY_(FindHEAD)(doc);
  751         assert(head != NULL);
  752 
  753         TY_(InsertNodeAtEnd)(head, node);
  754 
  755         if ( node->tag->parser )
  756             ParseTag( doc, node, IgnoreWhitespace );
  757     }
  758     else
  759     {
  760         TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
  761         TY_(FreeNode)( doc, node );
  762     }
  763 }
  764 
  765 /* moves given node to end of body element */
  766 static void MoveNodeToBody( TidyDocImpl* doc, Node* node )
  767 {
  768     Node* body = TY_(FindBody)( doc );
  769     if ( body )
  770     {
  771         TY_(RemoveNode)( node );
  772         TY_(InsertNodeAtEnd)( body, node );
  773     }
  774 }
  775 
  776 static void AddClassNoIndent( TidyDocImpl* doc, Node *node )
  777 {
  778     ctmbstr sprop =
  779         "padding-left: 2ex; margin-left: 0ex"
  780         "; margin-top: 0ex; margin-bottom: 0ex";
  781     if ( !cfgBool(doc, TidyDecorateInferredUL) )
  782         return;
  783     if ( cfgBool(doc, TidyMakeClean) )
  784         TY_(AddStyleAsClass)( doc, node, sprop );
  785     else
  786         TY_(AddStyleProperty)( doc, node, sprop );
  787 }
  788 
  789 /*
  790    element is node created by the lexer
  791    upon seeing the start tag, or by the
  792    parser when the start tag is inferred
  793 */
  794 void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
  795 {
  796 #if defined(ENABLE_DEBUG_LOG)
  797     static int in_parse_block = 0;
  798     static int parse_block_cnt = 0;
  799 #endif
  800     Lexer* lexer = doc->lexer;
  801     Node *node;
  802     Bool checkstack = yes;
  803     uint istackbase = 0;
  804 #if defined(ENABLE_DEBUG_LOG)
  805     in_parse_block++;
  806     parse_block_cnt++;
  807     SPRTF("Entering ParseBlock %d... %d %s\n",in_parse_block,parse_block_cnt,
  808         ((element && element->element) ? element->element : ""));
  809 #endif
  810 
  811     if ( element->tag->model & CM_EMPTY ) {
  812 #if defined(ENABLE_DEBUG_LOG)
  813         in_parse_block--;
  814         SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block);
  815 #endif
  816         return;
  817     }
  818 
  819     if ( nodeIsFORM(element) && 
  820          DescendantOf(element, TidyTag_FORM) )
  821         TY_(Report)(doc, element, NULL, ILLEGAL_NESTING );
  822 
  823     /*
  824      InlineDup() asks the lexer to insert inline emphasis tags
  825      currently pushed on the istack, but take care to avoid
  826      propagating inline emphasis inside OBJECT or APPLET.
  827      For these elements a fresh inline stack context is created
  828      and disposed of upon reaching the end of the element.
  829      They thus behave like table cells in this respect.
  830     */
  831     if (element->tag->model & CM_OBJECT)
  832     {
  833         istackbase = lexer->istackbase;
  834         lexer->istackbase = lexer->istacksize;
  835     }
  836 
  837     if (!(element->tag->model & CM_MIXED))
  838         TY_(InlineDup)( doc, NULL );
  839 
  840     /*\
  841      *  Issue #212 - If it is likely that it may be necessary
  842      *  to move a leading space into a text node before this
  843      *  element, then keep the mode MixedContent to keep any
  844      *  leading space
  845     \*/
  846     if ( !(element->tag->model & CM_INLINE) ||
  847           (element->tag->model & CM_FIELD ) )
  848     {
  849         mode = IgnoreWhitespace;
  850     }
  851     else if (mode == IgnoreWhitespace)
  852     {
  853         /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace'
  854            when such a leading space may need to be inserted before this element to 
  855            preverve the browser view */
  856         mode = MixedContent;
  857     }
  858 
  859     while ((node = TY_(GetToken)(doc, mode /*MixedContent*/)) != NULL)
  860     {
  861         /* end tag for this element */
  862         if (node->type == EndTag && node->tag &&
  863             (node->tag == element->tag || element->was == node->tag))
  864         {
  865             TY_(FreeNode)( doc, node );
  866 
  867             if (element->tag->model & CM_OBJECT)
  868             {
  869                 /* pop inline stack */
  870                 while (lexer->istacksize > lexer->istackbase)
  871                     TY_(PopInline)( doc, NULL );
  872                 lexer->istackbase = istackbase;
  873             }
  874 
  875             element->closed = yes;
  876             TrimSpaces( doc, element );
  877 #if defined(ENABLE_DEBUG_LOG)
  878             in_parse_block--;
  879             SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
  880 #endif
  881             return;
  882         }
  883 
  884         if ( nodeIsHTML(node) || nodeIsHEAD(node) || nodeIsBODY(node) )
  885         {
  886             if ( TY_(nodeIsElement)(node) )
  887                 TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
  888             TY_(FreeNode)( doc, node );
  889             continue;
  890         }
  891 
  892 
  893         if (node->type == EndTag)
  894         {
  895             if (node->tag == NULL)
  896             {
  897                 TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
  898                 TY_(FreeNode)( doc, node );
  899                 continue;
  900             }
  901             else if ( nodeIsBR(node) )
  902                 node->type = StartTag;
  903             else if ( nodeIsP(node) )
  904             {
  905                 /* Cannot have a block inside a paragraph, so no checking
  906                    for an ancestor is necessary -- but we _can_ have
  907                    paragraphs inside a block, so change it to an implicit
  908                    empty paragraph, to be dealt with according to the user's
  909                    options
  910                 */
  911                 node->type = StartEndTag;
  912                 node->implicit = yes;
  913             }
  914             else if (DescendantOf( element, node->tag->id ))
  915             {
  916                 /* 
  917                   if this is the end tag for an ancestor element
  918                   then infer end tag for this element
  919                 */
  920                 TY_(UngetToken)( doc );
  921                 break;
  922             }
  923             else
  924             {
  925                 /* special case </tr> etc. for stuff moved in front of table */
  926                 if ( lexer->exiled
  927                      && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) )
  928                 {
  929                     TY_(UngetToken)( doc );
  930                     TrimSpaces( doc, element );
  931 #if defined(ENABLE_DEBUG_LOG)
  932                     in_parse_block--;
  933                     SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
  934 #endif
  935                     return;
  936                 }
  937             }
  938         }
  939 
  940         /* mixed content model permits text */
  941         if (TY_(nodeIsText)(node))
  942         {
  943             if ( checkstack )
  944             {
  945                 checkstack = no;
  946                 if (!(element->tag->model & CM_MIXED))
  947                 {
  948                     if ( TY_(InlineDup)(doc, node) > 0 )
  949                         continue;
  950                 }
  951             }
  952 
  953             TY_(InsertNodeAtEnd)(element, node);
  954             mode = MixedContent;
  955 
  956             /*
  957               HTML4 strict doesn't allow mixed content for
  958               elements with %block; as their content model
  959             */
  960             /*
  961               But only body, map, blockquote, form and
  962               noscript have content model %block;
  963             */
  964             if ( nodeIsBODY(element)       ||
  965                  nodeIsMAP(element)        ||
  966                  nodeIsBLOCKQUOTE(element) ||
  967                  nodeIsFORM(element)       ||
  968                  nodeIsNOSCRIPT(element) )
  969                 TY_(ConstrainVersion)( doc, ~VERS_HTML40_STRICT );
  970             continue;
  971         }
  972 
  973         if ( InsertMisc(element, node) )
  974             continue;
  975 
  976         /* allow PARAM elements? */
  977         if ( nodeIsPARAM(node) )
  978         {
  979             if ( TY_(nodeHasCM)(element, CM_PARAM) && TY_(nodeIsElement)(node) )
  980             {
  981                 TY_(InsertNodeAtEnd)(element, node);
  982                 continue;
  983             }
  984 
  985             /* otherwise discard it */
  986             TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
  987             TY_(FreeNode)( doc, node );
  988             continue;
  989         }
  990 
  991         /* allow AREA elements? */
  992         if ( nodeIsAREA(node) )
  993         {
  994             if ( nodeIsMAP(element) && TY_(nodeIsElement)(node) )
  995             {
  996                 TY_(InsertNodeAtEnd)(element, node);
  997                 continue;
  998             }
  999 
 1000             /* otherwise discard it */
 1001             TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1002             TY_(FreeNode)( doc, node );
 1003             continue;
 1004         }
 1005 
 1006         /* ignore unknown start/end tags */
 1007         if ( node->tag == NULL )
 1008         {
 1009             TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1010             TY_(FreeNode)( doc, node );
 1011             continue;
 1012         }
 1013 
 1014         /*
 1015           Allow CM_INLINE elements here.
 1016 
 1017           Allow CM_BLOCK elements here unless
 1018           lexer->excludeBlocks is yes.
 1019 
 1020           LI and DD are special cased.
 1021 
 1022           Otherwise infer end tag for this element.
 1023         */
 1024 
 1025         if ( !TY_(nodeHasCM)(node, CM_INLINE) )
 1026         {
 1027             if ( !TY_(nodeIsElement)(node) )
 1028             {
 1029                 if ( nodeIsFORM(node) )
 1030                     BadForm( doc );
 1031 
 1032                 TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1033                 TY_(FreeNode)( doc, node );
 1034                 continue;
 1035             }
 1036 
 1037             /* #427671 - Fix by Randy Waki - 10 Aug 00 */
 1038             /*
 1039              If an LI contains an illegal FRAME, FRAMESET, OPTGROUP, or OPTION
 1040              start tag, discard the start tag and let the subsequent content get
 1041              parsed as content of the enclosing LI.  This seems to mimic IE and
 1042              Netscape, and avoids an infinite loop: without this check,
 1043              ParseBlock (which is parsing the LI's content) and ParseList (which
 1044              is parsing the LI's parent's content) repeatedly defer to each
 1045              other to parse the illegal start tag, each time inferring a missing
 1046              </li> or <li> respectively.
 1047 
 1048              NOTE: This check is a bit fragile.  It specifically checks for the
 1049              four tags that happen to weave their way through the current series
 1050              of tests performed by ParseBlock and ParseList to trigger the
 1051              infinite loop.
 1052             */
 1053             if ( nodeIsLI(element) )
 1054             {
 1055                 if ( nodeIsFRAME(node)    ||
 1056                      nodeIsFRAMESET(node) ||
 1057                      nodeIsOPTGROUP(node) ||
 1058                      nodeIsOPTION(node) )
 1059                 {
 1060                     TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1061                     TY_(FreeNode)( doc, node );  /* DSR - 27Apr02 avoid memory leak */
 1062                     continue;
 1063                 }
 1064             }
 1065 
 1066             if ( nodeIsTD(element) || nodeIsTH(element) )
 1067             {
 1068                 /* if parent is a table cell, avoid inferring the end of the cell */
 1069 
 1070                 if ( TY_(nodeHasCM)(node, CM_HEAD) )
 1071                 {
 1072                     MoveToHead( doc, element, node );
 1073                     continue;
 1074                 }
 1075 
 1076                 if ( TY_(nodeHasCM)(node, CM_LIST) )
 1077                 {
 1078                     TY_(UngetToken)( doc );
 1079                     node = TY_(InferredTag)(doc, TidyTag_UL);
 1080                     AddClassNoIndent(doc, node);
 1081                     lexer->excludeBlocks = yes;
 1082                 }
 1083                 else if ( TY_(nodeHasCM)(node, CM_DEFLIST) )
 1084                 {
 1085                     TY_(UngetToken)( doc );
 1086                     node = TY_(InferredTag)(doc, TidyTag_DL);
 1087                     lexer->excludeBlocks = yes;
 1088                 }
 1089 
 1090                 /* infer end of current table cell */
 1091                 if ( !TY_(nodeHasCM)(node, CM_BLOCK) )
 1092                 {
 1093                     TY_(UngetToken)( doc );
 1094                     TrimSpaces( doc, element );
 1095 #if defined(ENABLE_DEBUG_LOG)
 1096                     in_parse_block--;
 1097                     SPRTF("Exit ParseBlock 3 %d...\n",in_parse_block);
 1098 #endif
 1099                     return;
 1100                 }
 1101             }
 1102             else if ( TY_(nodeHasCM)(node, CM_BLOCK) )
 1103             {
 1104                 if ( lexer->excludeBlocks )
 1105                 {
 1106                     if ( !TY_(nodeHasCM)(element, CM_OPT) )
 1107                         TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE );
 1108 
 1109                     TY_(UngetToken)( doc );
 1110 
 1111                     if ( TY_(nodeHasCM)(element, CM_OBJECT) )
 1112                         lexer->istackbase = istackbase;
 1113 
 1114                     TrimSpaces( doc, element );
 1115 #if defined(ENABLE_DEBUG_LOG)
 1116                     in_parse_block--;
 1117                     SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block);
 1118 #endif
 1119                     return;
 1120                 }
 1121             }
 1122             else /* things like list items */
 1123             {
 1124                 if (node->tag->model & CM_HEAD)
 1125                 {
 1126                     MoveToHead( doc, element, node );
 1127                     continue;
 1128                 }
 1129 
 1130                 /*
 1131                  special case where a form start tag
 1132                  occurs in a tr and is followed by td or th
 1133                 */
 1134 
 1135                 if ( nodeIsFORM(element) &&
 1136                      nodeIsTD(element->parent) &&
 1137                      element->parent->implicit )
 1138                 {
 1139                     if ( nodeIsTD(node) )
 1140                     {
 1141                         TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1142                         TY_(FreeNode)( doc, node );
 1143                         continue;
 1144                     }
 1145 
 1146                     if ( nodeIsTH(node) )
 1147                     {
 1148                         TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1149                         TY_(FreeNode)( doc, node );
 1150                         node = element->parent;
 1151                         TidyDocFree(doc, node->element);
 1152                         node->element = TY_(tmbstrdup)(doc->allocator, "th");
 1153                         node->tag = TY_(LookupTagDef)( TidyTag_TH );
 1154                         continue;
 1155                     }
 1156                 }
 1157 
 1158                 if ( !TY_(nodeHasCM)(element, CM_OPT) && !element->implicit )
 1159                     TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE );
 1160                 
 1161                 /* #521, warn on missing optional end-tags if not omitting them. */
 1162                 if ( cfgBool( doc, TidyOmitOptionalTags ) == no && TY_(nodeHasCM)(element, CM_OPT) )
 1163                     TY_(Report)(doc, element, node, MISSING_ENDTAG_OPTIONAL );
 1164 
 1165 
 1166                 TY_(UngetToken)( doc );
 1167 
 1168                 if ( TY_(nodeHasCM)(node, CM_LIST) )
 1169                 {
 1170                     if ( element->parent && element->parent->tag &&
 1171                          element->parent->tag->parser == TY_(ParseList) )
 1172                     {
 1173                         TrimSpaces( doc, element );
 1174 #if defined(ENABLE_DEBUG_LOG)
 1175                         in_parse_block--;
 1176                         SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block);
 1177 #endif
 1178                         return;
 1179                     }
 1180 
 1181                     node = TY_(InferredTag)(doc, TidyTag_UL);
 1182                     AddClassNoIndent(doc, node);
 1183                 }
 1184                 else if ( TY_(nodeHasCM)(node, CM_DEFLIST) )
 1185                 {
 1186                     if ( nodeIsDL(element->parent) )
 1187                     {
 1188                         TrimSpaces( doc, element );
 1189 #if defined(ENABLE_DEBUG_LOG)
 1190                         in_parse_block--;
 1191                         SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block);
 1192 #endif
 1193                         return;
 1194                     }
 1195 
 1196                     node = TY_(InferredTag)(doc, TidyTag_DL);
 1197                 }
 1198                 else if ( TY_(nodeHasCM)(node, CM_TABLE) || TY_(nodeHasCM)(node, CM_ROW) )
 1199                 {
 1200                     /* http://tidy.sf.net/issue/1316307 */
 1201                     /* In exiled mode, return so table processing can 
 1202                        continue. */
 1203                     if (lexer->exiled) {
 1204 #if defined(ENABLE_DEBUG_LOG)
 1205                         in_parse_block--;
 1206                         SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block);
 1207 #endif
 1208                         return;
 1209                     }
 1210                     node = TY_(InferredTag)(doc, TidyTag_TABLE);
 1211                 }
 1212                 else if ( TY_(nodeHasCM)(element, CM_OBJECT) )
 1213                 {
 1214                     /* pop inline stack */
 1215                     while ( lexer->istacksize > lexer->istackbase )
 1216                         TY_(PopInline)( doc, NULL );
 1217                     lexer->istackbase = istackbase;
 1218                     TrimSpaces( doc, element );
 1219 #if defined(ENABLE_DEBUG_LOG)
 1220                     in_parse_block--;
 1221                     SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block);
 1222 #endif
 1223                     return;
 1224 
 1225                 }
 1226                 else
 1227                 {
 1228                     TrimSpaces( doc, element );
 1229 #if defined(ENABLE_DEBUG_LOG)
 1230                     in_parse_block--;
 1231                     SPRTF("Exit ParseBlock 9 %d...\n",in_parse_block);
 1232 #endif
 1233                     return;
 1234                 }
 1235             }
 1236         }
 1237 
 1238         /*\
 1239          *  Issue #307 - an <A> tag to ends any open <A> element
 1240          *  Like #427827 - fixed by Randy Waki and Bjoern Hoehrmann 23 Aug 00
 1241          *  in ParseInline(), fix copied HERE to ParseBlock()
 1242          *  href: http://www.w3.org/TR/html-markup/a.html
 1243          *  The interactive element a must not appear as a descendant of the a element.
 1244         \*/
 1245         if ( nodeIsA(node) && !node->implicit && 
 1246              (nodeIsA(element) || DescendantOf(element, TidyTag_A)) )
 1247         {
 1248             if (node->type != EndTag && node->attributes == NULL
 1249                 && cfgBool(doc, TidyCoerceEndTags) )
 1250             {
 1251                 node->type = EndTag;
 1252                 TY_(Report)(doc, element, node, COERCE_TO_ENDTAG);
 1253                 TY_(UngetToken)( doc );
 1254                 continue;
 1255             }
 1256 
 1257             if (nodeIsA(element))
 1258             {
 1259                 TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE);
 1260                 TY_(UngetToken)( doc );
 1261             }
 1262             else
 1263             {
 1264                 /* Issue #597 - if we not 'UngetToken' then it is being discarded.
 1265                    Add message, and 'FreeNode' - thanks @ralfjunker */
 1266                 TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 1267                 TY_(FreeNode)(doc, node);
 1268             }
 1269 
 1270             if (!(mode & Preformatted))
 1271                 TrimSpaces(doc, element);
 1272 
 1273 #if defined(ENABLE_DEBUG_LOG)
 1274             in_parse_block--;
 1275             SPRTF("Exit ParseBlock 9b %d...\n",in_parse_block);
 1276 #endif
 1277             return;
 1278         }
 1279 
 1280         /* parse known element */
 1281         if (TY_(nodeIsElement)(node))
 1282         {
 1283             if (node->tag->model & CM_INLINE)
 1284             {
 1285                 if (checkstack && !node->implicit)
 1286                 {
 1287                     checkstack = no;
 1288 
 1289                     if (!(element->tag->model & CM_MIXED)) /* #431731 - fix by Randy Waki 25 Dec 00 */
 1290                     {
 1291                         if ( TY_(InlineDup)(doc, node) > 0 )
 1292                             continue;
 1293                     }
 1294                 }
 1295 
 1296                 mode = MixedContent;
 1297             }
 1298             else
 1299             {
 1300                 checkstack = yes;
 1301                 mode = IgnoreWhitespace;
 1302             }
 1303 
 1304             /* trim white space before <br> */
 1305             if ( nodeIsBR(node) )
 1306                 TrimSpaces( doc, element );
 1307 
 1308             TY_(InsertNodeAtEnd)(element, node);
 1309             
 1310             if (node->implicit)
 1311                 TY_(Report)(doc, element, node, INSERTING_TAG );
 1312 
 1313             /* Issue #212 - WHY is this hard coded to 'IgnoreWhitespace' while an 
 1314                effort has been made above to set a 'MixedContent' mode in some cases?
 1315                WHY IS THE 'mode' VARIABLE NOT USED HERE???? */
 1316             ParseTag( doc, node, IgnoreWhitespace /*MixedContent*/ );
 1317             continue;
 1318         }
 1319 
 1320         /* discard unexpected tags */
 1321         if (node->type == EndTag)
 1322             TY_(PopInline)( doc, node );  /* if inline end tag */
 1323 
 1324         TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1325         TY_(FreeNode)( doc, node );
 1326         continue;
 1327     }
 1328 
 1329     if (!(element->tag->model & CM_OPT))
 1330         TY_(Report)(doc, element, node, MISSING_ENDTAG_FOR);
 1331 
 1332     if (element->tag->model & CM_OBJECT)
 1333     {
 1334         /* pop inline stack */
 1335         while ( lexer->istacksize > lexer->istackbase )
 1336             TY_(PopInline)( doc, NULL );
 1337         lexer->istackbase = istackbase;
 1338     }
 1339 
 1340     TrimSpaces( doc, element );
 1341 #if defined(ENABLE_DEBUG_LOG)
 1342     in_parse_block--;
 1343     SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block);
 1344 #endif
 1345 }
 1346 
 1347 /* [i_a] svg / math */
 1348 
 1349 struct MatchingDescendantData
 1350 {
 1351     Node *found_node;
 1352     Bool *passed_marker_node;
 1353 
 1354     /* input: */
 1355     TidyTagId matching_tagId;
 1356     Node *node_to_find;
 1357     Node *marker_node;
 1358 };
 1359 
 1360 static NodeTraversalSignal FindDescendant_cb(TidyDocImpl* ARG_UNUSED(doc), Node* node, void *propagate)
 1361 {
 1362     struct MatchingDescendantData *cb_data = (struct MatchingDescendantData *)propagate;
 1363 
 1364     if (TagId(node) == cb_data->matching_tagId)
 1365     {
 1366         /* make sure we match up 'unknown' tags exactly! */
 1367         if (cb_data->matching_tagId != TidyTag_UNKNOWN ||
 1368             (node->element != NULL &&
 1369             cb_data->node_to_find != NULL &&
 1370             cb_data->node_to_find->element != NULL &&
 1371             0 == TY_(tmbstrcmp)(cb_data->node_to_find->element, node->element)))
 1372         {
 1373             cb_data->found_node = node;
 1374             return ExitTraversal;
 1375         }
 1376     }
 1377 
 1378     if (cb_data->passed_marker_node && node == cb_data->marker_node)
 1379         *cb_data->passed_marker_node = yes;
 1380 
 1381     return VisitParent;
 1382 }
 1383 
 1384 /*
 1385 Search the parent chain (from 'parent' upwards up to the root) for a node matching the
 1386 given 'node'.
 1387 
 1388 When the search passes beyond the 'marker_node' (which is assumed to sit in the
 1389 parent chain), this will be flagged by setting the boolean referenced by
 1390 'is_parent_of_marker' to yes.
 1391 
 1392 'is_parent_of_marker' and 'marker_node' are optional parameters and may be NULL.
 1393 */
 1394 static Node *FindMatchingDescendant( Node *parent, Node *node, Node *marker_node, Bool *is_parent_of_marker )
 1395 {
 1396     struct MatchingDescendantData cb_data = { 0 };
 1397     cb_data.matching_tagId = TagId(node);
 1398     cb_data.node_to_find = node;
 1399     cb_data.marker_node = marker_node;
 1400 
 1401     assert(node);
 1402 
 1403     if (is_parent_of_marker)
 1404         *is_parent_of_marker = no;
 1405 
 1406     TY_(TraverseNodeTree)(NULL, parent, FindDescendant_cb, &cb_data);
 1407     return cb_data.found_node;
 1408 }
 1409 
 1410 /*
 1411    Act as a generic XML (sub)tree parser: collect each node and add it to the DOM, without any further validation.
 1412    TODO : add schema- or other-hierarchy-definition-based validation of the subtree here...
 1413 */
 1414 void TY_(ParseNamespace)(TidyDocImpl* doc, Node *basenode, GetTokenMode mode)
 1415 {
 1416     Lexer* lexer = doc->lexer;
 1417     Node *node;
 1418     Node *parent = basenode;
 1419     uint istackbase;
 1420     AttVal* av; /* #130 MathML attr and entity fix! */
 1421 
 1422     /* a la <table>: defer popping elements off the inline stack */
 1423     TY_(DeferDup)( doc );
 1424     istackbase = lexer->istackbase;
 1425     lexer->istackbase = lexer->istacksize;
 1426 
 1427     mode = OtherNamespace; /* Preformatted; IgnoreWhitespace; */
 1428 
 1429     while ((node = TY_(GetToken)(doc, mode)) != NULL)
 1430     {
 1431         /*
 1432         fix check to skip action in InsertMisc for regular/empty
 1433         nodes, which we don't want here...
 1434 
 1435         The way we do it here is by checking and processing everything
 1436         and only what remains goes into InsertMisc()
 1437         */
 1438 
 1439         /* is this a close tag? And does it match the current parent node? */
 1440         if (node->type == EndTag)
 1441         {
 1442             /*
 1443             to prevent end tags flowing from one 'alternate namespace' we
 1444             check this in two phases: first we check if the tag is a
 1445             descendant of the current node, and when it is, we check whether
 1446             it is the end tag for a node /within/ or /outside/ the basenode.
 1447             */
 1448             Bool outside;
 1449             Node *mp = FindMatchingDescendant(parent, node, basenode, &outside);
 1450 
 1451             if (mp != NULL)
 1452             {
 1453                 /*
 1454                 when mp != parent as we might expect,
 1455                 infer end tags until we 'hit' the matched
 1456                 parent or the basenode
 1457                 */
 1458                 Node *n;
 1459 
 1460                 for (n = parent;
 1461                      n != NULL && n != basenode->parent && n != mp;
 1462                      n = n->parent)
 1463                 {
 1464                     /* n->implicit = yes; */
 1465                     n->closed = yes;
 1466                     TY_(Report)(doc, n->parent, n, MISSING_ENDTAG_BEFORE);
 1467                 }
 1468 
 1469                 /* Issue #369 - Since 'assert' is DEBUG only, and there are
 1470                    simple cases where these can be fired, removing them
 1471                    pending feedback from the original author!
 1472                    assert(outside == no ? n == mp : 1);
 1473                    assert(outside == yes ? n == basenode->parent : 1);
 1474                    =================================================== */
 1475 
 1476                 if (outside == no)
 1477                 {
 1478                     /* EndTag for a node within the basenode subtree. Roll on... */
 1479                     n->closed = yes;
 1480                     TY_(FreeNode)(doc, node);
 1481 
 1482                     node = n;
 1483                     parent = node->parent;
 1484                 }
 1485                 else
 1486                 {
 1487                     /* EndTag for a node outside the basenode subtree: let the caller handle that. */
 1488                     TY_(UngetToken)( doc );
 1489                     node = basenode;
 1490                     parent = node->parent;
 1491                 }
 1492 
 1493                 /* when we've arrived at the end-node for the base node, it's quitting time */
 1494                 if (node == basenode)
 1495                 {
 1496                     lexer->istackbase = istackbase;
 1497                     assert(basenode->closed == yes);
 1498                     return;
 1499                 }
 1500             }
 1501             else
 1502             {
 1503                 /* unmatched close tag: report an error and discard */
 1504                 /* TY_(Report)(doc, parent, node, NON_MATCHING_ENDTAG); Issue #308 - Seems wrong warning! */
 1505                 TY_(Report)(doc, parent, node, DISCARDING_UNEXPECTED);
 1506                 assert(parent);
 1507                 /* assert(parent->tag != node->tag); Issue #308 - Seems would always be true! */
 1508                 TY_(FreeNode)( doc, node); /* Issue #308 - Discard unexpected end tag memory */
 1509             }
 1510         }
 1511         else if (node->type == StartTag)
 1512         {
 1513             /* #130 MathML attr and entity fix! 
 1514                care if it has attributes, and 'accidently' any of those attributes match known */
 1515             for ( av = node->attributes; av; av = av->next )
 1516             {
 1517                 av->dict = 0; /* does something need to be freed? */
 1518             }
 1519             /* add another child to the current parent */
 1520             TY_(InsertNodeAtEnd)(parent, node);
 1521             parent = node;
 1522         }
 1523         else
 1524         {
 1525             /* #130 MathML attr and entity fix! 
 1526                care if it has attributes, and 'accidently' any of those attributes match known */
 1527             for ( av = node->attributes; av; av = av->next )
 1528             {
 1529                 av->dict = 0; /* does something need to be freed? */
 1530             }
 1531             TY_(InsertNodeAtEnd)(parent, node);
 1532         }
 1533     }
 1534 
 1535     TY_(Report)(doc, basenode->parent, basenode, MISSING_ENDTAG_FOR);
 1536 }
 1537 
 1538 
 1539 TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
 1540 {
 1541 #if defined(ENABLE_DEBUG_LOG)
 1542     static int in_parse_inline = 0;
 1543 #endif
 1544     Lexer* lexer = doc->lexer;
 1545     Node *node, *parent;
 1546 #if defined(ENABLE_DEBUG_LOG)
 1547     in_parse_inline++;
 1548     SPRTF("Entering ParseInline %d...\n",in_parse_inline);
 1549 #endif
 1550 
 1551     if (element->tag->model & CM_EMPTY) {
 1552 #if defined(ENABLE_DEBUG_LOG)
 1553         in_parse_inline--;
 1554         SPRTF("Exit ParseInline 1 %d...\n",in_parse_inline);
 1555 #endif
 1556         return;
 1557     }
 1558 
 1559     /*
 1560      ParseInline is used for some block level elements like H1 to H6
 1561      For such elements we need to insert inline emphasis tags currently
 1562      on the inline stack. For Inline elements, we normally push them
 1563      onto the inline stack provided they aren't implicit or OBJECT/APPLET.
 1564      This test is carried out in PushInline and PopInline, see istack.c
 1565 
 1566      InlineDup(...) is not called for elements with a CM_MIXED (inline and
 1567      block) content model, e.g. <del> or <ins>, otherwise constructs like 
 1568 
 1569        <p>111<a name='foo'>222<del>333</del>444</a>555</p>
 1570        <p>111<span>222<del>333</del>444</span>555</p>
 1571        <p>111<em>222<del>333</del>444</em>555</p>
 1572 
 1573      will get corrupted.
 1574     */
 1575     if ((TY_(nodeHasCM)(element, CM_BLOCK) || nodeIsDT(element)) &&
 1576         !TY_(nodeHasCM)(element, CM_MIXED))
 1577         TY_(InlineDup)(doc, NULL);
 1578     else if (TY_(nodeHasCM)(element, CM_INLINE))
 1579         TY_(PushInline)(doc, element);
 1580 
 1581     if ( nodeIsNOBR(element) )
 1582         doc->badLayout |= USING_NOBR;
 1583     else if ( nodeIsFONT(element) )
 1584         doc->badLayout |= USING_FONT;
 1585 
 1586     /* Inline elements may or may not be within a preformatted element */
 1587     if (mode != Preformatted)
 1588         mode = MixedContent;
 1589 
 1590     while ((node = TY_(GetToken)(doc, mode)) != NULL)
 1591     {
 1592         /* end tag for current element */
 1593         if (node->tag == element->tag && node->type == EndTag)
 1594         {
 1595             if (element->tag->model & CM_INLINE)
 1596                 TY_(PopInline)( doc, node );
 1597 
 1598             TY_(FreeNode)( doc, node );
 1599 
 1600             if (!(mode & Preformatted))
 1601                 TrimSpaces(doc, element);
 1602 
 1603             /*
 1604              if a font element wraps an anchor and nothing else
 1605              then move the font element inside the anchor since
 1606              otherwise it won't alter the anchor text color
 1607             */
 1608             if ( nodeIsFONT(element) && 
 1609                  element->content && element->content == element->last )
 1610             {
 1611                 Node *child = element->content;
 1612 
 1613                 if ( nodeIsA(child) )
 1614                 {
 1615                     child->parent = element->parent;
 1616                     child->next = element->next;
 1617                     child->prev = element->prev;
 1618 
 1619                     element->next = NULL;
 1620                     element->prev = NULL;
 1621                     element->parent = child;
 1622 
 1623                     element->content = child->content;
 1624                     element->last = child->last;
 1625                     child->content = element;
 1626 
 1627                     TY_(FixNodeLinks)(child);
 1628                     TY_(FixNodeLinks)(element);
 1629                 }
 1630             }
 1631 
 1632             element->closed = yes;
 1633             TrimSpaces( doc, element );
 1634 #if defined(ENABLE_DEBUG_LOG)
 1635             in_parse_inline--;
 1636             SPRTF("Exit ParseInline 2 %d...\n",in_parse_inline);
 1637 #endif
 1638             return;
 1639         }
 1640 
 1641         /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
 1642         /* (see additional conditions below) */
 1643         /* otherwise emphasis nesting is probably unintentional */
 1644         /* big, small, sub, sup have cumulative effect to leave them alone */
 1645         if ( node->type == StartTag
 1646              && node->tag == element->tag
 1647              && TY_(IsPushed)( doc, node )
 1648              && !node->implicit
 1649              && !element->implicit
 1650              && node->tag && (node->tag->model & CM_INLINE)
 1651              && !nodeIsA(node)
 1652              && !nodeIsFONT(node)
 1653              && !nodeIsBIG(node)
 1654              && !nodeIsSMALL(node)
 1655              && !nodeIsSUB(node)
 1656              && !nodeIsSUP(node)
 1657              && !nodeIsQ(node)
 1658              && !nodeIsSPAN(node)
 1659              && cfgBool(doc, TidyCoerceEndTags)
 1660            )
 1661         {
 1662             /* proceeds only if "node" does not have any attribute and
 1663                follows a text node not finishing with a space */
 1664             if (element->content != NULL && node->attributes == NULL
 1665                 && TY_(nodeIsText)(element->last)
 1666                 && !TY_(TextNodeEndWithSpace)(doc->lexer, element->last) )
 1667             {
 1668                 TY_(Report)(doc, element, node, COERCE_TO_ENDTAG);
 1669                 node->type = EndTag;
 1670                 TY_(UngetToken)(doc);
 1671                 continue;
 1672             }
 1673 
 1674             if (node->attributes == NULL || element->attributes == NULL)
 1675                 TY_(Report)(doc, element, node, NESTED_EMPHASIS);
 1676         }
 1677         else if ( TY_(IsPushed)(doc, node) && node->type == StartTag && 
 1678                   nodeIsQ(node) )
 1679         {
 1680             /*\
 1681              * Issue #215 - such nested quotes are NOT a problem if HTML5, so
 1682              * only issue this warning if NOT HTML5 mode.
 1683             \*/
 1684             if (TY_(HTMLVersion)(doc) != HT50) 
 1685             {
 1686                 TY_(Report)(doc, element, node, NESTED_QUOTATION);
 1687             }
 1688         }
 1689 
 1690         if ( TY_(nodeIsText)(node) )
 1691         {
 1692             /* only called for 1st child */
 1693             if ( element->content == NULL && !(mode & Preformatted) )
 1694                 TrimSpaces( doc, element );
 1695 
 1696             if ( node->start >= node->end )
 1697             {
 1698                 TY_(FreeNode)( doc, node );
 1699                 continue;
 1700             }
 1701 
 1702             TY_(InsertNodeAtEnd)(element, node);
 1703             continue;
 1704         }
 1705 
 1706         /* mixed content model so allow text */
 1707         if (InsertMisc(element, node))
 1708             continue;
 1709 
 1710         /* deal with HTML tags */
 1711         if ( nodeIsHTML(node) )
 1712         {
 1713             if ( TY_(nodeIsElement)(node) )
 1714             {
 1715                 TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
 1716                 TY_(FreeNode)( doc, node );
 1717                 continue;
 1718             }
 1719 
 1720             /* otherwise infer end of inline element */
 1721             TY_(UngetToken)( doc );
 1722 
 1723             if (!(mode & Preformatted))
 1724                 TrimSpaces(doc, element);
 1725 #if defined(ENABLE_DEBUG_LOG)
 1726             in_parse_inline--;
 1727             SPRTF("Exit ParseInline 3 %d...\n",in_parse_inline);
 1728 #endif
 1729             return;
 1730         }
 1731 
 1732         /* within <dt> or <pre> map <p> to <br> */
 1733         if ( nodeIsP(node) &&
 1734              node->type == StartTag &&
 1735              ( (mode & Preformatted) ||
 1736                nodeIsDT(element) || 
 1737                DescendantOf(element, TidyTag_DT )
 1738              )
 1739            )
 1740         {
 1741             node->tag = TY_(LookupTagDef)( TidyTag_BR );
 1742             TidyDocFree(doc, node->element);
 1743             node->element = TY_(tmbstrdup)(doc->allocator, "br");
 1744             TrimSpaces(doc, element);
 1745             TY_(InsertNodeAtEnd)(element, node);
 1746             continue;
 1747         }
 1748 
 1749         /* <p> allowed within <address> in HTML 4.01 Transitional */
 1750         if ( nodeIsP(node) &&
 1751              node->type == StartTag &&
 1752              nodeIsADDRESS(element) )
 1753         {
 1754             TY_(ConstrainVersion)( doc, ~VERS_HTML40_STRICT );
 1755             TY_(InsertNodeAtEnd)(element, node);
 1756             (*node->tag->parser)( doc, node, mode );
 1757             continue;
 1758         }
 1759 
 1760         /* ignore unknown and PARAM tags */
 1761         if ( node->tag == NULL || nodeIsPARAM(node) )
 1762         {
 1763             TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 1764             TY_(FreeNode)( doc, node );
 1765             continue;
 1766         }
 1767 
 1768         if ( nodeIsBR(node) && node->type == EndTag )
 1769             node->type = StartTag;
 1770 
 1771         if ( node->type == EndTag )
 1772         {
 1773            /* coerce </br> to <br> */
 1774            if ( nodeIsBR(node) )
 1775                 node->type = StartTag;
 1776            else if ( nodeIsP(node) )
 1777            {
 1778                /* coerce unmatched </p> to <br><br> */
 1779                 if ( !DescendantOf(element, TidyTag_P) )
 1780                 {
 1781                     TY_(CoerceNode)(doc, node, TidyTag_BR, no, no);
 1782                     TrimSpaces( doc, element );
 1783                     TY_(InsertNodeAtEnd)( element, node );
 1784                     node = TY_(InferredTag)(doc, TidyTag_BR);
 1785                     TY_(InsertNodeAtEnd)( element, node ); /* todo: check this */
 1786                     continue;
 1787                 }
 1788            }
 1789            else if ( TY_(nodeHasCM)(node, CM_INLINE)
 1790                      && !nodeIsA(node)
 1791                      && !TY_(nodeHasCM)(node, CM_OBJECT)
 1792                      && TY_(nodeHasCM)(element, CM_INLINE) )
 1793             {
 1794                 /* allow any inline end tag to end current element */
 1795 
 1796                 /* http://tidy.sf.net/issue/1426419 */
 1797                 /* but, like the browser, retain an earlier inline element.
 1798                    This is implemented by setting the lexer into a mode
 1799                    where it gets tokens from the inline stack rather than
 1800                    from the input stream. Check if the scenerio fits. */
 1801                 if ( !nodeIsA(element)
 1802                      && (node->tag != element->tag)
 1803                      && TY_(IsPushed)( doc, node )
 1804                      && TY_(IsPushed)( doc, element ) )
 1805                 {
 1806                     /* we have something like
 1807                        <b>bold <i>bold and italic</b> italics</i> */
 1808                     if ( TY_(SwitchInline)( doc, element, node ) )
 1809                     {
 1810                         TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG);
 1811                         TY_(UngetToken)( doc ); /* put this back */
 1812                         TY_(InlineDup1)( doc, NULL, element ); /* dupe the <i>, after </b> */
 1813                         if (!(mode & Preformatted))
 1814                             TrimSpaces( doc, element );
 1815 #if defined(ENABLE_DEBUG_LOG)
 1816                         in_parse_inline--;
 1817                         SPRTF("Exit ParseInline 4 %d...\n",in_parse_inline);
 1818 #endif
 1819                         return; /* close <i>, but will re-open it, after </b> */
 1820                     }
 1821                 }
 1822                 TY_(PopInline)( doc, element );
 1823 
 1824                 if ( !nodeIsA(element) )
 1825                 {
 1826                     if ( nodeIsA(node) && node->tag != element->tag )
 1827                     {
 1828                        TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE );
 1829                        TY_(UngetToken)( doc );
 1830                     }
 1831                     else
 1832                     {
 1833                         TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG);
 1834                         TY_(FreeNode)( doc, node);
 1835                     }
 1836 
 1837                     if (!(mode & Preformatted))
 1838                         TrimSpaces(doc, element);
 1839 #if defined(ENABLE_DEBUG_LOG)
 1840                     in_parse_inline--;
 1841                     SPRTF("Exit ParseInline 5 %d...\n",in_parse_inline);
 1842 #endif
 1843                     return;
 1844                 }
 1845 
 1846                 /* if parent is <a> then discard unexpected inline end tag */
 1847                 TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 1848                 TY_(FreeNode)( doc, node);
 1849                 continue;
 1850             }  /* special case </tr> etc. for stuff moved in front of table */
 1851             else if ( lexer->exiled
 1852                      && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) )
 1853             {
 1854                 TY_(UngetToken)( doc );
 1855                 TrimSpaces(doc, element);
 1856 #if defined(ENABLE_DEBUG_LOG)
 1857                 in_parse_inline--;
 1858                 SPRTF("Exit ParseInline 6 %d...\n",in_parse_inline);
 1859 #endif
 1860                 return;
 1861             }
 1862         }
 1863 
 1864         /* allow any header tag to end current header */
 1865         if ( TY_(nodeHasCM)(node, CM_HEADING) && TY_(nodeHasCM)(element, CM_HEADING) )
 1866         {
 1867 
 1868             if ( node->tag == element->tag )
 1869             {
 1870                 TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG );
 1871                 TY_(FreeNode)( doc, node);
 1872             }
 1873             else
 1874             {
 1875                 TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE );
 1876                 TY_(UngetToken)( doc );
 1877             }
 1878 
 1879             if (!(mode & Preformatted))
 1880                 TrimSpaces(doc, element);
 1881 
 1882 #if defined(ENABLE_DEBUG_LOG)
 1883             in_parse_inline--;
 1884             SPRTF("Exit ParseInline 7 %d...\n",in_parse_inline);
 1885 #endif
 1886             return;
 1887         }
 1888 
 1889         /*
 1890            an <A> tag to ends any open <A> element
 1891            but <A href=...> is mapped to </A><A href=...>
 1892         */
 1893         /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
 1894         /* if (node->tag == doc->tags.tag_a && !node->implicit && TY_(IsPushed)(doc, node)) */
 1895         if ( nodeIsA(node) && !node->implicit && 
 1896              (nodeIsA(element) || DescendantOf(element, TidyTag_A)) )
 1897         {
 1898             /* coerce <a> to </a> unless it has some attributes */
 1899             /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
 1900             /* other fixes by Dave Raggett */
 1901             /* if (node->attributes == NULL) */
 1902             if (node->type != EndTag && node->attributes == NULL
 1903                 && cfgBool(doc, TidyCoerceEndTags) )
 1904             {
 1905                 node->type = EndTag;
 1906                 TY_(Report)(doc, element, node, COERCE_TO_ENDTAG);
 1907                 /* TY_(PopInline)( doc, node ); */
 1908                 TY_(UngetToken)( doc );
 1909                 continue;
 1910             }
 1911 
 1912             TY_(UngetToken)( doc );
 1913             TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE);
 1914             /* TY_(PopInline)( doc, element ); */
 1915 
 1916             if (!(mode & Preformatted))
 1917                 TrimSpaces(doc, element);
 1918 
 1919 #if defined(ENABLE_DEBUG_LOG)
 1920             in_parse_inline--;
 1921             SPRTF("Exit ParseInline 8 %d...\n",in_parse_inline);
 1922 #endif
 1923             return;
 1924         }
 1925 
 1926         if (element->tag->model & CM_HEADING)
 1927         {
 1928             if ( nodeIsCENTER(node) || nodeIsDIV(node) )
 1929             {
 1930                 if (!TY_(nodeIsElement)(node))
 1931                 {
 1932                     TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 1933                     TY_(FreeNode)( doc, node);
 1934                     continue;
 1935                 }
 1936 
 1937                 TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN);
 1938 
 1939                 /* insert center as parent if heading is empty */
 1940                 if (element->content == NULL)
 1941                 {
 1942                     InsertNodeAsParent(element, node);
 1943                     continue;
 1944                 }
 1945 
 1946                 /* split heading and make center parent of 2nd part */
 1947                 TY_(InsertNodeAfterElement)(element, node);
 1948 
 1949                 if (!(mode & Preformatted))
 1950                     TrimSpaces(doc, element);
 1951 
 1952                 element = TY_(CloneNode)( doc, element );
 1953                 TY_(InsertNodeAtEnd)(node, element);
 1954                 continue;
 1955             }
 1956 
 1957             if ( nodeIsHR(node) )
 1958             {
 1959                 if ( !TY_(nodeIsElement)(node) )
 1960                 {
 1961                     TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 1962                     TY_(FreeNode)( doc, node);
 1963                     continue;
 1964                 }
 1965 
 1966                 TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN);
 1967 
 1968                 /* insert hr before heading if heading is empty */
 1969                 if (element->content == NULL)
 1970                 {
 1971                     TY_(InsertNodeBeforeElement)(element, node);
 1972                     continue;
 1973                 }
 1974 
 1975                 /* split heading and insert hr before 2nd part */
 1976                 TY_(InsertNodeAfterElement)(element, node);
 1977 
 1978                 if (!(mode & Preformatted))
 1979                     TrimSpaces(doc, element);
 1980 
 1981                 element = TY_(CloneNode)( doc, element );
 1982                 TY_(InsertNodeAfterElement)(node, element);
 1983                 continue;
 1984             }
 1985         }
 1986 
 1987         if ( nodeIsDT(element) )
 1988         {
 1989             if ( nodeIsHR(node) )
 1990             {
 1991                 Node *dd;
 1992                 if ( !TY_(nodeIsElement)(node) )
 1993                 {
 1994                     TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 1995                     TY_(FreeNode)( doc, node);
 1996                     continue;
 1997                 }
 1998 
 1999                 TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN);
 2000                 dd = TY_(InferredTag)(doc, TidyTag_DD);
 2001 
 2002                 /* insert hr within dd before dt if dt is empty */
 2003                 if (element->content == NULL)
 2004                 {
 2005                     TY_(InsertNodeBeforeElement)(element, dd);
 2006                     TY_(InsertNodeAtEnd)(dd, node);
 2007                     continue;
 2008                 }
 2009 
 2010                 /* split dt and insert hr within dd before 2nd part */
 2011                 TY_(InsertNodeAfterElement)(element, dd);
 2012                 TY_(InsertNodeAtEnd)(dd, node);
 2013 
 2014                 if (!(mode & Preformatted))
 2015                     TrimSpaces(doc, element);
 2016 
 2017                 element = TY_(CloneNode)( doc, element );
 2018                 TY_(InsertNodeAfterElement)(dd, element);
 2019                 continue;
 2020             }
 2021         }
 2022 
 2023 
 2024         /* 
 2025           if this is the end tag for an ancestor element
 2026           then infer end tag for this element
 2027         */
 2028         if (node->type == EndTag)
 2029         {
 2030             for (parent = element->parent;
 2031                     parent != NULL; parent = parent->parent)
 2032             {
 2033                 if (node->tag == parent->tag)
 2034                 {
 2035                     if (!(element->tag->model & CM_OPT) && !element->implicit)
 2036                         TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE);
 2037 
 2038                     if( TY_(IsPushedLast)( doc, element, node ) ) 
 2039                         TY_(PopInline)( doc, element );
 2040                     TY_(UngetToken)( doc );
 2041 
 2042                     if (!(mode & Preformatted))
 2043                         TrimSpaces(doc, element);
 2044 
 2045 #if defined(ENABLE_DEBUG_LOG)
 2046                     in_parse_inline--;
 2047                     SPRTF("Exit ParseInline 9 %d...\n",in_parse_inline);
 2048 #endif
 2049                     return;
 2050                 }
 2051             }
 2052         }
 2053 
 2054         /*\
 2055          *  block level tags end this element 
 2056          *  Issue #333 - There seems an exception if the element is a 'span',
 2057          *  and the node just collected is a 'meta'. The 'meta' can not have
 2058          *  CM_INLINE added, nor can the 'span' have CM_MIXED added without
 2059          *  big consequences.
 2060          *  There may be other exceptions to be added...
 2061         \*/
 2062         if (!(node->tag->model & CM_INLINE) &&
 2063             !(element->tag->model & CM_MIXED) &&
 2064             !(nodeIsSPAN(element) && nodeIsMETA(node)) )
 2065         {
 2066             if ( !TY_(nodeIsElement)(node) )
 2067             {
 2068                 TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 2069                 TY_(FreeNode)( doc, node);
 2070                 continue;
 2071             }
 2072             /* HTML5 */
 2073             if (nodeIsDATALIST(element)) {
 2074                 TY_(ConstrainVersion)( doc, ~VERS_HTML5 );
 2075             } else
 2076             if (!(element->tag->model & CM_OPT))
 2077                 TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE);
 2078 
 2079             if (node->tag->model & CM_HEAD && !(node->tag->model & CM_BLOCK))
 2080             {
 2081                 MoveToHead(doc, element, node);
 2082                 continue;
 2083             }
 2084 
 2085             /*
 2086                prevent anchors from propagating into block tags
 2087                except for headings h1 to h6
 2088             */
 2089             if ( nodeIsA(element) )
 2090             {
 2091                 if (node->tag && !(node->tag->model & CM_HEADING))
 2092                     TY_(PopInline)( doc, element );
 2093                 else if (!(element->content))
 2094                 {
 2095                     TY_(DiscardElement)( doc, element );
 2096                     TY_(UngetToken)( doc );
 2097 #if defined(ENABLE_DEBUG_LOG)
 2098                     in_parse_inline--;
 2099                     SPRTF("Exit ParseInline 10 %d...\n",in_parse_inline);
 2100 #endif
 2101                     return;
 2102                 }
 2103             }
 2104 
 2105             TY_(UngetToken)( doc );
 2106 
 2107             if (!(mode & Preformatted))
 2108                 TrimSpaces(doc, element);
 2109 
 2110 #if defined(ENABLE_DEBUG_LOG)
 2111             in_parse_inline--;
 2112             SPRTF("Exit ParseInline 11 %d...\n",in_parse_inline);
 2113 #endif
 2114             return;
 2115         }
 2116 
 2117         /* parse inline element */
 2118         if (TY_(nodeIsElement)(node))
 2119         {
 2120             if (node->implicit)
 2121                 TY_(Report)(doc, element, node, INSERTING_TAG);
 2122 
 2123             /* trim white space before <br> */
 2124             if ( nodeIsBR(node) )
 2125                 TrimSpaces(doc, element);
 2126             
 2127             TY_(InsertNodeAtEnd)(element, node);
 2128             ParseTag(doc, node, mode);
 2129             continue;
 2130         }
 2131 
 2132         /* discard unexpected tags */
 2133         TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
 2134         TY_(FreeNode)( doc, node );
 2135         continue;
 2136     }
 2137 
 2138     if (!(element->tag->model & CM_OPT))
 2139         TY_(Report)(doc, element, node, MISSING_ENDTAG_FOR);
 2140 
 2141 #if defined(ENABLE_DEBUG_LOG)
 2142     in_parse_inline--;
 2143     SPRTF("Exit ParseInline 12 %d...\n",in_parse_inline);
 2144 #endif
 2145 }
 2146 
 2147 void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
 2148 {
 2149     Lexer* lexer = doc->lexer;
 2150     if ( lexer->isvoyager )
 2151     {
 2152         Node *node = TY_(GetToken)( doc, mode);
 2153         if ( node )
 2154         {
 2155             if ( !(node->type == EndTag && node->tag == element->tag) )
 2156             {
 2157                 /* TY_(Report)(doc, element, node, ELEMENT_NOT_EMPTY); */
 2158                 TY_(UngetToken)( doc );
 2159             }
 2160             else
 2161             {
 2162                 TY_(FreeNode)( doc, node );
 2163             }
 2164         }
 2165     }
 2166 }
 2167 
 2168 void TY_(ParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
 2169 {
 2170     Lexer* lexer = doc->lexer;
 2171     Node *node, *parent;
 2172 
 2173     if (list->tag->model & CM_EMPTY)
 2174         return;
 2175 
 2176     lexer->insert = NULL;  /* defer implicit inline start tags */
 2177 
 2178     while ((node = TY_(GetToken)( doc, IgnoreWhitespace)) != NULL)
 2179     {
 2180         if (node->tag == list->tag && node->type == EndTag)
 2181         {
 2182             TY_(FreeNode)( doc, node);
 2183             list->closed = yes;
 2184             return;
 2185         }
 2186 
 2187         /* deal with comments etc. */
 2188         if (InsertMisc(list, node))
 2189             continue;
 2190 
 2191         if (TY_(nodeIsText)(node))
 2192         {
 2193             TY_(UngetToken)( doc );
 2194             node = TY_(InferredTag)(doc, TidyTag_DT);
 2195             TY_(Report)(doc, list, node, MISSING_STARTTAG);
 2196         }
 2197 
 2198         if (node->tag == NULL)
 2199         {
 2200             TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2201             TY_(FreeNode)( doc, node);
 2202             continue;
 2203         }
 2204 
 2205         /* 
 2206           if this is the end tag for an ancestor element
 2207           then infer end tag for this element
 2208         */
 2209         if (node->type == EndTag)
 2210         {
 2211             Bool discardIt = no;
 2212             if ( nodeIsFORM(node) )
 2213             {
 2214                 BadForm( doc );
 2215                 TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2216                 TY_(FreeNode)( doc, node );
 2217                 continue;
 2218             }
 2219 
 2220             for (parent = list->parent;
 2221                     parent != NULL; parent = parent->parent)
 2222             {
 2223                /* Do not match across BODY to avoid infinite loop
 2224                   between ParseBody and this parser,
 2225                   See http://tidy.sf.net/bug/1098012. */
 2226                 if (nodeIsBODY(parent))
 2227                 {
 2228                     discardIt = yes;
 2229                     break;
 2230                 }
 2231                 if (node->tag == parent->tag)
 2232                 {
 2233                     TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE);
 2234 
 2235                     TY_(UngetToken)( doc );
 2236                     return;
 2237                 }
 2238             }
 2239             if (discardIt)
 2240             {
 2241                 TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2242                 TY_(FreeNode)( doc, node);
 2243                 continue;
 2244             }
 2245         }
 2246 
 2247         /* center in a dt or a dl breaks the dl list in two */
 2248         if ( nodeIsCENTER(node) )
 2249         {
 2250             if (list->content)
 2251                 TY_(InsertNodeAfterElement)(list, node);
 2252             else /* trim empty dl list */
 2253             {
 2254                 TY_(InsertNodeBeforeElement)(list, node);
 2255             }
 2256 
 2257             /* #426885 - fix by Glenn Carroll 19 Apr 00, and
 2258                          Gary Dechaines 11 Aug 00 */
 2259             /* ParseTag can destroy node, if it finds that
 2260              * this <center> is followed immediately by </center>.
 2261              * It's awkward but necessary to determine if this
 2262              * has happened.
 2263              */
 2264             parent = node->parent;
 2265 
 2266             /* and parse contents of center */
 2267             lexer->excludeBlocks = no;
 2268             ParseTag( doc, node, mode);
 2269             lexer->excludeBlocks = yes;
 2270 
 2271             /* now create a new dl element,
 2272              * unless node has been blown away because the
 2273              * center was empty, as above.
 2274              */
 2275             if (parent->last == node)
 2276             {
 2277                 list = TY_(InferredTag)(doc, TidyTag_DL);
 2278                 TY_(InsertNodeAfterElement)(node, list);
 2279             }
 2280             continue;
 2281         }
 2282 
 2283         if ( !(nodeIsDT(node) || nodeIsDD(node)) )
 2284         {
 2285             TY_(UngetToken)( doc );
 2286 
 2287             if (!(node->tag->model & (CM_BLOCK | CM_INLINE)))
 2288             {
 2289                 TY_(Report)(doc, list, node, TAG_NOT_ALLOWED_IN);
 2290                 return;
 2291             }
 2292 
 2293             /* if DD appeared directly in BODY then exclude blocks */
 2294             if (!(node->tag->model & CM_INLINE) && lexer->excludeBlocks)
 2295                 return;
 2296 
 2297             node = TY_(InferredTag)(doc, TidyTag_DD);
 2298             TY_(Report)(doc, list, node, MISSING_STARTTAG);
 2299         }
 2300 
 2301         if (node->type == EndTag)
 2302         {
 2303             TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2304             TY_(FreeNode)( doc, node);
 2305             continue;
 2306         }
 2307         
 2308         /* node should be <DT> or <DD>*/
 2309         TY_(InsertNodeAtEnd)(list, node);
 2310         ParseTag( doc, node, IgnoreWhitespace);
 2311     }
 2312 
 2313     TY_(Report)(doc, list, node, MISSING_ENDTAG_FOR);
 2314 }
 2315 
 2316 static Bool FindLastLI( Node *list, Node **lastli )
 2317 {
 2318     Node *node;
 2319 
 2320     *lastli = NULL;
 2321     for ( node = list->content; node ; node = node->next )
 2322         if ( nodeIsLI(node) && node->type == StartTag )
 2323             *lastli=node;
 2324     return *lastli ? yes:no;
 2325 }
 2326 
 2327 void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
 2328 {
 2329 #if defined(ENABLE_DEBUG_LOG)
 2330     static int in_parse_list = 0;
 2331 #endif
 2332     Lexer* lexer = doc->lexer;
 2333     Node *node, *parent, *lastli;
 2334     Bool wasblock;
 2335     Bool nodeisOL = nodeIsOL(list);
 2336 
 2337 #if defined(ENABLE_DEBUG_LOG)
 2338     in_parse_list++;
 2339     SPRTF("Entering ParseList %d...\n",in_parse_list);
 2340 #endif
 2341     if (list->tag->model & CM_EMPTY)
 2342     {
 2343 #if defined(ENABLE_DEBUG_LOG)
 2344         in_parse_list--;
 2345         SPRTF("Exit ParseList 1 %d... CM_EMPTY\n",in_parse_list);
 2346 #endif
 2347         return;
 2348     }
 2349     lexer->insert = NULL;  /* defer implicit inline start tags */
 2350 
 2351     while ((node = TY_(GetToken)( doc, IgnoreWhitespace)) != NULL)
 2352     {
 2353         Bool foundLI = no;
 2354         if (node->tag == list->tag && node->type == EndTag)
 2355         {
 2356             TY_(FreeNode)( doc, node);
 2357             list->closed = yes;
 2358 #if defined(ENABLE_DEBUG_LOG)
 2359             in_parse_list--;
 2360             SPRTF("Exit ParseList 2 %d... Endtag\n",in_parse_list);
 2361 #endif
 2362             return;
 2363         }
 2364 
 2365         /* deal with comments etc. */
 2366         if (InsertMisc(list, node))
 2367             continue;
 2368 
 2369         if (node->type != TextNode && node->tag == NULL)
 2370         {
 2371             TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2372             TY_(FreeNode)( doc, node);
 2373             continue;
 2374         }
 2375         if (lexer && (node->type == TextNode))
 2376         {
 2377             uint ch, ix = node->start;
 2378             /* Issue #572 - Skip whitespace. */
 2379             while (ix < node->end && (ch = (lexer->lexbuf[ix] & 0xff))
 2380                 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'))
 2381                 ++ix;
 2382             if (ix >= node->end)
 2383             {
 2384                 /* Issue #572 - Discard if ALL whitespace. */
 2385                 TY_(FreeNode)(doc, node);
 2386                 continue;
 2387             }
 2388         }
 2389 
 2390 
 2391         /* 
 2392           if this is the end tag for an ancestor element
 2393           then infer end tag for this element
 2394         */
 2395         if (node->type == EndTag)
 2396         {
 2397             if ( nodeIsFORM(node) )
 2398             {
 2399                 BadForm( doc );
 2400                 TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2401                 TY_(FreeNode)( doc, node );
 2402                 continue;
 2403             }
 2404 
 2405             if (TY_(nodeHasCM)(node,CM_INLINE))
 2406             {
 2407                 TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2408                 TY_(PopInline)( doc, node );
 2409                 TY_(FreeNode)( doc, node);
 2410                 continue;
 2411             }
 2412 
 2413             for ( parent = list->parent;
 2414                   parent != NULL; parent = parent->parent )
 2415             {
 2416                /* Do not match across BODY to avoid infinite loop
 2417                   between ParseBody and this parser,
 2418                   See http://tidy.sf.net/bug/1053626. */
 2419                 if (nodeIsBODY(parent))
 2420                     break;
 2421                 if (node->tag == parent->tag)
 2422                 {
 2423                     TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE);
 2424                     TY_(UngetToken)( doc );
 2425 #if defined(ENABLE_DEBUG_LOG)
 2426                     in_parse_list--;
 2427                     SPRTF("Exit ParseList 3 %d... No End Tag\n",in_parse_list);
 2428 #endif
 2429                     return;
 2430                 }
 2431             }
 2432 
 2433             TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED);
 2434             TY_(FreeNode)( doc, node);
 2435             continue;
 2436         }
 2437 
 2438         if ( !nodeIsLI(node) && nodeisOL )
 2439         {
 2440             /* Issue #572 - A <ol><li> can have nested <ol> elements */
 2441             foundLI = FindLastLI(list, &lastli); /* find last <li> */
 2442         }
 2443 
 2444         if ( nodeIsLI(node) || (TY_(IsHTML5Mode)(doc) && !foundLI) )
 2445         {
 2446             /* node is <LI> OR
 2447                Issue #396 - A <ul> can have Zero or more <li> elements
 2448              */
 2449             TY_(InsertNodeAtEnd)(list,node);
 2450         }
 2451         else
 2452         {
 2453             TY_(UngetToken)( doc );
 2454 
 2455             if (TY_(nodeHasCM)(node,CM_BLOCK) && lexer->excludeBlocks)
 2456             {
 2457                 TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE);
 2458 #if defined(ENABLE_DEBUG_LOG)
 2459                 in_parse_list--;
 2460                 SPRTF("Exit ParseList 4 %d... No End Tag\n",in_parse_list);
 2461 #endif
 2462                 return;
 2463             }
 2464             /* http://tidy.sf.net/issue/1316307 */
 2465             /* In exiled mode, return so table processing can continue. */
 2466             else if ( lexer->exiled
 2467                       && (TY_(nodeHasCM)(node, CM_TABLE|CM_ROWGRP|CM_ROW)
 2468                           || nodeIsTABLE(node)) )
 2469             {
 2470 #if defined(ENABLE_DEBUG_LOG)
 2471                 in_parse_list--;
 2472                 SPRTF("Exit ParseList 5 %d... exiled\n",in_parse_list);
 2473 #endif
 2474                 return;
 2475             }
 2476             /* http://tidy.sf.net/issue/836462
 2477                If "list" is an unordered list, insert the next tag within 
 2478                the last <li> to preserve the numbering to match the visual 
 2479                rendering of most browsers. */    
 2480             if ( nodeIsOL(list) && FindLastLI(list, &lastli) )
 2481             {
 2482                 /* Create a node for error reporting */
 2483                 node = TY_(InferredTag)(doc, TidyTag_LI);
 2484                 TY_(Report)(doc, list, node, MISSING_STARTTAG );
 2485                 TY_(FreeNode)( doc, node);
 2486                 node = lastli;
 2487             }
 2488             else
 2489             {
 2490                 /* Add an inferred <li> */
 2491                 wasblock = TY_(nodeHasCM)(node,CM_BLOCK);
 2492                 node = TY_(InferredTag)(doc, TidyTag_LI);
 2493                 /* Add "display: inline" to avoid a blank line after <li> with 
 2494                    Internet Explorer. See http://tidy.sf.net/issue/836462 */
 2495                 TY_(AddStyleProperty)( doc, node,
 2496                                        wasblock
 2497                                        ? "list-style: none; display: inline"
 2498                                        : "list-style: none" 
 2499                                        );
 2500                 TY_(Report)(doc, list, node, MISSING_STARTTAG );
 2501                 TY_(InsertNodeAtEnd)(list,node);
 2502             }
 2503         }
 2504 
 2505         ParseTag( doc, node, IgnoreWhitespace);
 2506     }
 2507 
 2508     TY_(Report)(doc, list, node, MISSING_ENDTAG_FOR);
 2509 #if defined(ENABLE_DEBUG_LOG)
 2510     in_parse_list--;
 2511     SPRTF("Exit ParseList 6 %d... missing end tag\n",in_parse_list);
 2512 #endif
 2513 }
 2514 
 2515 /*
 2516  unexpected content in table row is moved to just before
 2517  the table in accordance with Netscape and IE. This code
 2518  assumes that node hasn't been inserted into the row.
 2519 */
 2520 static void MoveBeforeTable( TidyDocImpl* ARG_UNUSED(doc), Node *row,
 2521                              Node *node )
 2522 {
 2523     Node *table;
 2524 
 2525     /* first find the table element */
 2526     for (table = row->parent; table; table = table->parent)
 2527     {
 2528         if ( nodeIsTABLE(table) )
 2529         {
 2530             TY_(InsertNodeBeforeElement)( table, node );
 2531             return;
 2532         }
 2533     }
 2534     /* No table element */
 2535     TY_(InsertNodeBeforeElement)( row->parent, node );
 2536 }
 2537 
 2538 /*
 2539  if a table row is empty then insert an empty cell
 2540  this practice is consistent with browser behavior
 2541  and avoids potential problems with row spanning cells
 2542 */
 2543 static void FixEmptyRow(TidyDocImpl* doc, Node *row)
 2544 {
 2545     Node *cell;
 2546 
 2547     if (row->content == NULL)
 2548     {
 2549         cell = TY_(InferredTag)(doc, TidyTag_TD);
 2550         TY_(InsertNodeAtEnd)(row, cell);
 2551         TY_(Report)(doc, row, cell, MISSING_STARTTAG);
 2552     }
 2553 }
 2554 
 2555 void TY_(ParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
 2556 {
 2557     Lexer* lexer = doc->lexer;
 2558     Node *node;
 2559     Bool exclude_state;
 2560 
 2561     if (row->tag->model & CM_EMPTY)
 2562         return;
 2563 
 2564     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 2565     {
 2566         if (node->tag == row->tag)
 2567         {
 2568             if (node->type == EndTag)
 2569             {
 2570                 TY_(FreeNode)( doc, node);
 2571                 row->closed = yes;
 2572                 FixEmptyRow( doc, row);
 2573                 return;
 2574             }
 2575 
 2576             /* New row start implies end of current row */
 2577             TY_(UngetToken)( doc );
 2578             FixEmptyRow( doc, row);
 2579             return;
 2580         }
 2581 
 2582         /* 
 2583           if this is the end tag for an ancestor element
 2584           then infer end tag for this element
 2585         */
 2586         if ( node->type == EndTag )
 2587         {
 2588             if ( (TY_(nodeHasCM)(node, CM_HTML|CM_TABLE) || nodeIsTABLE(node))
 2589                  && DescendantOf(row, TagId(node)) )
 2590             {
 2591                 TY_(UngetToken)( doc );
 2592                 return;
 2593             }
 2594 
 2595             if ( nodeIsFORM(node) || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) )
 2596             {
 2597                 if ( nodeIsFORM(node) )
 2598                     BadForm( doc );
 2599 
 2600                 TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED);
 2601                 TY_(FreeNode)( doc, node);
 2602                 continue;
 2603             }
 2604 
 2605             if ( nodeIsTD(node) || nodeIsTH(node) )
 2606             {
 2607                 TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED);
 2608                 TY_(FreeNode)( doc, node);
 2609                 continue;
 2610             }
 2611         }
 2612 
 2613         /* deal with comments etc. */
 2614         if (InsertMisc(row, node))
 2615             continue;
 2616 
 2617         /* discard unknown tags */
 2618         if (node->tag == NULL && node->type != TextNode)
 2619         {
 2620             TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED);
 2621             TY_(FreeNode)( doc, node);
 2622             continue;
 2623         }
 2624 
 2625         /* discard unexpected <table> element */
 2626         if ( nodeIsTABLE(node) )
 2627         {
 2628             TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED);
 2629             TY_(FreeNode)( doc, node);
 2630             continue;
 2631         }
 2632 
 2633         /* THEAD, TFOOT or TBODY */
 2634         if ( TY_(nodeHasCM)(node, CM_ROWGRP) )
 2635         {
 2636             TY_(UngetToken)( doc );
 2637             return;
 2638         }
 2639 
 2640         if (node->type == EndTag)
 2641         {
 2642             TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED);
 2643             TY_(FreeNode)( doc, node);
 2644             continue;
 2645         }
 2646 
 2647         /*
 2648           if text or inline or block move before table
 2649           if head content move to head
 2650         */
 2651 
 2652         if (node->type != EndTag)
 2653         {
 2654             if ( nodeIsFORM(node) )
 2655             {
 2656                 TY_(UngetToken)( doc );
 2657                 node = TY_(InferredTag)(doc, TidyTag_TD);
 2658                 TY_(Report)(doc, row, node, MISSING_STARTTAG);
 2659             }
 2660             else if ( TY_(nodeIsText)(node)
 2661                       || TY_(nodeHasCM)(node, CM_BLOCK | CM_INLINE) )
 2662             {
 2663                 MoveBeforeTable( doc, row, node );
 2664                 TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN);
 2665                 lexer->exiled = yes;
 2666                 exclude_state = lexer->excludeBlocks;
 2667                 lexer->excludeBlocks = no;
 2668 
 2669                 if (node->type != TextNode)
 2670                     ParseTag( doc, node, IgnoreWhitespace);
 2671 
 2672                 lexer->exiled = no;
 2673                 lexer->excludeBlocks = exclude_state;
 2674                 continue;
 2675             }
 2676             else if (node->tag->model & CM_HEAD)
 2677             {
 2678                 TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN);
 2679                 MoveToHead( doc, row, node);
 2680                 continue;
 2681             }
 2682         }
 2683 
 2684         if ( !(nodeIsTD(node) || nodeIsTH(node)) )
 2685         {
 2686             TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN);
 2687             TY_(FreeNode)( doc, node);
 2688             continue;
 2689         }
 2690         
 2691         /* node should be <TD> or <TH> */
 2692         TY_(InsertNodeAtEnd)(row, node);
 2693         exclude_state = lexer->excludeBlocks;
 2694         lexer->excludeBlocks = no;
 2695         ParseTag( doc, node, IgnoreWhitespace);
 2696         lexer->excludeBlocks = exclude_state;
 2697 
 2698         /* pop inline stack */
 2699 
 2700         while ( lexer->istacksize > lexer->istackbase )
 2701             TY_(PopInline)( doc, NULL );
 2702     }
 2703 
 2704 }
 2705 
 2706 void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSED(mode))
 2707 {
 2708     Lexer* lexer = doc->lexer;
 2709     Node *node, *parent;
 2710 
 2711     if (rowgroup->tag->model & CM_EMPTY)
 2712         return;
 2713 
 2714     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 2715     {
 2716         if (node->tag == rowgroup->tag)
 2717         {
 2718             if (node->type == EndTag)
 2719             {
 2720                 rowgroup->closed = yes;
 2721                 TY_(FreeNode)( doc, node);
 2722                 return;
 2723             }
 2724 
 2725             TY_(UngetToken)( doc );
 2726             return;
 2727         }
 2728 
 2729         /* if </table> infer end tag */
 2730         if ( nodeIsTABLE(node) && node->type == EndTag )
 2731         {
 2732             TY_(UngetToken)( doc );
 2733             return;
 2734         }
 2735 
 2736         /* deal with comments etc. */
 2737         if (InsertMisc(rowgroup, node))
 2738             continue;
 2739 
 2740         /* discard unknown tags */
 2741         if (node->tag == NULL && node->type != TextNode)
 2742         {
 2743             TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED);
 2744             TY_(FreeNode)( doc, node);
 2745             continue;
 2746         }
 2747 
 2748         /*
 2749           if TD or TH then infer <TR>
 2750           if text or inline or block move before table
 2751           if head content move to head
 2752         */
 2753 
 2754         if (node->type != EndTag)
 2755         {
 2756             if ( nodeIsTD(node) || nodeIsTH(node) )
 2757             {
 2758                 TY_(UngetToken)( doc );
 2759                 node = TY_(InferredTag)(doc, TidyTag_TR);
 2760                 TY_(Report)(doc, rowgroup, node, MISSING_STARTTAG);
 2761             }
 2762             else if ( TY_(nodeIsText)(node)
 2763                       || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) )
 2764             {
 2765                 MoveBeforeTable( doc, rowgroup, node );
 2766                 TY_(Report)(doc, rowgroup, node, TAG_NOT_ALLOWED_IN);
 2767                 lexer->exiled = yes;
 2768 
 2769                 if (node->type != TextNode)
 2770                     ParseTag(doc, node, IgnoreWhitespace);
 2771 
 2772                 lexer->exiled = no;
 2773                 continue;
 2774             }
 2775             else if (node->tag->model & CM_HEAD)
 2776             {
 2777                 TY_(Report)(doc, rowgroup, node, TAG_NOT_ALLOWED_IN);
 2778                 MoveToHead(doc, rowgroup, node);
 2779                 continue;
 2780             }
 2781         }
 2782 
 2783         /* 
 2784           if this is the end tag for ancestor element
 2785           then infer end tag for this element
 2786         */
 2787         if (node->type == EndTag)
 2788         {
 2789             if ( nodeIsFORM(node) || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) )
 2790             {
 2791                 if ( nodeIsFORM(node) )
 2792                     BadForm( doc );
 2793 
 2794                 TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED);
 2795                 TY_(FreeNode)( doc, node);
 2796                 continue;
 2797             }
 2798 
 2799             if ( nodeIsTR(node) || nodeIsTD(node) || nodeIsTH(node) )
 2800             {
 2801                 TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED);
 2802                 TY_(FreeNode)( doc, node);
 2803                 continue;
 2804             }
 2805 
 2806             for ( parent = rowgroup->parent;
 2807                   parent != NULL;
 2808                   parent = parent->parent )
 2809             {
 2810                 if (node->tag == parent->tag)
 2811                 {
 2812                     TY_(UngetToken)( doc );
 2813                     return;
 2814                 }
 2815             }
 2816         }
 2817 
 2818         /*
 2819           if THEAD, TFOOT or TBODY then implied end tag
 2820 
 2821         */
 2822         if (node->tag->model & CM_ROWGRP)
 2823         {
 2824             if (node->type != EndTag)
 2825             {
 2826                 TY_(UngetToken)( doc );
 2827                 return;
 2828             }
 2829         }
 2830 
 2831         if (node->type == EndTag)
 2832         {
 2833             TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED);
 2834             TY_(FreeNode)( doc, node);
 2835             continue;
 2836         }
 2837         
 2838         if ( !nodeIsTR(node) )
 2839         {
 2840             node = TY_(InferredTag)(doc, TidyTag_TR);
 2841             TY_(Report)(doc, rowgroup, node, MISSING_STARTTAG);
 2842             TY_(UngetToken)( doc );
 2843         }
 2844 
 2845        /* node should be <TR> */
 2846         TY_(InsertNodeAtEnd)(rowgroup, node);
 2847         ParseTag(doc, node, IgnoreWhitespace);
 2848     }
 2849 
 2850 }
 2851 
 2852 void TY_(ParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSED(mode))
 2853 {
 2854     Node *node, *parent;
 2855 
 2856     if (colgroup->tag->model & CM_EMPTY)
 2857         return;
 2858 
 2859     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 2860     {
 2861         if (node->tag == colgroup->tag && node->type == EndTag)
 2862         {
 2863             TY_(FreeNode)( doc, node);
 2864             colgroup->closed = yes;
 2865             return;
 2866         }
 2867 
 2868         /* 
 2869           if this is the end tag for an ancestor element
 2870           then infer end tag for this element
 2871         */
 2872         if (node->type == EndTag)
 2873         {
 2874             if ( nodeIsFORM(node) )
 2875             {
 2876                 BadForm( doc );
 2877                 TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED);
 2878                 TY_(FreeNode)( doc, node);
 2879                 continue;
 2880             }
 2881 
 2882             for ( parent = colgroup->parent;
 2883                   parent != NULL;
 2884                   parent = parent->parent )
 2885             {
 2886                 if (node->tag == parent->tag)
 2887                 {
 2888                     TY_(UngetToken)( doc );
 2889                     return;
 2890                 }
 2891             }
 2892         }
 2893 
 2894         if (TY_(nodeIsText)(node))
 2895         {
 2896             TY_(UngetToken)( doc );
 2897             return;
 2898         }
 2899 
 2900         /* deal with comments etc. */
 2901         if (InsertMisc(colgroup, node))
 2902             continue;
 2903 
 2904         /* discard unknown tags */
 2905         if (node->tag == NULL)
 2906         {
 2907             TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED);
 2908             TY_(FreeNode)( doc, node);
 2909             continue;
 2910         }
 2911 
 2912         if ( !nodeIsCOL(node) )
 2913         {
 2914             TY_(UngetToken)( doc );
 2915             return;
 2916         }
 2917 
 2918         if (node->type == EndTag)
 2919         {
 2920             TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED);
 2921             TY_(FreeNode)( doc, node);
 2922             continue;
 2923         }
 2924         
 2925         /* node should be <COL> */
 2926         TY_(InsertNodeAtEnd)(colgroup, node);
 2927         ParseTag(doc, node, IgnoreWhitespace);
 2928     }
 2929 }
 2930 
 2931 void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(mode))
 2932 {
 2933 #if defined(ENABLE_DEBUG_LOG)
 2934     static int in_parse_table = 0;
 2935 #endif
 2936     Lexer* lexer = doc->lexer;
 2937     Node *node, *parent;
 2938     uint istackbase;
 2939 
 2940     TY_(DeferDup)( doc );
 2941     istackbase = lexer->istackbase;
 2942     lexer->istackbase = lexer->istacksize;
 2943 #if defined(ENABLE_DEBUG_LOG)
 2944     in_parse_table++;
 2945     SPRTF("Entering ParseTableTag %d...\n",in_parse_table);
 2946 #endif
 2947     
 2948     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 2949     {
 2950         if (node->tag == table->tag )
 2951         {
 2952             if (node->type == EndTag)
 2953             {
 2954                 TY_(FreeNode)(doc, node);
 2955             }
 2956             else
 2957             {
 2958                 /* Issue #498 - If a <table> in a <table>
 2959                  * just close the current table, and issue a 
 2960                  * warning. The previous action was to discard
 2961                  * this second <table>
 2962                  */
 2963                 TY_(UngetToken)(doc);
 2964                 TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN);
 2965             }
 2966             lexer->istackbase = istackbase;
 2967             table->closed = yes;
 2968 #if defined(ENABLE_DEBUG_LOG)
 2969             in_parse_table--;
 2970             SPRTF("Exit ParseTableTag 1 %d... EndTag\n",in_parse_table);
 2971 #endif
 2972             return;
 2973         }
 2974 
 2975         /* deal with comments etc. */
 2976         if (InsertMisc(table, node))
 2977             continue;
 2978 
 2979         /* discard unknown tags */
 2980         if (node->tag == NULL && node->type != TextNode)
 2981         {
 2982             TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED);
 2983             TY_(FreeNode)( doc, node);
 2984             continue;
 2985         }
 2986 
 2987         /* if TD or TH or text or inline or block then infer <TR> */
 2988 
 2989         if (node->type != EndTag)
 2990         {
 2991             if ( nodeIsTD(node) || nodeIsTH(node) || nodeIsTABLE(node) )
 2992             {
 2993                 TY_(UngetToken)( doc );
 2994                 node = TY_(InferredTag)(doc, TidyTag_TR);
 2995                 TY_(Report)(doc, table, node, MISSING_STARTTAG);
 2996             }
 2997             else if ( TY_(nodeIsText)(node) ||TY_(nodeHasCM)(node,CM_BLOCK|CM_INLINE) )
 2998             {
 2999                 TY_(InsertNodeBeforeElement)(table, node);
 3000                 TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN);
 3001                 lexer->exiled = yes;
 3002 
 3003                 if (node->type != TextNode) 
 3004                     ParseTag(doc, node, IgnoreWhitespace);
 3005 
 3006                 lexer->exiled = no;
 3007                 continue;
 3008             }
 3009             else if (node->tag->model & CM_HEAD)
 3010             {
 3011                 MoveToHead(doc, table, node);
 3012                 continue;
 3013             }
 3014         }
 3015 
 3016         /* 
 3017           if this is the end tag for an ancestor element
 3018           then infer end tag for this element
 3019         */
 3020         if (node->type == EndTag)
 3021         {
 3022             if ( nodeIsFORM(node) )
 3023             {
 3024                 BadForm( doc );
 3025                 TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED);
 3026                 TY_(FreeNode)( doc, node);
 3027                 continue;
 3028             }
 3029 
 3030             /* best to discard unexpected block/inline end tags */
 3031             if ( TY_(nodeHasCM)(node, CM_TABLE|CM_ROW) ||
 3032                  TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) )
 3033             {
 3034                 TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED);
 3035                 TY_(FreeNode)( doc, node);
 3036                 continue;
 3037             }
 3038 
 3039             for ( parent = table->parent;
 3040                   parent != NULL;
 3041                   parent = parent->parent )
 3042             {
 3043                 if (node->tag == parent->tag)
 3044                 {
 3045                     TY_(Report)(doc, table, node, MISSING_ENDTAG_BEFORE );
 3046                     TY_(UngetToken)( doc );
 3047                     lexer->istackbase = istackbase;
 3048 #if defined(ENABLE_DEBUG_LOG)
 3049                     in_parse_table--;
 3050                     SPRTF("Exit ParseTableTag 2 %d... missing EndTag\n",in_parse_table);
 3051 #endif
 3052                     return;
 3053                 }
 3054             }
 3055         }
 3056 
 3057         if (!(node->tag->model & CM_TABLE))
 3058         {
 3059             TY_(UngetToken)( doc );
 3060             TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN);
 3061             lexer->istackbase = istackbase;
 3062 #if defined(ENABLE_DEBUG_LOG)
 3063             in_parse_table--;
 3064             SPRTF("Exit ParseTableTag 3 %d... CM_TABLE\n",in_parse_table);
 3065 #endif
 3066             return;
 3067         }
 3068 
 3069         if (TY_(nodeIsElement)(node))
 3070         {
 3071             TY_(InsertNodeAtEnd)(table, node);
 3072             ParseTag(doc, node, IgnoreWhitespace);
 3073             continue;
 3074         }
 3075 
 3076         /* discard unexpected text nodes and end tags */
 3077         TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED);
 3078         TY_(FreeNode)( doc, node);
 3079     }
 3080 
 3081     TY_(Report)(doc, table, node, MISSING_ENDTAG_FOR);
 3082     lexer->istackbase = istackbase;
 3083 #if defined(ENABLE_DEBUG_LOG)
 3084     in_parse_table--;
 3085     SPRTF("Exit ParseTableTag 4 %d... missing end\n",in_parse_table);
 3086 #endif
 3087 }
 3088 
 3089 /* acceptable content for pre elements */
 3090 static Bool PreContent( TidyDocImpl* ARG_UNUSED(doc), Node* node )
 3091 {
 3092     /* p is coerced to br's, Text OK too */
 3093     if ( nodeIsP(node) || TY_(nodeIsText)(node) )
 3094         return yes;
 3095 
 3096     if ( node->tag == NULL ||
 3097          nodeIsPARAM(node) ||
 3098          !TY_(nodeHasCM)(node, CM_INLINE|CM_NEW) )
 3099         return no;
 3100 
 3101     return yes;
 3102 }
 3103 
 3104 void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
 3105 {
 3106     Node *node;
 3107 
 3108     if (pre->tag->model & CM_EMPTY)
 3109         return;
 3110 
 3111     TY_(InlineDup)( doc, NULL ); /* tell lexer to insert inlines if needed */
 3112 
 3113     while ((node = TY_(GetToken)(doc, Preformatted)) != NULL)
 3114     {
 3115         if ( node->type == EndTag && 
 3116              (node->tag == pre->tag || DescendantOf(pre, TagId(node))) )
 3117         {
 3118             if (nodeIsBODY(node) || nodeIsHTML(node))
 3119             {
 3120                 TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED);
 3121                 TY_(FreeNode)(doc, node);
 3122                 continue;
 3123             }
 3124             if (node->tag == pre->tag)
 3125             {
 3126                 TY_(FreeNode)(doc, node);
 3127             }
 3128             else
 3129             {
 3130                 TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE );
 3131                 TY_(UngetToken)( doc );
 3132             }
 3133             pre->closed = yes;
 3134             TrimSpaces(doc, pre);
 3135             return;
 3136         }
 3137 
 3138         if (TY_(nodeIsText)(node))
 3139         {
 3140             TY_(InsertNodeAtEnd)(pre, node);
 3141             continue;
 3142         }
 3143 
 3144         /* deal with comments etc. */
 3145         if (InsertMisc(pre, node))
 3146             continue;
 3147 
 3148         if (node->tag == NULL)
 3149         {
 3150             TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED);
 3151             TY_(FreeNode)(doc, node);
 3152             continue;
 3153         }
 3154 
 3155         /* strip unexpected tags */
 3156         if ( !PreContent(doc, node) )
 3157         {
 3158             Node *newnode;
 3159 
 3160             /* fix for http://tidy.sf.net/bug/772205 */
 3161             if (node->type == EndTag)
 3162             {
 3163                 /* http://tidy.sf.net/issue/1590220 */ 
 3164                if ( doc->lexer->exiled
 3165                    && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) )
 3166                {
 3167                   TY_(UngetToken)(doc);
 3168                   TrimSpaces(doc, pre);
 3169                   return;
 3170                }
 3171 
 3172                TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED);
 3173                TY_(FreeNode)(doc, node);
 3174                continue;
 3175             }
 3176             /* http://tidy.sf.net/issue/1590220 */
 3177             else if (TY_(nodeHasCM)(node, CM_TABLE|CM_ROW)
 3178                      || nodeIsTABLE(node) )
 3179             {
 3180                 if (!doc->lexer->exiled)
 3181                     /* No missing close warning if exiled. */
 3182                     TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE);
 3183 
 3184                 TY_(UngetToken)(doc);
 3185                 return;
 3186             }
 3187 
 3188             /*
 3189               This is basically what Tidy 04 August 2000 did and far more accurate
 3190               with respect to browser behaivour than the code commented out above.
 3191               Tidy could try to propagate the <pre> into each disallowed child where
 3192               <pre> is allowed in order to replicate some browsers behaivour, but
 3193               there are a lot of exceptions, e.g. Internet Explorer does not propagate
 3194               <pre> into table cells while Mozilla does. Opera 6 never propagates
 3195               <pre> into blocklevel elements while Opera 7 behaves much like Mozilla.
 3196 
 3197               Tidy behaves thus mostly like Opera 6 except for nested <pre> elements
 3198               which are handled like Mozilla takes them (Opera6 closes all <pre> after
 3199               the first </pre>).
 3200 
 3201               There are similar issues like replacing <p> in <pre> with <br>, for
 3202               example
 3203 
 3204                 <pre>...<p>...</pre>                 (Input)
 3205                 <pre>...<br>...</pre>                (Tidy)
 3206                 <pre>...<br>...</pre>                (Opera 7 and Internet Explorer)
 3207                 <pre>...<br><br>...</pre>            (Opera 6 and Mozilla)
 3208 
 3209                 <pre>...<p>...</p>...</pre>          (Input)
 3210                 <pre>...<br>......</pre>             (Tidy, BUG!)
 3211                 <pre>...<br>...<br>...</pre>         (Internet Explorer)
 3212                 <pre>...<br><br>...<br><br>...</pre> (Mozilla, Opera 6)
 3213                 <pre>...<br>...<br><br>...</pre>     (Opera 7)
 3214                 
 3215               or something similar, they could also be closing the <pre> and propagate
 3216               the <pre> into the newly opened <p>.
 3217 
 3218               Todo: IMG, OBJECT, APPLET, BIG, SMALL, SUB, SUP, FONT, and BASEFONT are
 3219               dissallowed in <pre>, Tidy neither detects this nor does it perform any
 3220               cleanup operation. Tidy should at least issue a warning if it encounters
 3221               such constructs.
 3222 
 3223               Todo: discarding </p> is abviously a bug, it should be replaced by <br>.
 3224             */
 3225             TY_(InsertNodeAfterElement)(pre, node);
 3226             TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE);
 3227             ParseTag(doc, node, IgnoreWhitespace);
 3228 
 3229             newnode = TY_(InferredTag)(doc, TidyTag_PRE);
 3230             TY_(Report)(doc, pre, newnode, INSERTING_TAG);
 3231             pre = newnode;
 3232             TY_(InsertNodeAfterElement)(node, pre);
 3233 
 3234             continue;
 3235         }
 3236 
 3237         if ( nodeIsP(node) )
 3238         {
 3239             if (node->type == StartTag)
 3240             {
 3241                 TY_(Report)(doc, pre, node, USING_BR_INPLACE_OF);
 3242 
 3243                 /* trim white space before <p> in <pre>*/
 3244                 TrimSpaces(doc, pre);
 3245             
 3246                 /* coerce both <p> and </p> to <br> */
 3247                 TY_(CoerceNode)(doc, node, TidyTag_BR, no, no);
 3248                 TY_(FreeAttrs)( doc, node ); /* discard align attribute etc. */
 3249                 TY_(InsertNodeAtEnd)( pre, node );
 3250             }
 3251             else
 3252             {
 3253                 TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED);
 3254                 TY_(FreeNode)( doc, node);
 3255             }
 3256             continue;
 3257         }
 3258 
 3259         if ( TY_(nodeIsElement)(node) )
 3260         {
 3261             /* trim white space before <br> */
 3262             if ( nodeIsBR(node) )
 3263                 TrimSpaces(doc, pre);
 3264             
 3265             TY_(InsertNodeAtEnd)(pre, node);
 3266             ParseTag(doc, node, Preformatted);
 3267             continue;
 3268         }
 3269 
 3270         /* discard unexpected tags */
 3271         TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED);
 3272         TY_(FreeNode)( doc, node);
 3273     }
 3274 
 3275     TY_(Report)(doc, pre, node, MISSING_ENDTAG_FOR);
 3276 }
 3277 
 3278 void TY_(ParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
 3279 {
 3280     Lexer* lexer = doc->lexer;
 3281     Node *node;
 3282 
 3283     lexer->insert = NULL;  /* defer implicit inline start tags */
 3284 
 3285     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 3286     {
 3287         if (node->tag == field->tag && node->type == EndTag)
 3288         {
 3289             TY_(FreeNode)( doc, node);
 3290             field->closed = yes;
 3291             TrimSpaces(doc, field);
 3292             return;
 3293         }
 3294 
 3295         /* deal with comments etc. */
 3296         if (InsertMisc(field, node))
 3297             continue;
 3298 
 3299         if ( node->type == StartTag && 
 3300              (nodeIsOPTION(node) || nodeIsOPTGROUP(node)) )
 3301         {
 3302             if ( nodeIsOPTGROUP(node) )
 3303                 TY_(Report)(doc, field, node, CANT_BE_NESTED);
 3304 
 3305             TY_(InsertNodeAtEnd)(field, node);
 3306             ParseTag(doc, node, MixedContent);
 3307             continue;
 3308         }
 3309 
 3310         /* discard unexpected tags */
 3311         TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED );
 3312         TY_(FreeNode)( doc, node);
 3313     }
 3314 }
 3315 
 3316 
 3317 void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
 3318 {
 3319 #if defined(ENABLE_DEBUG_LOG)
 3320     static int in_parse_select = 0;
 3321 #endif
 3322     Lexer* lexer = doc->lexer;
 3323     Node *node;
 3324 
 3325     lexer->insert = NULL;  /* defer implicit inline start tags */
 3326 #if defined(ENABLE_DEBUG_LOG)
 3327     in_parse_select++;
 3328     SPRTF("Entering ParseSelect %d...\n",in_parse_select);
 3329 #endif
 3330 
 3331     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 3332     {
 3333         if (node->tag == field->tag && node->type == EndTag)
 3334         {
 3335             TY_(FreeNode)( doc, node);
 3336             field->closed = yes;
 3337             TrimSpaces(doc, field);
 3338 #if defined(ENABLE_DEBUG_LOG)
 3339             in_parse_select--;
 3340             SPRTF("Exit ParseSelect 1 %d...\n",in_parse_select);
 3341 #endif
 3342             return;
 3343         }
 3344 
 3345         /* deal with comments etc. */
 3346         if (InsertMisc(field, node))
 3347             continue;
 3348 
 3349         if ( node->type == StartTag && 
 3350              ( nodeIsOPTION(node)   ||
 3351                nodeIsOPTGROUP(node) ||
 3352                nodeIsDATALIST(node) ||
 3353                nodeIsSCRIPT(node)) 
 3354            )
 3355         {
 3356             TY_(InsertNodeAtEnd)(field, node);
 3357             ParseTag(doc, node, IgnoreWhitespace);
 3358             continue;
 3359         }
 3360 
 3361         /* discard unexpected tags */
 3362         TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED);
 3363         TY_(FreeNode)( doc, node);
 3364     }
 3365 
 3366     TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR);
 3367 #if defined(ENABLE_DEBUG_LOG)
 3368     in_parse_select--;
 3369     SPRTF("Exit ParseSelect 2 %d...\n",in_parse_select);
 3370 #endif
 3371 }
 3372 
 3373 /* HTML5 */
 3374 void TY_(ParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
 3375 {
 3376 #if defined(ENABLE_DEBUG_LOG)
 3377     static int in_parse_datalist = 0;
 3378 #endif
 3379     Lexer* lexer = doc->lexer;
 3380     Node *node;
 3381 
 3382     lexer->insert = NULL;  /* defer implicit inline start tags */
 3383 #if defined(ENABLE_DEBUG_LOG)
 3384     in_parse_datalist++;
 3385     SPRTF("Entering ParseDatalist %d...\n",in_parse_datalist);
 3386 #endif
 3387 
 3388     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 3389     {
 3390         if (node->tag == field->tag && node->type == EndTag)
 3391         {
 3392             TY_(FreeNode)( doc, node);
 3393             field->closed = yes;
 3394             TrimSpaces(doc, field);
 3395 #if defined(ENABLE_DEBUG_LOG)
 3396             in_parse_datalist--;
 3397             SPRTF("Exit ParseDatalist 1 %d...\n",in_parse_datalist);
 3398 #endif
 3399             return;
 3400         }
 3401 
 3402         /* deal with comments etc. */
 3403         if (InsertMisc(field, node))
 3404             continue;
 3405 
 3406         if ( node->type == StartTag && 
 3407              ( nodeIsOPTION(node)   ||
 3408                nodeIsOPTGROUP(node) ||
 3409                nodeIsDATALIST(node) ||
 3410                nodeIsSCRIPT(node)) 
 3411            )
 3412         {
 3413             TY_(InsertNodeAtEnd)(field, node);
 3414             ParseTag(doc, node, IgnoreWhitespace);
 3415             continue;
 3416         }
 3417 
 3418         /* discard unexpected tags */
 3419         TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED);
 3420         TY_(FreeNode)( doc, node);
 3421     }
 3422 
 3423     TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR);
 3424 #if defined(ENABLE_DEBUG_LOG)
 3425     in_parse_datalist--;
 3426     SPRTF("Exit ParseDatalist 2 %d...\n",in_parse_datalist);
 3427 #endif
 3428 }
 3429 
 3430 
 3431 
 3432 
 3433 void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
 3434 {
 3435     Lexer* lexer = doc->lexer;
 3436     Node *node;
 3437 
 3438     lexer->insert = NULL;  /* defer implicit inline start tags */
 3439 
 3440     if ( nodeIsTEXTAREA(field) )
 3441         mode = Preformatted;
 3442     else
 3443         mode = MixedContent;  /* kludge for font tags */
 3444 
 3445     while ((node = TY_(GetToken)(doc, mode)) != NULL)
 3446     {
 3447         if (node->tag == field->tag && node->type == EndTag)
 3448         {
 3449             TY_(FreeNode)( doc, node);
 3450             field->closed = yes;
 3451             TrimSpaces(doc, field);
 3452             return;
 3453         }
 3454 
 3455         /* deal with comments etc. */
 3456         if (InsertMisc(field, node))
 3457             continue;
 3458 
 3459         if (TY_(nodeIsText)(node))
 3460         {
 3461             /* only called for 1st child */
 3462             if (field->content == NULL && !(mode & Preformatted))
 3463                 TrimSpaces(doc, field);
 3464 
 3465             if (node->start >= node->end)
 3466             {
 3467                 TY_(FreeNode)( doc, node);
 3468                 continue;
 3469             }
 3470 
 3471             TY_(InsertNodeAtEnd)(field, node);
 3472             continue;
 3473         }
 3474 
 3475         /* for textarea should all cases of < and & be escaped? */
 3476 
 3477         /* discard inline tags e.g. font */
 3478         if (   node->tag 
 3479             && node->tag->model & CM_INLINE
 3480             && !(node->tag->model & CM_FIELD)) /* #487283 - fix by Lee Passey 25 Jan 02 */
 3481         {
 3482             TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED);
 3483             TY_(FreeNode)( doc, node);
 3484             continue;
 3485         }
 3486 
 3487         /* terminate element on other tags */
 3488         if (!(field->tag->model & CM_OPT))
 3489             TY_(Report)(doc, field, node, MISSING_ENDTAG_BEFORE);
 3490 
 3491         TY_(UngetToken)( doc );
 3492         TrimSpaces(doc, field);
 3493         return;
 3494     }
 3495 
 3496     if (!(field->tag->model & CM_OPT))
 3497         TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR);
 3498 }
 3499 
 3500 
 3501 void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode))
 3502 {
 3503     Node *node;
 3504     while ((node = TY_(GetToken)(doc, MixedContent)) != NULL)
 3505     {
 3506         if (node->tag == title->tag && node->type == StartTag
 3507             && cfgBool(doc, TidyCoerceEndTags) )
 3508         {
 3509             TY_(Report)(doc, title, node, COERCE_TO_ENDTAG);
 3510             node->type = EndTag;
 3511             TY_(UngetToken)( doc );
 3512             continue;
 3513         }
 3514         else if (node->tag == title->tag && node->type == EndTag)
 3515         {
 3516             TY_(FreeNode)( doc, node);
 3517             title->closed = yes;
 3518             TrimSpaces(doc, title);
 3519             return;
 3520         }
 3521 
 3522         if (TY_(nodeIsText)(node))
 3523         {
 3524             /* only called for 1st child */
 3525             if (title->content == NULL)
 3526                 TrimInitialSpace(doc, title, node);
 3527 
 3528             if (node->start >= node->end)
 3529             {
 3530                 TY_(FreeNode)( doc, node);
 3531                 continue;
 3532             }
 3533 
 3534             TY_(InsertNodeAtEnd)(title, node);
 3535             continue;
 3536         }
 3537 
 3538         /* deal with comments etc. */
 3539         if (InsertMisc(title, node))
 3540             continue;
 3541 
 3542         /* discard unknown tags */
 3543         if (node->tag == NULL)
 3544         {
 3545             TY_(Report)(doc, title, node, DISCARDING_UNEXPECTED);
 3546             TY_(FreeNode)( doc, node);
 3547             continue;
 3548         }
 3549 
 3550         /* pushback unexpected tokens */
 3551         TY_(Report)(doc, title, node, MISSING_ENDTAG_BEFORE);
 3552         TY_(UngetToken)( doc );
 3553         TrimSpaces(doc, title);
 3554         return;
 3555     }
 3556 
 3557     TY_(Report)(doc, title, node, MISSING_ENDTAG_FOR);
 3558 }
 3559 
 3560 /*
 3561   This isn't quite right for CDATA content as it recognises
 3562   tags within the content and parses them accordingly.
 3563   This will unfortunately screw up scripts which include
 3564   < + letter,  < + !, < + ?  or  < + / + letter
 3565 */
 3566 
 3567 void TY_(ParseScript)(TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mode))
 3568 {
 3569     Node *node;
 3570     
 3571     doc->lexer->parent = script;
 3572     node = TY_(GetToken)(doc, CdataContent);
 3573     doc->lexer->parent = NULL;
 3574 
 3575     if (node)
 3576     {
 3577         TY_(InsertNodeAtEnd)(script, node);
 3578     }
 3579     else
 3580     {
 3581         /* handle e.g. a document like "<script>" */
 3582         TY_(Report)(doc, script, NULL, MISSING_ENDTAG_FOR);
 3583         return;
 3584     }
 3585 
 3586     node = TY_(GetToken)(doc, IgnoreWhitespace);
 3587 
 3588     if (!(node && node->type == EndTag && node->tag &&
 3589         node->tag->id == script->tag->id))
 3590     {
 3591         TY_(Report)(doc, script, node, MISSING_ENDTAG_FOR);
 3592 
 3593         if (node)
 3594             TY_(UngetToken)(doc);
 3595     }
 3596     else
 3597     {
 3598         TY_(FreeNode)(doc, node);
 3599     }
 3600 }
 3601 
 3602 Bool TY_(IsJavaScript)(Node *node)
 3603 {
 3604     Bool result = no;
 3605     AttVal *attr;
 3606 
 3607     if (node->attributes == NULL)
 3608         return yes;
 3609 
 3610     for (attr = node->attributes; attr; attr = attr->next)
 3611     {
 3612         if ( (attrIsLANGUAGE(attr) || attrIsTYPE(attr))
 3613              && AttrContains(attr, "javascript") )
 3614         {
 3615             result = yes;
 3616             break;
 3617         }
 3618     }
 3619 
 3620     return result;
 3621 }
 3622 
 3623 void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
 3624 {
 3625     Lexer* lexer = doc->lexer;
 3626     Node *node;
 3627     int HasTitle = 0;
 3628     int HasBase = 0;
 3629 
 3630     DEBUG_LOG(SPRTF("Enter ParseHead...\n"));
 3631     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 3632     {
 3633         if (node->tag == head->tag && node->type == EndTag)
 3634         {
 3635             TY_(FreeNode)( doc, node);
 3636             head->closed = yes;
 3637             break;
 3638         }
 3639 
 3640         /* find and discard multiple <head> elements */
 3641         /* find and discard <html> in <head> elements */
 3642         if ((node->tag == head->tag || nodeIsHTML(node)) && node->type == StartTag)
 3643         {
 3644             TY_(Report)(doc, head, node, DISCARDING_UNEXPECTED);
 3645             TY_(FreeNode)(doc, node);
 3646             continue;
 3647         }
 3648 
 3649         if (TY_(nodeIsText)(node))
 3650         {
 3651             /*\ Issue #132 - avoid warning for missing body tag,
 3652              *  if configured to --omit-otpional-tags yes
 3653              *  Issue #314 - and if --show-body-only
 3654             \*/
 3655             if (!cfgBool( doc, TidyOmitOptionalTags ) &&
 3656                 !showingBodyOnly(doc) )
 3657             {
 3658                 TY_(Report)(doc, head, node, TAG_NOT_ALLOWED_IN);
 3659             }
 3660             TY_(UngetToken)( doc );
 3661             break;
 3662         }
 3663 
 3664         if (node->type == ProcInsTag && node->element &&
 3665             TY_(tmbstrcmp)(node->element, "xml-stylesheet") == 0)
 3666         {
 3667             TY_(Report)(doc, head, node, TAG_NOT_ALLOWED_IN);
 3668             TY_(InsertNodeBeforeElement)(TY_(FindHTML)(doc), node);
 3669             continue;
 3670         }
 3671 
 3672         /* deal with comments etc. */
 3673         if (InsertMisc(head, node))
 3674             continue;
 3675 
 3676         if (node->type == DocTypeTag)
 3677         {
 3678             InsertDocType(doc, head, node);
 3679             continue;
 3680         }
 3681 
 3682         /* discard unknown tags */
 3683         if (node->tag == NULL)
 3684         {
 3685             TY_(Report)(doc, head, node, DISCARDING_UNEXPECTED);
 3686             TY_(FreeNode)( doc, node);
 3687             continue;
 3688         }
 3689         
 3690         /*
 3691          if it doesn't belong in the head then
 3692          treat as implicit end of head and deal
 3693          with as part of the body
 3694         */
 3695         if (!(node->tag->model & CM_HEAD))
 3696         {
 3697             /* #545067 Implicit closing of head broken - warn only for XHTML input */
 3698             if ( lexer->isvoyager )
 3699                 TY_(Report)(doc, head, node, TAG_NOT_ALLOWED_IN );
 3700             TY_(UngetToken)( doc );
 3701             break;
 3702         }
 3703 
 3704         if (TY_(nodeIsElement)(node))
 3705         {
 3706             if ( nodeIsTITLE(node) )
 3707             {
 3708                 ++HasTitle;
 3709 
 3710                 if (HasTitle > 1)
 3711                     TY_(Report)(doc, head, node,
 3712                                      head ?
 3713                                      TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
 3714             }
 3715             else if ( nodeIsBASE(node) )
 3716             {
 3717                 ++HasBase;
 3718 
 3719                 if (HasBase > 1)
 3720                     TY_(Report)(doc, head, node,
 3721                                      head ?
 3722                                      TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
 3723             }
 3724 
 3725             TY_(InsertNodeAtEnd)(head, node);
 3726             ParseTag(doc, node, IgnoreWhitespace);
 3727             continue;
 3728         }
 3729 
 3730         /* discard unexpected text nodes and end tags */
 3731         TY_(Report)(doc, head, node, DISCARDING_UNEXPECTED);
 3732         TY_(FreeNode)( doc, node);
 3733     }
 3734     DEBUG_LOG(SPRTF("Exit ParseHead 1...\n"));
 3735 }
 3736 
 3737 /*\ 
 3738  *  Issue #166 - repeated <main> element
 3739  *  But this service is generalised to check for other duplicate elements
 3740 \*/
 3741 static Bool TY_(FindNodeWithId)( Node *node, TidyTagId tid )
 3742 {
 3743     Node *content;
 3744     while (node)
 3745     {
 3746         if (TagIsId(node,tid))
 3747             return yes;
 3748         /*\ 
 3749          *   Issue #459 - Under certain circumstances, with many node this use of
 3750          *   'for (content = node->content; content; content = content->content)'
 3751          *   would produce a **forever** circle, or at least a very extended loop...
 3752          *   It is sufficient to test the content, if it exists,
 3753          *   to quickly iterate all nodes. Now all nodes are tested only once.
 3754         \*/ 
 3755         content = node->content;
 3756         if (content)
 3757         {
 3758             if (TY_(FindNodeWithId)(content,tid))
 3759                 return yes;
 3760         }
 3761         node = node->next;
 3762     }
 3763     return no;
 3764 }
 3765 
 3766 
 3767 /*\ 
 3768  *  Issue #166 - repeated <main> element
 3769  *  Do a global search for an element
 3770 \*/
 3771 static Bool TY_(FindNodeById)( TidyDocImpl* doc, TidyTagId tid )
 3772 {
 3773     Node *node = (doc ? doc->root.content : NULL);
 3774     return TY_(FindNodeWithId)(node,tid);
 3775 }
 3776 
 3777 
 3778 void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
 3779 {
 3780     Lexer* lexer = doc->lexer;
 3781     Node *node;
 3782     Bool checkstack, iswhitenode;
 3783 
 3784     mode = IgnoreWhitespace;
 3785     checkstack = yes;
 3786 
 3787     TY_(BumpObject)( doc, body->parent );
 3788 
 3789     DEBUG_LOG(SPRTF("Enter ParseBody...\n"));
 3790     while ((node = TY_(GetToken)(doc, mode)) != NULL)
 3791     {
 3792         /* find and discard multiple <body> elements */
 3793         if (node->tag == body->tag && node->type == StartTag)
 3794         {
 3795             TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
 3796             TY_(FreeNode)(doc, node);
 3797             continue;
 3798         }
 3799 
 3800         /* #538536 Extra endtags not detected */
 3801         if ( nodeIsHTML(node) )
 3802         {
 3803             if (TY_(nodeIsElement)(node) || lexer->seenEndHtml) 
 3804                 TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
 3805             else
 3806                 lexer->seenEndHtml = 1;
 3807 
 3808             TY_(FreeNode)( doc, node);
 3809             continue;
 3810         }
 3811 
 3812         if ( lexer->seenEndBody && 
 3813              ( node->type == StartTag ||
 3814                node->type == EndTag   ||
 3815                node->type == StartEndTag ) )
 3816         {
 3817             TY_(Report)(doc, body, node, CONTENT_AFTER_BODY );
 3818         }
 3819 
 3820         if ( node->tag == body->tag && node->type == EndTag )
 3821         {
 3822             body->closed = yes;
 3823             TrimSpaces(doc, body);
 3824             TY_(FreeNode)( doc, node);
 3825             lexer->seenEndBody = 1;
 3826             mode = IgnoreWhitespace;
 3827 
 3828             if ( nodeIsNOFRAMES(body->parent) )
 3829                 break;
 3830 
 3831             continue;
 3832         }
 3833 
 3834         if ( nodeIsNOFRAMES(node) )
 3835         {
 3836             if (node->type == StartTag)
 3837             {
 3838                 TY_(InsertNodeAtEnd)(body, node);
 3839                 TY_(ParseBlock)(doc, node, mode);
 3840                 continue;
 3841             }
 3842 
 3843             if (node->type == EndTag && nodeIsNOFRAMES(body->parent) )
 3844             {
 3845                 TrimSpaces(doc, body);
 3846                 TY_(UngetToken)( doc );
 3847                 break;
 3848             }
 3849         }
 3850 
 3851         if ( (nodeIsFRAME(node) || nodeIsFRAMESET(node))
 3852              && nodeIsNOFRAMES(body->parent) )
 3853         {
 3854             TrimSpaces(doc, body);
 3855             TY_(UngetToken)( doc );
 3856             break;
 3857         }
 3858         
 3859         iswhitenode = no;
 3860 
 3861         if ( TY_(nodeIsText)(node) &&
 3862              node->end <= node->start + 1 &&
 3863              lexer->lexbuf[node->start] == ' ' )
 3864             iswhitenode = yes;
 3865 
 3866         /* deal with comments etc. */
 3867         if (InsertMisc(body, node))
 3868             continue;
 3869 
 3870         /* mixed content model permits text */
 3871         if (TY_(nodeIsText)(node))
 3872         {
 3873             if (iswhitenode && mode == IgnoreWhitespace)
 3874             {
 3875                 TY_(FreeNode)( doc, node);
 3876                 continue;
 3877             }
 3878 
 3879             /* HTML 2 and HTML4 strict don't allow text here */
 3880             TY_(ConstrainVersion)(doc, ~(VERS_HTML40_STRICT | VERS_HTML20));
 3881 
 3882             if (checkstack)
 3883             {
 3884                 checkstack = no;
 3885 
 3886                 if ( TY_(InlineDup)(doc, node) > 0 )
 3887                     continue;
 3888             }
 3889 
 3890             TY_(InsertNodeAtEnd)(body, node);
 3891             mode = MixedContent;
 3892             continue;
 3893         }
 3894 
 3895         if (node->type == DocTypeTag)
 3896         {
 3897             InsertDocType(doc, body, node);
 3898             continue;
 3899         }
 3900         /* discard unknown  and PARAM tags */
 3901         if ( node->tag == NULL || nodeIsPARAM(node) )
 3902         {
 3903             TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
 3904             TY_(FreeNode)( doc, node);
 3905             continue;
 3906         }
 3907 
 3908         /*
 3909           Netscape allows LI and DD directly in BODY
 3910           We infer UL or DL respectively and use this
 3911           Bool to exclude block-level elements so as
 3912           to match Netscape's observed behaviour.
 3913         */
 3914         lexer->excludeBlocks = no;
 3915         
 3916         if (( nodeIsINPUT(node) ||
 3917              (!TY_(nodeHasCM)(node, CM_BLOCK) && !TY_(nodeHasCM)(node, CM_INLINE))
 3918            ) && !TY_(IsHTML5Mode)(doc) )
 3919         {
 3920             /* avoid this error message being issued twice */
 3921             if (!(node->tag->model & CM_HEAD))
 3922                 TY_(Report)(doc, body, node, TAG_NOT_ALLOWED_IN);
 3923 
 3924             if (node->tag->model & CM_HTML)
 3925             {
 3926                 /* copy body attributes if current body was inferred */
 3927                 if ( nodeIsBODY(node) && body->implicit 
 3928                      && body->attributes == NULL )
 3929                 {
 3930                     body->attributes = node->attributes;
 3931                     node->attributes = NULL;
 3932                 }
 3933 
 3934                 TY_(FreeNode)( doc, node);
 3935                 continue;
 3936             }
 3937 
 3938             if (node->tag->model & CM_HEAD)
 3939             {
 3940                 MoveToHead(doc, body, node);
 3941                 continue;
 3942             }
 3943 
 3944             if (node->tag->model & CM_LIST)
 3945             {
 3946                 TY_(UngetToken)( doc );
 3947                 node = TY_(InferredTag)(doc, TidyTag_UL);
 3948                 AddClassNoIndent(doc, node);
 3949                 lexer->excludeBlocks = yes;
 3950             }
 3951             else if (node->tag->model & CM_DEFLIST)
 3952             {
 3953                 TY_(UngetToken)( doc );
 3954                 node = TY_(InferredTag)(doc, TidyTag_DL);
 3955                 lexer->excludeBlocks = yes;
 3956             }
 3957             else if (node->tag->model & (CM_TABLE | CM_ROWGRP | CM_ROW))
 3958             {
 3959                 /* http://tidy.sf.net/issue/2855621 */
 3960                 if (node->type != EndTag) {
 3961                     TY_(UngetToken)( doc );
 3962                     node = TY_(InferredTag)(doc, TidyTag_TABLE);
 3963                 }
 3964                 lexer->excludeBlocks = yes;
 3965             }
 3966             else if ( nodeIsINPUT(node) )
 3967             {
 3968                 TY_(UngetToken)( doc );
 3969                 node = TY_(InferredTag)(doc, TidyTag_FORM);
 3970                 lexer->excludeBlocks = yes;
 3971             }
 3972             else
 3973             {
 3974                 if ( !TY_(nodeHasCM)(node, CM_ROW | CM_FIELD) )
 3975                 {
 3976                     TY_(UngetToken)( doc );
 3977                     return;
 3978                 }
 3979 
 3980                 /* ignore </td> </th> <option> etc. */
 3981                 TY_(FreeNode)( doc, node );
 3982                 continue;
 3983             }
 3984         }
 3985 
 3986         if (node->type == EndTag)
 3987         {
 3988             if ( nodeIsBR(node) )
 3989                 node->type = StartTag;
 3990             else if ( nodeIsP(node) )
 3991             {
 3992                 node->type = StartEndTag;
 3993                 node->implicit = yes;
 3994             }
 3995             else if ( TY_(nodeHasCM)(node, CM_INLINE) )
 3996                 TY_(PopInline)( doc, node );
 3997         }
 3998 
 3999         if (TY_(nodeIsElement)(node))
 4000         {
 4001             if (nodeIsMAIN(node)) {
 4002                 /*\ Issue #166 - repeated <main> element
 4003                  *  How to efficiently search for a previous main element?
 4004                 \*/
 4005                 if ( TY_(FindNodeById)(doc, TidyTag_MAIN) )
 4006                 {
 4007                     doc->badForm |= flg_BadMain; /* this is an ERROR in format */
 4008                     TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
 4009                     TY_(FreeNode)( doc, node);
 4010                     continue;
 4011                 }
 4012             }
 4013             /* Issue #20 - merging from Ger Hobbelt fork put back CM_MIXED, which had been
 4014                removed to fix this issue - reverting to fix 880221e
 4015              */
 4016             if ( TY_(nodeHasCM)(node, CM_INLINE) )
 4017             {
 4018                 /* HTML4 strict doesn't allow inline content here */
 4019                 /* but HTML2 does allow img elements as children of body */
 4020                 if ( nodeIsIMG(node) )
 4021                     TY_(ConstrainVersion)(doc, ~VERS_HTML40_STRICT);
 4022                 else
 4023                     TY_(ConstrainVersion)(doc, ~(VERS_HTML40_STRICT|VERS_HTML20));
 4024 
 4025                 if (checkstack && !node->implicit)
 4026                 {
 4027                     checkstack = no;
 4028 
 4029                     if ( TY_(InlineDup)(doc, node) > 0 )
 4030                         continue;
 4031                 }
 4032 
 4033                 mode = MixedContent;
 4034             }
 4035             else
 4036             {
 4037                 checkstack = yes;
 4038                 mode = IgnoreWhitespace;
 4039             }
 4040 
 4041             if (node->implicit)
 4042                 TY_(Report)(doc, body, node, INSERTING_TAG);
 4043 
 4044             TY_(InsertNodeAtEnd)(body, node);
 4045             ParseTag(doc, node, mode);
 4046             continue;
 4047         }
 4048 
 4049         /* discard unexpected tags */
 4050         TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
 4051         TY_(FreeNode)( doc, node);
 4052     }
 4053     DEBUG_LOG(SPRTF("Exit ParseBody 1...\n"));
 4054 }
 4055 
 4056 void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
 4057 {
 4058     Lexer* lexer = doc->lexer;
 4059     Node *node;
 4060 
 4061     if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
 4062     {
 4063         doc->badAccess |=  BA_USING_NOFRAMES;
 4064     }
 4065     mode = IgnoreWhitespace;
 4066 
 4067     while ( (node = TY_(GetToken)(doc, mode)) != NULL )
 4068     {
 4069         if ( node->tag == noframes->tag && node->type == EndTag )
 4070         {
 4071             TY_(FreeNode)( doc, node);
 4072             noframes->closed = yes;
 4073             TrimSpaces(doc, noframes);
 4074             return;
 4075         }
 4076 
 4077         if ( nodeIsFRAME(node) || nodeIsFRAMESET(node) )
 4078         {
 4079             TrimSpaces(doc, noframes);
 4080             if (node->type == EndTag)
 4081             {
 4082                 TY_(Report)(doc, noframes, node, DISCARDING_UNEXPECTED);
 4083                 TY_(FreeNode)( doc, node);       /* Throw it away */
 4084             }
 4085             else
 4086             {
 4087                 TY_(Report)(doc, noframes, node, MISSING_ENDTAG_BEFORE);
 4088                 TY_(UngetToken)( doc );
 4089             }
 4090             return;
 4091         }
 4092 
 4093         if ( nodeIsHTML(node) )
 4094         {
 4095             if (TY_(nodeIsElement)(node))
 4096                 TY_(Report)(doc, noframes, node, DISCARDING_UNEXPECTED);
 4097 
 4098             TY_(FreeNode)( doc, node);
 4099             continue;
 4100         }
 4101 
 4102         /* deal with comments etc. */
 4103         if (InsertMisc(noframes, node))
 4104             continue;
 4105 
 4106         if ( nodeIsBODY(node) && node->type == StartTag )
 4107         {
 4108             Bool seen_body = lexer->seenEndBody;
 4109             TY_(InsertNodeAtEnd)(noframes, node);
 4110             ParseTag(doc, node, IgnoreWhitespace /*MixedContent*/);
 4111 
 4112             /* fix for bug http://tidy.sf.net/bug/887259 */
 4113             if (seen_body && TY_(FindBody)(doc) != node)
 4114             {
 4115                 TY_(CoerceNode)(doc, node, TidyTag_DIV, no, no);
 4116                 MoveNodeToBody(doc, node);
 4117             }
 4118             continue;
 4119         }
 4120 
 4121         /* implicit body element inferred */
 4122         if (TY_(nodeIsText)(node) || (node->tag && node->type != EndTag))
 4123         {
 4124             Node *body = TY_(FindBody)( doc );
 4125             if ( body || lexer->seenEndBody )
 4126             {
 4127                 if ( body == NULL )
 4128                 {
 4129                     TY_(Report)(doc, noframes, node, DISCARDING_UNEXPECTED);
 4130                     TY_(FreeNode)( doc, node);
 4131                     continue;
 4132                 }
 4133                 if ( TY_(nodeIsText)(node) )
 4134                 {
 4135                     TY_(UngetToken)( doc );
 4136                     node = TY_(InferredTag)(doc, TidyTag_P);
 4137                     TY_(Report)(doc, noframes, node, CONTENT_AFTER_BODY );
 4138                 }
 4139                 TY_(InsertNodeAtEnd)( body, node );
 4140             }
 4141             else
 4142             {
 4143                 TY_(UngetToken)( doc );
 4144                 node = TY_(InferredTag)(doc, TidyTag_BODY);
 4145                 if ( cfgBool(doc, TidyXmlOut) )
 4146                     TY_(Report)(doc, noframes, node, INSERTING_TAG);
 4147                 TY_(InsertNodeAtEnd)( noframes, node );
 4148             }
 4149 
 4150             ParseTag( doc, node, IgnoreWhitespace /*MixedContent*/ );
 4151             continue;
 4152         }
 4153 
 4154         /* discard unexpected end tags */
 4155         TY_(Report)(doc, noframes, node, DISCARDING_UNEXPECTED);
 4156         TY_(FreeNode)( doc, node);
 4157     }
 4158 
 4159     TY_(Report)(doc, noframes, node, MISSING_ENDTAG_FOR);
 4160 }
 4161 
 4162 void TY_(ParseFrameSet)(TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSED(mode))
 4163 {
 4164     Lexer* lexer = doc->lexer;
 4165     Node *node;
 4166 
 4167     if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
 4168     {
 4169         doc->badAccess |= BA_USING_FRAMES;
 4170     }
 4171     
 4172     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 4173     {
 4174         if (node->tag == frameset->tag && node->type == EndTag)
 4175         {
 4176             TY_(FreeNode)( doc, node);
 4177             frameset->closed = yes;
 4178             TrimSpaces(doc, frameset);
 4179             return;
 4180         }
 4181 
 4182         /* deal with comments etc. */
 4183         if (InsertMisc(frameset, node))
 4184             continue;
 4185 
 4186         if (node->tag == NULL)
 4187         {
 4188             TY_(Report)(doc, frameset, node, DISCARDING_UNEXPECTED);
 4189             TY_(FreeNode)( doc, node);
 4190             continue; 
 4191         }
 4192 
 4193         if (TY_(nodeIsElement)(node))
 4194         {
 4195             if (node->tag && node->tag->model & CM_HEAD)
 4196             {
 4197                 MoveToHead(doc, frameset, node);
 4198                 continue;
 4199             }
 4200         }
 4201 
 4202         if ( nodeIsBODY(node) )
 4203         {
 4204             TY_(UngetToken)( doc );
 4205             node = TY_(InferredTag)(doc, TidyTag_NOFRAMES);
 4206             TY_(Report)(doc, frameset, node, INSERTING_TAG);
 4207         }
 4208 
 4209         if (node->type == StartTag && (node->tag->model & CM_FRAMES))
 4210         {
 4211             TY_(InsertNodeAtEnd)(frameset, node);
 4212             lexer->excludeBlocks = no;
 4213             ParseTag(doc, node, MixedContent);
 4214             continue;
 4215         }
 4216         else if (node->type == StartEndTag && (node->tag->model & CM_FRAMES))
 4217         {
 4218             TY_(InsertNodeAtEnd)(frameset, node);
 4219             continue;
 4220         }
 4221 
 4222         /* discard unexpected tags */
 4223         /* WAI [6.5.1.4] link is being discarded outside of NOFRAME */
 4224         if ( nodeIsA(node) )
 4225            doc->badAccess |= BA_INVALID_LINK_NOFRAMES;
 4226 
 4227         TY_(Report)(doc, frameset, node, DISCARDING_UNEXPECTED);
 4228         TY_(FreeNode)( doc, node);
 4229     }
 4230 
 4231     TY_(Report)(doc, frameset, node, MISSING_ENDTAG_FOR);
 4232 }
 4233 
 4234 void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
 4235 {
 4236     Node *node, *head;
 4237     Node *frameset = NULL;
 4238     Node *noframes = NULL;
 4239 
 4240     DEBUG_LOG(SPRTF("Entering ParseHTML...\n"));
 4241     TY_(SetOptionBool)( doc, TidyXmlTags, no );
 4242 
 4243     for (;;)
 4244     {
 4245         node = TY_(GetToken)(doc, IgnoreWhitespace);
 4246 
 4247         if (node == NULL)
 4248         {
 4249             node = TY_(InferredTag)(doc, TidyTag_HEAD);
 4250             break;
 4251         }
 4252 
 4253         if ( nodeIsHEAD(node) )
 4254             break;
 4255 
 4256         if (node->tag == html->tag && node->type == EndTag)
 4257         {
 4258             TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4259             TY_(FreeNode)( doc, node);
 4260             continue;
 4261         }
 4262 
 4263         /* find and discard multiple <html> elements */
 4264         if (node->tag == html->tag && node->type == StartTag)
 4265         {
 4266             TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4267             TY_(FreeNode)(doc, node);
 4268             continue;
 4269         }
 4270 
 4271         /* deal with comments etc. */
 4272         if (InsertMisc(html, node))
 4273             continue;
 4274 
 4275         TY_(UngetToken)( doc );
 4276         node = TY_(InferredTag)(doc, TidyTag_HEAD);
 4277         break;
 4278     }
 4279 
 4280     head = node;
 4281     TY_(InsertNodeAtEnd)(html, head);
 4282     TY_(ParseHead)(doc, head, mode);
 4283 
 4284     for (;;)
 4285     {
 4286         node = TY_(GetToken)(doc, IgnoreWhitespace);
 4287 
 4288         if (node == NULL)
 4289         {
 4290             if (frameset == NULL) /* implied body */
 4291             {
 4292                 node = TY_(InferredTag)(doc, TidyTag_BODY);
 4293                 TY_(InsertNodeAtEnd)(html, node);
 4294                 TY_(ParseBody)(doc, node, mode);
 4295             }
 4296 
 4297             DEBUG_LOG(SPRTF("Exit ParseHTML 1...\n"));
 4298             return;
 4299         }
 4300 
 4301         /* robustly handle html tags */
 4302         if (node->tag == html->tag)
 4303         {
 4304             if (node->type != StartTag && frameset == NULL)
 4305                 TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4306 
 4307             TY_(FreeNode)( doc, node);
 4308             continue;
 4309         }
 4310 
 4311         /* deal with comments etc. */
 4312         if (InsertMisc(html, node))
 4313             continue;
 4314 
 4315         /* if frameset document coerce <body> to <noframes> */
 4316         if ( nodeIsBODY(node) )
 4317         {
 4318             if (node->type != StartTag)
 4319             {
 4320                 TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4321                 TY_(FreeNode)( doc, node);
 4322                 continue;
 4323             }
 4324 
 4325             if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
 4326             {
 4327                 if (frameset != NULL)
 4328                 {
 4329                     TY_(UngetToken)( doc );
 4330 
 4331                     if (noframes == NULL)
 4332                     {
 4333                         noframes = TY_(InferredTag)(doc, TidyTag_NOFRAMES);
 4334                         TY_(InsertNodeAtEnd)(frameset, noframes);
 4335                         TY_(Report)(doc, html, noframes, INSERTING_TAG);
 4336                     }
 4337                     else
 4338                     {
 4339                         if (noframes->type == StartEndTag)
 4340                             noframes->type = StartTag;
 4341                     }
 4342 
 4343                     ParseTag(doc, noframes, mode);
 4344                     continue;
 4345                 }
 4346             }
 4347 
 4348             TY_(ConstrainVersion)(doc, ~VERS_FRAMESET);
 4349             break;  /* to parse body */
 4350         }
 4351 
 4352         /* flag an error if we see more than one frameset */
 4353         if ( nodeIsFRAMESET(node) )
 4354         {
 4355             if (node->type != StartTag)
 4356             {
 4357                 TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4358                 TY_(FreeNode)( doc, node);
 4359                 continue;
 4360             }
 4361 
 4362             if (frameset != NULL)
 4363                 TY_(Report)(doc, html, node, DUPLICATE_FRAMESET);
 4364             else
 4365                 frameset = node;
 4366 
 4367             TY_(InsertNodeAtEnd)(html, node);
 4368             ParseTag(doc, node, mode);
 4369 
 4370             /*
 4371               see if it includes a noframes element so
 4372               that we can merge subsequent noframes elements
 4373             */
 4374 
 4375             for (node = frameset->content; node; node = node->next)
 4376             {
 4377                 if ( nodeIsNOFRAMES(node) )
 4378                     noframes = node;
 4379             }
 4380             continue;
 4381         }
 4382 
 4383         /* if not a frameset document coerce <noframes> to <body> */
 4384         if ( nodeIsNOFRAMES(node) )
 4385         {
 4386             if (node->type != StartTag)
 4387             {
 4388                 TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4389                 TY_(FreeNode)( doc, node);
 4390                 continue;
 4391             }
 4392 
 4393             if (frameset == NULL)
 4394             {
 4395                 TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4396                 TY_(FreeNode)( doc, node);
 4397                 node = TY_(InferredTag)(doc, TidyTag_BODY);
 4398                 break;
 4399             }
 4400 
 4401             if (noframes == NULL)
 4402             {
 4403                 noframes = node;
 4404                 TY_(InsertNodeAtEnd)(frameset, noframes);
 4405             }
 4406             else
 4407                 TY_(FreeNode)( doc, node);
 4408 
 4409             ParseTag(doc, noframes, mode);
 4410             continue;
 4411         }
 4412 
 4413         if (TY_(nodeIsElement)(node))
 4414         {
 4415             if (node->tag && node->tag->model & CM_HEAD)
 4416             {
 4417                 MoveToHead(doc, html, node);
 4418                 continue;
 4419             }
 4420 
 4421             /* discard illegal frame element following a frameset */
 4422             if ( frameset != NULL && nodeIsFRAME(node) )
 4423             {
 4424                 TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
 4425                 TY_(FreeNode)(doc, node);
 4426                 continue;
 4427             }
 4428         }
 4429 
 4430         TY_(UngetToken)( doc );
 4431 
 4432         /* insert other content into noframes element */
 4433 
 4434         if (frameset)
 4435         {
 4436             if (noframes == NULL)
 4437             {
 4438                 noframes = TY_(InferredTag)(doc, TidyTag_NOFRAMES);
 4439                 TY_(InsertNodeAtEnd)(frameset, noframes);
 4440             }
 4441             else
 4442             {
 4443                 TY_(Report)(doc, html, node, NOFRAMES_CONTENT);
 4444                 if (noframes->type == StartEndTag)
 4445                     noframes->type = StartTag;
 4446             }
 4447 
 4448             TY_(ConstrainVersion)(doc, VERS_FRAMESET);
 4449             ParseTag(doc, noframes, mode);
 4450             continue;
 4451         }
 4452 
 4453         node = TY_(InferredTag)(doc, TidyTag_BODY);
 4454         /* Issue #132 - disable inserting BODY tag warning
 4455            BUT only if NOT --show-body-only yes */
 4456         if (!showingBodyOnly(doc))
 4457             TY_(Report)(doc, html, node, INSERTING_TAG );
 4458         TY_(ConstrainVersion)(doc, ~VERS_FRAMESET);
 4459         break;
 4460     }
 4461 
 4462     /* node must be body */
 4463 
 4464     TY_(InsertNodeAtEnd)(html, node);
 4465     ParseTag(doc, node, mode);
 4466     DEBUG_LOG(SPRTF("Exit ParseHTML 2...\n"));
 4467 }
 4468 
 4469 static Bool nodeCMIsOnlyInline( Node* node )
 4470 {
 4471     return TY_(nodeHasCM)( node, CM_INLINE ) && !TY_(nodeHasCM)( node, CM_BLOCK );
 4472 }
 4473 
 4474 static void EncloseBodyText(TidyDocImpl* doc)
 4475 {
 4476     Node* node;
 4477     Node* body = TY_(FindBody)(doc);
 4478 
 4479     if (!body)
 4480         return;
 4481 
 4482     node = body->content;
 4483 
 4484     while (node)
 4485     {
 4486         if ((TY_(nodeIsText)(node) && !TY_(IsBlank)(doc->lexer, node)) ||
 4487             (TY_(nodeIsElement)(node) && nodeCMIsOnlyInline(node)))
 4488         {
 4489             Node* p = TY_(InferredTag)(doc, TidyTag_P);
 4490             TY_(InsertNodeBeforeElement)(node, p);
 4491             while (node && (!TY_(nodeIsElement)(node) || nodeCMIsOnlyInline(node)))
 4492             {
 4493                 Node* next = node->next;
 4494                 TY_(RemoveNode)(node);
 4495                 TY_(InsertNodeAtEnd)(p, node);
 4496                 node = next;
 4497             }
 4498             TrimSpaces(doc, p);
 4499             continue;
 4500         }
 4501         node = node->next;
 4502     }
 4503 }
 4504 
 4505 /* <form>, <blockquote> and <noscript> do not allow #PCDATA in
 4506    HTML 4.01 Strict (%block; model instead of %flow;).
 4507   When requested, text nodes in these elements are wrapped in <p>. */
 4508 static void EncloseBlockText(TidyDocImpl* doc, Node* node)
 4509 {
 4510     Node *next;
 4511     Node *block;
 4512 
 4513     while (node)
 4514     {
 4515         next = node->next;
 4516 
 4517         if (node->content)
 4518             EncloseBlockText(doc, node->content);
 4519 
 4520         if (!(nodeIsFORM(node) || nodeIsNOSCRIPT(node) ||
 4521               nodeIsBLOCKQUOTE(node))
 4522             || !node->content)
 4523         {
 4524             node = next;
 4525             continue;
 4526         }
 4527 
 4528         block = node->content;
 4529 
 4530         if ((TY_(nodeIsText)(block) && !TY_(IsBlank)(doc->lexer, block)) ||
 4531             (TY_(nodeIsElement)(block) && nodeCMIsOnlyInline(block)))
 4532         {
 4533             Node* p = TY_(InferredTag)(doc, TidyTag_P);
 4534             TY_(InsertNodeBeforeElement)(block, p);
 4535             while (block &&
 4536                    (!TY_(nodeIsElement)(block) || nodeCMIsOnlyInline(block)))
 4537             {
 4538                 Node* tempNext = block->next;
 4539                 TY_(RemoveNode)(block);
 4540                 TY_(InsertNodeAtEnd)(p, block);
 4541                 block = tempNext;
 4542             }
 4543             TrimSpaces(doc, p);
 4544             continue;
 4545         }
 4546 
 4547         node = next;
 4548     }
 4549 }
 4550 
 4551 static void ReplaceObsoleteElements(TidyDocImpl* doc, Node* node)
 4552 {
 4553     Node *next;
 4554 
 4555     while (node)
 4556     {
 4557         next = node->next;
 4558 
 4559         /* if (nodeIsDIR(node) || nodeIsMENU(node)) */
 4560         /* HTML5 - <menu ... > is no longer obsolete */
 4561         if (nodeIsDIR(node))
 4562             TY_(CoerceNode)(doc, node, TidyTag_UL, yes, yes);
 4563 
 4564         if (nodeIsXMP(node) || nodeIsLISTING(node) ||
 4565             (node->tag && node->tag->id == TidyTag_PLAINTEXT))
 4566             TY_(CoerceNode)(doc, node, TidyTag_PRE, yes, yes);
 4567 
 4568         if (node->content)
 4569             ReplaceObsoleteElements(doc, node->content);
 4570 
 4571         node = next;
 4572     }
 4573 }
 4574 
 4575 static void AttributeChecks(TidyDocImpl* doc, Node* node)
 4576 {
 4577     Node *next;
 4578 
 4579     while (node)
 4580     {
 4581         next = node->next;
 4582 
 4583         if (TY_(nodeIsElement)(node))
 4584         {
 4585             if (node->tag && node->tag->chkattrs) /* [i_a]2 fix crash after adding SVG support with alt/unknown tag subtree insertion there */
 4586                 node->tag->chkattrs(doc, node);
 4587             else
 4588                 TY_(CheckAttributes)(doc, node);
 4589         }
 4590 
 4591         if (node->content)
 4592             AttributeChecks(doc, node->content);
 4593 
 4594         assert( next != node ); /* http://tidy.sf.net/issue/1603538 */
 4595         node = next;
 4596     }
 4597 }
 4598 
 4599 /*
 4600   HTML is the top level element
 4601 */
 4602 void TY_(ParseDocument)(TidyDocImpl* doc)
 4603 {
 4604     Node *node, *html, *doctype = NULL;
 4605 
 4606     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 4607     {
 4608         if (node->type == XmlDecl)
 4609         {
 4610             doc->xmlDetected = yes;
 4611 
 4612             if (TY_(FindXmlDecl)(doc) && doc->root.content)
 4613             {
 4614                 TY_(Report)(doc, &doc->root, node, DISCARDING_UNEXPECTED);
 4615                 TY_(FreeNode)(doc, node);
 4616                 continue;
 4617             }
 4618             if (node->line > 1 || node->column != 1)
 4619             {
 4620                 TY_(Report)(doc, &doc->root, node, SPACE_PRECEDING_XMLDECL);
 4621             }
 4622         }
 4623 
 4624         /* deal with comments etc. */
 4625         if (InsertMisc( &doc->root, node ))
 4626             continue;
 4627 
 4628         if (node->type == DocTypeTag)
 4629         {
 4630             if (doctype == NULL)
 4631             {
 4632                 TY_(InsertNodeAtEnd)( &doc->root, node);
 4633                 doctype = node;
 4634             }
 4635             else
 4636             {
 4637                 TY_(Report)(doc, &doc->root, node, DISCARDING_UNEXPECTED);
 4638                 TY_(FreeNode)( doc, node);
 4639             }
 4640             continue;
 4641         }
 4642 
 4643         if (node->type == EndTag)
 4644         {
 4645             TY_(Report)(doc, &doc->root, node, DISCARDING_UNEXPECTED);
 4646             TY_(FreeNode)( doc, node);
 4647             continue;
 4648         }
 4649 
 4650         if (node->type == StartTag && nodeIsHTML(node))
 4651         {
 4652             AttVal *xmlns;
 4653 
 4654             xmlns = TY_(AttrGetById)(node, TidyAttr_XMLNS);
 4655 
 4656             if (AttrValueIs(xmlns, XHTML_NAMESPACE))
 4657             {
 4658                 Bool htmlOut = cfgBool( doc, TidyHtmlOut );
 4659                 doc->lexer->isvoyager = yes;                  /* Unless plain HTML */
 4660                 TY_(SetOptionBool)( doc, TidyXhtmlOut, !htmlOut ); /* is specified, output*/
 4661                 TY_(SetOptionBool)( doc, TidyXmlOut, !htmlOut );   /* will be XHTML. */
 4662 
 4663                 /* adjust other config options, just as in config.c */
 4664                 if ( !htmlOut )
 4665                 {
 4666                     TY_(SetOptionBool)( doc, TidyUpperCaseTags, no );
 4667                     TY_(SetOptionInt)( doc, TidyUpperCaseAttrs, no );
 4668                 }
 4669             }
 4670         }
 4671 
 4672         if ( node->type != StartTag || !nodeIsHTML(node) )
 4673         {
 4674             TY_(UngetToken)( doc );
 4675             html = TY_(InferredTag)(doc, TidyTag_HTML);
 4676         }
 4677         else
 4678             html = node;
 4679 
 4680         /*\
 4681          *  #72, avoid MISSING_DOCTYPE if show-body-only. 
 4682          *  #191, also if --doctype omit, that is TidyDoctypeOmit
 4683          *  #342, adjust tags to html4-- if not 'auto' or 'html5'
 4684         \*/
 4685         if (!TY_(FindDocType)(doc)) 
 4686         {
 4687             ulong dtmode = cfg( doc, TidyDoctypeMode );
 4688             if ((dtmode != TidyDoctypeOmit) && !showingBodyOnly(doc))
 4689                 TY_(Report)(doc, NULL, NULL, MISSING_DOCTYPE);
 4690             if ((dtmode != TidyDoctypeAuto) && (dtmode != TidyDoctypeHtml5))
 4691             {
 4692                 /*\
 4693                  *  Issue #342 - if not doctype 'auto', or 'html5'
 4694                  *  then reset mode htm4-- parsing
 4695                 \*/
 4696                 TY_(AdjustTags)(doc); /* Dynamically modify the tags table to html4-- mode */
 4697             }
 4698         }
 4699         TY_(InsertNodeAtEnd)( &doc->root, html);
 4700         TY_(ParseHTML)( doc, html, IgnoreWhitespace );
 4701         break;
 4702     }
 4703 
 4704     /* do this before any more document fixes */
 4705     if ( cfg( doc, TidyAccessibilityCheckLevel ) > 0 )
 4706         TY_(AccessibilityChecks)( doc );
 4707 
 4708     if (!TY_(FindHTML)(doc))
 4709     {
 4710         /* a later check should complain if <body> is empty */
 4711         html = TY_(InferredTag)(doc, TidyTag_HTML);
 4712         TY_(InsertNodeAtEnd)( &doc->root, html);
 4713         TY_(ParseHTML)(doc, html, IgnoreWhitespace);
 4714     }
 4715 
 4716     node = TY_(FindTITLE)(doc);
 4717     if (!node)
 4718     {
 4719         Node* head = TY_(FindHEAD)(doc);
 4720         /* #72, avoid MISSING_TITLE_ELEMENT if show-body-only (but allow InsertNodeAtEnd to avoid new warning) */
 4721         if (!showingBodyOnly(doc))
 4722         {
 4723             TY_(Report)(doc, head, NULL, MISSING_TITLE_ELEMENT);
 4724         }
 4725         TY_(InsertNodeAtEnd)(head, TY_(InferredTag)(doc, TidyTag_TITLE));
 4726     }
 4727     else if (!node->content && !showingBodyOnly(doc))
 4728     {
 4729         /* Is #839 - warn node is blank in HTML5 */
 4730         if (TY_(IsHTML5Mode)(doc))
 4731         {
 4732             TY_(Report)(doc, node, NULL, BLANK_TITLE_ELEMENT);
 4733         }
 4734     }
 4735 
 4736     AttributeChecks(doc, &doc->root);
 4737     ReplaceObsoleteElements(doc, &doc->root);
 4738     TY_(DropEmptyElements)(doc, &doc->root);
 4739     CleanSpaces(doc, &doc->root);
 4740 
 4741     if (cfgBool(doc, TidyEncloseBodyText))
 4742         EncloseBodyText(doc);
 4743     if (cfgBool(doc, TidyEncloseBlockText))
 4744         EncloseBlockText(doc, &doc->root);
 4745 }
 4746 
 4747 Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element)
 4748 {
 4749     AttVal *attribute;
 4750 
 4751     /* search attributes for xml:space */
 4752     for (attribute = element->attributes; attribute; attribute = attribute->next)
 4753     {
 4754         if (attrIsXML_SPACE(attribute))
 4755         {
 4756             if (AttrValueIs(attribute, "preserve"))
 4757                 return yes;
 4758 
 4759             return no;
 4760         }
 4761     }
 4762 
 4763     if (element->element == NULL)
 4764         return no;
 4765         
 4766     /* kludge for html docs without explicit xml:space attribute */
 4767     if (nodeIsPRE(element)    ||
 4768         nodeIsSCRIPT(element) ||
 4769         nodeIsSTYLE(element)  ||
 4770         TY_(FindParser)(doc, element) == TY_(ParsePre))
 4771         return yes;
 4772 
 4773     /* kludge for XSL docs */
 4774     if ( TY_(tmbstrcasecmp)(element->element, "xsl:text") == 0 )
 4775         return yes;
 4776 
 4777     return no;
 4778 }
 4779 
 4780 /*
 4781   XML documents
 4782 */
 4783 static void ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
 4784 {
 4785     Lexer* lexer = doc->lexer;
 4786     Node *node;
 4787 
 4788     /* if node is pre or has xml:space="preserve" then do so */
 4789 
 4790     if ( TY_(XMLPreserveWhiteSpace)(doc, element) )
 4791         mode = Preformatted;
 4792 
 4793     while ((node = TY_(GetToken)(doc, mode)) != NULL)
 4794     {
 4795         if (node->type == EndTag &&
 4796            node->element && element->element &&
 4797            TY_(tmbstrcmp)(node->element, element->element) == 0)
 4798         {
 4799             TY_(FreeNode)( doc, node);
 4800             element->closed = yes;
 4801             break;
 4802         }
 4803 
 4804         /* discard unexpected end tags */
 4805         if (node->type == EndTag)
 4806         {
 4807             if (element)
 4808                 TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN);
 4809             else
 4810                 TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR);
 4811 
 4812             TY_(FreeNode)( doc, node);
 4813             continue;
 4814         }
 4815 
 4816         /* parse content on seeing start tag */
 4817         if (node->type == StartTag)
 4818             ParseXMLElement( doc, node, mode );
 4819 
 4820         TY_(InsertNodeAtEnd)(element, node);
 4821     }
 4822 
 4823     /*
 4824      if first child is text then trim initial space and
 4825      delete text node if it is empty.
 4826     */
 4827 
 4828     node = element->content;
 4829 
 4830     if (TY_(nodeIsText)(node) && mode != Preformatted)
 4831     {
 4832         if ( lexer->lexbuf[node->start] == ' ' )
 4833         {
 4834             node->start++;
 4835 
 4836             if (node->start >= node->end)
 4837                 TY_(DiscardElement)( doc, node );
 4838         }
 4839     }
 4840 
 4841     /*
 4842      if last child is text then trim final space and
 4843      delete the text node if it is empty
 4844     */
 4845 
 4846     node = element->last;
 4847 
 4848     if (TY_(nodeIsText)(node) && mode != Preformatted)
 4849     {
 4850         if ( lexer->lexbuf[node->end - 1] == ' ' )
 4851         {
 4852             node->end--;
 4853 
 4854             if (node->start >= node->end)
 4855                 TY_(DiscardElement)( doc, node );
 4856         }
 4857     }
 4858 }
 4859 
 4860 void TY_(ParseXMLDocument)(TidyDocImpl* doc)
 4861 {
 4862     Node *node, *doctype = NULL;
 4863 
 4864     TY_(SetOptionBool)( doc, TidyXmlTags, yes );
 4865 
 4866     doc->xmlDetected = yes;
 4867 
 4868     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
 4869     {
 4870         /* discard unexpected end tags */
 4871         if (node->type == EndTag)
 4872         {
 4873             TY_(Report)(doc, NULL, node, UNEXPECTED_ENDTAG);
 4874             TY_(FreeNode)( doc, node);
 4875             continue;
 4876         }
 4877 
 4878          /* deal with comments etc. */
 4879         if (InsertMisc( &doc->root, node))
 4880             continue;
 4881 
 4882         if (node->type == DocTypeTag)
 4883         {
 4884             if (doctype == NULL)
 4885             {
 4886                 TY_(InsertNodeAtEnd)( &doc->root, node);
 4887                 doctype = node;
 4888             }
 4889             else
 4890             {
 4891                 TY_(Report)(doc, &doc->root, node, DISCARDING_UNEXPECTED);
 4892                 TY_(FreeNode)( doc, node);
 4893             }
 4894             continue;
 4895         }
 4896 
 4897         if (node->type == StartEndTag)
 4898         {
 4899             TY_(InsertNodeAtEnd)( &doc->root, node);
 4900             continue;
 4901         }
 4902 
 4903        /* if start tag then parse element's content */
 4904         if (node->type == StartTag)
 4905         {
 4906             TY_(InsertNodeAtEnd)( &doc->root, node );
 4907             ParseXMLElement( doc, node, IgnoreWhitespace );
 4908             continue;
 4909         }
 4910 
 4911         TY_(Report)(doc, &doc->root, node, DISCARDING_UNEXPECTED);
 4912         TY_(FreeNode)( doc, node);
 4913     }
 4914 
 4915     /* ensure presence of initial <?xml version="1.0"?> */
 4916     if ( cfgBool(doc, TidyXmlDecl) )
 4917         TY_(FixXmlDecl)( doc );
 4918 }
 4919 
 4920 
 4921 /*
 4922  * local variables:
 4923  * mode: c
 4924  * indent-tabs-mode: nil
 4925  * c-basic-offset: 4
 4926  * eval: (c-set-offset 'substatement-open 0)
 4927  * end:
 4928  */