"Fossies" - the Fresh Open Source Software Archive

Member "tidy-html5-5.8.0/src/tidylib.c" (16 Jul 2021, 79048 Bytes) of package /linux/www/tidy-html5-5.8.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tidylib.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.7.28_vs_5.8.0.

    1 /* tidylib.c -- internal library definitions
    2 
    3   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
    4   See tidy.h for the copyright notice.
    5 
    6   Defines HTML Tidy API implemented by tidy library.
    7 
    8   Very rough initial cut for discussion purposes.
    9 
   10   Public interface is const-correct and doesn't explicitly depend
   11   on any globals.  Thus, thread-safety may be introduced w/out
   12   changing the interface.
   13 
   14   Looking ahead to a C++ wrapper, C functions always pass
   15   this-equivalent as 1st arg.
   16 
   17   Created 2001-05-20 by Charles Reitzel
   18 
   19 */
   20 
   21 #include <errno.h>
   22 
   23 #include "tidy-int.h"
   24 #include "parser.h"
   25 #include "clean.h"
   26 #include "gdoc.h"
   27 #include "config.h"
   28 #include "message.h"
   29 #include "messageobj.h"
   30 #include "pprint.h"
   31 #include "entities.h"
   32 #include "tmbstr.h"
   33 #include "utf8.h"
   34 #include "mappedio.h"
   35 #include "language.h"
   36 #include "attrs.h"
   37 #include "sprtf.h"
   38 #if SUPPORT_LOCALIZATIONS
   39 #  include "stdlib.h"
   40 #  include "locale.h"
   41 #endif
   42 
   43 /* Create/Destroy a Tidy "document" object */
   44 static TidyDocImpl* tidyDocCreate( TidyAllocator *allocator );
   45 static void         tidyDocRelease( TidyDocImpl* impl );
   46 
   47 static int          tidyDocStatus( TidyDocImpl* impl );
   48 
   49 /* Parse Markup */
   50 static int          tidyDocParseFile( TidyDocImpl* impl, ctmbstr htmlfil );
   51 static int          tidyDocParseStdin( TidyDocImpl* impl );
   52 static int          tidyDocParseString( TidyDocImpl* impl, ctmbstr content );
   53 static int          tidyDocParseBuffer( TidyDocImpl* impl, TidyBuffer* inbuf );
   54 static int          tidyDocParseSource( TidyDocImpl* impl, TidyInputSource* docIn );
   55 
   56 
   57 /* Execute post-parse diagnostics and cleanup.
   58 ** Note, the order is important.  You will get different
   59 ** results from the diagnostics depending on if they are run
   60 ** pre-or-post repair.
   61 */
   62 static int          tidyDocRunDiagnostics( TidyDocImpl* doc );
   63 static void         tidyDocReportDoctype( TidyDocImpl* doc );
   64 static int          tidyDocCleanAndRepair( TidyDocImpl* doc );
   65 
   66 
   67 /* Save cleaned up file to file/buffer/sink */
   68 static int          tidyDocSaveFile( TidyDocImpl* impl, ctmbstr htmlfil );
   69 static int          tidyDocSaveStdout( TidyDocImpl* impl );
   70 static int          tidyDocSaveString( TidyDocImpl* impl, tmbstr buffer, uint* buflen );
   71 static int          tidyDocSaveBuffer( TidyDocImpl* impl, TidyBuffer* outbuf );
   72 static int          tidyDocSaveSink( TidyDocImpl* impl, TidyOutputSink* docOut );
   73 static int          tidyDocSaveStream( TidyDocImpl* impl, StreamOut* out );
   74 
   75 
   76 /* Tidy public interface
   77 **
   78 ** Most functions return an integer:
   79 **
   80 ** 0    -> SUCCESS
   81 ** >0   -> WARNING
   82 ** <0   -> ERROR
   83 **
   84 */
   85 
   86 TidyDoc TIDY_CALL       tidyCreate(void)
   87 {
   88   TidyDocImpl* impl = tidyDocCreate( &TY_(g_default_allocator) );
   89   return tidyImplToDoc( impl );
   90 }
   91 
   92 TidyDoc TIDY_CALL tidyCreateWithAllocator( TidyAllocator *allocator )
   93 {
   94   TidyDocImpl* impl = tidyDocCreate( allocator );
   95   return tidyImplToDoc( impl );
   96 }
   97 
   98 void TIDY_CALL          tidyRelease( TidyDoc tdoc )
   99 {
  100   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  101   tidyDocRelease( impl );
  102 }
  103 
  104 TidyDocImpl* tidyDocCreate( TidyAllocator *allocator )
  105 {
  106     TidyDocImpl* doc = (TidyDocImpl*)TidyAlloc( allocator, sizeof(TidyDocImpl) );
  107     TidyClearMemory( doc, sizeof(*doc) );
  108     doc->allocator = allocator;
  109 
  110     TY_(InitMap)();
  111     TY_(InitTags)( doc );
  112     TY_(InitAttrs)( doc );
  113     TY_(InitConfig)( doc );
  114     TY_(InitPrintBuf)( doc );
  115 
  116     /* Set the locale for tidy's output. This both configures
  117     ** LibTidy to use the environment's locale as well as the
  118     ** standard library.
  119     */
  120 #if SUPPORT_LOCALIZATIONS
  121     if ( TY_(tidyGetLanguageSetByUser)() == no )
  122     {
  123         if( ! TY_(tidySetLanguage)( getenv( "LC_MESSAGES" ) ) )
  124         {
  125             if( ! TY_(tidySetLanguage)( getenv( "LANG" ) ) )
  126             {
  127                 /*\
  128                 *  Is. #770 #783 #780 #790 and maybe others -
  129                 *  TY_(tidySetLanguage)( setlocale( LC_ALL, "" ) );
  130                 *  this seems a 'bad' choice!
  131                \*/
  132             }
  133         }
  134     }
  135 #endif
  136 
  137     /* By default, wire tidy messages to standard error.
  138     ** Document input will be set by parsing routines.
  139     ** Document output will be set by pretty print routines.
  140     ** Config input will be set by config parsing routines.
  141     ** But we need to start off with a way to report errors.
  142     */
  143     doc->errout = TY_(StdErrOutput)();
  144     return doc;
  145 }
  146 
  147 void          tidyDocRelease( TidyDocImpl* doc )
  148 {
  149     /* doc in/out opened and closed by parse/print routines */
  150     if ( doc )
  151     {
  152         assert( doc->docIn == NULL );
  153         assert( doc->docOut == NULL );
  154 
  155         TY_(ReleaseStreamOut)( doc, doc->errout );
  156         doc->errout = NULL;
  157 
  158         TY_(FreePrintBuf)( doc );
  159         TY_(FreeNode)(doc, &doc->root);
  160         TidyClearMemory(&doc->root, sizeof(Node));
  161 
  162         if (doc->givenDoctype)
  163             TidyDocFree(doc, doc->givenDoctype);
  164 
  165         TY_(FreeConfig)( doc );
  166         TY_(FreeAttrTable)( doc );
  167         TY_(FreeAttrPriorityList)( doc );
  168         TY_(FreeMutedMessageList( doc ));
  169         TY_(FreeTags)( doc );
  170         /*\ 
  171          *  Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
  172          *  to determine which hash is to be used, so free it last.
  173         \*/
  174         TY_(FreeLexer)( doc );
  175         TidyDocFree( doc, doc );
  176     }
  177 }
  178 
  179 /* Let application store a chunk of data w/ each Tidy tdocance.
  180 ** Useful for callbacks.
  181 */
  182 void TIDY_CALL        tidySetAppData( TidyDoc tdoc, void* appData )
  183 {
  184   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  185   if ( impl )
  186     impl->appData = appData;
  187 }
  188 void* TIDY_CALL       tidyGetAppData( TidyDoc tdoc )
  189 {
  190   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  191   if ( impl )
  192     return impl->appData;
  193   return NULL;
  194 }
  195 
  196 ctmbstr TIDY_CALL     tidyReleaseDate(void)
  197 {
  198     return TY_(ReleaseDate)();
  199 }
  200 
  201 ctmbstr TIDY_CALL     tidyLibraryVersion(void)
  202 {
  203     return TY_(tidyLibraryVersion)();
  204 }
  205 
  206 ctmbstr TIDY_CALL     tidyPlatform(void)
  207 {
  208 #ifdef PLATFORM_NAME
  209     return PLATFORM_NAME;
  210 #else
  211     return NULL;
  212 #endif
  213 }
  214 
  215 
  216 /* Get/set configuration options
  217 */
  218 Bool TIDY_CALL     tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
  219 {
  220   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  221   if ( impl )
  222   {
  223     impl->pOptCallback = pOptCallback;
  224     return yes;
  225   }
  226   return no;
  227 }
  228 
  229 Bool TIDY_CALL     tidySetConfigCallback(TidyDoc tdoc, TidyConfigCallback pConfigCallback)
  230 {
  231   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  232   if ( impl )
  233   {
  234     impl->pConfigCallback = pConfigCallback;
  235     return yes;
  236   }
  237   return no;
  238 }
  239 
  240 Bool TIDY_CALL    tidySetConfigChangeCallback(TidyDoc tdoc, TidyConfigChangeCallback pCallback)
  241 {
  242   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  243   if ( impl )
  244   {
  245     impl->pConfigChangeCallback = pCallback;
  246     return yes;
  247   }
  248   return no;
  249 }
  250 
  251 
  252 
  253 int TIDY_CALL     tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
  254 {
  255     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  256     if ( impl )
  257         return TY_(ParseConfigFile)( impl, cfgfil );
  258     return -EINVAL;
  259 }
  260 
  261 int TIDY_CALL     tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc )
  262 {
  263     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  264     if ( impl )
  265         return TY_(ParseConfigFileEnc)( impl, cfgfil, charenc );
  266     return -EINVAL;
  267 }
  268 
  269 int TIDY_CALL         tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam )
  270 {
  271     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  272     if ( impl )
  273     {
  274         int enc = TY_(CharEncodingId)( impl, encnam );
  275         if ( enc >= 0 && TY_(AdjustCharEncoding)(impl, enc) )
  276             return 0;
  277 
  278         TY_(ReportBadArgument)( impl, "char-encoding" );
  279     }
  280     return -EINVAL;
  281 }
  282 
  283 int TIDY_CALL           tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam )
  284 {
  285     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  286     if ( impl )
  287     {
  288         int enc = TY_(CharEncodingId)( impl, encnam );
  289         if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyInCharEncoding, enc ) )
  290             return 0;
  291 
  292         TY_(ReportBadArgument)( impl, "in-char-encoding" );
  293     }
  294     return -EINVAL;
  295 }
  296 
  297 int TIDY_CALL           tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam )
  298 {
  299     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  300     if ( impl )
  301     {
  302         int enc = TY_(CharEncodingId)( impl, encnam );
  303         if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyOutCharEncoding, enc ) )
  304             return 0;
  305 
  306         TY_(ReportBadArgument)( impl, "out-char-encoding" );
  307     }
  308     return -EINVAL;
  309 }
  310 
  311 TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam )
  312 {
  313     const TidyOptionImpl* option = TY_(lookupOption)( optnam );
  314     if ( option )
  315         return option->id;
  316     return N_TIDY_OPTIONS;  /* Error */
  317 }
  318 
  319 TidyIterator TIDY_CALL  tidyGetOptionList( TidyDoc tdoc )
  320 {
  321     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  322     if ( impl )
  323         return TY_(getOptionList)( impl );
  324     return (TidyIterator) -1;
  325 }
  326 
  327 TidyOption TIDY_CALL    tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos )
  328 {
  329     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  330     const TidyOptionImpl* option = NULL;
  331     if ( impl )
  332         option = TY_(getNextOption)( impl, pos );
  333     else if ( pos )
  334         *pos = 0;
  335     return tidyImplToOption( option );
  336 }
  337 
  338 
  339 TidyOption TIDY_CALL    tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId )
  340 {
  341     const TidyOptionImpl* option = TY_(getOption)( optId );
  342     return tidyImplToOption( option );
  343 }
  344 TidyOption TIDY_CALL    tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam )
  345 {
  346     const TidyOptionImpl* option = TY_(lookupOption)( optnam );
  347     return tidyImplToOption( option );
  348 }
  349 
  350 TidyOptionId TIDY_CALL  tidyOptGetId( TidyOption topt )
  351 {
  352     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  353     if ( option )
  354         return option->id;
  355     return N_TIDY_OPTIONS;
  356 }
  357 ctmbstr TIDY_CALL       tidyOptGetName( TidyOption topt )
  358 {
  359     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  360     if ( option )
  361         return option->name;
  362     return NULL;
  363 }
  364 TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt )
  365 {
  366     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  367     if ( option )
  368         return option->type;
  369     return (TidyOptionType) -1;
  370 }
  371 Bool TIDY_CALL           tidyOptionIsList( TidyOption opt )
  372 {
  373     const TidyOptionImpl* option = tidyOptionToImpl( opt );
  374     if ( option )
  375         return TY_(getOptionIsList)( option->id );
  376     return no;
  377 }
  378 TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt )
  379 {
  380     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  381     if ( option )
  382         return option->category;
  383     return (TidyConfigCategory) -1;
  384 }
  385 ctmbstr TIDY_CALL       tidyOptGetDefault( TidyOption topt )
  386 {
  387     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  388     /* Special case for TidyDoctype, because it is declared as string */
  389     if ( option && option->id == TidyDoctype )
  390     {
  391         const TidyOptionImpl* newopt = TY_(getOption)( TidyDoctypeMode );
  392         return TY_(GetPickListLabelForPick)( TidyDoctypeMode, newopt->dflt );
  393     }
  394     if ( option && option->type == TidyString )
  395         return option->pdflt; /* Issue #306 - fix an old typo hidden by a cast! */
  396     return NULL;
  397 }
  398 ulong TIDY_CALL          tidyOptGetDefaultInt( TidyOption topt )
  399 {
  400     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  401     if ( option && option->type != TidyString )
  402         return option->dflt;
  403 
  404     /* Special case for TidyDoctype, because it has a picklist */
  405     if ( option->id == TidyDoctype )
  406     {
  407         const TidyOptionImpl* newopt = TY_(getOption)( TidyDoctypeMode );
  408         return newopt->dflt;
  409     }
  410 
  411     return ~0U;
  412 }
  413 Bool TIDY_CALL          tidyOptGetDefaultBool( TidyOption topt )
  414 {
  415     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  416     if ( option && option->type != TidyString )
  417         return ( option->dflt ? yes : no );
  418     return no;
  419 }
  420 Bool TIDY_CALL          tidyOptIsReadOnly( TidyOption topt )
  421 {
  422     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  423     if ( option  )
  424         return ( option->parser == NULL );
  425     return yes;
  426 }
  427 
  428 
  429 TidyIterator TIDY_CALL  tidyOptGetPickList( TidyOption topt )
  430 {
  431     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  432     if ( option )
  433       return TY_(getOptionPickList)( option );
  434     return (TidyIterator) -1;
  435 }
  436 ctmbstr TIDY_CALL       tidyOptGetNextPick( TidyOption topt, TidyIterator* pos )
  437 {
  438     const TidyOptionImpl* option = tidyOptionToImpl( topt );
  439     if ( option )
  440         return TY_(getNextOptionPick)( option, pos );
  441     return NULL;
  442 }
  443 
  444 
  445 ctmbstr TIDY_CALL       tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId )
  446 {
  447     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  448     ctmbstr optval = NULL;
  449     if ( impl )
  450     {
  451         if ( optId == TidyDoctype )
  452         {
  453             /* Special case for TidyDoctype, because it has a picklist and is a string. */
  454             uint pick = tidyOptGetInt( tdoc, TidyDoctypeMode );
  455             if ( pick != TidyDoctypeUser )
  456             {
  457                 optval = TY_(GetPickListLabelForPick)( TidyDoctypeMode, pick );
  458             } else {
  459                 optval = cfgStr( impl, optId );
  460             }
  461         } else {
  462             /* Standard case. */
  463             optval = cfgStr( impl, optId );
  464         }
  465     }
  466     return optval;
  467 }
  468 Bool TIDY_CALL        tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val )
  469 {
  470   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  471   if ( impl )
  472     return TY_(ParseConfigValue)( impl, optId, val );
  473   return no;
  474 }
  475 Bool TIDY_CALL        tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val )
  476 {
  477   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  478   if ( impl )
  479     return TY_(ParseConfigOption)( impl, optnam, val );
  480   return no;
  481 }
  482 
  483 ulong TIDY_CALL        tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId )
  484 {
  485     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  486     ulong opti = 0;
  487     if ( impl )
  488     {
  489         /* Special case for TidyDoctype, because it has a picklist */
  490         if ( optId == TidyDoctype )
  491             opti = cfg( impl, TidyDoctypeMode);
  492         else
  493             opti = cfg( impl, optId );
  494     }
  495     return opti;
  496 }
  497 
  498 Bool TIDY_CALL        tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val )
  499 {
  500     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  501     if ( impl )
  502     {
  503         /* Special case for TidyDoctype, because it has a picklist */
  504         if ( optId == TidyDoctype )
  505             return TY_(SetOptionInt)( impl, TidyDoctypeMode, val );
  506         else
  507             return TY_(SetOptionInt)( impl, optId, val );
  508     }
  509     return no;
  510 }
  511 
  512 Bool TIDY_CALL         tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId )
  513 {
  514     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  515     Bool optb = no;
  516     if ( impl )
  517     {
  518         const TidyOptionImpl* option = TY_(getOption)( optId );
  519         if ( option )
  520         {
  521             optb = cfgBool( impl, optId );
  522         }
  523     }
  524     return optb;
  525 }
  526 
  527 Bool TIDY_CALL        tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val )
  528 {
  529     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  530     if ( impl )
  531         return TY_(SetOptionBool)( impl, optId, val );
  532     return no;
  533 }
  534 
  535 ctmbstr TIDY_CALL       tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId )
  536 {
  537   uint enc = tidyOptGetInt( tdoc, optId );
  538   return TY_(CharEncodingOptName)( enc );
  539 }
  540 
  541 ctmbstr TIDY_CALL       tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
  542 {
  543     uint pick = tidyOptGetInt( tdoc, optId );
  544     return TY_(GetPickListLabelForPick)( optId, pick );
  545 }
  546 
  547 
  548 TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc )
  549 {
  550     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  551     TidyIterator declIter = 0;
  552     if ( impl )
  553         declIter = TY_(GetDeclaredTagList)( impl );
  554     return declIter;
  555 }
  556 
  557 ctmbstr TIDY_CALL       tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
  558                                      TidyIterator* iter )
  559 {
  560     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  561     ctmbstr tagnam = NULL;
  562     if ( impl )
  563     {
  564         UserTagType tagtyp = tagtype_null;
  565         if ( optId == TidyInlineTags )
  566             tagtyp = tagtype_inline;
  567         else if ( optId == TidyBlockTags )
  568             tagtyp = tagtype_block;
  569         else if ( optId == TidyEmptyTags )
  570             tagtyp = tagtype_empty;
  571         else if ( optId == TidyPreTags )
  572             tagtyp = tagtype_pre;
  573         if ( tagtyp != tagtype_null )
  574             tagnam = TY_(GetNextDeclaredTag)( impl, tagtyp, iter );
  575     }
  576     return tagnam;
  577 }
  578 
  579 TidyIterator TIDY_CALL tidyOptGetPriorityAttrList( TidyDoc tdoc )
  580 {
  581     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  582     if ( impl )
  583         return TY_(getPriorityAttrList)( impl );
  584     return (TidyIterator) -1;
  585 }
  586 
  587 ctmbstr TIDY_CALL      tidyOptGetNextPriorityAttr(TidyDoc tdoc, TidyIterator* iter )
  588 {
  589     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  590     ctmbstr result = NULL;
  591     if ( impl )
  592         result = TY_(getNextPriorityAttr)( impl, iter );
  593     else if ( iter )
  594         *iter = 0;
  595     return result;
  596 }
  597 
  598 TidyIterator TIDY_CALL tidyOptGetMutedMessageList( TidyDoc tdoc )
  599 {
  600     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  601     if ( impl )
  602         return TY_(getMutedMessageList)( impl );
  603     return (TidyIterator) -1;
  604 }
  605 
  606 ctmbstr TIDY_CALL      tidyOptGetNextMutedMessage(TidyDoc tdoc, TidyIterator* iter )
  607 {
  608     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  609     ctmbstr result = NULL;
  610     if ( impl )
  611         result = TY_(getNextMutedMessage)( impl, iter );
  612     else if ( iter )
  613         *iter = 0;
  614     return result;
  615 }
  616 
  617 ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
  618 {
  619     const TidyOptionId optId = tidyOptGetId( opt );
  620     return tidyLocalizedString(optId);
  621 }
  622 
  623 #if SUPPORT_CONSOLE_APP
  624 /* TODO - GROUP ALL CONSOLE-ONLY FUNCTIONS */
  625 TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
  626 {
  627     const TidyOptionId optId = tidyOptGetId( opt );
  628     const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId );
  629     if (docDesc && docDesc->links)
  630         return (TidyIterator)docDesc->links;
  631     return (TidyIterator)NULL;
  632 }
  633 #endif /* SUPPORT_CONSOLE_APP */
  634 
  635 TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos )
  636 {
  637     const TidyOptionId* curr = (const TidyOptionId *)*pos;
  638     TidyOption opt;
  639 
  640     if (*curr == TidyUnknownOption)
  641     {
  642         *pos = (TidyIterator)NULL;
  643         return (TidyOption)0;
  644     }
  645     opt = tidyGetOption(tdoc, *curr);
  646     curr++;
  647     *pos = (*curr == TidyUnknownOption ) ?
  648         (TidyIterator)NULL:(TidyIterator)curr;
  649     return opt;
  650 }
  651 
  652 int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil )
  653 {
  654     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  655     if ( impl )
  656         return TY_(SaveConfigFile)( impl, cfgfil );
  657     return -EINVAL;
  658 }
  659 
  660 int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink )
  661 {
  662     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  663     if ( impl )
  664         return TY_(SaveConfigSink)( impl, sink );
  665     return -EINVAL;
  666 }
  667 
  668 Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc )
  669 {
  670     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  671     if ( impl )
  672     {
  673         TY_(TakeConfigSnapshot)( impl );
  674         return yes;
  675     }
  676     return no;
  677 }
  678 Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc )
  679 {
  680     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  681     if ( impl )
  682     {
  683         TY_(ResetConfigToSnapshot)( impl );
  684         return yes;
  685     }
  686     return no;
  687 }
  688 Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc )
  689 {
  690     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  691     if ( impl )
  692     {
  693         TY_(ResetConfigToDefault)( impl );
  694         return yes;
  695     }
  696     return no;
  697 }
  698 
  699 Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId )
  700 {
  701     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  702     if ( impl )
  703         return TY_(ResetOptionToDefault)( impl, optId );
  704     return no;
  705 }
  706 
  707 Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc )
  708 {
  709     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  710     if ( impl )
  711         return TY_(ConfigDiffThanDefault)( impl );
  712     return no;
  713 }
  714 Bool TIDY_CALL          tidyOptDiffThanSnapshot( TidyDoc tdoc )
  715 {
  716     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  717     if ( impl )
  718         return TY_(ConfigDiffThanSnapshot)( impl );
  719     return no;
  720 }
  721 
  722 Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
  723 {
  724     TidyDocImpl* docTo = tidyDocToImpl( to );
  725     TidyDocImpl* docFrom = tidyDocToImpl( from );
  726     if ( docTo && docFrom )
  727     {
  728         TY_(CopyConfig)( docTo, docFrom );
  729         return yes;
  730     }
  731     return no;
  732 }
  733 
  734 
  735 /* I/O and Message handling interface
  736 **
  737 ** By default, Tidy will define, create and use instance of input and output 
  738 ** handlers for standard C buffered I/O (i.e. FILE* stdin, FILE* stdout and
  739 ** FILE* stderr for content input, content output and diagnostic output,
  740 ** respectively.  A FILE* cfgFile input handler will be used for config files.
  741 ** Command line options will just be set directly.
  742 */
  743 
  744 void TIDY_CALL tidySetEmacsFile( TidyDoc tdoc, ctmbstr filePath )
  745 {
  746     tidyOptSetValue( tdoc, TidyEmacsFile, filePath );
  747 }
  748 
  749 ctmbstr TIDY_CALL tidyGetEmacsFile( TidyDoc tdoc )
  750 {
  751     return tidyOptGetValue( tdoc, TidyEmacsFile );
  752 }
  753 
  754 
  755 /* Use TidyReportFilter to filter messages by diagnostic level:
  756 ** info, warning, etc.  Just set diagnostic output
  757 ** handler to redirect all diagnostics output.  Return true
  758 ** to proceed with output, false to cancel.
  759 */
  760 Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
  761 {
  762   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  763   if ( impl )
  764   {
  765     impl->reportFilter = filt;
  766     return yes;
  767   }
  768   return no;
  769 }
  770 
  771 /* tidySetReportCallback functions similar to TidyReportFilter, but provides the
  772  * string version of the internal enum name so that LibTidy users can use
  773 ** the string as a lookup key for providing their own error localizations.
  774 ** See the string key definitions in tidyenum.h.
  775 */
  776 Bool TIDY_CALL tidySetReportCallback( TidyDoc tdoc, TidyReportCallback filt )
  777 {
  778   TidyDocImpl* impl = tidyDocToImpl( tdoc );
  779   if ( impl )
  780   {
  781     impl->reportCallback = filt;
  782     return yes;
  783   }
  784   return no;
  785 }
  786 
  787 Bool TIDY_CALL tidySetMessageCallback( TidyDoc tdoc, TidyMessageCallback filt )
  788 {
  789     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  790     if ( impl )
  791     {
  792         impl->messageCallback = filt;
  793         return yes;
  794     }
  795     return no;
  796 }
  797 
  798 TidyDoc TIDY_CALL tidyGetMessageDoc( TidyMessage tmessage )
  799 {
  800     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  801     TidyDocImpl* doc = TY_(getMessageDoc)(*message);
  802     return tidyImplToDoc(doc);
  803 }
  804 
  805 uint TIDY_CALL tidyGetMessageCode( TidyMessage tmessage )
  806 {
  807     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  808     return TY_(getMessageCode)(*message);
  809 }
  810 
  811 ctmbstr TIDY_CALL tidyGetMessageKey( TidyMessage tmessage )
  812 {
  813     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  814     return TY_(getMessageKey)(*message);
  815 }
  816 
  817 int TIDY_CALL tidyGetMessageLine( TidyMessage tmessage )
  818 {
  819     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  820     return TY_(getMessageLine)(*message);
  821 }
  822 
  823 int TIDY_CALL tidyGetMessageColumn( TidyMessage tmessage )
  824 {
  825     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  826     return TY_(getMessageColumn)(*message);
  827 }
  828 
  829 TidyReportLevel TIDY_CALL tidyGetMessageLevel( TidyMessage tmessage )
  830 {
  831     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  832     return TY_(getMessageLevel)(*message);
  833 }
  834 
  835 Bool TIDY_CALL tidyGetMessageIsMuted( TidyMessage tmessage )
  836 {
  837     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  838     return TY_(getMessageIsMuted)(*message);
  839 }
  840 
  841 ctmbstr TIDY_CALL tidyGetMessageFormatDefault( TidyMessage tmessage )
  842 {
  843     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  844     return TY_(getMessageFormatDefault)(*message);
  845 }
  846 
  847 ctmbstr TIDY_CALL tidyGetMessageFormat( TidyMessage tmessage )
  848 {
  849     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  850     return TY_(getMessageFormat)(*message);
  851 }
  852 
  853 ctmbstr TIDY_CALL tidyGetMessageDefault( TidyMessage tmessage )
  854 {
  855     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  856     return TY_(getMessageDefault)(*message);
  857 }
  858 
  859 ctmbstr TIDY_CALL tidyGetMessage( TidyMessage tmessage )
  860 {
  861     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  862     return TY_(getMessage)(*message);
  863 }
  864 
  865 ctmbstr TIDY_CALL tidyGetMessagePosDefault( TidyMessage tmessage )
  866 {
  867     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  868     return TY_(getMessagePosDefault)(*message);
  869 }
  870 
  871 ctmbstr TIDY_CALL tidyGetMessagePos( TidyMessage tmessage )
  872 {
  873     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  874     return TY_(getMessagePos)(*message);
  875 }
  876 
  877 ctmbstr TIDY_CALL tidyGetMessagePrefixDefault( TidyMessage tmessage )
  878 {
  879     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  880     return TY_(getMessagePrefixDefault)(*message);
  881 }
  882 
  883 ctmbstr TIDY_CALL tidyGetMessagePrefix( TidyMessage tmessage )
  884 {
  885     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  886     return TY_(getMessagePrefix)(*message);
  887 }
  888 
  889 
  890 ctmbstr TIDY_CALL tidyGetMessageOutputDefault( TidyMessage tmessage )
  891 {
  892     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  893     return TY_(getMessageOutputDefault)(*message);
  894 }
  895 
  896 ctmbstr TIDY_CALL tidyGetMessageOutput( TidyMessage tmessage )
  897 {
  898     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  899     return TY_(getMessageOutput)(*message);
  900 }
  901 
  902 TidyIterator TIDY_CALL tidyGetMessageArguments( TidyMessage tmessage )
  903 {
  904     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  905     return TY_(getMessageArguments)(*message);
  906 }
  907 
  908 TidyMessageArgument TIDY_CALL tidyGetNextMessageArgument( TidyMessage tmessage, TidyIterator* iter )
  909 {
  910     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  911     return TY_(getNextMessageArgument)(*message, iter);
  912 }
  913 
  914 TidyFormatParameterType TIDY_CALL tidyGetArgType( TidyMessage tmessage, TidyMessageArgument* arg )
  915 {
  916     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  917     return TY_(getArgType)(*message, arg);
  918 }
  919 
  920 ctmbstr TIDY_CALL tidyGetArgFormat( TidyMessage tmessage, TidyMessageArgument* arg )
  921 {
  922     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  923     return TY_(getArgFormat)(*message, arg);
  924 }
  925 
  926 ctmbstr TIDY_CALL tidyGetArgValueString( TidyMessage tmessage, TidyMessageArgument* arg )
  927 {
  928     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  929     return TY_(getArgValueString)(*message, arg);
  930 }
  931 
  932 uint TIDY_CALL tidyGetArgValueUInt( TidyMessage tmessage, TidyMessageArgument* arg )
  933 {
  934     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  935     return TY_(getArgValueUInt)(*message, arg);
  936 }
  937 
  938 int TIDY_CALL tidyGetArgValueInt( TidyMessage tmessage, TidyMessageArgument* arg )
  939 {
  940     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  941     return TY_(getArgValueInt)(*message, arg);
  942 }
  943 
  944 double TIDY_CALL tidyGetArgValueDouble( TidyMessage tmessage, TidyMessageArgument* arg )
  945 {
  946     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
  947     return TY_(getArgValueDouble)(*message, arg);
  948 }
  949 
  950 
  951 FILE* TIDY_CALL   tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
  952 {
  953     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  954     if ( impl )
  955     {
  956         FILE* errout = fopen( errfilnam, "wb" );
  957         if ( errout )
  958         {
  959             uint outenc = cfg( impl, TidyOutCharEncoding );
  960             uint nl = cfg( impl, TidyNewline );
  961             TY_(ReleaseStreamOut)( impl, impl->errout );
  962             impl->errout = TY_(FileOutput)( impl, errout, outenc, nl );
  963             return errout;
  964         }
  965         else /* Emit message to current error sink */
  966             TY_(ReportFileError)( impl, errfilnam, FILE_CANT_OPEN );
  967     }
  968     return NULL;
  969 }
  970 
  971 int TIDY_CALL    tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
  972 {
  973     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  974     if ( impl )
  975     {
  976         uint outenc = cfg( impl, TidyOutCharEncoding );
  977         uint nl = cfg( impl, TidyNewline );
  978         TY_(ReleaseStreamOut)( impl, impl->errout );
  979         impl->errout = TY_(BufferOutput)( impl, errbuf, outenc, nl );
  980         return ( impl->errout ? 0 : -ENOMEM );
  981     }
  982     return -EINVAL;
  983 }
  984 
  985 int TIDY_CALL    tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
  986 {
  987     TidyDocImpl* impl = tidyDocToImpl( tdoc );
  988     if ( impl )
  989     {
  990         uint outenc = cfg( impl, TidyOutCharEncoding );
  991         uint nl = cfg( impl, TidyNewline );
  992         TY_(ReleaseStreamOut)( impl, impl->errout );
  993         impl->errout = TY_(UserOutput)( impl, sink, outenc, nl );
  994         return ( impl->errout ? 0 : -ENOMEM );
  995     }
  996     return -EINVAL;
  997 }
  998 
  999 /* Use TidyPPProgress to monitor the progress of the pretty printer.
 1000  */
 1001 Bool TIDY_CALL        tidySetPrettyPrinterCallback(TidyDoc tdoc, TidyPPProgress callback)
 1002 {
 1003     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1004     if ( impl )
 1005     {
 1006         impl->progressCallback = callback;
 1007         return yes;
 1008     }
 1009     return no;
 1010 }
 1011 
 1012 
 1013 /* Document info */
 1014 int TIDY_CALL        tidyStatus( TidyDoc tdoc )
 1015 {
 1016     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1017     int tidyStat = -EINVAL;
 1018     if ( impl )
 1019         tidyStat = tidyDocStatus( impl );
 1020     return tidyStat;
 1021 }
 1022 int TIDY_CALL        tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
 1023 {
 1024     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1025     return TY_(HTMLVersionNumberFromCode)( impl->lexer->versionEmitted );
 1026 }
 1027 
 1028 Bool TIDY_CALL        tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
 1029 {
 1030     TidyDocImpl* impl = tidyDocToImpl( tdoc ); 
 1031     return impl->lexer->isvoyager;
 1032 }
 1033 Bool TIDY_CALL        tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
 1034 {
 1035     TidyDocImpl* impl = tidyDocToImpl( tdoc ); 
 1036     return impl->xmlDetected;
 1037 }
 1038 
 1039 uint TIDY_CALL       tidyErrorCount( TidyDoc tdoc )
 1040 {
 1041     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1042     uint count = 0xFFFFFFFF;
 1043     if ( impl )
 1044         count = impl->errors;
 1045     return count;
 1046 }
 1047 uint TIDY_CALL       tidyWarningCount( TidyDoc tdoc )
 1048 {
 1049     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1050     uint count = 0xFFFFFFFF;
 1051     if ( impl )
 1052         count = impl->warnings;
 1053     return count;
 1054 }
 1055 uint TIDY_CALL       tidyAccessWarningCount( TidyDoc tdoc )
 1056 {
 1057     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1058     uint count = 0xFFFFFFFF;
 1059     if ( impl )
 1060         count = impl->accessErrors;
 1061     return count;
 1062 }
 1063 uint TIDY_CALL       tidyConfigErrorCount( TidyDoc tdoc )
 1064 {
 1065     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1066     uint count = 0xFFFFFFFF;
 1067     if ( impl )
 1068         count = impl->optionErrors;
 1069     return count;
 1070 }
 1071 
 1072 
 1073 /* Error reporting functions
 1074 */
 1075 void TIDY_CALL         tidyErrorSummary( TidyDoc tdoc )
 1076 {
 1077     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1078     if ( impl )
 1079         TY_(ErrorSummary)( impl );
 1080 }
 1081 void TIDY_CALL         tidyGeneralInfo( TidyDoc tdoc )
 1082 {
 1083     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1084     if ( impl )
 1085     {
 1086         TY_(Dialogue)( impl, TEXT_GENERAL_INFO );
 1087         TY_(Dialogue)( impl, TEXT_GENERAL_INFO_PLEA );
 1088     }
 1089 }
 1090 
 1091 
 1092 /* I/O Functions
 1093 **
 1094 ** Initial version supports only whole-file operations.
 1095 ** Do not expose Tidy StreamIn or Out data structures - yet.
 1096 */
 1097 
 1098 /* Parse/load Functions
 1099 **
 1100 ** HTML/XHTML version determined from input.
 1101 */
 1102 int TIDY_CALL  tidyParseFile( TidyDoc tdoc, ctmbstr filnam )
 1103 {
 1104     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1105     return tidyDocParseFile( doc, filnam );
 1106 }
 1107 int TIDY_CALL  tidyParseStdin( TidyDoc tdoc )
 1108 {
 1109     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1110     return tidyDocParseStdin( doc );
 1111 }
 1112 int TIDY_CALL  tidyParseString( TidyDoc tdoc, ctmbstr content )
 1113 {
 1114     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1115     return tidyDocParseString( doc, content );
 1116 }
 1117 int TIDY_CALL  tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf )
 1118 {
 1119     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1120     return tidyDocParseBuffer( doc, inbuf );
 1121 }
 1122 int TIDY_CALL  tidyParseSource( TidyDoc tdoc, TidyInputSource* source )
 1123 {
 1124     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1125     return tidyDocParseSource( doc, source );
 1126 }
 1127 
 1128 #ifdef WIN32
 1129 #define M_IS_DIR _S_IFDIR
 1130 #else // !WIN32
 1131 #define M_IS_DIR S_IFDIR
 1132 #endif
 1133 int   tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
 1134 {
 1135     int status = -ENOENT;
 1136     FILE* fin = 0;
 1137     struct stat sbuf = { 0 }; /* Is. #681 - read-only files */
 1138     if ( stat(filnam,&sbuf) != 0 )
 1139     {
 1140         TY_(ReportFileError)( doc, filnam, FILE_NOT_FILE );
 1141         return status;
 1142     }
 1143     if (sbuf.st_mode & M_IS_DIR) /* and /NOT/ if a DIRECTORY */
 1144     {
 1145         TY_(ReportFileError)(doc, filnam, FILE_NOT_FILE);
 1146         return status;
 1147     }
 1148 
 1149 #ifdef _WIN32
 1150     return TY_(DocParseFileWithMappedFile)( doc, filnam );
 1151 #else
 1152 
 1153     fin = fopen( filnam, "rb" );
 1154 
 1155 #if PRESERVE_FILE_TIMES
 1156     {
 1157         /* get last modified time */
 1158         TidyClearMemory(&doc->filetimes, sizeof(doc->filetimes));
 1159         if (fin && cfgBool(doc, TidyKeepFileTimes) &&
 1160             fstat(fileno(fin), &sbuf) != -1)
 1161         {
 1162             doc->filetimes.actime = sbuf.st_atime;
 1163             doc->filetimes.modtime = sbuf.st_mtime;
 1164         }
 1165     }
 1166 #endif
 1167 
 1168     if ( fin )
 1169     {
 1170         StreamIn* in = TY_(FileInput)( doc, fin, cfg( doc, TidyInCharEncoding ));
 1171         if ( !in )
 1172         {
 1173             fclose( fin );
 1174             return status;
 1175         }
 1176         status = TY_(DocParseStream)( doc, in );
 1177         TY_(freeFileSource)(&in->source, yes);
 1178         TY_(freeStreamIn)(in);
 1179     }
 1180     else /* Error message! */
 1181         TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
 1182     return status;
 1183 #endif
 1184 }
 1185 
 1186 int   tidyDocParseStdin( TidyDocImpl* doc )
 1187 {
 1188     StreamIn* in = TY_(FileInput)( doc, stdin, cfg( doc, TidyInCharEncoding ));
 1189     int status = TY_(DocParseStream)( doc, in );
 1190     TY_(freeFileSource)(&in->source, yes);
 1191     TY_(freeStreamIn)(in);
 1192     return status;
 1193 }
 1194 
 1195 int   tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf )
 1196 {
 1197     int status = -EINVAL;
 1198     if ( inbuf )
 1199     {
 1200         StreamIn* in = TY_(BufferInput)( doc, inbuf, cfg( doc, TidyInCharEncoding ));
 1201         status = TY_(DocParseStream)( doc, in );
 1202         TY_(freeStreamIn)(in);
 1203     }
 1204     return status;
 1205 }
 1206 
 1207 int   tidyDocParseString( TidyDocImpl* doc, ctmbstr content )
 1208 {
 1209     int status = -EINVAL;
 1210     TidyBuffer inbuf;
 1211     StreamIn* in = NULL;
 1212 
 1213     if ( content )
 1214     {
 1215         tidyBufInitWithAllocator( &inbuf, doc->allocator );
 1216         tidyBufAttach( &inbuf, (byte*)content, TY_(tmbstrlen)(content)+1 );
 1217         in = TY_(BufferInput)( doc, &inbuf, cfg( doc, TidyInCharEncoding ));
 1218         status = TY_(DocParseStream)( doc, in );
 1219         tidyBufDetach( &inbuf );
 1220         TY_(freeStreamIn)(in);
 1221     }
 1222     return status;
 1223 }
 1224 
 1225 int   tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source )
 1226 {
 1227     StreamIn* in = TY_(UserInput)( doc, source, cfg( doc, TidyInCharEncoding ));
 1228     int status = TY_(DocParseStream)( doc, in );
 1229     TY_(freeStreamIn)(in);
 1230     return status;
 1231 }
 1232 
 1233 
 1234 /* Print/save Functions
 1235 **
 1236 */
 1237 int TIDY_CALL        tidySaveFile( TidyDoc tdoc, ctmbstr filnam )
 1238 {
 1239     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1240     return tidyDocSaveFile( doc, filnam );
 1241 }
 1242 int TIDY_CALL        tidySaveStdout( TidyDoc tdoc )
 1243 {
 1244     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1245     return tidyDocSaveStdout( doc );
 1246 }
 1247 int TIDY_CALL        tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen )
 1248 {
 1249     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1250     return tidyDocSaveString( doc, buffer, buflen );
 1251 }
 1252 int TIDY_CALL        tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf )
 1253 {
 1254     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1255     return tidyDocSaveBuffer( doc, outbuf );
 1256 }
 1257 int TIDY_CALL        tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
 1258 {
 1259     TidyDocImpl* doc = tidyDocToImpl( tdoc );
 1260     return tidyDocSaveSink( doc, sink );
 1261 }
 1262 
 1263 int         tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
 1264 {
 1265     int status = -ENOENT;
 1266     FILE* fout = NULL;
 1267 
 1268     /* Don't zap input file if no output */
 1269     if ( doc->errors > 0 &&
 1270          cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
 1271         status = tidyDocStatus( doc );
 1272     else
 1273         fout = fopen( filnam, "wb" );
 1274 
 1275     if ( fout )
 1276     {
 1277         uint outenc = cfg( doc, TidyOutCharEncoding );
 1278         uint nl = cfg( doc, TidyNewline );
 1279         StreamOut* out = TY_(FileOutput)( doc, fout, outenc, nl );
 1280 
 1281         status = tidyDocSaveStream( doc, out );
 1282 
 1283         fclose( fout );
 1284         TidyDocFree( doc, out );
 1285 
 1286 #if PRESERVE_FILE_TIMES
 1287         if ( doc->filetimes.actime )
 1288         {
 1289             /* set file last accessed/modified times to original values */
 1290             utime( filnam, &doc->filetimes );
 1291             TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
 1292         }
 1293 #endif /* PRESERVFILETIMES */
 1294     }
 1295     if ( status < 0 ) /* Error message! */
 1296         TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
 1297     return status;
 1298 }
 1299 
 1300 
 1301 
 1302 /* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
 1303 ** The code has been left in in case it works w/ other compilers
 1304 ** or operating systems.  If stdout is in Text mode, be aware that
 1305 ** it will garble UTF16 documents.  In text mode, when it encounters
 1306 ** a single byte of value 10 (0xA), it will insert a single byte
 1307 ** value 13 (0xD) just before it.  This has the effect of garbling
 1308 ** the entire document.
 1309 */
 1310 
 1311 #if !defined(NO_SETMODE_SUPPORT)
 1312 #  if defined(_WIN32) || defined(OS2_OS)
 1313 #   include <fcntl.h>
 1314 #     include <io.h>
 1315 #   endif
 1316 #endif
 1317 
 1318 int         tidyDocSaveStdout( TidyDocImpl* doc )
 1319 {
 1320 #if !defined(NO_SETMODE_SUPPORT)
 1321 #  if defined(_WIN32) || defined(OS2_OS)
 1322     int oldstdoutmode = -1, oldstderrmode = -1;
 1323 #  endif
 1324 #endif
 1325 
 1326     int status = 0;
 1327     uint outenc = cfg( doc, TidyOutCharEncoding );
 1328     uint nl = cfg( doc, TidyNewline );
 1329     StreamOut* out = TY_(FileOutput)( doc, stdout, outenc, nl );
 1330 
 1331 #if !defined(NO_SETMODE_SUPPORT)
 1332 #  if defined(_WIN32) || defined(OS2_OS)
 1333     oldstdoutmode = setmode( fileno(stdout), _O_BINARY );
 1334     oldstderrmode = setmode( fileno(stderr), _O_BINARY );
 1335 #  endif
 1336 #endif
 1337 
 1338     if ( 0 == status )
 1339       status = tidyDocSaveStream( doc, out );
 1340 
 1341     fflush(stdout);
 1342     fflush(stderr);
 1343 
 1344 #if !defined(NO_SETMODE_SUPPORT)
 1345 #  if defined(_WIN32) || defined(OS2_OS)
 1346     if ( oldstdoutmode != -1 )
 1347         oldstdoutmode = setmode( fileno(stdout), oldstdoutmode );
 1348     if ( oldstderrmode != -1 )
 1349         oldstderrmode = setmode( fileno(stderr), oldstderrmode );
 1350 #  endif
 1351 #endif
 1352 
 1353     TidyDocFree( doc, out );
 1354     return status;
 1355 }
 1356 
 1357 int         tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
 1358 {
 1359     uint outenc = cfg( doc, TidyOutCharEncoding );
 1360     uint nl = cfg( doc, TidyNewline );
 1361     TidyBuffer outbuf;
 1362     StreamOut* out;
 1363     int status;
 1364 
 1365     tidyBufInitWithAllocator( &outbuf, doc->allocator );
 1366     out = TY_(BufferOutput)( doc, &outbuf, outenc, nl );
 1367     status = tidyDocSaveStream( doc, out );
 1368 
 1369     if ( outbuf.size > *buflen )
 1370         status = -ENOMEM;
 1371     else
 1372         memcpy( buffer, outbuf.bp, outbuf.size );
 1373 
 1374     *buflen = outbuf.size;
 1375     tidyBufFree( &outbuf );
 1376     TidyDocFree( doc, out );
 1377     return status;
 1378 }
 1379 
 1380 int         tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
 1381 {
 1382     int status = -EINVAL;
 1383     if ( outbuf )
 1384     {
 1385         uint outenc = cfg( doc, TidyOutCharEncoding );
 1386         uint nl = cfg( doc, TidyNewline );
 1387         StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl );
 1388 
 1389         status = tidyDocSaveStream( doc, out );
 1390         TidyDocFree( doc, out );
 1391     }
 1392     return status;
 1393 }
 1394 
 1395 int         tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
 1396 {
 1397     uint outenc = cfg( doc, TidyOutCharEncoding );
 1398     uint nl = cfg( doc, TidyNewline );
 1399     StreamOut* out = TY_(UserOutput)( doc, sink, outenc, nl );
 1400     int status = tidyDocSaveStream( doc, out );
 1401     TidyDocFree( doc, out );
 1402     return status;
 1403 }
 1404 
 1405 int         tidyDocStatus( TidyDocImpl* doc )
 1406 {
 1407     if ( doc->errors > 0 )
 1408         return 2;
 1409     if ( doc->warnings > 0 || doc->accessErrors > 0 )
 1410         return 1;
 1411     return 0;
 1412 }
 1413 
 1414 
 1415 
 1416 int TIDY_CALL        tidyCleanAndRepair( TidyDoc tdoc )
 1417 {
 1418     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1419     if ( impl )
 1420       return tidyDocCleanAndRepair( impl );
 1421     return -EINVAL;
 1422 }
 1423 
 1424 int TIDY_CALL        tidyRunDiagnostics( TidyDoc tdoc )
 1425 {
 1426     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1427     if ( impl )
 1428       return tidyDocRunDiagnostics( impl );
 1429     return -EINVAL;
 1430 }
 1431 
 1432 int TIDY_CALL        tidyReportDoctype( TidyDoc tdoc )
 1433 {
 1434     int iret = -EINVAL;
 1435     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 1436     if ( impl ) {
 1437       tidyDocReportDoctype( impl );
 1438       iret = 0;
 1439     }
 1440     return iret;
 1441 }
 1442 
 1443 /* Workhorse functions.
 1444 **
 1445 ** Parse requires input source, all input config items
 1446 ** and diagnostic sink to have all been set before calling.
 1447 **
 1448 ** Emit likewise requires that document sink and all
 1449 ** pretty printing options have been set.
 1450 */
 1451 static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";
 1452 
 1453 int         TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in )
 1454 {
 1455     Bool xmlIn = cfgBool( doc, TidyXmlTags );
 1456     TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
 1457     
 1458     int bomEnc;
 1459     doc->pConfigChangeCallback = NULL;
 1460 
 1461     assert( doc != NULL && in != NULL );
 1462     assert( doc->docIn == NULL );
 1463     doc->docIn = in;
 1464 
 1465     TY_(ResetTags)(doc);             /* Reset table to html5 mode */
 1466     TY_(TakeConfigSnapshot)( doc );  /* Save config state */
 1467     TY_(AdjustConfig)( doc );        /* Ensure config internal consistency */
 1468     TY_(FreeAnchors)( doc );
 1469 
 1470     TY_(FreeNode)(doc, &doc->root);
 1471     TidyClearMemory(&doc->root, sizeof(Node));
 1472 
 1473     if (doc->givenDoctype)
 1474         TidyDocFree(doc, doc->givenDoctype);
 1475     /*\ 
 1476      *  Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
 1477      *  to determine which hash is to be used, so free it last.
 1478     \*/
 1479     TY_(FreeLexer)( doc );
 1480     doc->givenDoctype = NULL;
 1481 
 1482     doc->lexer = TY_(NewLexer)( doc );
 1483     /* doc->lexer->root = &doc->root; */
 1484     doc->root.line = doc->lexer->lines;
 1485     doc->root.column = doc->lexer->columns;
 1486     doc->inputHadBOM = no;
 1487     doc->xmlDetected = no;
 1488 
 1489     bomEnc = TY_(ReadBOMEncoding)(in);
 1490 
 1491     if (bomEnc != -1)
 1492     {
 1493         in->encoding = bomEnc;
 1494         TY_(SetOptionInt)(doc, TidyInCharEncoding, bomEnc);
 1495     }
 1496 
 1497     /* Tidy doesn't alter the doctype for generic XML docs */
 1498     if ( xmlIn )
 1499     {
 1500         TY_(ParseXMLDocument)( doc );
 1501         if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
 1502             TidyPanic( doc->allocator, integrity );
 1503     }
 1504     else
 1505     {
 1506         doc->warnings = 0;
 1507         TY_(ParseDocument)( doc );
 1508         if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
 1509             TidyPanic( doc->allocator, integrity );
 1510     }
 1511 
 1512     doc->docIn = NULL;
 1513     doc->pConfigChangeCallback = callback;
 1514 
 1515     return tidyDocStatus( doc );
 1516 }
 1517 
 1518 int         tidyDocRunDiagnostics( TidyDocImpl* doc )
 1519 {
 1520     TY_(ReportMarkupVersion)( doc );
 1521     TY_(ReportNumWarnings)( doc );
 1522 
 1523     if ( doc->errors > 0 && !cfgBool( doc, TidyForceOutput ) )
 1524         TY_(Dialogue)(doc, STRING_NEEDS_INTERVENTION );
 1525 
 1526      return tidyDocStatus( doc );
 1527 }
 1528 
 1529 void         tidyDocReportDoctype( TidyDocImpl* doc )
 1530 {
 1531         TY_(ReportMarkupVersion)( doc );
 1532 }
 1533 
 1534 
 1535 /*****************************************************************************
 1536  *  HTML5 STUFF
 1537  *****************************************************************************/
 1538 #if 0 && defined(ENABLE_DEBUG_LOG)
 1539 extern void show_not_html5(void);
 1540 /* -----------------------------
 1541 List tags that do not have version HTML5 (HT50|XH50)
 1542 
 1543 acronym applet basefont big center dir font frame frameset isindex
 1544 listing noframes plaintext rb rbc rtc strike tt xmp nextid
 1545 align bgsound blink comment ilayer layer marquee multicol nobr noembed
 1546 nolayer nosave server servlet spacer
 1547 
 1548 Listed total 35 tags that do not have version 393216
 1549    ------------------------------ */
 1550 
 1551 static void list_not_html5(void)
 1552 {
 1553     static Bool done_list = no;
 1554     if (done_list == no) {
 1555         done_list = yes;
 1556         show_not_html5();
 1557     }
 1558 }
 1559 #endif
 1560 
 1561 /* What about <blink>, <s> stike-through, <u> underline */
 1562 static struct _html5Info
 1563 {
 1564     const char *tag;
 1565     uint id;
 1566 } const html5Info[] = {
 1567     {"acronym", TidyTag_ACRONYM},
 1568     {"applet", TidyTag_APPLET  },
 1569     {"basefont",TidyTag_BASEFONT },
 1570     { "big", TidyTag_BIG },
 1571     { "center", TidyTag_CENTER },
 1572     { "dir", TidyTag_DIR },
 1573     { "font", TidyTag_FONT },
 1574     { "frame", TidyTag_FRAME},
 1575     { "frameset", TidyTag_FRAMESET},
 1576     { "noframes", TidyTag_NOFRAMES },
 1577     { "strike", TidyTag_STRIKE },
 1578     { "tt", TidyTag_TT },
 1579     { 0, 0 }
 1580 };
 1581 static Bool inRemovedInfo( uint tid )
 1582 {
 1583     int i;
 1584     for (i = 0; ; i++) {
 1585         if (html5Info[i].tag == 0)
 1586             break;
 1587         if (html5Info[i].id == tid)
 1588             return yes;
 1589     }
 1590     return no;
 1591 }
 1592 
 1593 /* Things that should not be in an HTML5 body. This is special for CheckHTML5(),
 1594  and we might just want to remove CheckHTML5()'s output altogether and count
 1595  on the default --strict-tags-attributes.
 1596  */
 1597 static int BadBody5Attribs[] = {
 1598     TidyAttr_BACKGROUND,
 1599     TidyAttr_BGCOLOR,
 1600     TidyAttr_TEXT,
 1601     TidyAttr_LINK,
 1602     TidyAttr_VLINK,
 1603     TidyAttr_ALINK,
 1604     TidyAttr_UNKNOWN /* Must be last! */
 1605 };
 1606 
 1607 static Bool nodeHasAlignAttr( Node *node )
 1608 {
 1609     /* #define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN  ) */
 1610     AttVal* av;
 1611     for ( av = node->attributes; av != NULL; av = av->next ) {
 1612         if (attrIsALIGN(av))
 1613             return yes;
 1614     }
 1615     return no;
 1616 }
 1617 
 1618 /*
 1619  *  Perform special checks for HTML, even when we're not using the default
 1620  *  option `--strict-tags-attributes yes`. This will ensure that HTML5 warning
 1621  *  and error output is given regardless of the new option, and ensure that
 1622  *  cleanup takes place. This provides mostly consistent Tidy behavior even with
 1623  *  the introduction of this new option. Note that strings have changed, though,
 1624  *  in order to maintain consistency with the `--strict-tags-attributes` 
 1625  *  messages.
 1626  *
 1627  *  See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete
 1628  */
 1629 static void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
 1630 {
 1631     Bool clean = cfgBool( doc, TidyMakeClean );
 1632     Bool already_strict = cfgBool( doc, TidyStrictTagsAttr );
 1633     Node* body = TY_(FindBody)( doc );
 1634     Bool warn = yes;    /* should this be a warning, error, or report??? */
 1635     AttVal* attr = NULL;
 1636     int i = 0;
 1637 
 1638     while (node)
 1639     {
 1640         if ( nodeHasAlignAttr( node ) ) {
 1641             /* @todo: Is this for ALL elements that accept an 'align' attribute,
 1642              * or should this be a sub-set test?
 1643              */
 1644 
 1645             /* We will only emit this message if `--strict-tags-attributes==no`;
 1646              * otherwise if yes this message will be output during later
 1647              * checking.
 1648              */
 1649             if ( !already_strict )
 1650                 TY_(ReportAttrError)(doc, node, TY_(AttrGetById)(node, TidyAttr_ALIGN), MISMATCHED_ATTRIBUTE_WARN);
 1651         }
 1652         if ( node == body ) {
 1653             i = 0;
 1654             /* We will only emit these messages if `--strict-tags-attributes==no`;
 1655              * otherwise if yes these messages will be output during later
 1656              * checking.
 1657              */
 1658             if ( !already_strict ) {
 1659                 while ( BadBody5Attribs[i] != TidyAttr_UNKNOWN ) {
 1660                     attr = TY_(AttrGetById)(node, BadBody5Attribs[i]);
 1661                     if ( attr )
 1662                         TY_(ReportAttrError)(doc, node, attr , MISMATCHED_ATTRIBUTE_WARN);
 1663                     i++;
 1664                 }
 1665             }
 1666         } else
 1667         if ( nodeIsACRONYM(node) ) {
 1668             if (clean) {
 1669                 /* Replace with 'abbr' with warning to that effect.
 1670                  * Maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
 1671                  */
 1672                 TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
 1673             } else {
 1674                 if ( !already_strict )
 1675                     TY_(Report)(doc, node, node, REMOVED_HTML5);
 1676             }
 1677         } else
 1678         if ( nodeIsAPPLET(node) ) {
 1679             if (clean) {
 1680                 /* replace with 'object' with warning to that effect
 1681                  * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
 1682                  */
 1683                 TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
 1684             } else {
 1685                 if ( !already_strict )
 1686                     TY_(Report)(doc, node, node, REMOVED_HTML5);
 1687             }
 1688         } else
 1689         if ( nodeIsBASEFONT(node) ) {
 1690             /* basefont: CSS equivalent 'font-size', 'font-family' and 'color' 
 1691              * on body or class on each subsequent element.
 1692              * Difficult - If it is the first body element, then could consider
 1693              * adding that to the <body> as a whole, else could perhaps apply it
 1694              * to all subsequent elements. But also in consideration is the fact
 1695              * that it was NOT supported in many browsers.
 1696              * - For now just report a warning
 1697              */
 1698             if ( !already_strict )
 1699                 TY_(Report)(doc, node, node, REMOVED_HTML5);
 1700         } else
 1701         if ( nodeIsBIG(node) ) {
 1702             /* big: CSS equivalent 'font-size:larger'
 1703              * so could replace the <big> ... </big> with
 1704              * <span style="font-size: larger"> ... </span>
 1705              * then replace <big> with <span>
 1706              * Need to think about that...
 1707              * Could use -
 1708              *   TY_(AddStyleProperty)( doc, node, "font-size: larger" );
 1709              *   TY_(CoerceNode)(doc, node, TidyTag_SPAN, no, no);
 1710              * Alternatively generated a <style> but how to get the style name
 1711              * TY_(AddAttribute)( doc, node, "class", "????" );
 1712              * Also maybe need a specific message like
 1713              * Element '%s' replaced with 'span' with a 'font-size: larger style attribute
 1714              * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
 1715              */
 1716             if (clean) {
 1717                 TY_(AddStyleProperty)( doc, node, "font-size: larger" );
 1718                 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
 1719             } else {
 1720                 if ( !already_strict )
 1721                     TY_(Report)(doc, node, node, REMOVED_HTML5);
 1722             }
 1723         } else
 1724         if ( nodeIsCENTER(node) ) {
 1725             /* center: CSS equivalent 'text-align:center'
 1726              * and 'margin-left:auto; margin-right:auto' on descendant blocks
 1727              * Tidy already handles this if 'clean' by SILENTLY generating the
 1728              * <style> and adding a <div class="c1"> around the elements.
 1729              * see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
 1730              */
 1731             if ( !already_strict )
 1732                 TY_(Report)(doc, node, node, REMOVED_HTML5);
 1733         } else
 1734         if ( nodeIsDIR(node) ) {
 1735             /* dir: replace by <ul>
 1736              * Tidy already actions this and issues a warning
 1737              * Should this be CHANGED???
 1738              */
 1739             if ( !already_strict )
 1740                 TY_(Report)(doc, node, node, REMOVED_HTML5);
 1741         } else
 1742         if ( nodeIsFONT(node) ) {
 1743             /* Tidy already handles this -
 1744              * If 'clean' replaced by CSS, else
 1745              * if is NOT clean, and doctype html5 then warnings issued
 1746              * done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?)
 1747              */
 1748             if ( !already_strict )
 1749                 TY_(Report)(doc, node, node, REMOVED_HTML5);
 1750         } else
 1751         if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) {
 1752             /* YOW: What to do here?????? Maybe <iframe>????
 1753              */
 1754             if ( !already_strict )
 1755                 TY_(Report)(doc, node, node, REMOVED_HTML5);
 1756         } else
 1757         if ( nodeIsSTRIKE(node) ) {
 1758             /* strike: CSS equivalent 'text-decoration:line-through'
 1759              * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
 1760              */
 1761             if (clean) {
 1762                 TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" );
 1763                 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
 1764             } else {
 1765                 if ( !already_strict )
 1766                     TY_(Report)(doc, node, node, REMOVED_HTML5);
 1767             }
 1768         } else
 1769         if ( nodeIsTT(node) ) {
 1770             /* tt: CSS equivalent 'font-family:monospace'
 1771              * Tidy presently does nothing. Tidy5 issues a warning
 1772              * But like the 'clean' <font> replacement this could also be replaced with CSS
 1773              * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
 1774              */
 1775             if (clean) {
 1776                 TY_(AddStyleProperty)( doc, node, "font-family: monospace" );
 1777                 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
 1778             } else {
 1779                 if ( !already_strict )
 1780                     TY_(Report)(doc, node, node, REMOVED_HTML5);
 1781             }
 1782         } else
 1783             if (TY_(nodeIsElement)(node)) {
 1784                 if (node->tag) {
 1785                     if ( (!(node->tag->versions & VERS_HTML5) && !(node->tag->versions & VERS_PROPRIETARY)) || (inRemovedInfo(node->tag->id)) ) {
 1786                         if ( !already_strict )
 1787                             TY_(Report)(doc, node, node, REMOVED_HTML5);
 1788                     }
 1789                 }
 1790             }
 1791 
 1792         if (node->content)
 1793             TY_(CheckHTML5)( doc, node->content );
 1794         
 1795         node = node->next;
 1796     }
 1797 }
 1798 /*****************************************************************************
 1799  *  END HTML5 STUFF
 1800  *****************************************************************************/
 1801 
 1802 
 1803 /*
 1804  * Check and report HTML tags and attributes that are:
 1805  *  - Proprietary, and/or
 1806  *  - Not supported in the current version of HTML, defined as the version
 1807  *    of HTML that we are emitting.
 1808  * Proprietary items are reported as WARNINGS, and version mismatches will
 1809  * be reported as WARNING or ERROR in the following conditions:
 1810  *  - ERROR if the emitted doctype is a strict doctype.
 1811  *  - WARNING if the emitted doctype is a non-strict doctype.
 1812  * The propriety checks are *always* run as they have always been an integral
 1813  * part of Tidy. The version checks are controlled by `strict-tags-attributes`.
 1814  */
 1815 static void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
 1816 {
 1817     uint versionEmitted = doc->lexer->versionEmitted;
 1818     uint declared = doc->lexer->doctype;
 1819     uint version = versionEmitted == 0 ? declared : versionEmitted;
 1820     int tagReportType = VERS_STRICT & version ? ELEMENT_VERS_MISMATCH_ERROR : ELEMENT_VERS_MISMATCH_WARN;
 1821     int attrReportType = VERS_STRICT & version ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN;
 1822     Bool check_versions = cfgBool( doc, TidyStrictTagsAttr );
 1823     AttVal *next_attr, *attval;
 1824     Bool attrIsProprietary = no;
 1825     Bool attrIsMismatched = yes;
 1826     Bool tagLooksCustom = no;
 1827     Bool htmlIs5 = (doc->lexer->doctype & VERS_HTML5) > 0;
 1828 
 1829     while (node)
 1830     {
 1831         /* This bit here handles our HTML tags */
 1832         if ( TY_(nodeIsElement)(node) && node->tag ) {
 1833 
 1834             /* Leave XML stuff alone. */
 1835             if ( !cfgBool(doc, TidyXmlTags) )
 1836             {
 1837                 /* Version mismatches take priority. */
 1838                 if ( check_versions && !(node->tag->versions & version) )
 1839                 {
 1840                     TY_(Report)(doc, NULL, node, tagReportType );
 1841                 }
 1842                 /* If it's not mismatched, it could still be proprietary. */
 1843                 else if ( node->tag->versions & VERS_PROPRIETARY )
 1844                 {
 1845                     if ( !cfgBool(doc, TidyMakeClean) ||
 1846                         ( !nodeIsNOBR(node) && !nodeIsWBR(node) ) )
 1847                     {
 1848                         /* It looks custom, despite whether it's a known tag. */
 1849                         tagLooksCustom = TY_(nodeIsAutonomousCustomFormat)( node );
 1850 
 1851                         /* If we're in HTML5 mode and the tag does not look
 1852                            like a valid custom tag, then issue a warning.
 1853                            Appearance is good enough because invalid tags have
 1854                            been dropped. Also, if we're not in HTML5 mode, then
 1855                            then everything that reaches here gets the warning.
 1856                            Everything else can be ignored. */
 1857 
 1858                         if ( (htmlIs5 && !tagLooksCustom) || !htmlIs5 )
 1859                         {
 1860                             TY_(Report)(doc, NULL, node, PROPRIETARY_ELEMENT );
 1861                         }
 1862 
 1863                         if ( nodeIsLAYER(node) )
 1864                             doc->badLayout |= USING_LAYER;
 1865                         else if ( nodeIsSPACER(node) )
 1866                             doc->badLayout |= USING_SPACER;
 1867                         else if ( nodeIsNOBR(node) )
 1868                             doc->badLayout |= USING_NOBR;
 1869                     }
 1870                 }
 1871             }
 1872         }
 1873 
 1874         /* And this bit here handles our attributes */
 1875         if (TY_(nodeIsElement)(node))
 1876         {
 1877             attval = node->attributes;
 1878 
 1879             while (attval)
 1880             {
 1881                 next_attr = attval->next;
 1882 
 1883                 attrIsProprietary = TY_(AttributeIsProprietary)(node, attval);
 1884                 /* Is. #729 - always check version match if HTML5 */
 1885                 attrIsMismatched = (check_versions | htmlIs5) ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
 1886                 /* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
 1887                 if ( attrIsProprietary )
 1888                 {
 1889                     if ( cfgBool(doc, TidyWarnPropAttrs) )
 1890                         TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
 1891                 }
 1892                 else if ( attrIsMismatched )
 1893                 {
 1894                     if (htmlIs5) 
 1895                     { 
 1896                         /* Is. #729 - In html5 TidyStrictTagsAttr controls error or warn */
 1897                         TY_(ReportAttrError)(doc, node, attval,
 1898                             check_versions ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN);
 1899                     }
 1900                     else
 1901                         TY_(ReportAttrError)(doc, node, attval, attrReportType);
 1902 
 1903                 }
 1904 
 1905                 /* @todo: do we need a new option to drop mismatches? Or should we
 1906                  simply drop them? */
 1907                 if ( ( attrIsProprietary || attrIsMismatched ) && cfgBool(doc, TidyDropPropAttrs) )
 1908                     TY_(RemoveAttribute)( doc, node, attval );
 1909 
 1910                 attval = next_attr;
 1911             }
 1912         }
 1913 
 1914         if (node->content)
 1915             TY_(CheckHTMLTagsAttribsVersions)( doc, node->content );
 1916         
 1917         node = node->next;
 1918     }
 1919 }
 1920 
 1921 
 1922 #if defined(ENABLE_DEBUG_LOG)
 1923 /* *** FOR DEBUG ONLY *** */
 1924 const char *dbg_get_lexer_type( void *vp )
 1925 {
 1926     Node *node = (Node *)vp;
 1927     switch ( node->type )
 1928     {
 1929     case RootNode:      return "Root";
 1930     case DocTypeTag:    return "DocType";
 1931     case CommentTag:    return "Comment";
 1932     case ProcInsTag:    return "ProcIns";
 1933     case TextNode:      return "Text";
 1934     case StartTag:      return "StartTag";
 1935     case EndTag:        return "EndTag";
 1936     case StartEndTag:   return "StartEnd";
 1937     case CDATATag:      return "CDATA";
 1938     case SectionTag:    return "Section";
 1939     case AspTag:        return "Asp";
 1940     case JsteTag:       return "Jste";
 1941     case PhpTag:        return "Php";
 1942     case XmlDecl:       return "XmlDecl";
 1943     }
 1944     return "Uncased";
 1945 }
 1946 
 1947 /* NOTE: THis matches the above lexer type, except when element has a name */
 1948 const char *dbg_get_element_name( void *vp )
 1949 {
 1950     Node *node = (Node *)vp;
 1951     switch ( node->type )
 1952     {
 1953     case TidyNode_Root:       return "Root";
 1954     case TidyNode_DocType:    return "DocType";
 1955     case TidyNode_Comment:    return "Comment";
 1956     case TidyNode_ProcIns:    return "ProcIns";
 1957     case TidyNode_Text:       return "Text";
 1958     case TidyNode_CDATA:      return "CDATA";
 1959     case TidyNode_Section:    return "Section";
 1960     case TidyNode_Asp:        return "Asp";
 1961     case TidyNode_Jste:       return "Jste";
 1962     case TidyNode_Php:        return "Php";
 1963     case TidyNode_XmlDecl:    return "XmlDecl";
 1964 
 1965     case TidyNode_Start:
 1966     case TidyNode_End:
 1967     case TidyNode_StartEnd:
 1968     default:
 1969         if (node->element)
 1970             return node->element;
 1971     }
 1972     return "Unknown";
 1973 }
 1974 
 1975 void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent )
 1976 {
 1977     AttVal* av;
 1978     Lexer* lexer = doc->lexer;
 1979     ctmbstr call = "";
 1980     ctmbstr name = dbg_get_element_name(node);
 1981     ctmbstr type = dbg_get_lexer_type(node);
 1982     ctmbstr impl = node->implicit ? "implicit" : "";
 1983     switch ( caller )
 1984     {
 1985     case 1: call = "discard";   break;
 1986     case 2: call = "trim";      break;
 1987     case 3: call = "test";      break;
 1988     }
 1989     while (indent--)
 1990         SPRTF(" ");
 1991     if (strcmp(type,name))
 1992         SPRTF("%s %s %s %s", type, name, impl, call );
 1993     else
 1994         SPRTF("%s %s %s", name, impl, call );
 1995     if (lexer && (strcmp("Text",name) == 0)) {
 1996         uint len = node->end - node->start;
 1997         uint i;
 1998         SPRTF(" (%d) '", len);
 1999         if (len < 40) {
 2000             /* show it all */
 2001             for (i = node->start; i < node->end; i++) {
 2002                 SPRTF("%c", lexer->lexbuf[i]);
 2003             }
 2004         } else {
 2005             /* partial display */
 2006             uint max = 19;
 2007             for (i = node->start; i < max; i++) {
 2008                 SPRTF("%c", lexer->lexbuf[i]);
 2009             }
 2010             SPRTF("...");
 2011             i = node->end - 19;
 2012             for (; i < node->end; i++) {
 2013                 SPRTF("%c", lexer->lexbuf[i]);
 2014             }
 2015         }
 2016         SPRTF("'");
 2017     }
 2018     for (av = node->attributes; av; av = av->next) {
 2019         name = av->attribute;
 2020         if (name) {
 2021             SPRTF(" %s",name);
 2022             if (av->value) {
 2023                 SPRTF("=\"%s\"", av->value);
 2024             }
 2025         }
 2026     }
 2027 
 2028     SPRTF("\n");
 2029 }
 2030 
 2031 void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
 2032 {
 2033     while (node)
 2034     {
 2035         dbg_show_node( doc, node, 0, indent );
 2036         dbg_show_all_nodes( doc, node->content, indent + 1 );
 2037         node = node->next;
 2038     }
 2039 }
 2040 
 2041 #endif
 2042 
 2043 int         tidyDocCleanAndRepair( TidyDocImpl* doc )
 2044 {
 2045     Bool word2K   = cfgBool( doc, TidyWord2000 );
 2046     Bool logical  = cfgBool( doc, TidyLogicalEmphasis );
 2047     Bool clean    = cfgBool( doc, TidyMakeClean );
 2048     Bool gdoc     = cfgBool( doc, TidyGDocClean );
 2049     Bool htmlOut  = cfgBool( doc, TidyHtmlOut );
 2050     Bool xmlOut   = cfgBool( doc, TidyXmlOut );
 2051     Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
 2052     Bool xmlDecl  = cfgBool( doc, TidyXmlDecl );
 2053     Bool tidyMark = cfgBool( doc, TidyMark );
 2054     Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
 2055     Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
 2056     Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
 2057     Node* node;
 2058     TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
 2059     doc->pConfigChangeCallback = NULL;
 2060 
 2061 #if defined(ENABLE_DEBUG_LOG)
 2062     SPRTF("All nodes BEFORE clean and repair\n");
 2063     dbg_show_all_nodes( doc, &doc->root, 0  );
 2064 #endif
 2065     if (tidyXmlTags)
 2066     {
 2067         doc->pConfigChangeCallback = callback;
 2068         return tidyDocStatus( doc );
 2069     }
 2070 
 2071     /* Issue #567 - move style elements from body to head */
 2072     TY_(CleanStyle)(doc, &doc->root);
 2073 
 2074     /* simplifies <b><b> ... </b> ...</b> etc. */
 2075     if ( mergeEmphasis )
 2076         TY_(NestedEmphasis)( doc, &doc->root );
 2077 
 2078     /* cleans up <dir>indented text</dir> etc. */
 2079     TY_(List2BQ)( doc, &doc->root );
 2080     TY_(BQ2Div)( doc, &doc->root );
 2081 
 2082     /* replaces i by em and b by strong */
 2083     if ( logical )
 2084         TY_(EmFromI)( doc, &doc->root );
 2085 
 2086     if ( word2K && TY_(IsWord2000)(doc) )
 2087     {
 2088         /* prune Word2000's <![if ...]> ... <![endif]> */
 2089         TY_(DropSections)( doc, &doc->root );
 2090 
 2091         /* drop style & class attributes and empty p, span elements */
 2092         TY_(CleanWord2000)( doc, &doc->root );
 2093         TY_(DropEmptyElements)(doc, &doc->root);
 2094     }
 2095 
 2096     /* replaces presentational markup by style rules */
 2097     if ( clean )
 2098         TY_(CleanDocument)( doc );
 2099 
 2100     /* clean up html exported by Google Docs */
 2101     if ( gdoc )
 2102         TY_(CleanGoogleDocument)( doc );
 2103 
 2104     /*  Reconcile http-equiv meta element with output encoding  */
 2105     TY_(TidyMetaCharset)(doc);
 2106 
 2107     if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
 2108         TidyPanic( doc->allocator, integrity );
 2109 
 2110     /* remember given doctype for reporting */
 2111     node = TY_(FindDocType)(doc);
 2112 
 2113     if (node)
 2114     {
 2115         AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
 2116         if (AttrHasValue(fpi))
 2117         {
 2118             if (doc->givenDoctype)
 2119                 TidyDocFree(doc, doc->givenDoctype);
 2120             doc->givenDoctype = TY_(tmbstrdup)(doc->allocator,fpi->value);
 2121         }
 2122     }
 2123 
 2124     if ( doc->root.content )
 2125     {
 2126         /* If we had XHTML input but want HTML output */
 2127         if ( htmlOut && doc->lexer->isvoyager )
 2128         {
 2129             Node* node = TY_(FindDocType)(doc);
 2130             /* Remove reference, but do not free */
 2131             if (node)
 2132               TY_(RemoveNode)(node);
 2133         }
 2134 
 2135         if (xhtmlOut && !htmlOut)
 2136         {
 2137             TY_(SetXHTMLDocType)(doc);
 2138             TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
 2139             TY_(FixXhtmlNamespace)(doc, yes);
 2140             TY_(FixLanguageInformation)(doc, &doc->root, yes, yes);
 2141         }
 2142         else
 2143         {
 2144             TY_(FixDocType)(doc);
 2145             TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
 2146             TY_(FixXhtmlNamespace)(doc, no);
 2147             TY_(FixLanguageInformation)(doc, &doc->root, no, yes);
 2148         }
 2149 
 2150         if (tidyMark )
 2151             TY_(AddGenerator)(doc);
 2152 
 2153     }
 2154 
 2155     /* ensure presence of initial <?xml version="1.0"?> */
 2156     if ( xmlOut && xmlDecl )
 2157         TY_(FixXmlDecl)( doc );
 2158 
 2159     /* At this point the apparent doctype is going to be as stable as
 2160        it can ever be, so we can start detecting things that shouldn't
 2161        be in this version of HTML
 2162      */
 2163     if (doc->lexer) 
 2164     {
 2165         /*\ 
 2166          *  Issue #429 #426 - These services can only be used
 2167          *  when there is a document loaded, ie a lexer created.
 2168          *  But really should not be calling a Clean and Repair
 2169          *  service with no doc!
 2170         \*/
 2171         if (doc->lexer->versionEmitted & VERS_HTML5)
 2172             TY_(CheckHTML5)( doc, &doc->root );
 2173         TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );
 2174 
 2175         if ( !doc->lexer->isvoyager && doc->xmlDetected )
 2176         {
 2177             TY_(Report)(doc, NULL, TY_(FindXmlDecl)(doc), XML_DECLARATION_DETECTED );
 2178 
 2179         }
 2180     }
 2181 
 2182     TY_(CleanHead)(doc); /* Is #692 - discard multiple <title> tags */
 2183 
 2184 #if defined(ENABLE_DEBUG_LOG)
 2185     SPRTF("All nodes AFTER clean and repair\n");
 2186     dbg_show_all_nodes( doc, &doc->root, 0  );
 2187 #endif
 2188 
 2189     doc->pConfigChangeCallback = callback;
 2190     return tidyDocStatus( doc );
 2191 }
 2192 
 2193 static
 2194 Bool showBodyOnly( TidyDocImpl* doc, TidyTriState bodyOnly )
 2195 {
 2196     Node* node;
 2197 
 2198     switch( bodyOnly )
 2199     {
 2200     case TidyNoState:
 2201         return no;
 2202     case TidyYesState:
 2203         return yes;
 2204     default:
 2205         node = TY_(FindBody)( doc );
 2206         if (node && node->implicit )
 2207             return yes;
 2208     }
 2209     return no;
 2210 }
 2211 
 2212 
 2213 int         tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
 2214 {
 2215     Bool showMarkup  = cfgBool( doc, TidyShowMarkup );
 2216     Bool forceOutput = cfgBool( doc, TidyForceOutput );
 2217     Bool outputBOM   = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState );
 2218     Bool smartBOM    = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState );
 2219     Bool xmlOut      = cfgBool( doc, TidyXmlOut );
 2220     Bool xhtmlOut    = cfgBool( doc, TidyXhtmlOut );
 2221     TidyTriState bodyOnly    = cfgAutoBool( doc, TidyBodyOnly );
 2222 
 2223     Bool dropComments = cfgBool(doc, TidyHideComments);
 2224     Bool makeClean    = cfgBool(doc, TidyMakeClean);
 2225     Bool asciiChars   = cfgBool(doc, TidyAsciiChars);
 2226     Bool makeBare     = cfgBool(doc, TidyMakeBare);
 2227     Bool escapeCDATA  = cfgBool(doc, TidyEscapeCdata);
 2228     Bool ppWithTabs   = cfgBool(doc, TidyPPrintTabs);
 2229     TidyAttrSortStrategy sortAttrStrat = cfg(doc, TidySortAttributes);
 2230     TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
 2231     doc->pConfigChangeCallback = NULL;
 2232 
 2233     if (ppWithTabs)
 2234         TY_(PPrintTabs)();
 2235     else
 2236         TY_(PPrintSpaces)();
 2237 
 2238     if (escapeCDATA)
 2239         TY_(ConvertCDATANodes)(doc, &doc->root);
 2240 
 2241     if (dropComments)
 2242         TY_(DropComments)(doc, &doc->root);
 2243 
 2244     if (makeClean)
 2245     {
 2246         /* noop */
 2247         TY_(DropFontElements)(doc, &doc->root, NULL);
 2248     }
 2249 
 2250     if ((makeClean && asciiChars) || makeBare)
 2251         TY_(DowngradeTypography)(doc, &doc->root);
 2252 
 2253     if (makeBare)
 2254         /* Note: no longer replaces &nbsp; in */
 2255         /* attribute values / non-text tokens */
 2256         TY_(NormalizeSpaces)(doc->lexer, &doc->root);
 2257     else
 2258         TY_(ReplacePreformattedSpaces)(doc, &doc->root);
 2259 
 2260     TY_(SortAttributes)(doc, &doc->root, sortAttrStrat);
 2261 
 2262     if ( showMarkup && (doc->errors == 0 || forceOutput) )
 2263     {
 2264         /* Output a Byte Order Mark if required */
 2265         if ( outputBOM || (doc->inputHadBOM && smartBOM) )
 2266             TY_(outBOM)( out );
 2267 
 2268         /* No longer necessary. No DOCTYPE == HTML 3.2,
 2269         ** which gives you only the basic character entities,
 2270         ** which are safe in any browser.
 2271         ** if ( !TY_(FindDocType)(doc) )
 2272         **    TY_(SetOptionBool)( doc, TidyNumEntities, yes );
 2273         */
 2274 
 2275         doc->docOut = out;
 2276         if ( xmlOut && !xhtmlOut )
 2277             TY_(PPrintXMLTree)( doc, NORMAL, 0, &doc->root );
 2278         else if ( showBodyOnly( doc, bodyOnly ) )
 2279             TY_(PrintBody)( doc );
 2280         else
 2281             TY_(PPrintTree)( doc, NORMAL, 0, &doc->root );
 2282 
 2283         TY_(PFlushLine)( doc, 0 );
 2284         doc->docOut = NULL;
 2285     }
 2286 
 2287     /* @jsd: removing this should solve #673, and allow saving of the buffer multiple times. */
 2288 //    TY_(ResetConfigToSnapshot)( doc );
 2289     doc->pConfigChangeCallback = callback;
 2290     
 2291     return tidyDocStatus( doc );
 2292 }
 2293 
 2294 /* Tree traversal functions
 2295 **
 2296 ** The big issue here is the degree to which we should mimic
 2297 ** a DOM and/or SAX nodes.
 2298 **
 2299 ** Is it 100% possible (and, if so, how difficult is it) to
 2300 ** emit SAX events from this API?  If SAX events are possible,
 2301 ** is that 100% of data needed to build a DOM?
 2302 */
 2303 
 2304 TidyNode TIDY_CALL   tidyGetRoot( TidyDoc tdoc )
 2305 {
 2306     TidyDocImpl* impl = tidyDocToImpl( tdoc );
 2307     Node* node = NULL;
 2308     if ( impl )
 2309         node = &impl->root;
 2310     return tidyImplToNode( node );
 2311 }
 2312 
 2313 TidyNode TIDY_CALL   tidyGetHtml( TidyDoc tdoc )
 2314 {
 2315   TidyDocImpl* impl = tidyDocToImpl( tdoc );
 2316   Node* node = NULL;
 2317   if ( impl )
 2318       node = TY_(FindHTML)( impl );
 2319   return tidyImplToNode( node );
 2320 }
 2321 
 2322 TidyNode TIDY_CALL    tidyGetHead( TidyDoc tdoc )
 2323 {
 2324   TidyDocImpl* impl = tidyDocToImpl( tdoc );
 2325   Node* node = NULL;
 2326   if ( impl )
 2327       node = TY_(FindHEAD)( impl );
 2328   return tidyImplToNode( node );
 2329 }
 2330 
 2331 TidyNode TIDY_CALL    tidyGetBody( TidyDoc tdoc )
 2332 {
 2333   TidyDocImpl* impl = tidyDocToImpl( tdoc );
 2334   Node* node = NULL;
 2335   if ( impl )
 2336       node = TY_(FindBody)( impl );
 2337   return tidyImplToNode( node );
 2338 }
 2339 
 2340 /* parent / child */
 2341 TidyNode TIDY_CALL    tidyGetParent( TidyNode tnod )
 2342 {
 2343   Node* nimp = tidyNodeToImpl( tnod );
 2344   return tidyImplToNode( nimp->parent );
 2345 }
 2346 TidyNode TIDY_CALL    tidyGetChild( TidyNode tnod )
 2347 {
 2348   Node* nimp = tidyNodeToImpl( tnod );
 2349   return tidyImplToNode( nimp->content );
 2350 }
 2351 
 2352 /* remove a node */
 2353 TidyNode TIDY_CALL    tidyDiscardElement( TidyDoc tdoc, TidyNode tnod )
 2354 {
 2355   TidyDocImpl* doc = tidyDocToImpl( tdoc );
 2356   Node* nimp = tidyNodeToImpl( tnod );
 2357   Node* next = TY_(DiscardElement)( doc, nimp );
 2358   return tidyImplToNode( next );
 2359 }
 2360 
 2361 /* siblings */
 2362 TidyNode TIDY_CALL    tidyGetNext( TidyNode tnod )
 2363 {
 2364   Node* nimp = tidyNodeToImpl( tnod );
 2365   return tidyImplToNode( nimp->next );
 2366 }
 2367 TidyNode TIDY_CALL    tidyGetPrev( TidyNode tnod )
 2368 {
 2369   Node* nimp = tidyNodeToImpl( tnod );
 2370   return tidyImplToNode( nimp->prev );
 2371 }
 2372 
 2373 /* Node info */
 2374 TidyNodeType TIDY_CALL tidyNodeGetType( TidyNode tnod )
 2375 {
 2376   Node* nimp = tidyNodeToImpl( tnod );
 2377   TidyNodeType ntyp = TidyNode_Root;
 2378   if ( nimp )
 2379     ntyp = (TidyNodeType) nimp->type;
 2380   return ntyp;
 2381 }
 2382 
 2383 uint TIDY_CALL tidyNodeLine( TidyNode tnod )
 2384 {
 2385   Node* nimp = tidyNodeToImpl( tnod );
 2386   uint line = 0;
 2387   if ( nimp )
 2388     line = nimp->line;
 2389   return line;
 2390 }
 2391 uint TIDY_CALL tidyNodeColumn( TidyNode tnod )
 2392 {
 2393   Node* nimp = tidyNodeToImpl( tnod );
 2394   uint col = 0;
 2395   if ( nimp )
 2396     col = nimp->column;
 2397   return col;
 2398 }
 2399 
 2400 ctmbstr TIDY_CALL tidyNodeGetName( TidyNode tnod )
 2401 {
 2402   Node* nimp = tidyNodeToImpl( tnod );
 2403   ctmbstr nnam = NULL;
 2404   if ( nimp )
 2405     nnam = nimp->element;
 2406   return nnam;
 2407 }
 2408 
 2409 
 2410 Bool TIDY_CALL tidyNodeHasText( TidyDoc tdoc, TidyNode tnod )
 2411 {
 2412   TidyDocImpl* doc = tidyDocToImpl( tdoc );
 2413   if ( doc )
 2414       return TY_(nodeHasText)( doc, tidyNodeToImpl(tnod) );
 2415   return no;
 2416 }
 2417 
 2418 
 2419 Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf )
 2420 {
 2421   TidyDocImpl* doc = tidyDocToImpl( tdoc );
 2422   Node* nimp = tidyNodeToImpl( tnod );
 2423   if ( doc && nimp && outbuf )
 2424   {
 2425       uint outenc     = cfg( doc, TidyOutCharEncoding );
 2426       uint nl         = cfg( doc, TidyNewline );
 2427       StreamOut* out  = TY_(BufferOutput)( doc, outbuf, outenc, nl );
 2428       Bool xmlOut     = cfgBool( doc, TidyXmlOut );
 2429       Bool xhtmlOut   = cfgBool( doc, TidyXhtmlOut );
 2430 
 2431       doc->docOut = out;
 2432       if ( xmlOut && !xhtmlOut )
 2433           TY_(PPrintXMLTree)( doc, NORMAL, 0, nimp );
 2434       else
 2435           TY_(PPrintTree)( doc, NORMAL, 0, nimp );
 2436 
 2437       TY_(PFlushLine)( doc, 0 );
 2438       doc->docOut = NULL;
 2439 
 2440       TidyDocFree( doc, out );
 2441       return yes;
 2442   }
 2443   return no;
 2444 }
 2445 
 2446 Bool TIDY_CALL tidyNodeGetValue( TidyDoc tdoc, TidyNode tnod, TidyBuffer* buf )
 2447 {
 2448     TidyDocImpl *doc = tidyDocToImpl( tdoc );
 2449     Node *node = tidyNodeToImpl( tnod );
 2450     if ( doc == NULL || node == NULL || buf == NULL )
 2451         return no;
 2452 
 2453     switch( node->type ) {
 2454     case TextNode:
 2455     case CDATATag:
 2456     case CommentTag:
 2457     case ProcInsTag:
 2458     case SectionTag:
 2459     case AspTag:
 2460     case JsteTag:
 2461     case PhpTag:
 2462     {
 2463         tidyBufClear( buf );
 2464         tidyBufAppend( buf, doc->lexer->lexbuf + node->start,
 2465                        node->end - node->start );
 2466         break;
 2467     }
 2468     default:
 2469         /* The node doesn't have a value */
 2470         return no;
 2471     }
 2472 
 2473     return yes;
 2474 }
 2475 
 2476 Bool TIDY_CALL tidyNodeIsProp( TidyDoc ARG_UNUSED(tdoc), TidyNode tnod )
 2477 {
 2478   Node* nimp = tidyNodeToImpl( tnod );
 2479   Bool isProprietary = yes;
 2480   if ( nimp )
 2481   {
 2482     switch ( nimp->type )
 2483     {
 2484     case RootNode:
 2485     case DocTypeTag:
 2486     case CommentTag:
 2487     case XmlDecl:
 2488     case ProcInsTag:
 2489     case TextNode:
 2490     case CDATATag:
 2491         isProprietary = no;
 2492         break;
 2493 
 2494     case SectionTag:
 2495     case AspTag:
 2496     case JsteTag:
 2497     case PhpTag:
 2498         isProprietary = yes;
 2499         break;
 2500 
 2501     case StartTag:
 2502     case EndTag:
 2503     case StartEndTag:
 2504         isProprietary = ( nimp->tag
 2505                           ? (nimp->tag->versions&VERS_PROPRIETARY)!=0
 2506                           : yes );
 2507         break;
 2508     }
 2509   }
 2510   return isProprietary;
 2511 }
 2512 
 2513 TidyTagId TIDY_CALL tidyNodeGetId(TidyNode tnod)
 2514 {
 2515     Node* nimp = tidyNodeToImpl(tnod);
 2516 
 2517     TidyTagId tagId = TidyTag_UNKNOWN;
 2518     if (nimp && nimp->tag)
 2519         tagId = nimp->tag->id;
 2520 
 2521     return tagId;
 2522 }
 2523 
 2524 
 2525 /* Iterate over attribute values */
 2526 TidyAttr TIDY_CALL   tidyAttrFirst( TidyNode tnod )
 2527 {
 2528   Node* nimp = tidyNodeToImpl( tnod );
 2529   AttVal* attval = NULL;
 2530   if ( nimp )
 2531     attval = nimp->attributes;
 2532   return tidyImplToAttr( attval );
 2533 }
 2534 TidyAttr TIDY_CALL    tidyAttrNext( TidyAttr tattr )
 2535 {
 2536   AttVal* attval = tidyAttrToImpl( tattr );
 2537   AttVal* nxtval = NULL;
 2538   if ( attval )
 2539     nxtval = attval->next;
 2540   return tidyImplToAttr( nxtval );
 2541 }
 2542 
 2543 ctmbstr TIDY_CALL       tidyAttrName( TidyAttr tattr )
 2544 {
 2545   AttVal* attval = tidyAttrToImpl( tattr );
 2546   ctmbstr anam = NULL;
 2547   if ( attval )
 2548     anam = attval->attribute;
 2549   return anam;
 2550 }
 2551 ctmbstr TIDY_CALL       tidyAttrValue( TidyAttr tattr )
 2552 {
 2553   AttVal* attval = tidyAttrToImpl( tattr );
 2554   ctmbstr aval = NULL;
 2555   if ( attval )
 2556     aval = attval->value;
 2557   return aval;
 2558 }
 2559 
 2560 void TIDY_CALL           tidyAttrDiscard( TidyDoc tdoc, TidyNode tnod, TidyAttr tattr )
 2561 {
 2562   TidyDocImpl* impl = tidyDocToImpl( tdoc );
 2563   Node* nimp = tidyNodeToImpl( tnod );
 2564   AttVal* attval = tidyAttrToImpl( tattr );
 2565   TY_(RemoveAttribute)( impl, nimp, attval );
 2566 }
 2567 
 2568 TidyAttrId TIDY_CALL tidyAttrGetId( TidyAttr tattr )
 2569 {
 2570   AttVal* attval = tidyAttrToImpl( tattr );
 2571   TidyAttrId attrId = TidyAttr_UNKNOWN;
 2572   if ( attval && attval->dict )
 2573     attrId = attval->dict->id;
 2574   return attrId;
 2575 }
 2576 
 2577 TidyAttr TIDY_CALL tidyAttrGetById( TidyNode tnod, TidyAttrId attId )
 2578 {
 2579     Node* nimp = tidyNodeToImpl(tnod);
 2580     return tidyImplToAttr( TY_(AttrGetById)( nimp, attId ) );
 2581 }
 2582 
 2583 
 2584 Bool TIDY_CALL tidyAttrIsEvent( TidyAttr tattr )
 2585 {
 2586     return TY_(attrIsEvent)( tidyAttrToImpl(tattr) );
 2587 }
 2588 
 2589 
 2590 /*******************************************************************
 2591  ** Message Key Management
 2592  *******************************************************************/
 2593 ctmbstr TIDY_CALL tidyErrorCodeAsKey(uint code)
 2594 {
 2595     return TY_(tidyErrorCodeAsKey)( code );
 2596 }
 2597 
 2598 uint TIDY_CALL tidyErrorCodeFromKey(ctmbstr code)
 2599 {
 2600     return TY_(tidyErrorCodeFromKey)( code );
 2601 }
 2602 
 2603 TidyIterator TIDY_CALL getErrorCodeList()
 2604 {
 2605     return TY_(getErrorCodeList)();
 2606 }
 2607 
 2608 uint TIDY_CALL getNextErrorCode( TidyIterator* iter )
 2609 {
 2610     return TY_(getNextErrorCode)(iter);
 2611 }
 2612 
 2613 
 2614 /*******************************************************************
 2615  ** Localization Support
 2616  *******************************************************************/
 2617 
 2618 
 2619 Bool TIDY_CALL tidySetLanguage( ctmbstr languageCode )
 2620 {
 2621     Bool result = TY_(tidySetLanguage)( languageCode );
 2622 
 2623     if ( result )
 2624         TY_(tidySetLanguageSetByUser)();
 2625 
 2626     return result;
 2627 }
 2628 
 2629 ctmbstr TIDY_CALL tidyGetLanguage()
 2630 {
 2631     return TY_(tidyGetLanguage)();
 2632 }
 2633 
 2634 ctmbstr TIDY_CALL tidyLocalizedStringN( uint messageType, uint quantity )
 2635 {
 2636     return TY_(tidyLocalizedStringN)( messageType, quantity);
 2637 }
 2638 
 2639 ctmbstr TIDY_CALL tidyLocalizedString( uint messageType )
 2640 {
 2641     return TY_(tidyLocalizedString)( messageType );
 2642 }
 2643 
 2644 ctmbstr TIDY_CALL tidyDefaultString( uint messageType )
 2645 {
 2646     return TY_(tidyDefaultString)( messageType );
 2647 }
 2648 
 2649 TidyIterator TIDY_CALL getStringKeyList()
 2650 {
 2651     return TY_(getStringKeyList)();
 2652 }
 2653 
 2654 uint TIDY_CALL getNextStringKey( TidyIterator* iter )
 2655 {
 2656     return TY_(getNextStringKey)( iter );
 2657 }
 2658 
 2659 TidyIterator TIDY_CALL getWindowsLanguageList()
 2660 {
 2661     return TY_(getWindowsLanguageList)();
 2662 }
 2663 
 2664 //#define tidyOptionToImpl( topt )    ((const TidyOptionImpl*)(topt))
 2665 //#define tidyImplToOption( option )  ((TidyOption)(option))
 2666 
 2667 const tidyLocaleMapItem* TIDY_CALL getNextWindowsLanguage( TidyIterator* iter )
 2668 {
 2669     /* Get a real structure */
 2670     const tidyLocaleMapItemImpl *item = TY_(getNextWindowsLanguage)( iter );
 2671 
 2672     /* Return it as the opaque version */
 2673     return ((tidyLocaleMapItem*)(item));
 2674 }
 2675 
 2676 
 2677 ctmbstr TIDY_CALL TidyLangWindowsName( const tidyLocaleMapItem *item )
 2678 {
 2679     return TY_(TidyLangWindowsName)( (tidyLocaleMapItemImpl*)(item) );
 2680 }
 2681 
 2682 
 2683 ctmbstr TIDY_CALL TidyLangPosixName( const tidyLocaleMapItem *item )
 2684 {
 2685     return TY_(TidyLangPosixName)( (tidyLocaleMapItemImpl*)(item) );
 2686 }
 2687 
 2688 
 2689 TidyIterator TIDY_CALL getInstalledLanguageList()
 2690 {
 2691     return TY_(getInstalledLanguageList)();
 2692 }
 2693 
 2694 
 2695 ctmbstr TIDY_CALL getNextInstalledLanguage( TidyIterator* iter )
 2696 {
 2697     return TY_(getNextInstalledLanguage)( iter );
 2698 }
 2699 
 2700 
 2701 
 2702 
 2703 /*
 2704  * local variables:
 2705  * mode: c
 2706  * indent-tabs-mode: nil
 2707  * c-basic-offset: 4
 2708  * eval: (c-set-offset 'substatement-open 0)
 2709  * end:
 2710  */