"Fossies" - the Fresh Open Source Software Archive

Member "highlight-3.47/src/core/codegenerator.cpp" (13 Oct 2018, 77057 Bytes) of package /linux/www/highlight-3.47.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "codegenerator.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.46_vs_3.47.

    1 /***************************************************************************
    2                           codegenerator.cpp  -  description
    3                              -------------------
    4     begin                : Die Jul 9 2002
    5     copyright            : (C) 2002-2018 by Andre Simon
    6     email                : andre.simon1@gmx.de
    7  ***************************************************************************/
    8 
    9 
   10 /*
   11 This file is part of Highlight.
   12 
   13 Highlight is free software: you can redistribute it and/or modify
   14 it under the terms of the GNU General Public License as published by
   15 the Free Software Foundation, either version 3 of the License, or
   16 (at your option) any later version.
   17 
   18 Highlight is distributed in the hope that it will be useful,
   19 but WITHOUT ANY WARRANTY; without even the implied warranty of
   20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   21 GNU General Public License for more details.
   22 
   23 You should have received a copy of the GNU General Public License
   24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
   25 */
   26 
   27 
   28 #include <climits>
   29 #include <memory>
   30 #include <boost/xpressive/xpressive_dynamic.hpp>
   31 
   32 #include "codegenerator.h"
   33 
   34 #include "htmlgenerator.h"
   35 #include "xhtmlgenerator.h"
   36 #include "rtfgenerator.h"
   37 #include "latexgenerator.h"
   38 #include "texgenerator.h"
   39 #include "svggenerator.h"
   40 #include "bbcodegenerator.h"
   41 #include "pangogenerator.h"
   42 #include "odtgenerator.h"
   43 #include "astyle/astyle.h"
   44 #include "astyle/ASStreamIterator.h"
   45 
   46 #if !defined (QT)
   47 #include "ansigenerator.h"
   48 #include "xterm256generator.h"
   49 #endif
   50 
   51 namespace highlight
   52 {
   53 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
   54 
   55 const string CodeGenerator::STY_NAME_STD="std";
   56 const string CodeGenerator::STY_NAME_STR="str";
   57 const string CodeGenerator::STY_NAME_NUM="num";
   58 const string CodeGenerator::STY_NAME_SLC="slc";
   59 const string CodeGenerator::STY_NAME_COM="com";
   60 const string CodeGenerator::STY_NAME_ESC="esc";
   61 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
   62 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
   63 const string CodeGenerator::STY_NAME_LIN="lin";
   64 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
   65 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
   66 
   67 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
   68 
   69 
   70 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
   71 {
   72     CodeGenerator* generator=NULL;
   73     switch ( type ) {
   74     case HTML:
   75         generator = new HtmlGenerator();
   76         break;
   77     case XHTML:
   78         generator = new XHtmlGenerator();
   79         break;
   80     case TEX:
   81         generator = new TexGenerator ();
   82         break;
   83     case LATEX:
   84         generator = new LatexGenerator();
   85         break;
   86     case RTF:
   87         generator = new RtfGenerator ();
   88         break;
   89     case SVG:
   90         generator = new SVGGenerator();
   91         break;
   92     case BBCODE:
   93         generator = new BBCodeGenerator();
   94         break;
   95     case PANGO:
   96         generator = new PangoGenerator();
   97         break;
   98     case ODTFLAT:
   99         generator = new ODTGenerator();
  100         break;
  101 #if !defined (QT)
  102     case ESC_ANSI:
  103         generator = new AnsiGenerator();
  104         break;
  105     case ESC_XTERM256:
  106     case ESC_TRUECOLOR:
  107         generator = new Xterm256Generator();
  108         generator->setESCTrueColor(type==ESC_TRUECOLOR);
  109         break;
  110 #endif
  111     default:
  112         break;
  113     }
  114     return generator;
  115 }
  116 
  117 
  118 CodeGenerator::CodeGenerator ( highlight::OutputType type )
  119     :currentSyntax(NULL),
  120      in ( NULL ),
  121      out ( NULL ),
  122      encoding ( "none" ),
  123      docTitle ( "Source file" ),
  124      maskWs ( false ),
  125      excludeWs ( false ),
  126      fragmentOutput ( false ),
  127      keepInjections( false ),
  128      showLineNumbers ( false ),
  129      lineNumberFillZeroes ( false ),
  130      printNewLines(true),
  131      omitVersionComment(false),
  132      baseFontSize("10"),
  133      lineNumber ( 0 ),
  134      lineNumberOffset ( 0 ),
  135      currentState ( _UNKNOWN ),
  136      currentKeywordClass ( 0 ),
  137      includeStyleDef ( false ),
  138      numberCurrentLine ( false ),
  139      lineIndex ( 0 ),
  140      lastLineLength( 0 ),
  141      syntaxChangeIndex(UINT_MAX),
  142      syntaxChangeLineNo(UINT_MAX),
  143      lineNumberWidth ( 5 ),
  144      startLineCnt( 1 ),
  145      startLineCntCurFile( 1 ),
  146      maxLineCnt ( UINT_MAX ),
  147      inputFilesCnt (0),
  148      processedFilesCnt (0),
  149      terminatingChar ( '\0' ),
  150      formatter ( NULL ),
  151      formattingEnabled ( false ),
  152      formattingPossible ( false ),
  153      validateInput ( false ),
  154      numberWrappedLines ( true ),
  155      noTrailingNewLine(false),
  156      resultOfHook(false),
  157      lineContainedTestCase(false),
  158      applySyntaxTestCase(false),
  159      keywordCase ( StringTools::CASE_UNCHANGED ),
  160      eolDelimiter ('\n'),
  161      outputType ( type )
  162 {
  163 }
  164 
  165 
  166 CodeGenerator::~CodeGenerator()
  167 {
  168     delete formatter;
  169 
  170     for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
  171         delete it->second;
  172     }
  173     
  174     for (unsigned int i=0; i<pluginChunks.size(); i++) {
  175         delete pluginChunks[i];
  176     }
  177     pluginChunks.clear();
  178 }
  179 
  180 
  181 bool CodeGenerator::initTheme ( const string& themePath )
  182 {
  183     this->themePath=themePath;
  184     bool loadOK = docStyle.load ( themePath, outputType );
  185     initOutputTags();
  186     return loadOK;
  187 }
  188 
  189 const string& CodeGenerator::getStyleName()
  190 {
  191     return themePath;
  192 }
  193 
  194 void CodeGenerator::setLineNumberWidth ( int w )
  195 {
  196     lineNumberWidth=w;
  197 }
  198 
  199 int CodeGenerator::getLineNumberWidth()
  200 {
  201     return lineNumberWidth;
  202 }
  203 
  204 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
  205 {
  206     showLineNumbers=flag;
  207     lineNumberOffset = startCnt-1;
  208 }
  209 
  210 bool CodeGenerator::getPrintLineNumbers()
  211 {
  212     return showLineNumbers;
  213 }
  214 
  215 void CodeGenerator::setPrintZeroes ( bool flag )
  216 {
  217     lineNumberFillZeroes=flag;
  218 }
  219 
  220 bool CodeGenerator::getPrintZeroes()
  221 {
  222     return lineNumberFillZeroes;
  223 }
  224 
  225 void CodeGenerator::setIncludeStyle ( bool flag )
  226 {
  227     includeStyleDef = flag;
  228 }
  229 
  230 void CodeGenerator::disableTrailingNL ( bool flag )
  231 {
  232     noTrailingNewLine = flag;
  233 }
  234 
  235 void CodeGenerator::setStyleInputPath ( const string& path )
  236 {
  237     styleInputPath = path;
  238 }
  239 
  240 void CodeGenerator::setStyleOutputPath ( const string& path )
  241 {
  242     styleOutputPath = path;
  243 }
  244 
  245 void CodeGenerator::setPluginParameter ( const string& param )
  246 {
  247     pluginParameter = param;
  248 }
  249 
  250 const string&  CodeGenerator::getStyleInputPath()
  251 {
  252     return styleInputPath;
  253 }
  254 
  255 const string&  CodeGenerator::getStyleOutputPath()
  256 {
  257     return styleOutputPath;
  258 }
  259 
  260 void CodeGenerator::setFragmentCode ( bool flag )
  261 {
  262     fragmentOutput=flag;
  263 }
  264 
  265 bool CodeGenerator::getFragmentCode()
  266 {
  267     return fragmentOutput;
  268 }
  269 void CodeGenerator::setKeepInjections ( bool flag )
  270 {
  271     keepInjections=flag;
  272 }
  273 
  274 bool CodeGenerator::getKeepInjections()
  275 {
  276     return keepInjections;
  277 }
  278 void CodeGenerator::setValidateInput ( bool flag )
  279 {
  280     validateInput=flag;
  281 }
  282 
  283 bool CodeGenerator::getValidateInput()
  284 {
  285     return validateInput;
  286 }
  287 
  288 
  289 void CodeGenerator::setNumberWrappedLines ( bool flag )
  290 {
  291     numberWrappedLines=flag;
  292 }
  293 
  294 bool CodeGenerator::getNumberWrappedLines()
  295 {
  296     return numberWrappedLines;
  297 }
  298 
  299 void CodeGenerator::setOmitVersionComment ( bool flag )
  300 {
  301     omitVersionComment=flag;
  302 }
  303 
  304 bool CodeGenerator::getOmitVersionComment ()
  305 {
  306     return omitVersionComment;
  307 }
  308 
  309 void CodeGenerator::setBaseFont ( const string& fontName )
  310 {
  311     baseFont = fontName;
  312 }
  313 
  314 void CodeGenerator::setBaseFontSize ( const string& fontSize)
  315 {
  316     baseFontSize = fontSize;
  317 }
  318 
  319 void CodeGenerator::setStartingNestedLang(const string &langName)
  320 {
  321     embedLangStart = langName;
  322 }
  323 
  324 const string CodeGenerator::getBaseFont() const
  325 {
  326     if ( !baseFont.empty() ) return baseFont;
  327     switch ( outputType ) {
  328     case HTML:
  329     case XHTML:
  330     case SVG:
  331         return "'Courier New',monospace";
  332         break;
  333     case LATEX:
  334         return "ttfamily";
  335         break;
  336     case TEX:
  337         return "tt";
  338         break;
  339     default:
  340         return "Courier New";
  341     }
  342 }
  343 
  344 const string CodeGenerator::getBaseFontSize()
  345 {
  346     return baseFontSize;
  347 }
  348 
  349 void CodeGenerator::setTitle ( const string & title )
  350 {
  351     if ( !title.empty() ) docTitle= title;
  352 }
  353 
  354 string CodeGenerator::getTitle()
  355 {
  356     return docTitle;
  357 }
  358 
  359 void CodeGenerator::setEncoding ( const string& encodingName )
  360 {
  361     encoding = encodingName;
  362 }
  363 
  364 bool CodeGenerator::formattingDisabled()
  365 {
  366     return !formattingEnabled;
  367 }
  368 
  369 void CodeGenerator::setStartingInputLine ( unsigned int begin )
  370 {
  371     startLineCnt = startLineCntCurFile = begin;
  372 }
  373 
  374 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
  375 {
  376     maxLineCnt = cnt;
  377 }
  378 
  379 void CodeGenerator::setFilesCnt ( unsigned int cnt )
  380 {
  381     inputFilesCnt = cnt;
  382 }
  383 
  384 bool CodeGenerator::formattingIsPossible()
  385 {
  386     return formattingPossible;
  387 }
  388 
  389 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
  390                                        unsigned int lineLength,
  391                                        int numberSpaces )
  392 {
  393     bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
  394     bool replaceTabs = numberSpaces > 0;
  395 
  396     if ( enableWrap || replaceTabs ) {
  397         preFormatter.setWrap ( enableWrap );
  398         preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
  399         preFormatter.setWrapLineLength ( lineLength );
  400         preFormatter.setReplaceTabs ( replaceTabs );
  401         preFormatter.setNumberSpaces ( numberSpaces );
  402     }
  403 }
  404 
  405 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
  406 {
  407     keywordCase = keyCase;
  408 }
  409 
  410 void CodeGenerator::setEOLDelimiter(char delim)
  411 {
  412     eolDelimiter = delim;
  413 }
  414 
  415 void CodeGenerator::reset()
  416 {
  417     lineIndex = 0;
  418     lineNumber = 0;
  419     line.clear();
  420     preFormatter.reset();
  421     inFile.clear();
  422     outFile.clear();
  423     embedLangDefPath.clear();
  424     printNewLines=true;
  425     syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  426     startLineCntCurFile = startLineCnt;
  427     applySyntaxTestCase=lineContainedTestCase=false;
  428 }
  429 
  430 string CodeGenerator::getThemeInitError()
  431 {
  432     return  docStyle.getErrorMessage();
  433 }
  434 
  435 string CodeGenerator::getPluginScriptError()
  436 {
  437     return userScriptError;
  438 }
  439 
  440 string CodeGenerator::getSyntaxRegexError()
  441 {
  442     return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
  443 }
  444 string CodeGenerator::getSyntaxLuaError()
  445 {
  446     return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
  447 
  448 }
  449 string CodeGenerator::getSyntaxDescription()
  450 {
  451     return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
  452 
  453 }
  454 string CodeGenerator::getThemeDescription()
  455 {
  456     return docStyle.getDescription();
  457 
  458 }
  459 unsigned int CodeGenerator::getLineNumber()
  460 {
  461     return lineNumber;
  462 }
  463 
  464 bool CodeGenerator::readNewLine ( string &newLine )
  465 {
  466     bool eof=false;
  467     
  468     if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
  469     
  470     while (!eof && startLineCntCurFile>0) {
  471         if ( formattingPossible && formattingEnabled ) {
  472             eof=!formatter->hasMoreLines();
  473             if ( !eof ) {
  474                 newLine = formatter->nextLine();
  475             }
  476         } else {
  477             eof = ! getline ( *in, newLine, eolDelimiter );
  478         }
  479         --startLineCntCurFile;
  480     }
  481     startLineCntCurFile=1;
  482 #ifndef _WIN32
  483     // drop CR of CRLF files
  484     if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
  485         newLine.erase(newLine.size() - 1);
  486 #endif
  487 
  488     return eof || ( lineNumber == maxLineCnt );
  489 }
  490 
  491 void CodeGenerator::matchRegex ( const string &line, State skipState)
  492 {
  493     regexGroups.clear();
  494     int matchBegin=0;
  495     int groupID=0;
  496 
  497     // cycle through all regex, save the start and ending indices of matches to report them later
  498     for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
  499         RegexElement *regexElem = currentSyntax->getRegexElements() [i];
  500 
  501         if (regexElem->open == skipState) continue;
  502         
  503         boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
  504         boost::xpressive::sregex_iterator end;
  505 
  506         for( ; cur != end; ++cur )  {
  507             groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
  508             matchBegin =  cur->position(groupID);
  509             regexGroups.insert (
  510                 make_pair ( matchBegin + 1, ReGroup ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
  511         }
  512     }
  513 }
  514 
  515 unsigned char CodeGenerator::getInputChar()
  516 {
  517     // end of line?
  518     if ( lineIndex == line.length() ) {
  519         bool eof=false;
  520         if ( preFormatter.isEnabled() ) {
  521             if ( !preFormatter.hasMoreLines() ) {
  522                 eof=readNewLine ( line );
  523                 preFormatter.setLine ( line );
  524                 ++lineNumber;
  525                 numberCurrentLine = true;
  526             } else {
  527                 if(numberWrappedLines)
  528                     ++lineNumber;
  529                 numberCurrentLine = numberWrappedLines;
  530             }
  531 
  532             line = preFormatter.getNextLine();
  533         } else {
  534             eof=readNewLine ( line );
  535             ++lineNumber;
  536 
  537             numberCurrentLine = true;
  538         }
  539         lastLineLength=lineIndex;
  540         lineIndex=0;
  541         
  542         if (!lineContainedTestCase && applySyntaxTestCase){
  543             stateTraceTest = stateTraceCurrent;
  544             stateTraceCurrent.clear();
  545         } 
  546         
  547         lineContainedTestCase=false;
  548             
  549         matchRegex ( line );
  550         stateTrace.clear();
  551         return ( eof ) ?'\0':'\n';
  552     }
  553 
  554     return line[lineIndex++];
  555 }
  556 
  557 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS, Coffeescript with block regex, Pas + ASM) 
  558     especially nested syntax in one line
  559  */
  560 State CodeGenerator::getCurrentState (State oldState)
  561 {
  562     unsigned char c='\0';
  563 
  564     if ( token.length() ==0 ) {
  565         c=getInputChar();
  566     } else {
  567         lineIndex-= ( token.length()-1 );
  568         c=token[0];
  569     }
  570     if ( c=='\n' ) {
  571         return _EOL;   // End of line
  572     }
  573 
  574     if ( c=='\0' ) {
  575         return _EOF;   // End of file
  576     }
  577 
  578     if ( c==' ' || c=='\t' ) {
  579         token= c;
  580         return _WS;
  581     }
  582     
  583     //TODO add control flag
  584     if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT)  ) {
  585         token= c;
  586         return _TESTPOS;
  587     }
  588         
  589     // at this position the syntax change takes place
  590     if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
  591         loadEmbeddedLang(embedLangDefPath);  // load new syntax                     
  592         matchRegex(line);                    // recognize new patterns in the (remaining) line
  593         syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  594     }
  595 
  596 SKIP_EMBEDDED:
  597     
  598     // Test if a regular expression was found at the current position
  599     if ( !regexGroups.empty() ) {
  600         if ( regexGroups.count ( lineIndex ) ) {
  601             token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
  602 
  603             unsigned int oldIndex= lineIndex;
  604             if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
  605 
  606             if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
  607                 //do not handle a nested section if the syntax is marked as "sealed" 
  608                 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
  609                     embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
  610                     //remember position 
  611                     syntaxChangeIndex = lineIndex+2;
  612                     syntaxChangeLineNo = lineNumber;
  613                 }
  614                 
  615                 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
  616                 matchRegex(line, EMBEDDED_CODE_BEGIN);
  617                 lineIndex = oldIndex;
  618                 goto SKIP_EMBEDDED; // this is how it should be done
  619             }
  620 
  621             if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
  622                 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
  623                 currentKeywordClass=currentSyntax->isKeyword ( reservedWord ); //check in lists (no regex)
  624                 
  625                 // for positional Tests; will not be used again for actual input parsing
  626                 // FIXME not needed?
  627                 //if (currentKeywordClass)
  628                 //    regexGroups[oldIndex]=ReGroup ( KEYWORD, reservedWord.size(), currentKeywordClass, "" );  
  629                 
  630                 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD )
  631                     currentKeywordClass = regexGroups[oldIndex].kwClass;
  632                  
  633                 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState, currentKeywordClass);
  634             } else {
  635                 return validateState(regexGroups[oldIndex].state, oldState, 0);
  636             }
  637         }
  638     }
  639 
  640     // Character not referring to any state
  641     token = c;
  642     return STANDARD;
  643 }
  644 
  645 State CodeGenerator::validateState(State newState, State oldState, unsigned int kwClass)
  646 {
  647 
  648     if (currentSyntax->getValidateStateChangeFct()) {
  649         Diluculum::LuaValueList params;
  650         params.push_back(Diluculum::LuaValue(oldState));
  651         params.push_back(Diluculum::LuaValue(newState));
  652         params.push_back(Diluculum::LuaValue(token));
  653         params.push_back(Diluculum::LuaValue(kwClass));
  654 
  655         Diluculum::LuaValueList res=
  656             currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
  657                     params,"getValidateStateChangeFct call")  ;
  658 
  659         resultOfHook = res.size()>=1;
  660         if (resultOfHook) {
  661             State validatedState = (State)res[0].asInteger();
  662             if ( validatedState== _REJECT) {
  663                 // proceed using only the first character of the token
  664                 lineIndex -= (token.length() -1);
  665                 token=token.substr(0, 1);
  666                 return oldState;
  667             }
  668             stateTrace.push_back(validatedState);
  669             if (stateTrace.size()>200) stateTrace.erase(stateTrace.begin(), stateTrace.begin() + 100 );
  670             return validatedState;
  671         }
  672     }
  673     resultOfHook  = false;
  674     stateTrace.push_back(newState);
  675     if (stateTrace.size()>200) stateTrace.erase(stateTrace.begin(), stateTrace.begin() + 100 );    
  676     return newState;
  677 }
  678 
  679 //it is faster to pass ostream reference
  680 void CodeGenerator::maskString ( ostream& ss, const string & s )
  681 {
  682     for ( unsigned int i=0; i< s.length(); i++ ) {
  683         ss << maskCharacter ( s[i] );
  684 
  685         if (applySyntaxTestCase) {
  686             PositionState ps(currentState, currentKeywordClass, false);
  687             stateTraceCurrent.push_back(ps);
  688             
  689             if (stateTraceCurrent.size()>200) 
  690                 stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 ); 
  691         }
  692     }
  693 }
  694 
  695 
  696 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string&token)
  697 {
  698     Diluculum::LuaValueList params;
  699     params.push_back(Diluculum::LuaValue(token));
  700     params.push_back(Diluculum::LuaValue(currentState));
  701     params.push_back(Diluculum::LuaValue(currentKeywordClass));
  702     string trace(";");
  703     string trace2(";");
  704     if (stateTrace.size()>1){
  705         for (size_t i=0; i<stateTrace.size()-1;i++){
  706             trace += std::to_string (stateTrace[i]);
  707             trace += ";";
  708         }
  709     }
  710     params.push_back(Diluculum::LuaValue(trace));
  711 
  712     return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
  713             params,"getDecorateFct call")  ;
  714 }
  715 
  716 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
  717 {
  718     if ( flushWhiteSpace )
  719         flushWs(1);
  720     string caseToken = StringTools::change_case ( token, tcase );
  721     if (currentSyntax->getDecorateFct()) {
  722 
  723         Diluculum::LuaValueList res=callDecorateFct(caseToken);
  724         if (res.size()==1) {
  725             *out<<res[0].asString();
  726         } else {
  727             maskString ( *out, caseToken );
  728         }
  729     } else {
  730         maskString ( *out, caseToken );
  731     }
  732 
  733     token.clear();
  734 }
  735 
  736 bool CodeGenerator::styleFound()
  737 {
  738     return docStyle.found();
  739 }
  740 
  741 bool CodeGenerator::printIndexFile ( const vector<string> &fileList,
  742                                      const string &outPath )
  743 {
  744     return true;
  745 }
  746 
  747 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
  748 {
  749 
  750     if ( formatter!=NULL ) {
  751         return true;
  752     }
  753 
  754     if ( !indentScheme.size() ) return false;
  755 
  756     formatter=new astyle::ASFormatter();
  757 
  758     if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
  759         formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
  760     } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
  761         formatter->setFormattingStyle ( astyle::STYLE_KR );
  762     } else if ( indentScheme=="java" ) {
  763         formatter->setFormattingStyle ( astyle::STYLE_JAVA );
  764     } else if ( indentScheme=="stroustrup" ) {
  765         formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
  766     } else if ( indentScheme=="whitesmith" ) {
  767         formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
  768     } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
  769         formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
  770     } else if ( indentScheme=="gnu" ) {
  771         formatter->setFormattingStyle ( astyle::STYLE_GNU );
  772     } else if ( indentScheme=="linux" ) {
  773         formatter->setFormattingStyle ( astyle::STYLE_LINUX );
  774     } else if ( indentScheme=="horstmann" ) {
  775         formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
  776     } else if ( indentScheme=="otbs" ||  indentScheme=="1tbs") {
  777         formatter->setFormattingStyle ( astyle::STYLE_1TBS );
  778     } else if ( indentScheme=="google") {
  779         formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
  780     } else if ( indentScheme=="pico" ||  indentScheme=="a11") {
  781         formatter->setFormattingStyle ( astyle::STYLE_PICO );
  782     } else if ( indentScheme=="lisp" ||  indentScheme=="python"||  indentScheme=="a12") {
  783         formatter->setFormattingStyle ( astyle::STYLE_LISP );
  784     } else if ( indentScheme=="vtk") {
  785         formatter->setFormattingStyle ( astyle::STYLE_VTK );
  786     } else if ( indentScheme=="mozilla") {
  787         formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
  788     } else if ( indentScheme=="webkit") {
  789         formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
  790     } else if ( indentScheme!="user" ){
  791         return false;
  792     }
  793     return formattingEnabled=true;
  794 }
  795 
  796 
  797 /*Helper functions for astyle option parsing*/
  798 string CodeGenerator::getParam(const string& arg, const char* op)
  799 {
  800     return arg.substr(strlen(op));
  801 }
  802 
  803 string CodeGenerator::getParam(const string& arg, const char* op1, const char* op2)
  804 {
  805     return isParamOption(arg, op1) ? getParam(arg, op1) : getParam(arg, op2);
  806 }
  807 
  808 bool CodeGenerator::isOption(const string& arg, const char* op)
  809 {
  810     return arg.compare(op) == 0;
  811 }
  812 
  813 bool CodeGenerator::isOption(const string& arg, const char* op1, const char* op2)
  814 {
  815     return (isOption(arg, op1) || isOption(arg, op2));
  816 }
  817 
  818 bool CodeGenerator::isParamOption(const string& arg, const char* option)
  819 {
  820     bool retVal = arg.compare(0, strlen(option), option) == 0;
  821     // if comparing for short option, 2nd char of arg must be numeric
  822     if (retVal && strlen(option) == 1 && arg.length() > 1)
  823         if (!isdigit((unsigned char) arg[1]))
  824             retVal = false;
  825     return retVal;
  826 }
  827 
  828 bool CodeGenerator::isParamOption(const string& arg, const char* option1, const char* option2)
  829 {
  830     return isParamOption(arg, option1) || isParamOption(arg, option2);
  831 }
  832 
  833 //apply the same options as astyle
  834 void CodeGenerator::setIndentationOptions (const vector<string>& options){
  835     if (formatter) {
  836         string arg;
  837         for (unsigned int i=0; i<options.size(); i++) {
  838             arg=options[i];
  839             
  840             if (isOption(arg, "mode=cs"))
  841             {
  842                 formatter->setSharpStyle();
  843                 formatter->setModeManuallySet(true);
  844             }
  845             else if (isOption(arg, "mode=c"))
  846             {
  847                 formatter->setCStyle();
  848                 formatter->setModeManuallySet(true);
  849             }
  850             else if (isOption(arg, "mode=java"))
  851             {
  852                 formatter->setJavaStyle();
  853                 formatter->setModeManuallySet(true);
  854             }
  855             else if (isParamOption(arg, "t", "indent=tab="))
  856             {
  857                 int spaceNum = 4;
  858                 string spaceNumParam = getParam(arg, "t", "indent=tab=");
  859                 if (spaceNumParam.length() > 0)
  860                     spaceNum = atoi(spaceNumParam.c_str());
  861                 if (spaceNum >= 2 && spaceNum <= 20)
  862                     formatter->setTabIndentation(spaceNum, false);
  863             }
  864             else if (isOption(arg, "indent=tab"))
  865             {
  866                 formatter->setTabIndentation(4);
  867             }
  868             else if (isParamOption(arg, "T", "indent=force-tab="))
  869             {
  870                 int spaceNum = 4;
  871                 string spaceNumParam = getParam(arg, "T", "indent=force-tab=");
  872                 if (spaceNumParam.length() > 0)
  873                     spaceNum = atoi(spaceNumParam.c_str());
  874                 if (spaceNum >= 2 && spaceNum <= 20)
  875                     formatter->setTabIndentation(spaceNum, true);
  876             }
  877             else if (isOption(arg, "indent=force-tab"))
  878             {
  879                 formatter->setTabIndentation(4, true);
  880             }
  881             else if (isParamOption(arg, "xT", "indent=force-tab-x="))
  882             {
  883                 int tabNum = 8;
  884                 string tabNumParam = getParam(arg, "xT", "indent=force-tab-x=");
  885                 if (tabNumParam.length() > 0)
  886                     tabNum = atoi(tabNumParam.c_str());
  887                 if (tabNum >= 2 && tabNum <= 20)
  888                     formatter->setForceTabXIndentation(tabNum);
  889                 
  890             }
  891             else if (isOption(arg, "indent=force-tab-x"))
  892             {
  893                 formatter->setForceTabXIndentation(8);
  894             }
  895             else if (isParamOption(arg, "s", "indent=spaces="))
  896             {
  897                 int spaceNum = 4;
  898                 string spaceNumParam = getParam(arg, "s", "indent=spaces=");
  899                 if (spaceNumParam.length() > 0)
  900                     spaceNum = atoi(spaceNumParam.c_str());
  901                 if (spaceNum >= 2 && spaceNum <= 20)
  902                     formatter->setSpaceIndentation(spaceNum);
  903             }
  904             else if (isOption(arg, "indent=spaces"))
  905             {
  906                 formatter->setSpaceIndentation(4);
  907             }
  908             else if (isParamOption(arg, "xt", "indent-continuation="))
  909             {
  910                 int contIndent = 1;
  911                 string contIndentParam = getParam(arg, "xt", "indent-continuation=");
  912                 if (contIndentParam.length() > 0)
  913                     contIndent = atoi(contIndentParam.c_str());
  914                 if (contIndent > 0 && contIndent < 5)
  915                     formatter->setContinuationIndentation(contIndent);
  916             }
  917             else if (isParamOption(arg, "m", "min-conditional-indent="))
  918             {
  919                 int minIndent = astyle::MINCOND_TWO;
  920                 string minIndentParam = getParam(arg, "m", "min-conditional-indent=");
  921                 if (minIndentParam.length() > 0)
  922                     minIndent = atoi(minIndentParam.c_str());
  923                 if (minIndent < astyle::MINCOND_END)
  924                     formatter->setMinConditionalIndentOption(minIndent);
  925             }
  926             else if (isParamOption(arg, "M", "max-continuation-indent="))
  927             {
  928                 int maxIndent = 40;
  929                 string maxIndentParam = getParam(arg, "M", "max-continuation-indent=");
  930                 if (maxIndentParam.length() > 0)
  931                     maxIndent = atoi(maxIndentParam.c_str());
  932                 if (maxIndent >= 40 && maxIndent <= 120)
  933                     formatter->setMaxContinuationIndentLength(maxIndent);
  934             }
  935             else if (isOption(arg, "N", "indent-namespaces"))
  936             {
  937                 formatter->setNamespaceIndent(true);
  938             }
  939             else if (isOption(arg, "C", "indent-classes"))
  940             {
  941                 formatter->setClassIndent(true);
  942             }
  943             else if (isOption(arg, "xG", "indent-modifiers"))
  944             {
  945                 formatter->setModifierIndent(true);
  946             }
  947             else if (isOption(arg, "S", "indent-switches"))
  948             {
  949                 formatter->setSwitchIndent(true);
  950             }
  951             else if (isOption(arg, "K", "indent-cases"))
  952             {
  953                 formatter->setCaseIndent(true);
  954             }
  955             else if (isOption(arg, "xU", "indent-after-parens"))
  956             {
  957                 formatter->setAfterParenIndent(true);
  958             }
  959             else if (isOption(arg, "L", "indent-labels"))
  960             {
  961                 formatter->setLabelIndent(true);
  962             }
  963             else if (isOption(arg, "xW", "indent-preproc-block"))
  964             {
  965                 formatter->setPreprocBlockIndent(true);
  966             }
  967             else if (isOption(arg, "w", "indent-preproc-define"))
  968             {
  969                 formatter->setPreprocDefineIndent(true);
  970             }
  971             else if (isOption(arg, "xw", "indent-preproc-cond"))
  972             {
  973                 formatter->setPreprocConditionalIndent(true);
  974             }
  975             else if (isOption(arg, "y", "break-closing-braces"))
  976             {
  977                 formatter->setBreakClosingHeaderBracesMode(true);
  978             }
  979             else if (isOption(arg, "O", "keep-one-line-blocks"))
  980             {
  981                 formatter->setBreakOneLineBlocksMode(false);
  982             }
  983             else if (isOption(arg, "o", "keep-one-line-statements"))
  984             {
  985                 formatter->setBreakOneLineStatementsMode(false);
  986             }
  987             else if (isOption(arg, "P", "pad-paren"))
  988             {
  989                 formatter->setParensOutsidePaddingMode(true);
  990                 formatter->setParensInsidePaddingMode(true);
  991             }
  992             else if (isOption(arg, "d", "pad-paren-out"))
  993             {
  994                 formatter->setParensOutsidePaddingMode(true);
  995             }
  996             else if (isOption(arg, "xd", "pad-first-paren-out"))
  997             {
  998                 formatter->setParensFirstPaddingMode(true);
  999             }
 1000             else if (isOption(arg, "D", "pad-paren-in"))
 1001             {
 1002                 formatter->setParensInsidePaddingMode(true);
 1003             }
 1004             else if (isOption(arg, "H", "pad-header"))
 1005             {
 1006                 formatter->setParensHeaderPaddingMode(true);
 1007             }
 1008             else if (isOption(arg, "U", "unpad-paren"))
 1009             {
 1010                 formatter->setParensUnPaddingMode(true);
 1011             }
 1012             else if (isOption(arg, "p", "pad-oper"))
 1013             {
 1014                 formatter->setOperatorPaddingMode(true);
 1015             }
 1016             else if (isOption(arg, "xg", "pad-comma"))
 1017             {
 1018                 formatter->setCommaPaddingMode(true);
 1019             }
 1020             else if (isOption(arg, "xe", "delete-empty-lines"))
 1021             {
 1022                 formatter->setDeleteEmptyLinesMode(true);
 1023             }
 1024             else if (isOption(arg, "E", "fill-empty-lines"))
 1025             {
 1026                 formatter->setEmptyLineFill(true);
 1027             }
 1028             else if (isOption(arg, "c", "convert-tabs"))
 1029             {
 1030                 formatter->setTabSpaceConversionMode(true);
 1031             }
 1032             else if (isOption(arg, "xy", "close-templates"))
 1033             {
 1034                 formatter->setCloseTemplatesMode(true);
 1035             }
 1036             else if (isOption(arg, "F", "break-blocks=all"))
 1037             {
 1038                 formatter->setBreakBlocksMode(true);
 1039                 formatter->setBreakClosingHeaderBlocksMode(true);
 1040             }
 1041             else if (isOption(arg, "f", "break-blocks"))
 1042             {
 1043                 formatter->setBreakBlocksMode(true);
 1044             }
 1045             else if (isOption(arg, "e", "break-elseifs"))
 1046             {
 1047                 formatter->setBreakElseIfsMode(true);
 1048             }
 1049             else if (isOption(arg, "xb", "break-one-line-headers"))
 1050             {
 1051                 formatter->setBreakOneLineHeadersMode(true);
 1052             }
 1053             else if (isOption(arg, "j", "add-braces"))
 1054             {
 1055                 formatter->setAddBracesMode(true);
 1056             }
 1057             else if (isOption(arg, "J", "add-one-line-braces"))
 1058             {
 1059                 formatter->setAddOneLineBracesMode(true);
 1060             }
 1061             else if (isOption(arg, "xj", "remove-braces"))
 1062             {
 1063                 formatter->setRemoveBracesMode(true);
 1064             }
 1065             else if (isOption(arg, "Y", "indent-col1-comments"))
 1066             {
 1067                 formatter->setIndentCol1CommentsMode(true);
 1068             }
 1069             else if (isOption(arg, "align-pointer=type"))
 1070             {
 1071                 formatter->setPointerAlignment(astyle::PTR_ALIGN_TYPE);
 1072             }
 1073             else if (isOption(arg, "align-pointer=middle"))
 1074             {
 1075                 formatter->setPointerAlignment(astyle::PTR_ALIGN_MIDDLE);
 1076             }
 1077             else if (isOption(arg, "align-pointer=name"))
 1078             {
 1079                 formatter->setPointerAlignment(astyle::PTR_ALIGN_NAME);
 1080             }
 1081             else if (isParamOption(arg, "k"))
 1082             {
 1083                 int align = 0;
 1084                 string styleParam = getParam(arg, "k");
 1085                 if (styleParam.length() > 0)
 1086                     align = atoi(styleParam.c_str());
 1087                 if (align == 1)
 1088                     formatter->setPointerAlignment(astyle::PTR_ALIGN_TYPE);
 1089                 else if (align == 2)
 1090                     formatter->setPointerAlignment(astyle::PTR_ALIGN_MIDDLE);
 1091                 else if (align == 3)
 1092                     formatter->setPointerAlignment(astyle::PTR_ALIGN_NAME);
 1093             }
 1094             else if (isOption(arg, "align-reference=none"))
 1095             {
 1096                 formatter->setReferenceAlignment(astyle::REF_ALIGN_NONE);
 1097             }
 1098             else if (isOption(arg, "align-reference=type"))
 1099             {
 1100                 formatter->setReferenceAlignment(astyle::REF_ALIGN_TYPE);
 1101             }
 1102             else if (isOption(arg, "align-reference=middle"))
 1103             {
 1104                 formatter->setReferenceAlignment(astyle::REF_ALIGN_MIDDLE);
 1105             }
 1106             else if (isOption(arg, "align-reference=name"))
 1107             {
 1108                 formatter->setReferenceAlignment(astyle::REF_ALIGN_NAME);
 1109             }
 1110             else if (isParamOption(arg, "W"))
 1111             {
 1112                 int align = 0;
 1113                 string styleParam = getParam(arg, "W");
 1114                 if (styleParam.length() > 0)
 1115                     align = atoi(styleParam.c_str());
 1116                 if (align == 0)
 1117                     formatter->setReferenceAlignment(astyle::REF_ALIGN_NONE);
 1118                 else if (align == 1)
 1119                     formatter->setReferenceAlignment(astyle::REF_ALIGN_TYPE);
 1120                 else if (align == 2)
 1121                     formatter->setReferenceAlignment(astyle::REF_ALIGN_MIDDLE);
 1122                 else if (align == 3)
 1123                     formatter->setReferenceAlignment(astyle::REF_ALIGN_NAME);
 1124             }
 1125             else if (isParamOption(arg, "max-code-length="))
 1126             {
 1127                 int maxLength = 50;
 1128                 string maxLengthParam = getParam(arg, "max-code-length=");
 1129                 if (maxLengthParam.length() > 0)
 1130                     maxLength = atoi(maxLengthParam.c_str());
 1131                 if (maxLength >= 50 && maxLength<= 200)
 1132                     formatter->setMaxCodeLength(maxLength);
 1133             }
 1134             else if (isParamOption(arg, "xC"))
 1135             {
 1136                 int maxLength = 50;
 1137                 string maxLengthParam = getParam(arg, "xC");
 1138                 if (maxLengthParam.length() > 0)
 1139                     maxLength = atoi(maxLengthParam.c_str());
 1140                 if (maxLength > 0 && maxLength<= 200)
 1141                     formatter->setMaxCodeLength(maxLength);
 1142             }
 1143             else if (isOption(arg, "xL", "break-after-logical"))
 1144             {
 1145                 formatter->setBreakAfterMode(true);
 1146             }
 1147             else if (isOption(arg, "xc", "attach-classes"))
 1148             {
 1149                 formatter->setAttachClass(true);
 1150             }
 1151             else if (isOption(arg, "xV", "attach-closing-while"))
 1152             {
 1153                 formatter->setAttachClosingWhile(true);
 1154             }
 1155             else if (isOption(arg, "xk", "attach-extern-c"))
 1156             {
 1157                 formatter->setAttachExternC(true);
 1158             }
 1159             else if (isOption(arg, "xn", "attach-namespaces"))
 1160             {
 1161                 formatter->setAttachNamespace(true);
 1162             }
 1163             else if (isOption(arg, "xl", "attach-inlines"))
 1164             {
 1165                 formatter->setAttachInline(true);
 1166             }
 1167             else if (isOption(arg, "xp", "remove-comment-prefix"))
 1168             {
 1169                 formatter->setStripCommentPrefix(true);
 1170             }
 1171             else if (isOption(arg, "xB", "break-return-type"))
 1172             {
 1173                 formatter->setBreakReturnType(true);
 1174             }
 1175             else if (isOption(arg, "xD", "break-return-type-decl"))
 1176             {
 1177                 formatter->setBreakReturnTypeDecl(true);
 1178             }
 1179             else if (isOption(arg, "xf", "attach-return-type"))
 1180             {
 1181                 formatter->setAttachReturnType(true);
 1182             }
 1183             else if (isOption(arg, "xh", "attach-return-type-decl"))
 1184             {
 1185                 formatter->setAttachReturnTypeDecl(true);
 1186             }
 1187             // Objective-C options
 1188             else if (isOption(arg, "xQ", "pad-method-prefix"))
 1189             {
 1190                 formatter->setMethodPrefixPaddingMode(true);
 1191             }
 1192             else if (isOption(arg, "xR", "unpad-method-prefix"))
 1193             {
 1194                 formatter->setMethodPrefixUnPaddingMode(true);
 1195             }
 1196             else if (isOption(arg, "xq", "pad-return-type"))
 1197             {
 1198                 formatter->setReturnTypePaddingMode(true);
 1199             }
 1200             else if (isOption(arg, "xr", "unpad-return-type"))
 1201             {
 1202                 formatter->setReturnTypeUnPaddingMode(true);
 1203             }
 1204             else if (isOption(arg, "xS", "pad-param-type"))
 1205             {
 1206                 formatter->setParamTypePaddingMode(true);
 1207             }
 1208             else if (isOption(arg, "xs", "unpad-param-type"))
 1209             {
 1210                 formatter->setParamTypeUnPaddingMode(true);
 1211             }
 1212             else if (isOption(arg, "xM", "align-method-colon"))
 1213             {
 1214                 formatter->setAlignMethodColon(true);
 1215             }
 1216             else if (isOption(arg, "xP0", "pad-method-colon=none"))
 1217             {
 1218                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_NONE);
 1219             }
 1220             else if (isOption(arg, "xP1", "pad-method-colon=all"))
 1221             {
 1222                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_ALL);
 1223             }
 1224             else if (isOption(arg, "xP2", "pad-method-colon=after"))
 1225             {
 1226                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_AFTER);
 1227             }
 1228             else if (isOption(arg, "xP3", "pad-method-colon=before"))
 1229             {
 1230                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_BEFORE);
 1231             }
 1232         }
 1233     }
 1234 }
 1235 
 1236 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
 1237 {
 1238 
 1239     if (!embedded) {
 1240         while (!nestedLangs.empty()) {
 1241             nestedLangs.pop();
 1242         }   
 1243     }
 1244     
 1245     bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
 1246     LoadResult result=LOAD_OK;
 1247 
 1248     if ( reloadNecessary ) {
 1249         if (syntaxReaders.count(langDefPath)) {
 1250             currentSyntax=syntaxReaders[langDefPath];
 1251             result=LOAD_OK;
 1252         } else {
 1253             currentSyntax=new SyntaxReader();
 1254             result=currentSyntax->load(langDefPath, pluginParameter, outputType);
 1255             syntaxReaders[langDefPath]=currentSyntax;
 1256         }
 1257 
 1258         if ( result==LOAD_OK ) {
 1259             formattingPossible=currentSyntax->enableReformatting();
 1260 
 1261             if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
 1262                 // remove dynamic keyword tag delimiters of the old language definition
 1263                 vector<string>::iterator keyStyleOpenBegin =
 1264                     openTags.begin() + NUMBER_BUILTIN_STATES;
 1265                 vector<string>::iterator keyStyleCloseBegin =
 1266                     closeTags.begin() + NUMBER_BUILTIN_STATES;
 1267                 openTags.erase ( keyStyleOpenBegin, openTags.end() );
 1268                 closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
 1269             }
 1270             // add new keyword tag delimiters
 1271             for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
 1272                 openTags.push_back ( getKeywordOpenTag ( i ) );
 1273                 closeTags.push_back ( getKeywordCloseTag ( i ) );
 1274             }
 1275             
 1276             //test balloon
 1277             string overrideSpacer(currentSyntax->getOverrideConfigVal("spacer"));
 1278             if (!overrideSpacer.empty()) {
 1279                 spacer = overrideSpacer;
 1280             }
 1281             string overrideMaskWS(currentSyntax->getOverrideConfigVal("maskws"));
 1282             if (!overrideMaskWS.empty()) {
 1283                 maskWs = overrideMaskWS=="true";
 1284             }
 1285             
 1286         }
 1287     }
 1288     return result;
 1289 }
 1290 
 1291 bool CodeGenerator::validateInputStream()
 1292 {
 1293     if ( !in ) return false;
 1294 
 1295     // it is not possible to move stream pointer back with stdin
 1296     if ( ( int ) in->tellg() == -1 ) // -1 : stdin
 1297         return true;
 1298 
 1299     // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
 1300     // Magic configuration of "file"
 1301     // This is intended for web plugins - only check filetypes often found in the net
 1302     char magic_gif[]    = {'G','I','F','8', 0};
 1303     char magic_png[]    = {'\x89','P','N','G', 0};
 1304     char magic_java[]   = {'\xCA','\xFE','\xBA','\xBE', 0};
 1305     char magic_jpeg[]   = {'\xFF','\xD8','\xFF', 0};
 1306     char magic_bmp[]    = {'B','M', 0};
 1307     char magic_pdf[]    = {'%','P','D','F', 0};
 1308     char magic_utf8[]   = {'\xEF','\xBB','\xBF',0};
 1309     char magic_rar[]    = {'R','a','r','!', 0};
 1310     char magic_zip[]    = {'P','K','\x03','\x04', 0};
 1311     char magic_ace[]    = {'*','*','A','C','E','*','*', 0};
 1312     char magic_tgz[]    = {'\x8b','\x1f', '\x00', '\x08', 0};
 1313     char magic_bzip[]   = {'B','Z', 0};
 1314 
 1315     char* magic_table[] = {magic_utf8,
 1316                            magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
 1317                            magic_java,
 1318                            magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
 1319                            0
 1320                           };
 1321 
 1322     char buffer [10]= {0};
 1323     in->read ( buffer,8 );  //only read the first 8 bytes of input stream
 1324 
 1325     int magic_index=0;
 1326     while ( magic_table[magic_index] ) {
 1327         if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
 1328             break;
 1329         }
 1330         magic_index++;
 1331     }
 1332     int streamReadPos=0;
 1333     if ( magic_table[magic_index] == magic_utf8 ) {
 1334         //setEncoding("utf-8");
 1335         streamReadPos=3; // remove UTF-8 magic number from output
 1336     }
 1337 
 1338     in -> seekg ( streamReadPos, ios::beg );
 1339     in-> clear();  // clear fail bit to continue reading
 1340 
 1341     return !magic_table[magic_index] // points to 0 if no pattern was found
 1342            || magic_table[magic_index] == magic_utf8;
 1343 }
 1344 
 1345 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
 1346     
 1347     if ( pluginChunks.size()) {
 1348     
 1349         Diluculum::LuaState luaState;
 1350 
 1351         Diluculum::LuaValueList chunkParams;
 1352         chunkParams.push_back(currentSyntax->getDescription());
 1353         for (unsigned int i=0; i<pluginChunks.size(); i++) {
 1354             luaState.call(*pluginChunks[i], chunkParams, "format user function");
 1355         }
 1356         
 1357         if (luaState.globals().count(fctName)) {
 1358             Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
 1359         
 1360             luaState["HL_INPUT_FILE"] = luaState["HL_PLUGIN_PARAM"] = pluginParameter;
 1361             luaState["HL_OUTPUT"] = outputType;
 1362             luaState["HL_FORMAT_HTML"]=HTML;
 1363             luaState["HL_FORMAT_XHTML"]=XHTML;
 1364             luaState["HL_FORMAT_TEX"]=TEX;
 1365             luaState["HL_FORMAT_LATEX"]=LATEX;
 1366             luaState["HL_FORMAT_RTF"]=RTF;
 1367             luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
 1368             luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
 1369             luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
 1370             luaState["HL_FORMAT_SVG"]=SVG;
 1371             luaState["HL_FORMAT_BBCODE"]=BBCODE;
 1372             luaState["HL_FORMAT_PANGO"]=PANGO;
 1373             luaState["HL_FORMAT_ODT"]=ODTFLAT;
 1374             
 1375             Diluculum::LuaValueList params;
 1376             Diluculum::LuaValueMap options;
 1377             options[Diluculum::LuaValue("title")] =  Diluculum::LuaValue( docTitle );   
 1378             options[Diluculum::LuaValue("encoding")] =  Diluculum::LuaValue(encoding);   
 1379             options[Diluculum::LuaValue("fragment")] =  Diluculum::LuaValue(fragmentOutput);   
 1380             options[Diluculum::LuaValue("font")] =  Diluculum::LuaValue(getBaseFont());   
 1381             options[Diluculum::LuaValue("fontsize")] =  Diluculum::LuaValue(getBaseFontSize());   
 1382 
 1383             params.push_back(inputFilesCnt);
 1384             params.push_back(processedFilesCnt);
 1385             params.push_back(options);
 1386             
 1387             Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call")  ;
 1388             if (res.size()>=1) {
 1389                 *keepDefault=false;
 1390                 *result = res[0].asString();
 1391                 if (res.size()==2)
 1392                     *keepDefault = res[1].asBoolean();
 1393             }
 1394             delete documentFct;
 1395         }
 1396     }
 1397 }
 1398 
 1399 void CodeGenerator::printHeader()
 1400 {
 1401     bool keepDefaultHeader=true;
 1402     string pluginHeader;
 1403     
 1404     processedFilesCnt++;
 1405     
 1406     applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
 1407 
 1408     if ( ! fragmentOutput && keepDefaultHeader)
 1409         *out << getHeader();
 1410     
 1411     *out << pluginHeader; 
 1412    
 1413     if ( !fragmentOutput || keepInjections)
 1414         *out << currentSyntax->getHeaderInjection();
 1415 }
 1416 
 1417 void CodeGenerator::printFooter()
 1418 {
 1419     
 1420     bool keepDefaultFooter=true;
 1421     string pluginFooter;
 1422     
 1423     applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
 1424     
 1425     if ( !fragmentOutput || keepInjections)
 1426         *out << currentSyntax->getFooterInjection();
 1427 
 1428     *out << pluginFooter; 
 1429     
 1430     if ( ! fragmentOutput && keepDefaultFooter )
 1431         *out << getFooter();
 1432 }
 1433 
 1434 ParseError CodeGenerator::generateFile ( const string &inFileName,
 1435         const string &outFileName )
 1436 {
 1437     if ( !docStyle.found() ) {
 1438         return BAD_STYLE;
 1439     }
 1440 
 1441     reset();
 1442 
 1443     ParseError error=PARSE_OK;
 1444 
 1445     inFile=inFileName;
 1446     outFile=outFileName;
 1447         
 1448     in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
 1449 
 1450     if ( validateInput )
 1451         if ( !validateInputStream() ) error= BAD_INPUT;
 1452 
 1453     if ( !in->fail() && error==PARSE_OK ) {
 1454         out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
 1455         if ( out->fail() ) {
 1456             error=BAD_OUTPUT;
 1457         }
 1458     }
 1459 
 1460     if ( in->fail() ) {
 1461         error=BAD_INPUT;
 1462     }
 1463 
 1464     if ( error==PARSE_OK ) {
 1465         if ( formatter != NULL ) {
 1466             formatter->init ( new astyle::ASStreamIterator ( in ) );
 1467         }
 1468         printHeader();
 1469         printBody();
 1470         printFooter();
 1471     }
 1472 
 1473     if ( !outFileName.empty() ) {
 1474         delete out;
 1475         out=NULL;
 1476     }
 1477     if ( !inFileName.empty() ) {
 1478         delete in;
 1479         in=NULL;
 1480     }
 1481     return error;
 1482 }
 1483 
 1484 string CodeGenerator::generateString ( const string &input )
 1485 {
 1486 
 1487     if ( !docStyle.found() ) {
 1488         return "";
 1489     }
 1490 
 1491     reset();
 1492 
 1493     in = new istringstream ( input );
 1494     out = new ostringstream ();
 1495 
 1496     if ( in->fail() || out->fail() ) {
 1497         return "";
 1498     }
 1499 
 1500     if ( formatter != NULL ) {
 1501         formatter->init ( new astyle::ASStreamIterator ( in ) );
 1502     }
 1503     printHeader();
 1504     printBody();
 1505     printFooter();
 1506 
 1507     string result = static_cast<ostringstream*> ( out )->str();
 1508 
 1509     delete out;
 1510     out=NULL;
 1511     delete in;
 1512     in=NULL;
 1513 
 1514     return result;
 1515 }
 1516 
 1517 string CodeGenerator::generateStringFromFile ( const string &inFileName )
 1518 {
 1519 
 1520     if ( !docStyle.found() ) {
 1521         return "";
 1522     }
 1523 
 1524     reset();
 1525 
 1526     inFile = inFileName;
 1527     
 1528     in = new ifstream ( inFileName.c_str() );
 1529     out = new ostringstream ();
 1530 
 1531     if ( in->fail() || out->fail() ) {
 1532         return "";
 1533     }
 1534 
 1535     if ( validateInput && !validateInputStream() ) {
 1536         return "ERROR: detected binary input";
 1537     }
 1538 
 1539     if ( formatter != NULL ) {
 1540         formatter->init ( new astyle::ASStreamIterator ( in ) );
 1541     }
 1542     printHeader();
 1543     printBody();
 1544     printFooter();
 1545 
 1546     string result = static_cast<ostringstream*> ( out )->str();
 1547 
 1548     delete out;
 1549     out=NULL;
 1550     delete in;
 1551     in=NULL;
 1552 
 1553     return result;
 1554 }
 1555 
 1556 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
 1557 {
 1558     if ( s==KEYWORD && kwClassID ) {
 1559         return NUMBER_BUILTIN_STATES + kwClassID-1;
 1560     }
 1561     return ( unsigned int ) s ;
 1562 }
 1563 
 1564 void CodeGenerator::openTag ( State s )
 1565 {
 1566     *out << openTags[ ( unsigned int ) s];
 1567     currentState=s;
 1568 
 1569 }
 1570 
 1571 void CodeGenerator::closeTag ( State s )
 1572 {
 1573     *out << closeTags[ ( unsigned int ) s];
 1574     flushWs(2);
 1575     currentState=_UNKNOWN;
 1576 }
 1577 
 1578 void CodeGenerator::openKWTag ( unsigned int kwClassID )
 1579 {
 1580     *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1581     currentState=KEYWORD;
 1582 }
 1583 
 1584 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
 1585 {
 1586     *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1587     flushWs(3);
 1588     currentState=_UNKNOWN;
 1589 }
 1590 
 1591 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
 1592 {
 1593     if (nestedLangs.empty()) {
 1594         nestedLangs.push(currentSyntax->getCurrentPath() );
 1595     }
 1596     if (nestedLangs.top() != embedLangDefPath) {
 1597         nestedLangs.push(embedLangDefPath);
 1598     }
 1599     LoadResult res = loadLanguage(embedLangDefPath, true);
 1600     //pass end delimiter regex to syntax description
 1601     currentSyntax->restoreLangEndDelim(embedLangDefPath);
 1602     return res == LOAD_OK;
 1603 }
 1604 
 1605 ///////////////////////////////////////////////////////////////////////////////
 1606 
 1607 void CodeGenerator::processRootState()
 1608 {
 1609     bool eof=false,
 1610          firstLine=true; // avoid newline before printing the first output line
 1611 
 1612     applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
 1613     
 1614     if ( currentSyntax->highlightingDisabled() ) {
 1615         string line;
 1616         while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
 1617             ++lineNumber;
 1618             insertLineNumber ( !firstLine );
 1619             flushWs(4);
 1620             firstLine=false;
 1621             if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
 1622                 maskString ( *out, line );
 1623         }
 1624         *out << flush;
 1625         return;
 1626     }
 1627 
 1628     if (!embedLangStart.empty()) {
 1629         if (!loadEmbeddedLang(currentSyntax->getNewPath(embedLangStart))) return;
 1630     }
 1631 
 1632     State state=STANDARD;
 1633 
 1634     openTag ( STANDARD );
 1635     do {
 1636         // determine next state
 1637         state= getCurrentState(STANDARD);
 1638 
 1639         // handle current state
 1640         switch ( state ) {
 1641         case KEYWORD:
 1642             closeTag ( STANDARD );
 1643             eof=processKeywordState ( state );
 1644             openTag ( STANDARD );
 1645             break;
 1646         case NUMBER:
 1647             closeTag ( STANDARD );
 1648             eof=processNumberState();
 1649             openTag ( STANDARD );
 1650             break;
 1651         case ML_COMMENT:
 1652             closeTag ( STANDARD );
 1653             eof=processMultiLineCommentState();
 1654             openTag ( STANDARD );
 1655             break;
 1656         case SL_COMMENT:
 1657             closeTag ( STANDARD );
 1658             eof=processSingleLineCommentState();
 1659             openTag ( STANDARD );
 1660             break;
 1661         case STRING:
 1662             closeTag ( STANDARD );
 1663             eof=processStringState ( STANDARD );
 1664             openTag ( STANDARD );
 1665             break;
 1666         case DIRECTIVE:
 1667             closeTag ( STANDARD );
 1668             eof=processDirectiveState();
 1669             openTag ( STANDARD );
 1670             break;
 1671         case ESC_CHAR:
 1672             closeTag ( STANDARD );
 1673             eof=processEscapeCharState();
 1674             openTag ( STANDARD );
 1675             break;
 1676         case SYMBOL:
 1677             closeTag ( STANDARD );
 1678             eof=processSymbolState();
 1679             openTag ( STANDARD );
 1680             break;
 1681 
 1682         case EMBEDDED_CODE_END:
 1683             closeTag ( STANDARD );
 1684             eof=processSyntaxChangeState(state);
 1685             openTag ( STANDARD );
 1686             break;
 1687         case _EOL:
 1688 
 1689             // XTERM256 fix (issue with less cmd)
 1690             if  (!firstLine || showLineNumbers) {
 1691                 closeTag ( STANDARD );
 1692             }
 1693             insertLineNumber ( !firstLine );
 1694             if (!firstLine || showLineNumbers) {
 1695                 flushWs(5);
 1696                 stateTraceCurrent.clear();
 1697                 openTag ( STANDARD );
 1698             }
 1699             firstLine=false;
 1700             break;
 1701         case _EOF:
 1702             eof=true;
 1703             break;
 1704         case _WS:
 1705             processWsState();
 1706             break;
 1707         default:
 1708             printMaskedToken ();
 1709             break;
 1710         }
 1711     } while ( !eof );
 1712     closeTag ( STANDARD );
 1713 
 1714     if (currentSyntax->getDecorateLineEndFct()) {
 1715         Diluculum::LuaValueList res=callDecorateLineFct(false);
 1716         if (res.size()==1) {
 1717             *out << res[0].asString();
 1718         }
 1719     }
 1720 
 1721     printNewLines = !noTrailingNewLine;
 1722     *out << getNewLine();
 1723     *out << flush;
 1724 }
 1725 
 1726 bool CodeGenerator::processSyntaxChangeState(State myState)
 1727 {
 1728     State newState=STANDARD;
 1729     bool eof=false,
 1730          exitState=false;
 1731 
 1732     openTag ( KEYWORD );
 1733     do {
 1734 
 1735         if (myState==EMBEDDED_CODE_END) {
 1736             if (!nestedLangs.empty()) {
 1737                 nestedLangs.pop();
 1738             }
 1739             // load host language syntax
 1740             if (!nestedLangs.empty()) {
 1741                 loadLanguage(nestedLangs.top(), true);
 1742             }
 1743             matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
 1744         }
 1745         
 1746         printMaskedToken ( newState!=_WS );
 1747 
 1748         newState= getCurrentState(myState);
 1749 
 1750         switch ( newState ) {
 1751         case _WS:
 1752             processWsState();
 1753             break;
 1754         case _EOL:
 1755             insertLineNumber();
 1756             exitState=true;
 1757             break;
 1758         case _EOF:
 1759             eof = true;
 1760             break;
 1761         default:
 1762             exitState=true;
 1763             break;
 1764         }
 1765     } while (  !exitState  &&  !eof );
 1766     closeTag ( KEYWORD );
 1767 
 1768     return eof;
 1769 }
 1770 
 1771 
 1772 bool CodeGenerator::processKeywordState ( State myState )
 1773 {
 1774     State newState=STANDARD;
 1775     unsigned int myClassID=currentKeywordClass;
 1776     bool eof=false,
 1777          exitState=false;
 1778 
 1779     openKWTag ( myClassID );
 1780     do {
 1781         printMaskedToken ( newState!=_WS,
 1782                            ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
 1783         newState= getCurrentState(myState);
 1784         switch ( newState ) {
 1785         case _WS:
 1786             processWsState();
 1787             break;
 1788         case _EOL:
 1789             insertLineNumber();
 1790             exitState=true;
 1791             break;
 1792         case _EOF:
 1793             eof = true;
 1794             break;
 1795         case KEYWORD_END:
 1796             exitState=true;
 1797             break;
 1798         default:
 1799             exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
 1800             break;
 1801         }
 1802     } while ( !exitState  &&  !eof );
 1803 
 1804     closeKWTag ( myClassID );
 1805 
 1806     currentKeywordClass=0;
 1807     return eof;
 1808 }
 1809 
 1810 bool CodeGenerator::processNumberState()
 1811 {
 1812     State newState=STANDARD;
 1813     bool eof=false,
 1814          exitState=false;
 1815     openTag ( NUMBER );
 1816     do {
 1817         printMaskedToken ( newState!=_WS );
 1818         newState= getCurrentState(NUMBER);
 1819         switch ( newState ) {
 1820         case _WS:
 1821             processWsState();
 1822             break;
 1823         case _EOL:
 1824             insertLineNumber();
 1825             exitState=true;
 1826             break;
 1827         case _EOF:
 1828             eof = true;
 1829             break;
 1830         default:
 1831             exitState=newState!=NUMBER;
 1832             break;
 1833         }
 1834     } while ( !exitState && !eof );
 1835 
 1836     closeTag ( NUMBER );
 1837     return eof;
 1838 }
 1839 
 1840 
 1841 
 1842 bool CodeGenerator::processMultiLineCommentState()
 1843 {
 1844     int commentCount=1;
 1845     int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
 1846     State newState=STANDARD;
 1847     bool eof=false, exitState=false;
 1848     unsigned int startColumn=lineIndex - token.size() ;
 1849     openTag ( ML_COMMENT );
 1850     do {
 1851         printMaskedToken (newState!=_WS );
 1852         newState= getCurrentState(ML_COMMENT);
 1853 
 1854         switch ( newState ) {
 1855         case _WS:
 1856             processWsState();
 1857             break;
 1858         case _EOL:
 1859             wsBuffer += closeTags[ML_COMMENT];
 1860             insertLineNumber();
 1861             wsBuffer += openTags[ML_COMMENT];
 1862             startColumn=0;
 1863             break;
 1864         case _EOF:
 1865             eof = true;
 1866             break;
 1867         case _TESTPOS:
 1868             runSyntaxTestcases(token=="<" ? startColumn : lineIndex-1 );
 1869             printMaskedToken();
 1870             
 1871             break;
 1872         case ML_COMMENT:
 1873 
 1874             if ( currentSyntax->allowNestedMLComments() ) {
 1875                 ++commentCount;
 1876             }
 1877             // if delimiters are equal, close the comment by continueing to
 1878             // ML_COMMENT_END section
 1879             if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT  ))) break;
 1880 
 1881         case ML_COMMENT_END:
 1882 
 1883             if (!currentSyntax->matchesOpenDelimiter (token,  ML_COMMENT_END, openDelimID)) {
 1884                 break;
 1885             }
 1886             commentCount--;
 1887             if ( !commentCount ) {
 1888                 printMaskedToken();
 1889                 exitState=true;
 1890             }
 1891             break;
 1892         default:
 1893             break;
 1894         }
 1895     } while ( !exitState  &&  !eof );
 1896 
 1897     closeTag ( ML_COMMENT );
 1898     
 1899     stateTraceCurrent.clear();
 1900     
 1901     return eof;
 1902 }
 1903 
 1904 
 1905 bool CodeGenerator::processSingleLineCommentState()
 1906 {
 1907     if ( checkSpecialCmd() ) {
 1908         return in->bad(); // if input stream is bad, report eof to calling method
 1909     }
 1910 
 1911     State newState=STANDARD;
 1912     bool eof=false, exitState=false;
 1913     unsigned int startColumn = lineIndex - token.size() ;
 1914 
 1915     openTag ( SL_COMMENT );
 1916     do {
 1917         printMaskedToken ( newState!=_WS );
 1918         newState= getCurrentState(SL_COMMENT);
 1919 
 1920         switch ( newState ) {
 1921         case _WS:
 1922             processWsState();
 1923             break;
 1924         case _EOL:
 1925             printMaskedToken();
 1926             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1927                 exitState=false;
 1928             } else {
 1929                 exitState=true;
 1930             }
 1931             if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
 1932             insertLineNumber();
 1933             if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
 1934 
 1935             break;
 1936         case _EOF:
 1937             eof = true;
 1938             break;
 1939         case _TESTPOS:
 1940             runSyntaxTestcases(token=="<" ? startColumn : lineIndex-1 );
 1941             printMaskedToken();
 1942             break;
 1943      
 1944         default:
 1945             break;
 1946         }
 1947     } while ( !exitState  &&  !eof );
 1948 
 1949     closeTag ( SL_COMMENT );
 1950     
 1951     stateTraceCurrent.clear();
 1952     
 1953     return eof;
 1954 }
 1955 
 1956 bool CodeGenerator::processDirectiveState()
 1957 {
 1958     State  newState=STANDARD;
 1959     bool eof=false, exitState=false;
 1960 
 1961     openTag ( DIRECTIVE );
 1962     do {
 1963         printMaskedToken ( newState!=_WS );
 1964         newState= getCurrentState(DIRECTIVE);
 1965         switch ( newState ) {
 1966         case _WS:
 1967             processWsState();
 1968             break;
 1969         case DIRECTIVE_END:
 1970             printMaskedToken();
 1971             exitState=true;
 1972             break;
 1973         case _EOL:
 1974             printMaskedToken();
 1975             
 1976             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1977                 exitState=false;
 1978             } else {
 1979                 if (currentSyntax->getContinuationChar()!=0x13){
 1980                     exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
 1981                 } 
 1982             }
 1983             if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
 1984             insertLineNumber();
 1985             if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
 1986             break;
 1987         case ML_COMMENT:
 1988             closeTag ( DIRECTIVE );
 1989             eof= processMultiLineCommentState();
 1990             openTag ( DIRECTIVE );
 1991             break;
 1992         case SL_COMMENT:
 1993             closeTag ( DIRECTIVE );
 1994             eof= processSingleLineCommentState();
 1995             openTag ( DIRECTIVE );
 1996             exitState=true;
 1997             break;
 1998         case STRING:
 1999             closeTag ( DIRECTIVE );
 2000             eof=processStringState ( DIRECTIVE );
 2001             openTag ( DIRECTIVE );
 2002             break;
 2003         case _EOF:
 2004             eof = true;
 2005             break;
 2006         default:
 2007             break;
 2008         }
 2009     } while ( !exitState && !eof );
 2010 
 2011     closeTag ( DIRECTIVE );
 2012     return eof;
 2013 }
 2014 
 2015 bool CodeGenerator::processStringState ( State oldState )
 2016 {
 2017     State newState=STANDARD;
 2018     bool eof=false, exitState=false;
 2019     bool returnedFromOtherState=false;
 2020 
 2021     State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
 2022 
 2023     int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
 2024     string openDelim=token;
 2025 
 2026     //Raw String by definition:
 2027     bool isRawString=currentSyntax->delimiterIsRawString(openDelimID);
 2028 
 2029     // Test if character before string open delimiter token equals to the
 2030     // raw string prefix (Example: r" ", r""" """ in Python)
 2031 
 2032     //Raw String Prefix:
 2033     if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
 2034         isRawString=true;
 2035     }
 2036 
 2037     openTag ( myState );
 2038     do {
 2039         // true if last token was an escape char
 2040         if ( !returnedFromOtherState ) {
 2041             printMaskedToken (newState!=_WS );
 2042         }
 2043         returnedFromOtherState=false;
 2044         newState= getCurrentState(myState);
 2045 
 2046         switch ( newState ) {
 2047         case _WS:
 2048             processWsState();
 2049             break;
 2050         case _EOL:
 2051             wsBuffer += closeTags[myState];
 2052             insertLineNumber();
 2053             wsBuffer += openTags[myState];
 2054             break;
 2055         case STRING_END:
 2056             if (resultOfHook || currentSyntax->matchesOpenDelimiter (token,  STRING_END, openDelimID)) {
 2057                 if (currentSyntax->assertDelimEqualLength()) {
 2058                     exitState= openDelim.length()==token.length();
 2059                 } else {
 2060                     exitState= true;
 2061                 }
 2062                 printMaskedToken();
 2063             }
 2064             break;
 2065         case STRING:
 2066             // if there exist multiple string delimiters, close string if
 2067             // current delimiter is equal to the opening delimiter
 2068             exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING  ))&&token==openDelim;
 2069             printMaskedToken();
 2070             break;
 2071         case ESC_CHAR:
 2072             if ( !isRawString ) {
 2073                 closeTag ( myState );
 2074                 eof=processEscapeCharState();
 2075                 openTag ( myState );
 2076                 returnedFromOtherState=true;
 2077             } else {
 2078                 // FIXME not a fix for Python r"""\"""
 2079                 exitState=token.size()>1 && token[1] == openDelim[0];
 2080                 printMaskedToken();
 2081             }
 2082             break;
 2083         case STRING_INTERPOLATION:
 2084             closeTag ( myState );
 2085             eof=processInterpolationState();
 2086             openTag ( myState );
 2087             returnedFromOtherState=true;
 2088             break;
 2089         case _EOF:
 2090             eof = true;
 2091             break;
 2092         default:
 2093             printMaskedToken();
 2094             break;
 2095         }
 2096     } while ( !exitState && !eof );
 2097 
 2098     closeTag ( myState );
 2099 
 2100     return eof;
 2101 }
 2102 
 2103 bool CodeGenerator::processSymbolState()
 2104 {
 2105 
 2106     State newState=STANDARD;
 2107     bool eof=false,
 2108          exitState=false;
 2109 
 2110     openTag ( SYMBOL );
 2111     do {
 2112         printMaskedToken ( newState!=_WS );
 2113         newState= getCurrentState(SYMBOL);
 2114         switch ( newState ) {
 2115         case _WS:
 2116             processWsState();
 2117             break;
 2118         case _EOL:
 2119             insertLineNumber();
 2120             exitState=true;
 2121             break;
 2122         case _EOF:
 2123             eof = true;
 2124             break;
 2125         default:
 2126             exitState=newState!=SYMBOL;
 2127             break;
 2128         }
 2129     } while ( !exitState && !eof );
 2130 
 2131     closeTag ( SYMBOL );
 2132     return eof;
 2133 }
 2134 
 2135 bool CodeGenerator::processEscapeCharState()
 2136 {
 2137     State newState=STANDARD;
 2138     bool eof=false, exitState=false;
 2139     openTag ( ESC_CHAR );
 2140     do {
 2141         printMaskedToken (newState!=_WS );
 2142         newState= getCurrentState(ESC_CHAR);
 2143         switch ( newState ) {
 2144         case _EOL:
 2145             insertLineNumber();
 2146             exitState=true;
 2147             break;
 2148         case _WS:
 2149             processWsState();
 2150             break;
 2151         case _EOF:
 2152             eof = true;
 2153             break;
 2154         default:
 2155             exitState=newState!=ESC_CHAR;
 2156             break;
 2157         }
 2158     } while ( !exitState && !eof );
 2159 
 2160     closeTag ( ESC_CHAR );
 2161     return eof;
 2162 }
 2163 
 2164 bool CodeGenerator::processInterpolationState()
 2165 {
 2166     State newState=STANDARD;
 2167     bool eof=false, exitState=false;
 2168     openTag ( STRING_INTERPOLATION );
 2169     do {
 2170         printMaskedToken (newState!=_WS );
 2171         newState= getCurrentState(STRING_INTERPOLATION);
 2172         switch ( newState ) {
 2173         case _EOL:
 2174             insertLineNumber();
 2175             exitState=true;
 2176             break;
 2177         case _WS:
 2178             processWsState();
 2179             break;
 2180         case _EOF:
 2181             eof = true;
 2182             break;
 2183         default:
 2184             exitState=newState!=STRING_INTERPOLATION;
 2185             break;
 2186         }
 2187     } while ( !exitState && !eof );
 2188 
 2189     closeTag ( STRING_INTERPOLATION );
 2190     return eof;
 2191 }
 2192 
 2193 void CodeGenerator::processWsState()
 2194 {
 2195     if ( !maskWs ) {
 2196         wsBuffer += token;
 2197         token.clear();
 2198         return;
 2199     }
 2200 
 2201     flushWs(6);
 2202 
 2203     int cntWs=0;
 2204     lineIndex--;
 2205     PositionState ps(currentState, 0, true);
 2206     while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
 2207         ++cntWs;
 2208         ++lineIndex;
 2209     }
 2210     if ( cntWs>1 ) {
 2211 
 2212         unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
 2213         if ( excludeWs && styleID!=_UNKNOWN ) {
 2214             *out << closeTags[styleID];
 2215         }
 2216         *out << maskWsBegin ;
 2217         for ( int i=0; i<cntWs; i++ ) {
 2218             *out <<  spacer;
 2219             if (applySyntaxTestCase)
 2220                 stateTraceCurrent.push_back(ps);
 2221         }
 2222         *out << maskWsEnd;
 2223         if ( excludeWs && styleID!=_UNKNOWN ) {
 2224             *out << openTags[styleID];
 2225         }
 2226     } else {
 2227     
 2228         *out << spacer; //Bugfix fehlender Space nach Strings
 2229         if (applySyntaxTestCase)
 2230             stateTraceCurrent.push_back(ps);
 2231     }
 2232     token.clear();
 2233 }
 2234 
 2235 void CodeGenerator::flushWs(int arg)
 2236 {
 2237      PositionState ps(currentState, 0, true);
 2238      //workaround condition
 2239      for ( size_t i=0; i<wsBuffer.size() && (arg !=2 || (arg==2 && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
 2240         stateTraceCurrent.push_back(ps);
 2241      }
 2242      
 2243      //fix canvas whitespace
 2244      if (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR){
 2245         *out<< maskWsBegin;
 2246      }
 2247     
 2248     *out<<wsBuffer;
 2249     wsBuffer.clear();
 2250 }
 2251 
 2252 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
 2253     switch (s) {
 2254         
 2255         case STANDARD:
 2256             return STY_NAME_STD;
 2257         case STRING:
 2258             return STY_NAME_STR;
 2259         case NUMBER:
 2260             return STY_NAME_NUM;
 2261         case SL_COMMENT:
 2262             return STY_NAME_SLC;
 2263         case ML_COMMENT:
 2264             return STY_NAME_COM;
 2265         case ESC_CHAR:
 2266             return STY_NAME_ESC;
 2267         case DIRECTIVE:
 2268             return STY_NAME_DIR;
 2269         case DIRECTIVE_STRING:
 2270             return STY_NAME_DST;
 2271         case SYMBOL:
 2272             return STY_NAME_SYM;
 2273         case STRING_INTERPOLATION:
 2274             return STY_NAME_IPL;
 2275         case _WS:
 2276             return "ws";
 2277         case KEYWORD: {
 2278             
 2279             if (!kwClass)
 2280                 return "ws";
 2281             
 2282             char kwName[5] = {0};
 2283             snprintf(kwName, sizeof(kwName), "kw%c", ('a'+kwClass-1));
 2284             return string(kwName);
 2285         }
 2286         default:
 2287             return "unknown_test";
 2288     }
 2289 }
 2290 
 2291 void CodeGenerator::runSyntaxTestcases(unsigned int column){
 2292     
 2293     
 2294     if (!stateTraceCurrent.size() /*|| lineIndex>stateTraceCurrent.size()*/)
 2295         return;
 2296     
 2297     unsigned int assertGroup=0;
 2298     size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
 2299     State assertState=_UNKNOWN;
 2300     
 2301     if (!lineContainedTestCase){
 2302         stateTraceCurrent=stateTraceTest;
 2303     } 
 2304     
 2305     if (typeDescPos!=string::npos) {
 2306     
 2307         if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
 2308             assertState=NUMBER;
 2309         else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos)
 2310             assertState=STRING;
 2311         else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
 2312             assertState=ESC_CHAR;
 2313         else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
 2314             assertState=STRING_INTERPOLATION;
 2315         else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
 2316             assertState=SYMBOL;
 2317         else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
 2318             assertState=DIRECTIVE;
 2319         else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
 2320             assertState=SL_COMMENT;
 2321         else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
 2322             assertState=ML_COMMENT;
 2323         else if (line.find("ws", typeDescPos)==typeDescPos)
 2324             assertState=_WS;
 2325         else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos)
 2326             assertState=STANDARD;
 2327         else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
 2328             assertState=DIRECTIVE_STRING;
 2329         
 2330         else if (line.find("kw", typeDescPos)==typeDescPos) {
 2331             assertState=KEYWORD;
 2332             if (isalpha(line[typeDescPos+2]))
 2333                 assertGroup=line[typeDescPos+2] - 'a' +1;
 2334         }
 2335     
 2336         if (   (assertState!=_WS && stateTraceCurrent[column].state != assertState )
 2337             || (assertState==_WS && !stateTraceCurrent[column].isWhiteSpace)
 2338             || assertGroup != stateTraceCurrent[column].kwClass) {
 2339             ostringstream err;
 2340             err << inFile << " line " << lineNumber << ", column "<< column << ": got " << getTestcaseName(stateTraceCurrent[column].state, stateTraceCurrent[column].kwClass)  
 2341                 << " instead of " << getTestcaseName(assertState, assertGroup) ;
 2342             failedPosTests.push_back(err.str());
 2343         }
 2344         
 2345     }
 2346     
 2347     lineContainedTestCase=true; 
 2348 }
 2349 
 2350 
 2351 string CodeGenerator::getNewLine()
 2352 {
 2353     return (printNewLines) ? newLineTag : "";
 2354 }
 2355 
 2356 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
 2357 {
 2358 
 2359     Diluculum::LuaValueList params;
 2360     params.push_back(Diluculum::LuaValue(lineNumber));
 2361 
 2362     return currentSyntax->getLuaState()->call ( isLineStart ?
 2363             *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
 2364             params,"getDecorateLineFct call")  ;
 2365 
 2366 }
 2367 
 2368 void CodeGenerator::insertLineNumber ( bool insertNewLine )
 2369 {
 2370     if ( insertNewLine ) {
 2371         if (currentSyntax->getDecorateLineEndFct()) {
 2372             Diluculum::LuaValueList res=callDecorateLineFct(false);
 2373             if (res.size()==1) {
 2374                 wsBuffer +=res[0].asString();
 2375             }
 2376         }
 2377 
 2378         wsBuffer += getNewLine();
 2379     }
 2380 
 2381 
 2382     if (currentSyntax->getDecorateLineBeginFct()) {
 2383         Diluculum::LuaValueList res=callDecorateLineFct(true);
 2384         if (res.size()==1) {
 2385             wsBuffer +=res[0].asString();
 2386         }
 2387     }
 2388 
 2389     if ( showLineNumbers ) {
 2390         ostringstream os;
 2391         ostringstream numberPrefix;
 2392 
 2393         os << setw ( getLineNumberWidth() ) << right;
 2394         if( numberCurrentLine ) {
 2395             if ( lineNumberFillZeroes ) {
 2396                 os.fill ( '0' );
 2397             }
 2398             os << lineNumber+lineNumberOffset;
 2399         } else {
 2400             os << "";
 2401         }
 2402 
 2403         numberPrefix << openTags[LINENUMBER];
 2404         maskString ( numberPrefix, os.str() );
 2405         numberPrefix << spacer << closeTags[LINENUMBER];
 2406 
 2407         wsBuffer += numberPrefix.str();
 2408     }
 2409 }
 2410 
 2411 unsigned int CodeGenerator::getLineIndex()
 2412 {
 2413     return lineIndex;
 2414 }
 2415 unsigned int CodeGenerator::getLastLineLength()
 2416 {
 2417     return lastLineLength;
 2418 }
 2419 
 2420 bool CodeGenerator::printExternalStyle ( const string &outFile )
 2421 {
 2422     if ( !includeStyleDef ) {
 2423         ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
 2424         if ( !cssOutFile->fail() ) {
 2425             if (!omitVersionComment) {
 2426                 *cssOutFile << styleCommentOpen
 2427                             <<" Style definition file generated by highlight "
 2428                             << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
 2429                             << " " << styleCommentClose << "\n";
 2430             }
 2431             *cssOutFile << getStyleDefinition()
 2432                         << "\n";
 2433             *cssOutFile << readUserStyleDef();
 2434             if ( !outFile.empty() ) delete cssOutFile;
 2435         } else {
 2436             return false;
 2437         }
 2438     }
 2439     return true;
 2440 }
 2441 
 2442 string CodeGenerator::readUserStyleDef()
 2443 {
 2444     ostringstream ostr;
 2445     if ( !styleInputPath.empty() ) {
 2446         ifstream userStyleDef ( styleInputPath.c_str() );
 2447         if ( userStyleDef ) {
 2448             ostr    << "\n" << styleCommentOpen
 2449                     << " Content of " << styleInputPath
 2450                     << ": " <<styleCommentClose << "\n";
 2451             string line;
 2452             while ( getline ( userStyleDef, line ) ) {
 2453                 ostr << line << "\n";
 2454             }
 2455             userStyleDef.close();
 2456         } else {
 2457             ostr    << styleCommentOpen
 2458                     << " ERROR: Could not include " << styleInputPath
 2459                     << "." << styleCommentClose << "\n";
 2460         }
 2461     }
 2462 
 2463     string injections=docStyle.getInjections();
 2464     if (!injections.empty()) {
 2465         ostr    << "\n" << styleCommentOpen
 2466                 << " Plug-in theme injections: " <<styleCommentClose << "\n";
 2467         ostr << injections<<"\n";
 2468     }
 2469     return ostr.str();
 2470 }
 2471 
 2472 bool CodeGenerator::initPluginScript(const string& script)
 2473 {
 2474 
 2475     if (script.empty()) return true;
 2476 
 2477     try {
 2478 
 2479         userScriptError="";
 2480         Diluculum::LuaState ls;
 2481         
 2482         ls.doFile (script);
 2483         int listIdx=1;
 2484 
 2485         while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
 2486 
 2487             // Theme plugins
 2488             if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
 2489                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2490                     docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2491                 }
 2492             }
 2493             // Syntax plugins
 2494             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
 2495                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2496                     currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2497                 }
 2498             }
 2499             // Format plugins
 2500             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
 2501                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2502                     addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2503                 }
 2504             }
 2505             
 2506             listIdx++;
 2507         }
 2508     }  catch (Diluculum::LuaError &err) {
 2509         userScriptError=err.what();
 2510         return false;
 2511     }
 2512     return true;
 2513 }
 2514 
 2515 bool CodeGenerator::checkSpecialCmd()
 2516 {
 2517     string noParseCmd="@highlight";
 2518     size_t cmdPos = line.find ( noParseCmd );
 2519 
 2520     if ( cmdPos!=string::npos ) {
 2521         *out<<line.substr ( noParseCmd.size() +cmdPos + 1 );
 2522 
 2523         // hide comment line from output
 2524         token.clear();
 2525         lineIndex=line.length();
 2526         getInputChar();
 2527         lineNumber--;
 2528         // end hide
 2529 
 2530         return true; // do not parse line as comment
 2531     }
 2532     return false; //parse comment as usual
 2533 }
 2534 
 2535 }