"Fossies" - the Fresh Open Source Software Archive

Member "highlight-3.49/src/core/codegenerator.cpp" (6 Feb 2019, 78504 Bytes) of package /linux/www/highlight-3.49.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "codegenerator.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.48_vs_3.49.

    1 /***************************************************************************
    2                           codegenerator.cpp  -  description
    3                              -------------------
    4     begin                : Die Jul 9 2002
    5     copyright            : (C) 2002-2019 by Andre Simon
    6     email                : a.simon@mailbox.org
    7  ***************************************************************************/
    8 
    9 
   10 /*
   11 This file is part of Highlight.
   12 
   13 Highlight is free software: you can redistribute it and/or modify
   14 it under the terms of the GNU General Public License as published by
   15 the Free Software Foundation, either version 3 of the License, or
   16 (at your option) any later version.
   17 
   18 Highlight is distributed in the hope that it will be useful,
   19 but WITHOUT ANY WARRANTY; without even the implied warranty of
   20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   21 GNU General Public License for more details.
   22 
   23 You should have received a copy of the GNU General Public License
   24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
   25 */
   26 
   27 
   28 #include <climits>
   29 #include <memory>
   30 #include <boost/xpressive/xpressive_dynamic.hpp>
   31 
   32 #include "codegenerator.h"
   33 
   34 #include "htmlgenerator.h"
   35 #include "xhtmlgenerator.h"
   36 #include "rtfgenerator.h"
   37 #include "latexgenerator.h"
   38 #include "texgenerator.h"
   39 #include "svggenerator.h"
   40 #include "bbcodegenerator.h"
   41 #include "pangogenerator.h"
   42 #include "odtgenerator.h"
   43 #include "astyle/astyle.h"
   44 #include "astyle/ASStreamIterator.h"
   45 
   46 #if !defined (QT)
   47 #include "ansigenerator.h"
   48 #include "xterm256generator.h"
   49 #endif
   50 
   51 namespace highlight
   52 {
   53 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
   54 
   55 const string CodeGenerator::STY_NAME_STD="std";
   56 const string CodeGenerator::STY_NAME_STR="str";
   57 const string CodeGenerator::STY_NAME_NUM="num";
   58 const string CodeGenerator::STY_NAME_SLC="slc";
   59 const string CodeGenerator::STY_NAME_COM="com";
   60 const string CodeGenerator::STY_NAME_ESC="esc";
   61 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
   62 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
   63 const string CodeGenerator::STY_NAME_LIN="lin";
   64 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
   65 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
   66 
   67 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
   68 
   69 
   70 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
   71 {
   72     CodeGenerator* generator=NULL;
   73     switch ( type ) {
   74     case HTML:
   75         generator = new HtmlGenerator();
   76         break;
   77     case XHTML:
   78         generator = new XHtmlGenerator();
   79         break;
   80     case TEX:
   81         generator = new TexGenerator ();
   82         break;
   83     case LATEX:
   84         generator = new LatexGenerator();
   85         break;
   86     case RTF:
   87         generator = new RtfGenerator ();
   88         break;
   89     case SVG:
   90         generator = new SVGGenerator();
   91         break;
   92     case BBCODE:
   93         generator = new BBCodeGenerator();
   94         break;
   95     case PANGO:
   96         generator = new PangoGenerator();
   97         break;
   98     case ODTFLAT:
   99         generator = new ODTGenerator();
  100         break;
  101 #if !defined (QT)
  102     case ESC_ANSI:
  103         generator = new AnsiGenerator();
  104         break;
  105     case ESC_XTERM256:
  106     case ESC_TRUECOLOR:
  107         generator = new Xterm256Generator();
  108         generator->setESCTrueColor(type==ESC_TRUECOLOR);
  109         break;
  110 #endif
  111     default:
  112         break;
  113     }
  114     return generator;
  115 }
  116 
  117 
  118 CodeGenerator::CodeGenerator ( highlight::OutputType type )
  119     :currentSyntax(NULL),
  120      in ( NULL ),
  121      out ( NULL ),
  122      encoding ( "none" ),
  123      docTitle ( "Source file" ),
  124      maskWs ( false ),
  125      excludeWs ( false ),
  126      fragmentOutput ( false ),
  127      keepInjections( false ),
  128      showLineNumbers ( false ),
  129      lineNumberFillZeroes ( false ),
  130      printNewLines(true),
  131      omitVersionComment(false),
  132      baseFontSize("10"),
  133      lineNumber ( 0 ),
  134      lineNumberOffset ( 0 ),
  135      currentState ( _UNKNOWN ),
  136      currentKeywordClass ( 0 ),
  137      includeStyleDef ( false ),
  138      numberCurrentLine ( false ),
  139      lineIndex ( 0 ),
  140      lastLineLength( 0 ),
  141      syntaxChangeIndex(UINT_MAX),
  142      syntaxChangeLineNo(UINT_MAX),
  143      lineNumberWidth ( 5 ),
  144      startLineCnt( 1 ),
  145      startLineCntCurFile( 1 ),
  146      maxLineCnt ( UINT_MAX ),
  147      inputFilesCnt (0),
  148      processedFilesCnt (0),
  149      terminatingChar ( '\0' ),
  150      formatter ( NULL ),
  151      formattingEnabled ( false ),
  152      formattingPossible ( false ),
  153      validateInput ( false ),
  154      numberWrappedLines ( true ),
  155      noTrailingNewLine(false),
  156      resultOfHook(false),
  157      lineContainedTestCase(false),
  158      applySyntaxTestCase(false),
  159      keywordCase ( StringTools::CASE_UNCHANGED ),
  160      eolDelimiter ('\n'),
  161      outputType ( type )
  162 {
  163 }
  164 
  165 
  166 CodeGenerator::~CodeGenerator()
  167 {
  168     delete formatter;
  169 
  170     for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
  171         delete it->second;
  172     }
  173     
  174     for (unsigned int i=0; i<pluginChunks.size(); i++) {
  175         delete pluginChunks[i];
  176     }
  177     pluginChunks.clear();
  178 }
  179 
  180 
  181 bool CodeGenerator::initTheme ( const string& themePath )
  182 {
  183     this->themePath=themePath;
  184     bool loadOK = docStyle.load ( themePath, outputType );
  185     initOutputTags();
  186     return loadOK;
  187 }
  188 
  189 const string& CodeGenerator::getStyleName()
  190 {
  191     return themePath;
  192 }
  193 
  194 void CodeGenerator::setLineNumberWidth ( int w )
  195 {
  196     lineNumberWidth=w;
  197 }
  198 
  199 int CodeGenerator::getLineNumberWidth()
  200 {
  201     return lineNumberWidth;
  202 }
  203 
  204 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
  205 {
  206     showLineNumbers=flag;
  207     lineNumberOffset = startCnt-1;
  208 }
  209 
  210 bool CodeGenerator::getPrintLineNumbers()
  211 {
  212     return showLineNumbers;
  213 }
  214 
  215 void CodeGenerator::setPrintZeroes ( bool flag )
  216 {
  217     lineNumberFillZeroes=flag;
  218 }
  219 
  220 bool CodeGenerator::getPrintZeroes()
  221 {
  222     return lineNumberFillZeroes;
  223 }
  224 
  225 void CodeGenerator::setIncludeStyle ( bool flag )
  226 {
  227     includeStyleDef = flag;
  228 }
  229 
  230 void CodeGenerator::disableTrailingNL ( bool flag )
  231 {
  232     noTrailingNewLine = flag;
  233 }
  234 
  235 void CodeGenerator::setStyleInputPath ( const string& path )
  236 {
  237     styleInputPath = path;
  238 }
  239 
  240 void CodeGenerator::setStyleOutputPath ( const string& path )
  241 {
  242     styleOutputPath = path;
  243 }
  244 
  245 void CodeGenerator::setPluginParameter ( const string& param )
  246 {
  247     pluginParameter = param;
  248 }
  249 
  250 const string&  CodeGenerator::getStyleInputPath()
  251 {
  252     return styleInputPath;
  253 }
  254 
  255 const string&  CodeGenerator::getStyleOutputPath()
  256 {
  257     return styleOutputPath;
  258 }
  259 
  260 void CodeGenerator::setFragmentCode ( bool flag )
  261 {
  262     fragmentOutput=flag;
  263 }
  264 
  265 bool CodeGenerator::getFragmentCode()
  266 {
  267     return fragmentOutput;
  268 }
  269 void CodeGenerator::setKeepInjections ( bool flag )
  270 {
  271     keepInjections=flag;
  272 }
  273 
  274 bool CodeGenerator::getKeepInjections()
  275 {
  276     return keepInjections;
  277 }
  278 void CodeGenerator::setValidateInput ( bool flag )
  279 {
  280     validateInput=flag;
  281 }
  282 
  283 bool CodeGenerator::getValidateInput()
  284 {
  285     return validateInput;
  286 }
  287 
  288 
  289 void CodeGenerator::setNumberWrappedLines ( bool flag )
  290 {
  291     numberWrappedLines=flag;
  292 }
  293 
  294 bool CodeGenerator::getNumberWrappedLines()
  295 {
  296     return numberWrappedLines;
  297 }
  298 
  299 void CodeGenerator::setOmitVersionComment ( bool flag )
  300 {
  301     omitVersionComment=flag;
  302 }
  303 
  304 bool CodeGenerator::getOmitVersionComment ()
  305 {
  306     return omitVersionComment;
  307 }
  308 
  309 void CodeGenerator::setBaseFont ( const string& fontName )
  310 {
  311     baseFont = fontName;
  312 }
  313 
  314 void CodeGenerator::setBaseFontSize ( const string& fontSize)
  315 {
  316     baseFontSize = fontSize;
  317 }
  318 
  319 void CodeGenerator::setStartingNestedLang(const string &langName)
  320 {
  321     embedLangStart = langName;
  322 }
  323 
  324 const string CodeGenerator::getBaseFont() const
  325 {
  326     if ( !baseFont.empty() ) return baseFont;
  327     switch ( outputType ) {
  328     case HTML:
  329     case XHTML:
  330     case SVG:
  331         return "'Courier New',monospace";
  332         break;
  333     case LATEX:
  334         return "ttfamily";
  335         break;
  336     case TEX:
  337         return "tt";
  338         break;
  339     default:
  340         return "Courier New";
  341     }
  342 }
  343 
  344 const string CodeGenerator::getBaseFontSize()
  345 {
  346     return baseFontSize;
  347 }
  348 
  349 void CodeGenerator::setTitle ( const string & title )
  350 {
  351     if ( !title.empty() ) docTitle= title;
  352 }
  353 
  354 string CodeGenerator::getTitle()
  355 {
  356     return docTitle;
  357 }
  358 
  359 void CodeGenerator::setEncoding ( const string& encodingName )
  360 {
  361     encoding = encodingName;
  362 }
  363 
  364 bool CodeGenerator::formattingDisabled()
  365 {
  366     return !formattingEnabled;
  367 }
  368 
  369 void CodeGenerator::setStartingInputLine ( unsigned int begin )
  370 {
  371     startLineCnt = startLineCntCurFile = begin;
  372 }
  373 
  374 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
  375 {
  376     maxLineCnt = cnt;
  377 }
  378 
  379 void CodeGenerator::setFilesCnt ( unsigned int cnt )
  380 {
  381     inputFilesCnt = cnt;
  382 }
  383 
  384 bool CodeGenerator::formattingIsPossible()
  385 {
  386     return formattingPossible;
  387 }
  388 
  389 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
  390                                        unsigned int lineLength,
  391                                        int numberSpaces )
  392 {
  393     bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
  394     bool replaceTabs = numberSpaces > 0;
  395 
  396     if ( enableWrap || replaceTabs ) {
  397         preFormatter.setWrap ( enableWrap );
  398         preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
  399         preFormatter.setWrapLineLength ( lineLength );
  400         preFormatter.setReplaceTabs ( replaceTabs );
  401         preFormatter.setNumberSpaces ( numberSpaces );
  402     }
  403 }
  404 
  405 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
  406 {
  407     keywordCase = keyCase;
  408 }
  409 
  410 void CodeGenerator::setEOLDelimiter(char delim)
  411 {
  412     eolDelimiter = delim;
  413 }
  414 
  415 void CodeGenerator::reset()
  416 {
  417     lineIndex = 0;
  418     lineNumber = 0;
  419     line.clear();
  420     preFormatter.reset();
  421     inFile.clear();
  422     outFile.clear();
  423     embedLangDefPath.clear();
  424     printNewLines=true;
  425     syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  426     startLineCntCurFile = startLineCnt;
  427     applySyntaxTestCase=lineContainedTestCase=false;
  428 }
  429 
  430 string CodeGenerator::getThemeInitError()
  431 {
  432     return  docStyle.getErrorMessage();
  433 }
  434 
  435 string CodeGenerator::getPluginScriptError()
  436 {
  437     return userScriptError;
  438 }
  439 
  440 string CodeGenerator::getSyntaxRegexError()
  441 {
  442     return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
  443 }
  444 string CodeGenerator::getSyntaxLuaError()
  445 {
  446     return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
  447 
  448 }
  449 string CodeGenerator::getSyntaxDescription()
  450 {
  451     return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
  452 
  453 }
  454 string CodeGenerator::getThemeDescription()
  455 {
  456     return docStyle.getDescription();
  457 }
  458 
  459 string CodeGenerator::getSyntaxCatDescription(){
  460     return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
  461 }
  462 
  463 string CodeGenerator::getThemeCatDescription(){
  464     return docStyle.getCategoryDescription();
  465 }
  466 
  467 unsigned int CodeGenerator::getLineNumber()
  468 {
  469     return lineNumber;
  470 }
  471 
  472 bool CodeGenerator::readNewLine ( string &newLine )
  473 {
  474     bool eof=false;
  475     
  476     if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
  477     
  478     while (!eof && startLineCntCurFile>0) {
  479         if ( formattingPossible && formattingEnabled ) {
  480             eof=!formatter->hasMoreLines();
  481             if ( !eof ) {
  482                 newLine = formatter->nextLine();
  483             }
  484         } else {
  485             eof = ! getline ( *in, newLine, eolDelimiter );
  486         }
  487         --startLineCntCurFile;
  488     }
  489     startLineCntCurFile=1;
  490 #ifndef _WIN32
  491     // drop CR of CRLF files
  492     if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
  493         newLine.erase(newLine.size() - 1);
  494 #endif
  495 
  496     return eof || ( lineNumber == maxLineCnt );
  497 }
  498 
  499 void CodeGenerator::matchRegex ( const string &line, State skipState)
  500 {
  501     regexGroups.clear();
  502     int matchBegin=0;
  503     int groupID=0;
  504 
  505     // cycle through all regex, save the start and ending indices of matches to report them later
  506     for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
  507         RegexElement *regexElem = currentSyntax->getRegexElements() [i];
  508 
  509         if (regexElem->open == skipState) continue;
  510         
  511         boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
  512         boost::xpressive::sregex_iterator end;
  513 
  514         for( ; cur != end; ++cur )  {
  515             groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
  516             matchBegin =  cur->position(groupID);
  517             regexGroups.insert (
  518                 make_pair ( matchBegin + 1, ReGroup ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
  519         }
  520     }
  521 }
  522 
  523 unsigned char CodeGenerator::getInputChar()
  524 {
  525     // end of line?
  526     if ( lineIndex == line.length() ) {
  527         bool eof=false;
  528         if ( preFormatter.isEnabled() ) {
  529             if ( !preFormatter.hasMoreLines() ) {
  530                 eof=readNewLine ( line );
  531                 preFormatter.setLine ( line );
  532                 ++lineNumber;
  533                 numberCurrentLine = true;
  534             } else {
  535                 if(numberWrappedLines)
  536                     ++lineNumber;
  537                 numberCurrentLine = numberWrappedLines;
  538             }
  539 
  540             line = preFormatter.getNextLine();
  541         } else {
  542             eof=readNewLine ( line );
  543             ++lineNumber;
  544 
  545             numberCurrentLine = true;
  546         }
  547         lastLineLength=lineIndex;
  548         lineIndex=0;
  549         
  550         if (!lineContainedTestCase && applySyntaxTestCase){
  551             stateTraceTest = stateTraceCurrent;
  552             stateTraceCurrent.clear();
  553         } 
  554         
  555         lineContainedTestCase=false;
  556             
  557         matchRegex ( line );
  558         stateTrace.clear();
  559         return ( eof ) ?'\0':'\n';
  560     }
  561 
  562     return line[lineIndex++];
  563 }
  564 
  565 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS, Coffeescript with block regex, Pas + ASM) 
  566     especially nested syntax in one line
  567  */
  568 State CodeGenerator::getCurrentState (State oldState)
  569 {
  570     unsigned char c='\0';
  571 
  572     if ( token.length() ==0 ) {
  573         c=getInputChar();
  574     } else {
  575         lineIndex-= ( token.length()-1 );
  576         c=token[0];
  577     }
  578     if ( c=='\n' ) {
  579         return _EOL;   // End of line
  580     }
  581 
  582     if ( c=='\0' ) {
  583         return _EOF;   // End of file
  584     }
  585 
  586     if ( c==' ' || c=='\t' ) {
  587         token= c;
  588         return _WS;
  589     }
  590     
  591     //TODO add control flag
  592     if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT)  ) {
  593         token= c;
  594         return _TESTPOS;
  595     }
  596         
  597     // at this position the syntax change takes place
  598     if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
  599         loadEmbeddedLang(embedLangDefPath);  // load new syntax                     
  600         matchRegex(line);                    // recognize new patterns in the (remaining) line
  601         syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  602     }
  603 
  604 SKIP_EMBEDDED:
  605     
  606     // Test if a regular expression was found at the current position
  607     if ( !regexGroups.empty() ) {
  608         if ( regexGroups.count ( lineIndex ) ) {
  609             token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
  610 
  611             unsigned int oldIndex= lineIndex;
  612             if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
  613 
  614             if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
  615                 //do not handle a nested section if the syntax is marked as "sealed" 
  616                 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
  617                     embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
  618                     //remember position 
  619                     syntaxChangeIndex = lineIndex+2;
  620                     syntaxChangeLineNo = lineNumber;
  621                 }
  622                 
  623                 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
  624                 matchRegex(line, EMBEDDED_CODE_BEGIN);
  625                 lineIndex = oldIndex;
  626                 goto SKIP_EMBEDDED; // this is how it should be done
  627             }
  628 
  629             if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
  630                 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
  631                 currentKeywordClass=currentSyntax->isKeyword ( reservedWord ); //check in lists (no regex)
  632                 // for positional Tests; will not be used again for actual input parsing
  633                 // FIXME not needed?
  634                 //if (currentKeywordClass)
  635                 //    regexGroups[oldIndex]=ReGroup ( KEYWORD, reservedWord.size(), currentKeywordClass, "" );  
  636                 
  637                 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
  638                     currentKeywordClass = regexGroups[oldIndex].kwClass;
  639                 }
  640                 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
  641             } else {
  642                 return validateState(regexGroups[oldIndex].state, oldState);
  643             }
  644         }
  645     }
  646 
  647     // Character not referring to any state
  648     token = c;
  649     return STANDARD;
  650 }
  651 
  652 State CodeGenerator::validateState(State newState, State oldState)
  653 {
  654 
  655     if (currentSyntax->getValidateStateChangeFct()) {
  656         Diluculum::LuaValueList params;
  657         params.push_back(Diluculum::LuaValue(oldState));
  658         params.push_back(Diluculum::LuaValue(newState));
  659         params.push_back(Diluculum::LuaValue(token));
  660         params.push_back(Diluculum::LuaValue(getCurrentKeywordClassId()) );
  661 
  662         Diluculum::LuaValueList res=
  663             currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
  664                     params,"getValidateStateChangeFct call")  ;
  665 
  666         resultOfHook = res.size()>=1;
  667         if (resultOfHook) {
  668             State validatedState = (State)res[0].asInteger();
  669             if ( validatedState== _REJECT) {
  670                 // proceed using only the first character of the token
  671                 lineIndex -= (token.length() -1);
  672                 token=token.substr(0, 1);
  673                 return oldState;
  674             }
  675             stateTrace.push_back(validatedState);
  676             if (stateTrace.size()>200) stateTrace.erase(stateTrace.begin(), stateTrace.begin() + 100 );
  677             return validatedState;
  678         }
  679     }
  680     resultOfHook  = false;
  681     stateTrace.push_back(newState);
  682     if (stateTrace.size()>200) stateTrace.erase(stateTrace.begin(), stateTrace.begin() + 100 );    
  683     return newState;
  684 }
  685 
  686 
  687 unsigned int CodeGenerator::getCurrentKeywordClassId(){
  688     unsigned int kwClassId=0;
  689 
  690     // this vector contains the defined keyword classes, and currentKeywordClass is its index:
  691     vector<string> kwClasses=currentSyntax->getKeywordClasses();
  692     if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
  693         string kwClassName=kwClasses[currentKeywordClass-1];
  694         if (kwClassName.size()==3)
  695             kwClassId = kwClassName[2] - 'a' + 1;
  696     }
  697     return kwClassId;
  698 }
  699 
  700 //it is faster to pass ostream reference
  701 void CodeGenerator::maskString ( ostream& ss, const string & s )
  702 {
  703     for ( unsigned int i=0; i< s.length(); i++ ) {
  704         ss << maskCharacter ( s[i] );
  705 
  706         if (applySyntaxTestCase) {
  707             PositionState ps(currentState, getCurrentKeywordClassId(), false);
  708             stateTraceCurrent.push_back(ps);
  709             
  710             if (stateTraceCurrent.size()>200) 
  711                 stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 ); 
  712         }
  713     }
  714 }
  715 
  716 
  717 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string&token)
  718 {
  719     Diluculum::LuaValueList params;
  720     params.push_back(Diluculum::LuaValue(token));
  721     params.push_back(Diluculum::LuaValue(currentState));
  722     params.push_back(Diluculum::LuaValue(currentKeywordClass));
  723     string trace(";");
  724     if (stateTrace.size()>1){
  725         for (size_t i=0; i<stateTrace.size()-1;i++){
  726             trace += std::to_string (stateTrace[i]);
  727             trace += ";";
  728         }
  729     }
  730     
  731     //std::cerr <<"TRC1: "<<trace<<"\n";
  732     
  733     params.push_back(Diluculum::LuaValue(trace));
  734 
  735     return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
  736             params,"getDecorateFct call")  ;
  737 }
  738 
  739 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
  740 {
  741     if ( flushWhiteSpace )
  742         flushWs(1);
  743     string caseToken = StringTools::change_case ( token, tcase );
  744     if (currentSyntax->getDecorateFct()) {
  745 
  746         Diluculum::LuaValueList res=callDecorateFct(caseToken);
  747         if (res.size()==1) {
  748             *out<<res[0].asString();
  749         } else {
  750             maskString ( *out, caseToken );
  751         }
  752     } else {
  753         maskString ( *out, caseToken );
  754     }
  755 
  756     token.clear();
  757 }
  758 
  759 bool CodeGenerator::styleFound()
  760 {
  761     return docStyle.found();
  762 }
  763 
  764 bool CodeGenerator::printIndexFile ( const vector<string> &fileList,
  765                                      const string &outPath )
  766 {
  767     return true;
  768 }
  769 
  770 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
  771 {
  772 
  773     if ( formatter!=NULL ) {
  774         return true;
  775     }
  776 
  777     if ( !indentScheme.size() ) return false;
  778 
  779     formatter=new astyle::ASFormatter();
  780 
  781     if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
  782         formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
  783     } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
  784         formatter->setFormattingStyle ( astyle::STYLE_KR );
  785     } else if ( indentScheme=="java" ) {
  786         formatter->setFormattingStyle ( astyle::STYLE_JAVA );
  787     } else if ( indentScheme=="stroustrup" ) {
  788         formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
  789     } else if ( indentScheme=="whitesmith" ) {
  790         formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
  791     } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
  792         formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
  793     } else if ( indentScheme=="gnu" ) {
  794         formatter->setFormattingStyle ( astyle::STYLE_GNU );
  795     } else if ( indentScheme=="linux" ) {
  796         formatter->setFormattingStyle ( astyle::STYLE_LINUX );
  797     } else if ( indentScheme=="horstmann" ) {
  798         formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
  799     } else if ( indentScheme=="otbs" ||  indentScheme=="1tbs") {
  800         formatter->setFormattingStyle ( astyle::STYLE_1TBS );
  801     } else if ( indentScheme=="google") {
  802         formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
  803     } else if ( indentScheme=="pico" ||  indentScheme=="a11") {
  804         formatter->setFormattingStyle ( astyle::STYLE_PICO );
  805     } else if ( indentScheme=="lisp" ||  indentScheme=="python"||  indentScheme=="a12") {
  806         formatter->setFormattingStyle ( astyle::STYLE_LISP );
  807     } else if ( indentScheme=="vtk") {
  808         formatter->setFormattingStyle ( astyle::STYLE_VTK );
  809     } else if ( indentScheme=="mozilla") {
  810         formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
  811     } else if ( indentScheme=="webkit") {
  812         formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
  813     } else if ( indentScheme!="user" ){
  814         return false;
  815     }
  816     return formattingEnabled=true;
  817 }
  818 
  819 
  820 /*Helper functions for astyle option parsing*/
  821 string CodeGenerator::getParam(const string& arg, const char* op)
  822 {
  823     return arg.substr(strlen(op));
  824 }
  825 
  826 string CodeGenerator::getParam(const string& arg, const char* op1, const char* op2)
  827 {
  828     return isParamOption(arg, op1) ? getParam(arg, op1) : getParam(arg, op2);
  829 }
  830 
  831 bool CodeGenerator::isOption(const string& arg, const char* op)
  832 {
  833     return arg.compare(op) == 0;
  834 }
  835 
  836 bool CodeGenerator::isOption(const string& arg, const char* op1, const char* op2)
  837 {
  838     return (isOption(arg, op1) || isOption(arg, op2));
  839 }
  840 
  841 bool CodeGenerator::isParamOption(const string& arg, const char* option)
  842 {
  843     bool retVal = arg.compare(0, strlen(option), option) == 0;
  844     // if comparing for short option, 2nd char of arg must be numeric
  845     if (retVal && strlen(option) == 1 && arg.length() > 1)
  846         if (!isdigit((unsigned char) arg[1]))
  847             retVal = false;
  848     return retVal;
  849 }
  850 
  851 bool CodeGenerator::isParamOption(const string& arg, const char* option1, const char* option2)
  852 {
  853     return isParamOption(arg, option1) || isParamOption(arg, option2);
  854 }
  855 
  856 //apply the same options as astyle
  857 void CodeGenerator::setIndentationOptions (const vector<string>& options){
  858     if (formatter) {
  859         string arg;
  860         for (unsigned int i=0; i<options.size(); i++) {
  861             arg=options[i];
  862             
  863             if (isOption(arg, "mode=cs"))
  864             {
  865                 formatter->setSharpStyle();
  866                 formatter->setModeManuallySet(true);
  867             }
  868             else if (isOption(arg, "mode=c"))
  869             {
  870                 formatter->setCStyle();
  871                 formatter->setModeManuallySet(true);
  872             }
  873             else if (isOption(arg, "mode=java"))
  874             {
  875                 formatter->setJavaStyle();
  876                 formatter->setModeManuallySet(true);
  877             }
  878             else if (isParamOption(arg, "t", "indent=tab="))
  879             {
  880                 int spaceNum = 4;
  881                 string spaceNumParam = getParam(arg, "t", "indent=tab=");
  882                 if (spaceNumParam.length() > 0)
  883                     spaceNum = atoi(spaceNumParam.c_str());
  884                 if (spaceNum >= 2 && spaceNum <= 20)
  885                     formatter->setTabIndentation(spaceNum, false);
  886             }
  887             else if (isOption(arg, "indent=tab"))
  888             {
  889                 formatter->setTabIndentation(4);
  890             }
  891             else if (isParamOption(arg, "T", "indent=force-tab="))
  892             {
  893                 int spaceNum = 4;
  894                 string spaceNumParam = getParam(arg, "T", "indent=force-tab=");
  895                 if (spaceNumParam.length() > 0)
  896                     spaceNum = atoi(spaceNumParam.c_str());
  897                 if (spaceNum >= 2 && spaceNum <= 20)
  898                     formatter->setTabIndentation(spaceNum, true);
  899             }
  900             else if (isOption(arg, "indent=force-tab"))
  901             {
  902                 formatter->setTabIndentation(4, true);
  903             }
  904             else if (isParamOption(arg, "xT", "indent=force-tab-x="))
  905             {
  906                 int tabNum = 8;
  907                 string tabNumParam = getParam(arg, "xT", "indent=force-tab-x=");
  908                 if (tabNumParam.length() > 0)
  909                     tabNum = atoi(tabNumParam.c_str());
  910                 if (tabNum >= 2 && tabNum <= 20)
  911                     formatter->setForceTabXIndentation(tabNum);
  912                 
  913             }
  914             else if (isOption(arg, "indent=force-tab-x"))
  915             {
  916                 formatter->setForceTabXIndentation(8);
  917             }
  918             else if (isParamOption(arg, "s", "indent=spaces="))
  919             {
  920                 int spaceNum = 4;
  921                 string spaceNumParam = getParam(arg, "s", "indent=spaces=");
  922                 if (spaceNumParam.length() > 0)
  923                     spaceNum = atoi(spaceNumParam.c_str());
  924                 if (spaceNum >= 2 && spaceNum <= 20)
  925                     formatter->setSpaceIndentation(spaceNum);
  926             }
  927             else if (isOption(arg, "indent=spaces"))
  928             {
  929                 formatter->setSpaceIndentation(4);
  930             }
  931             else if (isParamOption(arg, "xt", "indent-continuation="))
  932             {
  933                 int contIndent = 1;
  934                 string contIndentParam = getParam(arg, "xt", "indent-continuation=");
  935                 if (contIndentParam.length() > 0)
  936                     contIndent = atoi(contIndentParam.c_str());
  937                 if (contIndent > 0 && contIndent < 5)
  938                     formatter->setContinuationIndentation(contIndent);
  939             }
  940             else if (isParamOption(arg, "m", "min-conditional-indent="))
  941             {
  942                 int minIndent = astyle::MINCOND_TWO;
  943                 string minIndentParam = getParam(arg, "m", "min-conditional-indent=");
  944                 if (minIndentParam.length() > 0)
  945                     minIndent = atoi(minIndentParam.c_str());
  946                 if (minIndent < astyle::MINCOND_END)
  947                     formatter->setMinConditionalIndentOption(minIndent);
  948             }
  949             else if (isParamOption(arg, "M", "max-continuation-indent="))
  950             {
  951                 int maxIndent = 40;
  952                 string maxIndentParam = getParam(arg, "M", "max-continuation-indent=");
  953                 if (maxIndentParam.length() > 0)
  954                     maxIndent = atoi(maxIndentParam.c_str());
  955                 if (maxIndent >= 40 && maxIndent <= 120)
  956                     formatter->setMaxContinuationIndentLength(maxIndent);
  957             }
  958             else if (isOption(arg, "N", "indent-namespaces"))
  959             {
  960                 formatter->setNamespaceIndent(true);
  961             }
  962             else if (isOption(arg, "C", "indent-classes"))
  963             {
  964                 formatter->setClassIndent(true);
  965             }
  966             else if (isOption(arg, "xG", "indent-modifiers"))
  967             {
  968                 formatter->setModifierIndent(true);
  969             }
  970             else if (isOption(arg, "S", "indent-switches"))
  971             {
  972                 formatter->setSwitchIndent(true);
  973             }
  974             else if (isOption(arg, "K", "indent-cases"))
  975             {
  976                 formatter->setCaseIndent(true);
  977             }
  978             else if (isOption(arg, "xU", "indent-after-parens"))
  979             {
  980                 formatter->setAfterParenIndent(true);
  981             }
  982             else if (isOption(arg, "L", "indent-labels"))
  983             {
  984                 formatter->setLabelIndent(true);
  985             }
  986             else if (isOption(arg, "xW", "indent-preproc-block"))
  987             {
  988                 formatter->setPreprocBlockIndent(true);
  989             }
  990             else if (isOption(arg, "w", "indent-preproc-define"))
  991             {
  992                 formatter->setPreprocDefineIndent(true);
  993             }
  994             else if (isOption(arg, "xw", "indent-preproc-cond"))
  995             {
  996                 formatter->setPreprocConditionalIndent(true);
  997             }
  998             else if (isOption(arg, "y", "break-closing-braces"))
  999             {
 1000                 formatter->setBreakClosingHeaderBracesMode(true);
 1001             }
 1002             else if (isOption(arg, "O", "keep-one-line-blocks"))
 1003             {
 1004                 formatter->setBreakOneLineBlocksMode(false);
 1005             }
 1006             else if (isOption(arg, "o", "keep-one-line-statements"))
 1007             {
 1008                 formatter->setBreakOneLineStatementsMode(false);
 1009             }
 1010             else if (isOption(arg, "P", "pad-paren"))
 1011             {
 1012                 formatter->setParensOutsidePaddingMode(true);
 1013                 formatter->setParensInsidePaddingMode(true);
 1014             }
 1015             else if (isOption(arg, "d", "pad-paren-out"))
 1016             {
 1017                 formatter->setParensOutsidePaddingMode(true);
 1018             }
 1019             else if (isOption(arg, "xd", "pad-first-paren-out"))
 1020             {
 1021                 formatter->setParensFirstPaddingMode(true);
 1022             }
 1023             else if (isOption(arg, "D", "pad-paren-in"))
 1024             {
 1025                 formatter->setParensInsidePaddingMode(true);
 1026             }
 1027             else if (isOption(arg, "H", "pad-header"))
 1028             {
 1029                 formatter->setParensHeaderPaddingMode(true);
 1030             }
 1031             else if (isOption(arg, "U", "unpad-paren"))
 1032             {
 1033                 formatter->setParensUnPaddingMode(true);
 1034             }
 1035             else if (isOption(arg, "p", "pad-oper"))
 1036             {
 1037                 formatter->setOperatorPaddingMode(true);
 1038             }
 1039             else if (isOption(arg, "xg", "pad-comma"))
 1040             {
 1041                 formatter->setCommaPaddingMode(true);
 1042             }
 1043             else if (isOption(arg, "xe", "delete-empty-lines"))
 1044             {
 1045                 formatter->setDeleteEmptyLinesMode(true);
 1046             }
 1047             else if (isOption(arg, "E", "fill-empty-lines"))
 1048             {
 1049                 formatter->setEmptyLineFill(true);
 1050             }
 1051             else if (isOption(arg, "c", "convert-tabs"))
 1052             {
 1053                 formatter->setTabSpaceConversionMode(true);
 1054             }
 1055             else if (isOption(arg, "xy", "close-templates"))
 1056             {
 1057                 formatter->setCloseTemplatesMode(true);
 1058             }
 1059             else if (isOption(arg, "F", "break-blocks=all"))
 1060             {
 1061                 formatter->setBreakBlocksMode(true);
 1062                 formatter->setBreakClosingHeaderBlocksMode(true);
 1063             }
 1064             else if (isOption(arg, "f", "break-blocks"))
 1065             {
 1066                 formatter->setBreakBlocksMode(true);
 1067             }
 1068             else if (isOption(arg, "e", "break-elseifs"))
 1069             {
 1070                 formatter->setBreakElseIfsMode(true);
 1071             }
 1072             else if (isOption(arg, "xb", "break-one-line-headers"))
 1073             {
 1074                 formatter->setBreakOneLineHeadersMode(true);
 1075             }
 1076             else if (isOption(arg, "j", "add-braces"))
 1077             {
 1078                 formatter->setAddBracesMode(true);
 1079             }
 1080             else if (isOption(arg, "J", "add-one-line-braces"))
 1081             {
 1082                 formatter->setAddOneLineBracesMode(true);
 1083             }
 1084             else if (isOption(arg, "xj", "remove-braces"))
 1085             {
 1086                 formatter->setRemoveBracesMode(true);
 1087             }
 1088             else if (isOption(arg, "Y", "indent-col1-comments"))
 1089             {
 1090                 formatter->setIndentCol1CommentsMode(true);
 1091             }
 1092             else if (isOption(arg, "align-pointer=type"))
 1093             {
 1094                 formatter->setPointerAlignment(astyle::PTR_ALIGN_TYPE);
 1095             }
 1096             else if (isOption(arg, "align-pointer=middle"))
 1097             {
 1098                 formatter->setPointerAlignment(astyle::PTR_ALIGN_MIDDLE);
 1099             }
 1100             else if (isOption(arg, "align-pointer=name"))
 1101             {
 1102                 formatter->setPointerAlignment(astyle::PTR_ALIGN_NAME);
 1103             }
 1104             else if (isParamOption(arg, "k"))
 1105             {
 1106                 int align = 0;
 1107                 string styleParam = getParam(arg, "k");
 1108                 if (styleParam.length() > 0)
 1109                     align = atoi(styleParam.c_str());
 1110                 if (align == 1)
 1111                     formatter->setPointerAlignment(astyle::PTR_ALIGN_TYPE);
 1112                 else if (align == 2)
 1113                     formatter->setPointerAlignment(astyle::PTR_ALIGN_MIDDLE);
 1114                 else if (align == 3)
 1115                     formatter->setPointerAlignment(astyle::PTR_ALIGN_NAME);
 1116             }
 1117             else if (isOption(arg, "align-reference=none"))
 1118             {
 1119                 formatter->setReferenceAlignment(astyle::REF_ALIGN_NONE);
 1120             }
 1121             else if (isOption(arg, "align-reference=type"))
 1122             {
 1123                 formatter->setReferenceAlignment(astyle::REF_ALIGN_TYPE);
 1124             }
 1125             else if (isOption(arg, "align-reference=middle"))
 1126             {
 1127                 formatter->setReferenceAlignment(astyle::REF_ALIGN_MIDDLE);
 1128             }
 1129             else if (isOption(arg, "align-reference=name"))
 1130             {
 1131                 formatter->setReferenceAlignment(astyle::REF_ALIGN_NAME);
 1132             }
 1133             else if (isParamOption(arg, "W"))
 1134             {
 1135                 int align = 0;
 1136                 string styleParam = getParam(arg, "W");
 1137                 if (styleParam.length() > 0)
 1138                     align = atoi(styleParam.c_str());
 1139                 if (align == 0)
 1140                     formatter->setReferenceAlignment(astyle::REF_ALIGN_NONE);
 1141                 else if (align == 1)
 1142                     formatter->setReferenceAlignment(astyle::REF_ALIGN_TYPE);
 1143                 else if (align == 2)
 1144                     formatter->setReferenceAlignment(astyle::REF_ALIGN_MIDDLE);
 1145                 else if (align == 3)
 1146                     formatter->setReferenceAlignment(astyle::REF_ALIGN_NAME);
 1147             }
 1148             else if (isParamOption(arg, "max-code-length="))
 1149             {
 1150                 int maxLength = 50;
 1151                 string maxLengthParam = getParam(arg, "max-code-length=");
 1152                 if (maxLengthParam.length() > 0)
 1153                     maxLength = atoi(maxLengthParam.c_str());
 1154                 if (maxLength >= 50 && maxLength<= 200)
 1155                     formatter->setMaxCodeLength(maxLength);
 1156             }
 1157             else if (isParamOption(arg, "xC"))
 1158             {
 1159                 int maxLength = 50;
 1160                 string maxLengthParam = getParam(arg, "xC");
 1161                 if (maxLengthParam.length() > 0)
 1162                     maxLength = atoi(maxLengthParam.c_str());
 1163                 if (maxLength > 0 && maxLength<= 200)
 1164                     formatter->setMaxCodeLength(maxLength);
 1165             }
 1166             else if (isOption(arg, "xL", "break-after-logical"))
 1167             {
 1168                 formatter->setBreakAfterMode(true);
 1169             }
 1170             else if (isOption(arg, "xc", "attach-classes"))
 1171             {
 1172                 formatter->setAttachClass(true);
 1173             }
 1174             else if (isOption(arg, "xV", "attach-closing-while"))
 1175             {
 1176                 formatter->setAttachClosingWhile(true);
 1177             }
 1178             else if (isOption(arg, "xk", "attach-extern-c"))
 1179             {
 1180                 formatter->setAttachExternC(true);
 1181             }
 1182             else if (isOption(arg, "xn", "attach-namespaces"))
 1183             {
 1184                 formatter->setAttachNamespace(true);
 1185             }
 1186             else if (isOption(arg, "xl", "attach-inlines"))
 1187             {
 1188                 formatter->setAttachInline(true);
 1189             }
 1190             else if (isOption(arg, "xp", "remove-comment-prefix"))
 1191             {
 1192                 formatter->setStripCommentPrefix(true);
 1193             }
 1194             else if (isOption(arg, "xB", "break-return-type"))
 1195             {
 1196                 formatter->setBreakReturnType(true);
 1197             }
 1198             else if (isOption(arg, "xD", "break-return-type-decl"))
 1199             {
 1200                 formatter->setBreakReturnTypeDecl(true);
 1201             }
 1202             else if (isOption(arg, "xf", "attach-return-type"))
 1203             {
 1204                 formatter->setAttachReturnType(true);
 1205             }
 1206             else if (isOption(arg, "xh", "attach-return-type-decl"))
 1207             {
 1208                 formatter->setAttachReturnTypeDecl(true);
 1209             }
 1210             // Objective-C options
 1211             else if (isOption(arg, "xQ", "pad-method-prefix"))
 1212             {
 1213                 formatter->setMethodPrefixPaddingMode(true);
 1214             }
 1215             else if (isOption(arg, "xR", "unpad-method-prefix"))
 1216             {
 1217                 formatter->setMethodPrefixUnPaddingMode(true);
 1218             }
 1219             else if (isOption(arg, "xq", "pad-return-type"))
 1220             {
 1221                 formatter->setReturnTypePaddingMode(true);
 1222             }
 1223             else if (isOption(arg, "xr", "unpad-return-type"))
 1224             {
 1225                 formatter->setReturnTypeUnPaddingMode(true);
 1226             }
 1227             else if (isOption(arg, "xS", "pad-param-type"))
 1228             {
 1229                 formatter->setParamTypePaddingMode(true);
 1230             }
 1231             else if (isOption(arg, "xs", "unpad-param-type"))
 1232             {
 1233                 formatter->setParamTypeUnPaddingMode(true);
 1234             }
 1235             else if (isOption(arg, "xM", "align-method-colon"))
 1236             {
 1237                 formatter->setAlignMethodColon(true);
 1238             }
 1239             else if (isOption(arg, "xP0", "pad-method-colon=none"))
 1240             {
 1241                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_NONE);
 1242             }
 1243             else if (isOption(arg, "xP1", "pad-method-colon=all"))
 1244             {
 1245                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_ALL);
 1246             }
 1247             else if (isOption(arg, "xP2", "pad-method-colon=after"))
 1248             {
 1249                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_AFTER);
 1250             }
 1251             else if (isOption(arg, "xP3", "pad-method-colon=before"))
 1252             {
 1253                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_BEFORE);
 1254             }
 1255         }
 1256     }
 1257 }
 1258 
 1259 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
 1260 {
 1261 
 1262     if (!embedded) {
 1263         while (!nestedLangs.empty()) {
 1264             nestedLangs.pop();
 1265         }   
 1266     }
 1267     
 1268     bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
 1269     LoadResult result=LOAD_OK;
 1270 
 1271     if ( reloadNecessary ) {
 1272         if (syntaxReaders.count(langDefPath)) {
 1273             currentSyntax=syntaxReaders[langDefPath];
 1274             result=LOAD_OK;
 1275         } else {
 1276             currentSyntax=new SyntaxReader();
 1277             result=currentSyntax->load(langDefPath, pluginParameter, outputType);
 1278             syntaxReaders[langDefPath]=currentSyntax;
 1279         }
 1280 
 1281         if ( result==LOAD_OK ) {
 1282             formattingPossible=currentSyntax->enableReformatting();
 1283 
 1284             if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
 1285                 // remove dynamic keyword tag delimiters of the old language definition
 1286                 vector<string>::iterator keyStyleOpenBegin =
 1287                     openTags.begin() + NUMBER_BUILTIN_STATES;
 1288                 vector<string>::iterator keyStyleCloseBegin =
 1289                     closeTags.begin() + NUMBER_BUILTIN_STATES;
 1290                 openTags.erase ( keyStyleOpenBegin, openTags.end() );
 1291                 closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
 1292             }
 1293             // add new keyword tag delimiters
 1294             for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
 1295                 openTags.push_back ( getKeywordOpenTag ( i ) );
 1296                 closeTags.push_back ( getKeywordCloseTag ( i ) );
 1297             }
 1298             
 1299             //test balloon
 1300             string overrideSpacer(currentSyntax->getOverrideConfigVal("spacer"));
 1301             if (!overrideSpacer.empty()) {
 1302                 spacer = overrideSpacer;
 1303             }
 1304             string overrideMaskWS(currentSyntax->getOverrideConfigVal("maskws"));
 1305             if (!overrideMaskWS.empty()) {
 1306                 maskWs = overrideMaskWS=="true";
 1307             }
 1308             
 1309         }
 1310     }
 1311     return result;
 1312 }
 1313 
 1314 bool CodeGenerator::validateInputStream()
 1315 {
 1316     if ( !in ) return false;
 1317 
 1318     // it is not possible to move stream pointer back with stdin
 1319     if ( ( int ) in->tellg() == -1 ) // -1 : stdin
 1320         return true;
 1321 
 1322     // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
 1323     // Magic configuration of "file"
 1324     // This is intended for web plugins - only check filetypes often found in the net
 1325     char magic_gif[]    = {'G','I','F','8', 0};
 1326     char magic_png[]    = {'\x89','P','N','G', 0};
 1327     char magic_java[]   = {'\xCA','\xFE','\xBA','\xBE', 0};
 1328     char magic_jpeg[]   = {'\xFF','\xD8','\xFF', 0};
 1329     char magic_bmp[]    = {'B','M', 0};
 1330     char magic_pdf[]    = {'%','P','D','F', 0};
 1331     char magic_utf8[]   = {'\xEF','\xBB','\xBF',0};
 1332     char magic_rar[]    = {'R','a','r','!', 0};
 1333     char magic_zip[]    = {'P','K','\x03','\x04', 0};
 1334     char magic_ace[]    = {'*','*','A','C','E','*','*', 0};
 1335     char magic_tgz[]    = {'\x8b','\x1f', '\x00', '\x08', 0};
 1336     char magic_bzip[]   = {'B','Z', 0};
 1337 
 1338     char* magic_table[] = {magic_utf8,
 1339                            magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
 1340                            magic_java,
 1341                            magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
 1342                            0
 1343                           };
 1344 
 1345     char buffer [10]= {0};
 1346     in->read ( buffer,8 );  //only read the first 8 bytes of input stream
 1347 
 1348     int magic_index=0;
 1349     while ( magic_table[magic_index] ) {
 1350         if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
 1351             break;
 1352         }
 1353         magic_index++;
 1354     }
 1355     int streamReadPos=0;
 1356     if ( magic_table[magic_index] == magic_utf8 ) {
 1357         //setEncoding("utf-8");
 1358         streamReadPos=3; // remove UTF-8 magic number from output
 1359     }
 1360 
 1361     in -> seekg ( streamReadPos, ios::beg );
 1362     in-> clear();  // clear fail bit to continue reading
 1363 
 1364     return !magic_table[magic_index] // points to 0 if no pattern was found
 1365            || magic_table[magic_index] == magic_utf8;
 1366 }
 1367 
 1368 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
 1369     
 1370     if ( pluginChunks.size()) {
 1371     
 1372         Diluculum::LuaState luaState;
 1373 
 1374         Diluculum::LuaValueList chunkParams;
 1375         chunkParams.push_back(currentSyntax->getDescription());
 1376         for (unsigned int i=0; i<pluginChunks.size(); i++) {
 1377             luaState.call(*pluginChunks[i], chunkParams, "format user function");
 1378         }
 1379         
 1380         if (luaState.globals().count(fctName)) {
 1381             Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
 1382         
 1383             luaState["HL_INPUT_FILE"] = luaState["HL_PLUGIN_PARAM"] = pluginParameter;
 1384             luaState["HL_OUTPUT"] = outputType;
 1385             luaState["HL_FORMAT_HTML"]=HTML;
 1386             luaState["HL_FORMAT_XHTML"]=XHTML;
 1387             luaState["HL_FORMAT_TEX"]=TEX;
 1388             luaState["HL_FORMAT_LATEX"]=LATEX;
 1389             luaState["HL_FORMAT_RTF"]=RTF;
 1390             luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
 1391             luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
 1392             luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
 1393             luaState["HL_FORMAT_SVG"]=SVG;
 1394             luaState["HL_FORMAT_BBCODE"]=BBCODE;
 1395             luaState["HL_FORMAT_PANGO"]=PANGO;
 1396             luaState["HL_FORMAT_ODT"]=ODTFLAT;
 1397             
 1398             Diluculum::LuaValueList params;
 1399             Diluculum::LuaValueMap options;
 1400             options[Diluculum::LuaValue("title")] =  Diluculum::LuaValue( docTitle );   
 1401             options[Diluculum::LuaValue("encoding")] =  Diluculum::LuaValue(encoding);   
 1402             options[Diluculum::LuaValue("fragment")] =  Diluculum::LuaValue(fragmentOutput);   
 1403             options[Diluculum::LuaValue("font")] =  Diluculum::LuaValue(getBaseFont());   
 1404             options[Diluculum::LuaValue("fontsize")] =  Diluculum::LuaValue(getBaseFontSize());   
 1405 
 1406             params.push_back(inputFilesCnt);
 1407             params.push_back(processedFilesCnt);
 1408             params.push_back(options);
 1409             
 1410             Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call")  ;
 1411             if (res.size()>=1) {
 1412                 *keepDefault=false;
 1413                 *result = res[0].asString();
 1414                 if (res.size()==2)
 1415                     *keepDefault = res[1].asBoolean();
 1416             }
 1417             delete documentFct;
 1418         }
 1419     }
 1420 }
 1421 
 1422 void CodeGenerator::printHeader()
 1423 {
 1424     bool keepDefaultHeader=true;
 1425     string pluginHeader;
 1426     
 1427     processedFilesCnt++;
 1428     
 1429     applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
 1430 
 1431     if ( ! fragmentOutput && keepDefaultHeader)
 1432         *out << getHeader();
 1433     
 1434     *out << pluginHeader; 
 1435    
 1436     if ( !fragmentOutput || keepInjections)
 1437         *out << currentSyntax->getHeaderInjection();
 1438 }
 1439 
 1440 void CodeGenerator::printFooter()
 1441 {
 1442     
 1443     bool keepDefaultFooter=true;
 1444     string pluginFooter;
 1445     
 1446     applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
 1447     
 1448     if ( !fragmentOutput || keepInjections)
 1449         *out << currentSyntax->getFooterInjection();
 1450 
 1451     *out << pluginFooter; 
 1452     
 1453     if ( ! fragmentOutput && keepDefaultFooter )
 1454         *out << getFooter();
 1455 }
 1456 
 1457 ParseError CodeGenerator::generateFile ( const string &inFileName,
 1458         const string &outFileName )
 1459 {
 1460     if ( !docStyle.found() ) {
 1461         return BAD_STYLE;
 1462     }
 1463 
 1464     reset();
 1465 
 1466     ParseError error=PARSE_OK;
 1467 
 1468     inFile=inFileName;
 1469     outFile=outFileName;
 1470         
 1471     in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
 1472 
 1473     if ( validateInput )
 1474         if ( !validateInputStream() ) error= BAD_INPUT;
 1475 
 1476     if ( !in->fail() && error==PARSE_OK ) {
 1477         out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
 1478         if ( out->fail() ) {
 1479             error=BAD_OUTPUT;
 1480         }
 1481     }
 1482 
 1483     if ( in->fail() ) {
 1484         error=BAD_INPUT;
 1485     }
 1486 
 1487     if ( error==PARSE_OK ) {
 1488         if ( formatter != NULL ) {
 1489             formatter->init ( new astyle::ASStreamIterator ( in ) );
 1490         }
 1491         printHeader();
 1492         printBody();
 1493         printFooter();
 1494     }
 1495 
 1496     if ( !outFileName.empty() ) {
 1497         delete out;
 1498         out=NULL;
 1499     }
 1500     if ( !inFileName.empty() ) {
 1501         delete in;
 1502         in=NULL;
 1503     }
 1504     return error;
 1505 }
 1506 
 1507 string CodeGenerator::generateString ( const string &input )
 1508 {
 1509 
 1510     if ( !docStyle.found() ) {
 1511         return "";
 1512     }
 1513 
 1514     reset();
 1515 
 1516     in = new istringstream ( input );
 1517     out = new ostringstream ();
 1518 
 1519     if ( in->fail() || out->fail() ) {
 1520         return "";
 1521     }
 1522 
 1523     if ( formatter != NULL ) {
 1524         formatter->init ( new astyle::ASStreamIterator ( in ) );
 1525     }
 1526     printHeader();
 1527     printBody();
 1528     printFooter();
 1529 
 1530     string result = static_cast<ostringstream*> ( out )->str();
 1531 
 1532     delete out;
 1533     out=NULL;
 1534     delete in;
 1535     in=NULL;
 1536 
 1537     return result;
 1538 }
 1539 
 1540 string CodeGenerator::generateStringFromFile ( const string &inFileName )
 1541 {
 1542 
 1543     if ( !docStyle.found() ) {
 1544         return "";
 1545     }
 1546 
 1547     reset();
 1548 
 1549     inFile = inFileName;
 1550     
 1551     in = new ifstream ( inFileName.c_str() );
 1552     out = new ostringstream ();
 1553 
 1554     if ( in->fail() || out->fail() ) {
 1555         return "";
 1556     }
 1557 
 1558     if ( validateInput && !validateInputStream() ) {
 1559         return "ERROR: detected binary input";
 1560     }
 1561 
 1562     if ( formatter != NULL ) {
 1563         formatter->init ( new astyle::ASStreamIterator ( in ) );
 1564     }
 1565     printHeader();
 1566     printBody();
 1567     printFooter();
 1568 
 1569     string result = static_cast<ostringstream*> ( out )->str();
 1570 
 1571     delete out;
 1572     out=NULL;
 1573     delete in;
 1574     in=NULL;
 1575 
 1576     return result;
 1577 }
 1578 
 1579 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
 1580 {
 1581     if ( s==KEYWORD && kwClassID ) {
 1582         return NUMBER_BUILTIN_STATES + kwClassID-1;
 1583     }
 1584     return ( unsigned int ) s ;
 1585 }
 1586 
 1587 void CodeGenerator::openTag ( State s )
 1588 {
 1589     *out << openTags[ ( unsigned int ) s];
 1590     currentState=s;
 1591 
 1592 }
 1593 
 1594 void CodeGenerator::closeTag ( State s )
 1595 {
 1596     *out << closeTags[ ( unsigned int ) s];
 1597     flushWs(2);
 1598     currentState=_UNKNOWN;
 1599 }
 1600 
 1601 void CodeGenerator::openKWTag ( unsigned int kwClassID )
 1602 {
 1603     *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1604     currentState=KEYWORD;
 1605 }
 1606 
 1607 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
 1608 {
 1609     *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1610     flushWs(3);
 1611     currentState=_UNKNOWN;
 1612 }
 1613 
 1614 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
 1615 {
 1616     if (nestedLangs.empty()) {
 1617         nestedLangs.push(currentSyntax->getCurrentPath() );
 1618     }
 1619     if (nestedLangs.top() != embedLangDefPath) {
 1620         nestedLangs.push(embedLangDefPath);
 1621     }
 1622     LoadResult res = loadLanguage(embedLangDefPath, true);
 1623     //pass end delimiter regex to syntax description
 1624     currentSyntax->restoreLangEndDelim(embedLangDefPath);
 1625     return res == LOAD_OK;
 1626 }
 1627 
 1628 ///////////////////////////////////////////////////////////////////////////////
 1629 
 1630 void CodeGenerator::processRootState()
 1631 {
 1632     bool eof=false,
 1633          firstLine=true; // avoid newline before printing the first output line
 1634 
 1635     applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
 1636     
 1637     if ( currentSyntax->highlightingDisabled() ) {
 1638         string line;
 1639         while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
 1640             ++lineNumber;
 1641             insertLineNumber ( !firstLine );
 1642             flushWs(4);
 1643             firstLine=false;
 1644             if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
 1645                 maskString ( *out, line );
 1646         }
 1647         *out << flush;
 1648         return;
 1649     }
 1650 
 1651     if (!embedLangStart.empty()) {
 1652         if (!loadEmbeddedLang(currentSyntax->getNewPath(embedLangStart))) return;
 1653     }
 1654 
 1655     State state=STANDARD;
 1656 
 1657     openTag ( STANDARD );
 1658     do {
 1659         // determine next state
 1660         state= getCurrentState(STANDARD);
 1661 
 1662         // handle current state
 1663         switch ( state ) {
 1664         case KEYWORD:
 1665             closeTag ( STANDARD );
 1666             eof=processKeywordState ( state );
 1667             openTag ( STANDARD );
 1668             break;
 1669         case NUMBER:
 1670             closeTag ( STANDARD );
 1671             eof=processNumberState();
 1672             openTag ( STANDARD );
 1673             break;
 1674         case ML_COMMENT:
 1675             closeTag ( STANDARD );
 1676             eof=processMultiLineCommentState();
 1677             openTag ( STANDARD );
 1678             break;
 1679         case SL_COMMENT:
 1680             closeTag ( STANDARD );
 1681             eof=processSingleLineCommentState();
 1682             openTag ( STANDARD );
 1683             break;
 1684         case STRING:
 1685             closeTag ( STANDARD );
 1686             eof=processStringState ( STANDARD );
 1687             openTag ( STANDARD );
 1688             break;
 1689         case DIRECTIVE:
 1690             closeTag ( STANDARD );
 1691             eof=processDirectiveState();
 1692             openTag ( STANDARD );
 1693             break;
 1694         case ESC_CHAR:
 1695             closeTag ( STANDARD );
 1696             eof=processEscapeCharState();
 1697             openTag ( STANDARD );
 1698             break;
 1699         case SYMBOL:
 1700             closeTag ( STANDARD );
 1701             eof=processSymbolState();
 1702             openTag ( STANDARD );
 1703             break;
 1704 
 1705         case EMBEDDED_CODE_END:
 1706             closeTag ( STANDARD );
 1707             eof=processSyntaxChangeState(state);
 1708             openTag ( STANDARD );
 1709             break;
 1710         case _EOL:
 1711 
 1712             // XTERM256 fix (issue with less cmd)
 1713             if  (!firstLine || showLineNumbers) {
 1714                 closeTag ( STANDARD );
 1715             }
 1716             insertLineNumber ( !firstLine );
 1717             if (!firstLine || showLineNumbers) {
 1718                 flushWs(5);
 1719                 stateTraceCurrent.clear();
 1720                 openTag ( STANDARD );
 1721             }
 1722             firstLine=false;
 1723             break;
 1724         case _EOF:
 1725             eof=true;
 1726             break;
 1727         case _WS:
 1728             processWsState();
 1729             break;
 1730         default:
 1731             printMaskedToken ();
 1732             break;
 1733         }
 1734     } while ( !eof );
 1735     closeTag ( STANDARD );
 1736 
 1737     if (currentSyntax->getDecorateLineEndFct()) {
 1738         Diluculum::LuaValueList res=callDecorateLineFct(false);
 1739         if (res.size()==1) {
 1740             *out << res[0].asString();
 1741         }
 1742     }
 1743 
 1744     printNewLines = !noTrailingNewLine;
 1745     *out << getNewLine();
 1746     *out << flush;
 1747 }
 1748 
 1749 bool CodeGenerator::processSyntaxChangeState(State myState)
 1750 {
 1751     State newState=STANDARD;
 1752     bool eof=false,
 1753          exitState=false;
 1754 
 1755     openTag ( KEYWORD );
 1756     do {
 1757 
 1758         if (myState==EMBEDDED_CODE_END) {
 1759             if (!nestedLangs.empty()) {
 1760                 nestedLangs.pop();
 1761             }
 1762             // load host language syntax
 1763             if (!nestedLangs.empty()) {
 1764                 loadLanguage(nestedLangs.top(), true);
 1765             }
 1766             matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
 1767         }
 1768         
 1769         printMaskedToken ( newState!=_WS );
 1770 
 1771         newState= getCurrentState(myState);
 1772 
 1773         switch ( newState ) {
 1774         case _WS:
 1775             processWsState();
 1776             break;
 1777         case _EOL:
 1778             insertLineNumber();
 1779             exitState=true;
 1780             break;
 1781         case _EOF:
 1782             eof = true;
 1783             break;
 1784         default:
 1785             exitState=true;
 1786             break;
 1787         }
 1788     } while (  !exitState  &&  !eof );
 1789     closeTag ( KEYWORD );
 1790 
 1791     return eof;
 1792 }
 1793 
 1794 
 1795 bool CodeGenerator::processKeywordState ( State myState )
 1796 {
 1797     State newState=STANDARD;
 1798     unsigned int myClassID=currentKeywordClass;
 1799     bool eof=false,
 1800          exitState=false;
 1801 
 1802     openKWTag ( myClassID );
 1803     do {
 1804         printMaskedToken ( newState!=_WS,
 1805                            ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
 1806         newState= getCurrentState(myState);
 1807         switch ( newState ) {
 1808         case _WS:
 1809             processWsState();
 1810             break;
 1811         case _EOL:
 1812             insertLineNumber();
 1813             exitState=true;
 1814             
 1815             break;
 1816         case _EOF:
 1817             eof = true;
 1818             break;
 1819         case KEYWORD_END:
 1820             exitState=true;
 1821             break;
 1822         default:
 1823             exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
 1824             break;
 1825         }
 1826     } while ( !exitState  &&  !eof );
 1827 
 1828     closeKWTag ( myClassID );
 1829 
 1830     currentKeywordClass=0;
 1831     return eof;
 1832 }
 1833 
 1834 bool CodeGenerator::processNumberState()
 1835 {
 1836     State newState=STANDARD;
 1837     bool eof=false,
 1838          exitState=false;
 1839     openTag ( NUMBER );
 1840     do {
 1841         printMaskedToken ( newState!=_WS );
 1842         newState= getCurrentState(NUMBER);
 1843         switch ( newState ) {
 1844         case _WS:
 1845             processWsState();
 1846             break;
 1847         case _EOL:
 1848             insertLineNumber();
 1849             exitState=true;
 1850             break;
 1851         case _EOF:
 1852             eof = true;
 1853             break;
 1854         default:
 1855             exitState=newState!=NUMBER;
 1856             break;
 1857         }
 1858     } while ( !exitState && !eof );
 1859 
 1860     closeTag ( NUMBER );
 1861     return eof;
 1862 }
 1863 
 1864 
 1865 
 1866 bool CodeGenerator::processMultiLineCommentState()
 1867 {
 1868     int commentCount=1;
 1869     int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
 1870     State newState=STANDARD;
 1871     bool eof=false, exitState=false, containedTestCase=false;
 1872     unsigned int startColumn=lineIndex - token.size() ;
 1873     openTag ( ML_COMMENT );
 1874     do {
 1875         printMaskedToken (newState!=_WS );
 1876         newState= getCurrentState(ML_COMMENT);
 1877 
 1878         switch ( newState ) {
 1879         case _WS:
 1880             processWsState();
 1881             break;
 1882         case _EOL:
 1883             wsBuffer += closeTags[ML_COMMENT];
 1884             insertLineNumber();
 1885             wsBuffer += openTags[ML_COMMENT];
 1886             startColumn=0;
 1887             break;
 1888         case _EOF:
 1889             eof = true;
 1890             break;
 1891         case _TESTPOS:
 1892             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1893             printMaskedToken();
 1894             containedTestCase=true;
 1895             break;
 1896         case ML_COMMENT:
 1897 
 1898             if ( currentSyntax->allowNestedMLComments() ) {
 1899                 ++commentCount;
 1900             }
 1901             // if delimiters are equal, close the comment by continueing to
 1902             // ML_COMMENT_END section
 1903             if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT  ))) break;
 1904 
 1905         case ML_COMMENT_END:
 1906 
 1907             if (!currentSyntax->matchesOpenDelimiter (token,  ML_COMMENT_END, openDelimID)) {
 1908                 break;
 1909             }
 1910             commentCount--;
 1911             if ( !commentCount ) {
 1912                 printMaskedToken();
 1913                 exitState=true;
 1914             }
 1915             break;
 1916         default:
 1917             break;
 1918         }
 1919     } while ( !exitState  &&  !eof );
 1920 
 1921     closeTag ( ML_COMMENT );
 1922    
 1923     if (containedTestCase){
 1924         stateTraceCurrent.clear();
 1925     }
 1926     return eof;
 1927 }
 1928 
 1929 
 1930 bool CodeGenerator::processSingleLineCommentState()
 1931 {
 1932     if ( checkSpecialCmd() ) {
 1933         return in->bad(); // if input stream is bad, report eof to calling method
 1934     }
 1935 
 1936     State newState=STANDARD;
 1937     bool eof=false, exitState=false, containedTestCase=false;
 1938     unsigned int startColumn = lineIndex - token.size() ;
 1939 
 1940     openTag ( SL_COMMENT );
 1941     do {
 1942         printMaskedToken ( newState!=_WS );
 1943         newState= getCurrentState(SL_COMMENT);
 1944 
 1945         switch ( newState ) {
 1946         case _WS:
 1947             processWsState();
 1948             break;
 1949         case _EOL:
 1950             printMaskedToken();
 1951             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1952                 exitState=false;
 1953             } else {
 1954                 exitState=true;
 1955             }
 1956             if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
 1957             insertLineNumber();
 1958             if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
 1959 
 1960             break;
 1961         case _EOF:
 1962             eof = true;
 1963             break;
 1964         case _TESTPOS:
 1965             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1966             printMaskedToken();
 1967             containedTestCase=true;
 1968             break;
 1969      
 1970         default:
 1971             break;
 1972         }
 1973     } while ( !exitState  &&  !eof );
 1974 
 1975     closeTag ( SL_COMMENT );
 1976     
 1977     if (containedTestCase) {
 1978         stateTraceCurrent.clear();
 1979     }
 1980     
 1981     return eof;
 1982 }
 1983 
 1984 bool CodeGenerator::processDirectiveState()
 1985 {
 1986     State  newState=STANDARD;
 1987     bool eof=false, exitState=false;
 1988 
 1989     openTag ( DIRECTIVE );
 1990     do {
 1991         printMaskedToken ( newState!=_WS );
 1992         newState= getCurrentState(DIRECTIVE);
 1993         switch ( newState ) {
 1994         case _WS:
 1995             processWsState();
 1996             break;
 1997         case DIRECTIVE_END:
 1998             printMaskedToken();
 1999             exitState=true;
 2000             break;
 2001         case _EOL:
 2002             printMaskedToken();
 2003             
 2004             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 2005                 exitState=false;
 2006             } else {
 2007                 if (currentSyntax->getContinuationChar()!=0x13){
 2008                     exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
 2009                 } 
 2010             }
 2011             if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
 2012             insertLineNumber();
 2013             if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
 2014             break;
 2015         case ML_COMMENT:
 2016             closeTag ( DIRECTIVE );
 2017             eof= processMultiLineCommentState();
 2018             openTag ( DIRECTIVE );
 2019             break;
 2020         case SL_COMMENT:
 2021             closeTag ( DIRECTIVE );
 2022             eof= processSingleLineCommentState();
 2023             openTag ( DIRECTIVE );
 2024             exitState=true;
 2025             break;
 2026         case STRING:
 2027             closeTag ( DIRECTIVE );
 2028             eof=processStringState ( DIRECTIVE );
 2029             openTag ( DIRECTIVE );
 2030             break;
 2031         case _EOF:
 2032             eof = true;
 2033             break;
 2034         default:
 2035             break;
 2036         }
 2037     } while ( !exitState && !eof );
 2038 
 2039     closeTag ( DIRECTIVE );
 2040     return eof;
 2041 }
 2042 
 2043 bool CodeGenerator::processStringState ( State oldState )
 2044 {
 2045     State newState=STANDARD;
 2046     bool eof=false, exitState=false;
 2047     bool returnedFromOtherState=false;
 2048 
 2049     State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
 2050 
 2051     int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
 2052     string openDelim=token;
 2053 
 2054     //Raw String by definition:
 2055     bool isRawString=currentSyntax->delimiterIsRawString(openDelimID);
 2056 
 2057     // Test if character before string open delimiter token equals to the
 2058     // raw string prefix (Example: r" ", r""" """ in Python)
 2059 
 2060     //Raw String Prefix:
 2061     if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
 2062         isRawString=true;
 2063     }
 2064 
 2065     openTag ( myState );
 2066     do {
 2067         // true if last token was an escape char
 2068         if ( !returnedFromOtherState ) {
 2069             printMaskedToken (newState!=_WS );
 2070         }
 2071         returnedFromOtherState=false;
 2072         newState= getCurrentState(myState);
 2073 
 2074         switch ( newState ) {
 2075         case _WS:
 2076             processWsState();
 2077             break;
 2078         case _EOL:
 2079             wsBuffer += closeTags[myState];
 2080             insertLineNumber();
 2081             wsBuffer += openTags[myState];
 2082             break;
 2083         case STRING_END:
 2084             if (resultOfHook || currentSyntax->matchesOpenDelimiter (token,  STRING_END, openDelimID)) {
 2085                 if (currentSyntax->assertDelimEqualLength()) {
 2086                     exitState= openDelim.length()==token.length();
 2087                 } else {
 2088                     exitState= true;
 2089                 }
 2090                 printMaskedToken();
 2091             }
 2092             break;
 2093         case STRING:
 2094             // if there exist multiple string delimiters, close string if
 2095             // current delimiter is equal to the opening delimiter
 2096             exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING  ))&&token==openDelim;
 2097             printMaskedToken();
 2098             break;
 2099         case ESC_CHAR:
 2100             if ( !isRawString ) {
 2101                 closeTag ( myState );
 2102                 eof=processEscapeCharState();
 2103                 openTag ( myState );
 2104                 returnedFromOtherState=true;
 2105             } else {
 2106                 // FIXME not a fix for Python r"""\"""
 2107                 exitState=token.size()>1 && token[1] == openDelim[0];
 2108                 printMaskedToken();
 2109             }
 2110             break;
 2111         case STRING_INTERPOLATION:
 2112             closeTag ( myState );
 2113             eof=processInterpolationState();
 2114             openTag ( myState );
 2115             returnedFromOtherState=true;
 2116             break;
 2117         case _EOF:
 2118             eof = true;
 2119             break;
 2120         default:
 2121             printMaskedToken();
 2122             break;
 2123         }
 2124     } while ( !exitState && !eof );
 2125 
 2126     closeTag ( myState );
 2127 
 2128     return eof;
 2129 }
 2130 
 2131 bool CodeGenerator::processSymbolState()
 2132 {
 2133 
 2134     State newState=STANDARD;
 2135     bool eof=false,
 2136          exitState=false;
 2137 
 2138     openTag ( SYMBOL );
 2139     do {
 2140         printMaskedToken ( newState!=_WS );
 2141         newState= getCurrentState(SYMBOL);
 2142         switch ( newState ) {
 2143         case _WS:
 2144             processWsState();
 2145             break;
 2146         case _EOL:
 2147             insertLineNumber();
 2148             exitState=true;
 2149             break;
 2150         case _EOF:
 2151             eof = true;
 2152             break;
 2153         default:
 2154             exitState=newState!=SYMBOL;
 2155             break;
 2156         }
 2157     } while ( !exitState && !eof );
 2158 
 2159     closeTag ( SYMBOL );
 2160     return eof;
 2161 }
 2162 
 2163 bool CodeGenerator::processEscapeCharState()
 2164 {
 2165     State newState=STANDARD;
 2166     bool eof=false, exitState=false;
 2167     openTag ( ESC_CHAR );
 2168     do {
 2169         printMaskedToken (newState!=_WS );
 2170         newState= getCurrentState(ESC_CHAR);
 2171         switch ( newState ) {
 2172         case _EOL:
 2173             insertLineNumber();
 2174             exitState=true;
 2175             break;
 2176         case _WS:
 2177             processWsState();
 2178             break;
 2179         case _EOF:
 2180             eof = true;
 2181             break;
 2182         default:
 2183             exitState=newState!=ESC_CHAR;
 2184             break;
 2185         }
 2186     } while ( !exitState && !eof );
 2187 
 2188     closeTag ( ESC_CHAR );
 2189     return eof;
 2190 }
 2191 
 2192 bool CodeGenerator::processInterpolationState()
 2193 {
 2194     State newState=STANDARD;
 2195     bool eof=false, exitState=false;
 2196     openTag ( STRING_INTERPOLATION );
 2197     do {
 2198         printMaskedToken (newState!=_WS );
 2199         newState= getCurrentState(STRING_INTERPOLATION);
 2200         switch ( newState ) {
 2201         case _EOL:
 2202             insertLineNumber();
 2203             exitState=true;
 2204             break;
 2205         case _WS:
 2206             processWsState();
 2207             break;
 2208         case _EOF:
 2209             eof = true;
 2210             break;
 2211         default:
 2212             exitState=newState!=STRING_INTERPOLATION;
 2213             break;
 2214         }
 2215     } while ( !exitState && !eof );
 2216 
 2217     closeTag ( STRING_INTERPOLATION );
 2218     return eof;
 2219 }
 2220 
 2221 void CodeGenerator::processWsState()
 2222 {
 2223     if ( !maskWs ) {
 2224         wsBuffer += token;
 2225         token.clear();
 2226         return;
 2227     }
 2228 
 2229     flushWs(6);
 2230 
 2231     int cntWs=0;
 2232     lineIndex--;
 2233     PositionState ps(currentState, 0, true);
 2234             
 2235     while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
 2236         ++cntWs;
 2237         ++lineIndex;
 2238     }
 2239     if ( cntWs>1 ) {
 2240 
 2241         unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
 2242         if ( excludeWs && styleID!=_UNKNOWN ) {
 2243             *out << closeTags[styleID];
 2244         }
 2245         *out << maskWsBegin ;
 2246         for ( int i=0; i<cntWs; i++ ) {
 2247             *out <<  spacer;
 2248             if (applySyntaxTestCase){
 2249                 stateTraceCurrent.push_back(ps);
 2250             }
 2251         }
 2252         *out << maskWsEnd;
 2253         if ( excludeWs && styleID!=_UNKNOWN ) {
 2254             *out << openTags[styleID];
 2255         }
 2256     } else {
 2257     
 2258         *out << spacer; //Bugfix fehlender Space nach Strings
 2259         if (applySyntaxTestCase){
 2260             stateTraceCurrent.push_back(ps);            
 2261         }
 2262     }
 2263     token.clear();
 2264 }
 2265 
 2266 void CodeGenerator::flushWs(int arg)
 2267 {
 2268      PositionState ps(currentState, 0, true);
 2269      //workaround condition
 2270      for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
 2271         stateTraceCurrent.push_back(ps);
 2272         //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;           
 2273     }
 2274      
 2275      //fix canvas whitespace
 2276      if (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR){
 2277         *out<< maskWsBegin;
 2278      }
 2279     
 2280     *out<<wsBuffer;
 2281     wsBuffer.clear();
 2282 }
 2283 
 2284 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
 2285     switch (s) {
 2286         
 2287         case STANDARD:
 2288             return STY_NAME_STD;
 2289         case STRING:
 2290             return STY_NAME_STR;
 2291         case NUMBER:
 2292             return STY_NAME_NUM;
 2293         case SL_COMMENT:
 2294             return STY_NAME_SLC;
 2295         case ML_COMMENT:
 2296             return STY_NAME_COM;
 2297         case ESC_CHAR:
 2298             return STY_NAME_ESC;
 2299         case DIRECTIVE:
 2300             return STY_NAME_DIR;
 2301         case DIRECTIVE_STRING:
 2302             return STY_NAME_DST;
 2303         case SYMBOL:
 2304             return STY_NAME_SYM;
 2305         case STRING_INTERPOLATION:
 2306             return STY_NAME_IPL;
 2307         case _WS:
 2308             return "ws";
 2309         case KEYWORD: {
 2310             
 2311             if (!kwClass)
 2312                 return "ws";
 2313             
 2314             char kwName[5] = {0};
 2315             snprintf(kwName, sizeof(kwName), "kw%c", ('a'+kwClass-1));
 2316             return string(kwName);
 2317         }
 2318         default:
 2319             return "unknown_test";
 2320     }
 2321 }
 2322 
 2323 void CodeGenerator::printTrace(const string &s){
 2324     std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
 2325     for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
 2326         std::cout<<" "<<stateTraceCurrent[i].state;
 2327     }
 2328     std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
 2329     for (unsigned int i=0; i< stateTraceTest.size(); i++) {
 2330         std::cout<<" "<<stateTraceTest[i].state;
 2331     }
 2332     /*
 2333     for (unsigned int i=0; i< stateTrace.size(); i++) {
 2334         std::cout<<" "<<stateTrace[i];
 2335     }
 2336    */
 2337     std::cout<<"\n";
 2338 }
 2339 
 2340 void CodeGenerator::runSyntaxTestcases(unsigned int column){
 2341     
 2342     unsigned int assertGroup=0;
 2343     size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
 2344     State assertState=_UNKNOWN;
 2345     
 2346     //printTrace("trace 2");
 2347     
 2348     if (typeDescPos!=string::npos) {
 2349     
 2350         if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
 2351             assertState=NUMBER;
 2352         else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos)
 2353             assertState=STRING;
 2354         else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
 2355             assertState=ESC_CHAR;
 2356         else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
 2357             assertState=STRING_INTERPOLATION;
 2358         else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
 2359             assertState=SYMBOL;
 2360         else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
 2361             assertState=DIRECTIVE;
 2362         else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
 2363             assertState=SL_COMMENT;
 2364         else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
 2365             assertState=ML_COMMENT;
 2366         else if (line.find("ws", typeDescPos)==typeDescPos)
 2367             assertState=_WS;
 2368         else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos)
 2369             assertState=STANDARD;
 2370         else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
 2371             assertState=DIRECTIVE_STRING;
 2372         
 2373         else if (line.find("kw", typeDescPos)==typeDescPos) {
 2374             assertState=KEYWORD;
 2375             if (isalpha(line[typeDescPos+2]))
 2376                 assertGroup=line[typeDescPos+2] - 'a' +1;
 2377         }
 2378     
 2379         if (   (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
 2380             || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
 2381             || assertGroup != stateTraceTest[column].kwClass) {
 2382             ostringstream err;
 2383             err << inFile << " line " << lineNumber << ", column "<< column 
 2384                 << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)  
 2385                 << " instead of " << getTestcaseName(assertState, assertGroup);
 2386             failedPosTests.push_back(err.str());
 2387         }
 2388         
 2389     }
 2390     
 2391     lineContainedTestCase=true; 
 2392 }
 2393 
 2394 
 2395 string CodeGenerator::getNewLine()
 2396 {
 2397     return (printNewLines) ? newLineTag : "";
 2398 }
 2399 
 2400 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
 2401 {
 2402 
 2403     Diluculum::LuaValueList params;
 2404     params.push_back(Diluculum::LuaValue(lineNumber));
 2405 
 2406     return currentSyntax->getLuaState()->call ( isLineStart ?
 2407             *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
 2408             params,"getDecorateLineFct call")  ;
 2409 
 2410 }
 2411 
 2412 void CodeGenerator::insertLineNumber ( bool insertNewLine )
 2413 {
 2414     if ( insertNewLine ) {
 2415         if (currentSyntax->getDecorateLineEndFct()) {
 2416             Diluculum::LuaValueList res=callDecorateLineFct(false);
 2417             if (res.size()==1) {
 2418                 wsBuffer +=res[0].asString();
 2419             }
 2420         }
 2421 
 2422         wsBuffer += getNewLine();
 2423     }
 2424 
 2425     if (currentSyntax->getDecorateLineBeginFct()) {
 2426         Diluculum::LuaValueList res=callDecorateLineFct(true);
 2427         if (res.size()==1) {
 2428             wsBuffer +=res[0].asString();
 2429         }
 2430     }
 2431 
 2432     if ( showLineNumbers ) {
 2433         ostringstream os;
 2434         ostringstream numberPrefix;
 2435 
 2436         os << setw ( getLineNumberWidth() ) << right;
 2437         if( numberCurrentLine ) {
 2438             if ( lineNumberFillZeroes ) {
 2439                 os.fill ( '0' );
 2440             }
 2441             os << lineNumber+lineNumberOffset;
 2442         } else {
 2443             os << "";
 2444         }
 2445 
 2446         numberPrefix << openTags[LINENUMBER];
 2447         maskString ( numberPrefix, os.str() );
 2448         numberPrefix << spacer << closeTags[LINENUMBER];
 2449 
 2450         wsBuffer += numberPrefix.str();
 2451     }
 2452 }
 2453 
 2454 unsigned int CodeGenerator::getLineIndex()
 2455 {
 2456     return lineIndex;
 2457 }
 2458 unsigned int CodeGenerator::getLastLineLength()
 2459 {
 2460     return lastLineLength;
 2461 }
 2462 
 2463 bool CodeGenerator::printExternalStyle ( const string &outFile )
 2464 {
 2465     if ( !includeStyleDef ) {
 2466         ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
 2467         if ( !cssOutFile->fail() ) {
 2468             if (!omitVersionComment) {
 2469                 *cssOutFile << styleCommentOpen
 2470                             <<" Style definition file generated by highlight "
 2471                             << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
 2472                             << " " << styleCommentClose << "\n";
 2473             }
 2474             *cssOutFile << getStyleDefinition()
 2475                         << "\n";
 2476             *cssOutFile << readUserStyleDef();
 2477             if ( !outFile.empty() ) delete cssOutFile;
 2478         } else {
 2479             return false;
 2480         }
 2481     }
 2482     return true;
 2483 }
 2484 
 2485 string CodeGenerator::readUserStyleDef()
 2486 {
 2487     ostringstream ostr;
 2488     if ( !styleInputPath.empty() ) {
 2489         ifstream userStyleDef ( styleInputPath.c_str() );
 2490         if ( userStyleDef ) {
 2491             ostr    << "\n" << styleCommentOpen
 2492                     << " Content of " << styleInputPath
 2493                     << ": " <<styleCommentClose << "\n";
 2494             string line;
 2495             while ( getline ( userStyleDef, line ) ) {
 2496                 ostr << line << "\n";
 2497             }
 2498             userStyleDef.close();
 2499         } else {
 2500             ostr    << styleCommentOpen
 2501                     << " ERROR: Could not include " << styleInputPath
 2502                     << "." << styleCommentClose << "\n";
 2503         }
 2504     }
 2505 
 2506     string injections=docStyle.getInjections();
 2507     if (!injections.empty()) {
 2508         ostr    << "\n" << styleCommentOpen
 2509                 << " Plug-in theme injections: " <<styleCommentClose << "\n";
 2510         ostr << injections<<"\n";
 2511     }
 2512     return ostr.str();
 2513 }
 2514 
 2515 bool CodeGenerator::initPluginScript(const string& script)
 2516 {
 2517 
 2518     if (script.empty()) return true;
 2519 
 2520     try {
 2521 
 2522         userScriptError="";
 2523         Diluculum::LuaState ls;
 2524         
 2525         ls.doFile (script);
 2526         int listIdx=1;
 2527 
 2528         while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
 2529 
 2530             // Theme plugins
 2531             if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
 2532                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2533                     docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2534                 }
 2535             }
 2536             // Syntax plugins
 2537             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
 2538                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2539                     currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2540                 }
 2541             }
 2542             // Format plugins
 2543             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
 2544                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2545                     addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2546                 }
 2547             }
 2548             
 2549             listIdx++;
 2550         }
 2551     }  catch (Diluculum::LuaError &err) {
 2552         userScriptError=err.what();
 2553         return false;
 2554     }
 2555     return true;
 2556 }
 2557 
 2558 bool CodeGenerator::checkSpecialCmd()
 2559 {
 2560     string noParseCmd="@highlight";
 2561     size_t cmdPos = line.find ( noParseCmd );
 2562 
 2563     if ( cmdPos!=string::npos ) {
 2564         *out<<line.substr ( noParseCmd.size() +cmdPos + 1 );
 2565 
 2566         // hide comment line from output
 2567         token.clear();
 2568         lineIndex=line.length();
 2569         getInputChar();
 2570         lineNumber--;
 2571         // end hide
 2572 
 2573         return true; // do not parse line as comment
 2574     }
 2575     return false; //parse comment as usual
 2576 }
 2577 
 2578 }