"Fossies" - the Fresh Open Source Software Archive

Member "highlight-3.51/src/core/codegenerator.cpp" (17 May 2019, 78512 Bytes) of package /linux/www/highlight-3.51.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "codegenerator.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.50_vs_3.51.

    1 /***************************************************************************
    2                           codegenerator.cpp  -  description
    3                              -------------------
    4     begin                : Die Jul 9 2002
    5     copyright            : (C) 2002-2019 by Andre Simon
    6     email                : a.simon@mailbox.org
    7  ***************************************************************************/
    8 
    9 
   10 /*
   11 This file is part of Highlight.
   12 
   13 Highlight is free software: you can redistribute it and/or modify
   14 it under the terms of the GNU General Public License as published by
   15 the Free Software Foundation, either version 3 of the License, or
   16 (at your option) any later version.
   17 
   18 Highlight is distributed in the hope that it will be useful,
   19 but WITHOUT ANY WARRANTY; without even the implied warranty of
   20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   21 GNU General Public License for more details.
   22 
   23 You should have received a copy of the GNU General Public License
   24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
   25 */
   26 
   27 
   28 #include <climits>
   29 #include <memory>
   30 #include <boost/xpressive/xpressive_dynamic.hpp>
   31 
   32 #include "codegenerator.h"
   33 
   34 #include "htmlgenerator.h"
   35 #include "xhtmlgenerator.h"
   36 #include "rtfgenerator.h"
   37 #include "latexgenerator.h"
   38 #include "texgenerator.h"
   39 #include "svggenerator.h"
   40 #include "bbcodegenerator.h"
   41 #include "pangogenerator.h"
   42 #include "odtgenerator.h"
   43 #include "astyle/astyle.h"
   44 #include "astyle/ASStreamIterator.h"
   45 
   46 #if !defined (QT)
   47 #include "ansigenerator.h"
   48 #include "xterm256generator.h"
   49 #endif
   50 
   51 namespace highlight
   52 {
   53 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
   54 
   55 const string CodeGenerator::STY_NAME_STD="std";
   56 const string CodeGenerator::STY_NAME_STR="str";
   57 const string CodeGenerator::STY_NAME_NUM="num";
   58 const string CodeGenerator::STY_NAME_SLC="slc";
   59 const string CodeGenerator::STY_NAME_COM="com";
   60 const string CodeGenerator::STY_NAME_ESC="esc";
   61 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
   62 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
   63 const string CodeGenerator::STY_NAME_LIN="lin";
   64 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
   65 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
   66 
   67 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
   68 
   69 
   70 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
   71 {
   72     CodeGenerator* generator=NULL;
   73     switch ( type ) {
   74     case HTML:
   75         generator = new HtmlGenerator();
   76         break;
   77     case XHTML:
   78         generator = new XHtmlGenerator();
   79         break;
   80     case TEX:
   81         generator = new TexGenerator ();
   82         break;
   83     case LATEX:
   84         generator = new LatexGenerator();
   85         break;
   86     case RTF:
   87         generator = new RtfGenerator ();
   88         break;
   89     case SVG:
   90         generator = new SVGGenerator();
   91         break;
   92     case BBCODE:
   93         generator = new BBCodeGenerator();
   94         break;
   95     case PANGO:
   96         generator = new PangoGenerator();
   97         break;
   98     case ODTFLAT:
   99         generator = new ODTGenerator();
  100         break;
  101     case ESC_ANSI:
  102         generator = new AnsiGenerator();
  103         break;
  104     case ESC_XTERM256:
  105     case ESC_TRUECOLOR:
  106         generator = new Xterm256Generator();
  107         generator->setESCTrueColor(type==ESC_TRUECOLOR);
  108         break;
  109     default:
  110         break;
  111     }
  112     return generator;
  113 }
  114 
  115 
  116 CodeGenerator::CodeGenerator ( highlight::OutputType type )
  117     :currentSyntax(NULL),
  118      in ( NULL ),
  119      out ( NULL ),
  120      encoding ( "none" ),
  121      docTitle ( "Source file" ),
  122      maskWs ( false ),
  123      excludeWs ( false ),
  124      fragmentOutput ( false ),
  125      keepInjections( false ),
  126      showLineNumbers ( false ),
  127      lineNumberFillZeroes ( false ),
  128      printNewLines(true),
  129      omitVersionComment(false),
  130      isolateTags(false),
  131      baseFontSize("10"),
  132      lineNumber ( 0 ),
  133      lineNumberOffset ( 0 ),
  134      currentState ( _UNKNOWN ),
  135      currentKeywordClass ( 0 ),
  136      includeStyleDef ( false ),
  137      numberCurrentLine ( false ),
  138      lineIndex ( 0 ),
  139      lastLineLength( 0 ),
  140      syntaxChangeIndex(UINT_MAX),
  141      syntaxChangeLineNo(UINT_MAX),
  142      lineNumberWidth ( 5 ),
  143      startLineCnt( 1 ),
  144      startLineCntCurFile( 1 ),
  145      maxLineCnt ( UINT_MAX ),
  146      inputFilesCnt (0),
  147      processedFilesCnt (0),
  148      terminatingChar ( '\0' ),
  149      formatter ( NULL ),
  150      formattingEnabled ( false ),
  151      formattingPossible ( false ),
  152      validateInput ( false ),
  153      numberWrappedLines ( true ),
  154      noTrailingNewLine(false),
  155      resultOfHook(false),
  156      lineContainedTestCase(false),
  157      applySyntaxTestCase(false),
  158      keywordCase ( StringTools::CASE_UNCHANGED ),
  159      eolDelimiter ('\n'),
  160      outputType ( type )
  161 {
  162 }
  163 
  164 
  165 CodeGenerator::~CodeGenerator()
  166 {
  167     delete formatter;
  168 
  169     for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
  170         delete it->second;
  171     }
  172     
  173     for (unsigned int i=0; i<pluginChunks.size(); i++) {
  174         delete pluginChunks[i];
  175     }
  176     pluginChunks.clear();
  177 }
  178 
  179 
  180 bool CodeGenerator::initTheme ( const string& themePath )
  181 {
  182     this->themePath=themePath;
  183     bool loadOK = docStyle.load ( themePath, outputType );
  184     initOutputTags();
  185     return loadOK;
  186 }
  187 
  188 const string& CodeGenerator::getStyleName()
  189 {
  190     return themePath;
  191 }
  192 
  193 void CodeGenerator::setLineNumberWidth ( int w )
  194 {
  195     lineNumberWidth=w;
  196 }
  197 
  198 int CodeGenerator::getLineNumberWidth()
  199 {
  200     return lineNumberWidth;
  201 }
  202 
  203 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
  204 {
  205     showLineNumbers=flag;
  206     lineNumberOffset = startCnt-1;
  207 }
  208 
  209 bool CodeGenerator::getPrintLineNumbers()
  210 {
  211     return showLineNumbers;
  212 }
  213 
  214 void CodeGenerator::setPrintZeroes ( bool flag )
  215 {
  216     lineNumberFillZeroes=flag;
  217 }
  218 
  219 bool CodeGenerator::getPrintZeroes()
  220 {
  221     return lineNumberFillZeroes;
  222 }
  223 
  224 void CodeGenerator::setIncludeStyle ( bool flag )
  225 {
  226     includeStyleDef = flag;
  227 }
  228 
  229 void CodeGenerator::disableTrailingNL ( bool flag )
  230 {
  231     noTrailingNewLine = flag;
  232 }
  233 
  234 void CodeGenerator::setStyleInputPath ( const string& path )
  235 {
  236     styleInputPath = path;
  237 }
  238 
  239 void CodeGenerator::setStyleOutputPath ( const string& path )
  240 {
  241     styleOutputPath = path;
  242 }
  243 
  244 void CodeGenerator::setPluginParameter ( const string& param )
  245 {
  246     pluginParameter = param;
  247 }
  248 
  249 const string&  CodeGenerator::getStyleInputPath()
  250 {
  251     return styleInputPath;
  252 }
  253 
  254 const string&  CodeGenerator::getStyleOutputPath()
  255 {
  256     return styleOutputPath;
  257 }
  258 
  259 void CodeGenerator::setFragmentCode ( bool flag )
  260 {
  261     fragmentOutput=flag;
  262 }
  263 
  264 bool CodeGenerator::getFragmentCode()
  265 {
  266     return fragmentOutput;
  267 }
  268 void CodeGenerator::setKeepInjections ( bool flag )
  269 {
  270     keepInjections=flag;
  271 }
  272 
  273 bool CodeGenerator::getKeepInjections()
  274 {
  275     return keepInjections;
  276 }
  277 void CodeGenerator::setValidateInput ( bool flag )
  278 {
  279     validateInput=flag;
  280 }
  281 
  282 bool CodeGenerator::getValidateInput()
  283 {
  284     return validateInput;
  285 }
  286 
  287 
  288 void CodeGenerator::setNumberWrappedLines ( bool flag )
  289 {
  290     numberWrappedLines=flag;
  291 }
  292 
  293 bool CodeGenerator::getNumberWrappedLines()
  294 {
  295     return numberWrappedLines;
  296 }
  297 
  298 void CodeGenerator::setOmitVersionComment ( bool flag )
  299 {
  300     omitVersionComment=flag;
  301 }
  302 
  303 bool CodeGenerator::getOmitVersionComment ()
  304 {
  305     return omitVersionComment;
  306 }
  307 
  308 void CodeGenerator::setIsolateTags ( bool flag )
  309 {
  310     isolateTags=flag;
  311 }
  312 
  313 bool CodeGenerator::getIsolateTags ()
  314 {
  315     return isolateTags;
  316 }
  317 
  318 void CodeGenerator::setBaseFont ( const string& fontName )
  319 {
  320     baseFont = fontName;
  321 }
  322 
  323 void CodeGenerator::setBaseFontSize ( const string& fontSize)
  324 {
  325     baseFontSize = fontSize;
  326 }
  327 
  328 void CodeGenerator::setStartingNestedLang(const string &langName)
  329 {
  330     embedLangStart = langName;
  331 }
  332 
  333 const string CodeGenerator::getBaseFont() const
  334 {
  335     if ( !baseFont.empty() ) return baseFont;
  336     switch ( outputType ) {
  337     case HTML:
  338     case XHTML:
  339     case SVG:
  340         return "'Courier New',monospace";
  341         break;
  342     case LATEX:
  343         return "ttfamily";
  344         break;
  345     case TEX:
  346         return "tt";
  347         break;
  348     default:
  349         return "Courier New";
  350     }
  351 }
  352 
  353 const string CodeGenerator::getBaseFontSize()
  354 {
  355     return baseFontSize;
  356 }
  357 
  358 void CodeGenerator::setTitle ( const string & title )
  359 {
  360     if ( !title.empty() ) docTitle= title;
  361 }
  362 
  363 string CodeGenerator::getTitle()
  364 {
  365     return docTitle;
  366 }
  367 
  368 void CodeGenerator::setEncoding ( const string& encodingName )
  369 {
  370     encoding = encodingName;
  371 }
  372 
  373 bool CodeGenerator::formattingDisabled()
  374 {
  375     return !formattingEnabled;
  376 }
  377 
  378 void CodeGenerator::setStartingInputLine ( unsigned int begin )
  379 {
  380     startLineCnt = startLineCntCurFile = begin;
  381 }
  382 
  383 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
  384 {
  385     maxLineCnt = cnt;
  386 }
  387 
  388 void CodeGenerator::setFilesCnt ( unsigned int cnt )
  389 {
  390     inputFilesCnt = cnt;
  391 }
  392 
  393 bool CodeGenerator::formattingIsPossible()
  394 {
  395     return formattingPossible;
  396 }
  397 
  398 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
  399                                        unsigned int lineLength,
  400                                        int numberSpaces )
  401 {
  402     bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
  403     bool replaceTabs = numberSpaces > 0;
  404 
  405     if ( enableWrap || replaceTabs ) {
  406         preFormatter.setWrap ( enableWrap );
  407         preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
  408         preFormatter.setWrapLineLength ( lineLength );
  409         preFormatter.setReplaceTabs ( replaceTabs );
  410         preFormatter.setNumberSpaces ( numberSpaces );
  411     }
  412 }
  413 
  414 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
  415 {
  416     keywordCase = keyCase;
  417 }
  418 
  419 void CodeGenerator::setEOLDelimiter(char delim)
  420 {
  421     eolDelimiter = delim;
  422 }
  423 
  424 void CodeGenerator::reset()
  425 {
  426     lineIndex = 0;
  427     lineNumber = 0;
  428     line.clear();
  429     preFormatter.reset();
  430     inFile.clear();
  431     outFile.clear();
  432     embedLangDefPath.clear();
  433     printNewLines=true;
  434     syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  435     startLineCntCurFile = startLineCnt;
  436     applySyntaxTestCase=lineContainedTestCase=false;
  437 }
  438 
  439 string CodeGenerator::getThemeInitError()
  440 {
  441     return  docStyle.getErrorMessage();
  442 }
  443 
  444 string CodeGenerator::getPluginScriptError()
  445 {
  446     return userScriptError;
  447 }
  448 
  449 string CodeGenerator::getSyntaxRegexError()
  450 {
  451     return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
  452 }
  453 string CodeGenerator::getSyntaxLuaError()
  454 {
  455     return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
  456 
  457 }
  458 string CodeGenerator::getSyntaxDescription()
  459 {
  460     return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
  461 
  462 }
  463 string CodeGenerator::getThemeDescription()
  464 {
  465     return docStyle.getDescription();
  466 }
  467 
  468 string CodeGenerator::getSyntaxCatDescription(){
  469     return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
  470 }
  471 
  472 string CodeGenerator::getThemeCatDescription(){
  473     return docStyle.getCategoryDescription();
  474 }
  475 
  476 unsigned int CodeGenerator::getLineNumber()
  477 {
  478     return lineNumber;
  479 }
  480 
  481 bool CodeGenerator::readNewLine ( string &newLine )
  482 {
  483     bool eof=false;
  484     
  485     if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
  486     
  487     while (!eof && startLineCntCurFile>0) {
  488         if ( formattingPossible && formattingEnabled ) {
  489             eof=!formatter->hasMoreLines();
  490             if ( !eof ) {
  491                 newLine = formatter->nextLine();
  492             }
  493         } else {
  494             eof = ! getline ( *in, newLine, eolDelimiter );
  495         }
  496         --startLineCntCurFile;
  497     }
  498     startLineCntCurFile=1;
  499 #ifndef _WIN32
  500     // drop CR of CRLF files
  501     if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
  502         newLine.erase(newLine.size() - 1);
  503 #endif
  504 
  505     return eof || ( lineNumber == maxLineCnt );
  506 }
  507 
  508 void CodeGenerator::matchRegex ( const string &line, State skipState)
  509 {
  510     regexGroups.clear();
  511     int matchBegin=0;
  512     int groupID=0;
  513 
  514     // cycle through all regex, save the start and ending indices of matches to report them later
  515     for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
  516         RegexElement *regexElem = currentSyntax->getRegexElements() [i];
  517 
  518         if (regexElem->open == skipState) continue;
  519         
  520         boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
  521         boost::xpressive::sregex_iterator end;
  522 
  523         for( ; cur != end; ++cur )  {
  524             groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
  525             matchBegin =  cur->position(groupID);
  526             regexGroups.insert (
  527                 make_pair ( matchBegin + 1, ReGroup ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
  528         }
  529     }
  530 }
  531 
  532 unsigned char CodeGenerator::getInputChar()
  533 {
  534     // end of line?
  535     if ( lineIndex == line.length() ) {
  536         bool eof=false;
  537         if ( preFormatter.isEnabled() ) {
  538             if ( !preFormatter.hasMoreLines() ) {
  539                 eof=readNewLine ( line );
  540                 preFormatter.setLine ( line );
  541                 ++lineNumber;
  542                 numberCurrentLine = true;
  543             } else {
  544                 if(numberWrappedLines)
  545                     ++lineNumber;
  546                 numberCurrentLine = numberWrappedLines;
  547             }
  548 
  549             line = preFormatter.getNextLine();
  550         } else {
  551             eof=readNewLine ( line );
  552             ++lineNumber;
  553 
  554             numberCurrentLine = true;
  555         }
  556         lastLineLength=lineIndex;
  557         lineIndex=0;
  558         
  559         if (!lineContainedTestCase && applySyntaxTestCase){
  560             stateTraceTest = stateTraceCurrent;
  561             stateTraceCurrent.clear();
  562         } 
  563         
  564         lineContainedTestCase=false;
  565             
  566         matchRegex ( line );
  567         stateTrace.clear();
  568         return ( eof ) ?'\0':'\n';
  569     }
  570 
  571     return line[lineIndex++];
  572 }
  573 
  574 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS, Coffeescript with block regex, Pas + ASM) 
  575     especially nested syntax in one line
  576  */
  577 State CodeGenerator::getCurrentState (State oldState)
  578 {
  579     unsigned char c='\0';
  580 
  581     if ( token.length() ==0 ) {
  582         c=getInputChar();
  583     } else {
  584         lineIndex-= ( token.length()-1 );
  585         c=token[0];
  586     }
  587     if ( c=='\n' ) {
  588         return _EOL;   // End of line
  589     }
  590 
  591     if ( c=='\0' ) {
  592         return _EOF;   // End of file
  593     }
  594 
  595     if ( c==' ' || c=='\t' ) {
  596         token= c;
  597         return _WS;
  598     }
  599     
  600     //TODO add control flag
  601     if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT)  ) {
  602         token= c;
  603         return _TESTPOS;
  604     }
  605         
  606     // at this position the syntax change takes place
  607     if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
  608         loadEmbeddedLang(embedLangDefPath);  // load new syntax                     
  609         matchRegex(line);                    // recognize new patterns in the (remaining) line
  610         syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  611     }
  612 
  613 SKIP_EMBEDDED:
  614     
  615     // Test if a regular expression was found at the current position
  616     if ( !regexGroups.empty() ) {
  617         if ( regexGroups.count ( lineIndex ) ) {
  618             token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
  619 
  620             unsigned int oldIndex= lineIndex;
  621             if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
  622 
  623             if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
  624                 //do not handle a nested section if the syntax is marked as "sealed" 
  625                 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
  626                     embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
  627                     //remember position 
  628                     syntaxChangeIndex = lineIndex+2;
  629                     syntaxChangeLineNo = lineNumber;
  630                 }
  631                 
  632                 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
  633                 matchRegex(line, EMBEDDED_CODE_BEGIN);
  634                 lineIndex = oldIndex;
  635                 goto SKIP_EMBEDDED; // this is how it should be done
  636             }
  637 
  638             if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
  639                 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
  640                 currentKeywordClass=currentSyntax->isKeyword ( reservedWord ); //check in lists (no regex)
  641                 
  642                 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
  643                     currentKeywordClass = regexGroups[oldIndex].kwClass;
  644                 }
  645                 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
  646             } else {
  647                 return validateState(regexGroups[oldIndex].state, oldState);
  648             }
  649         }
  650     }
  651 
  652     // Character not referring to any state
  653     token = c;
  654     return STANDARD;
  655 }
  656 
  657 State CodeGenerator::validateState(State newState, State oldState)
  658 {
  659 
  660     if (currentSyntax->getValidateStateChangeFct()) {
  661         Diluculum::LuaValueList params;
  662         params.push_back(Diluculum::LuaValue(oldState));
  663         params.push_back(Diluculum::LuaValue(newState));
  664         params.push_back(Diluculum::LuaValue(token));
  665         params.push_back(Diluculum::LuaValue(getCurrentKeywordClassId()) );
  666         params.push_back(Diluculum::LuaValue(lineNumber) );
  667         params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
  668 
  669         Diluculum::LuaValueList res=
  670             currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
  671                     params,"getValidateStateChangeFct call")  ;
  672 
  673         resultOfHook = res.size()>=1;
  674         if (resultOfHook) {
  675             State validatedState = (State)res[0].asInteger();
  676             if ( validatedState== _REJECT) {
  677                 // proceed using only the first character of the token
  678                 // TODO evaluate if token clear would be better
  679                 if (res.size()==1) { 
  680                     lineIndex -= (token.length() -1);
  681                     token=token.substr(0, 1);
  682                 }
  683                 
  684                 //experimental for slim.lang: evaluate second return arg after _REJECT
  685                 if (res.size()>=2) {
  686                     lineIndex -= (token.length() );
  687                     token.clear();
  688                     return (State)res[1].asInteger();
  689                 }
  690                 return oldState;
  691             }
  692             stateTrace.push_back(validatedState);
  693             if (stateTrace.size()>200) stateTrace.erase(stateTrace.begin(), stateTrace.begin() + 100 );
  694             return validatedState;
  695         }
  696     }
  697     resultOfHook  = false;
  698     stateTrace.push_back(newState);
  699     if (stateTrace.size()>200) stateTrace.erase(stateTrace.begin(), stateTrace.begin() + 100 );    
  700     return newState;
  701 }
  702 
  703 
  704 unsigned int CodeGenerator::getCurrentKeywordClassId(){
  705     unsigned int kwClassId=0;
  706 
  707     // this vector contains the defined keyword classes, and currentKeywordClass is its index:
  708     vector<string> kwClasses=currentSyntax->getKeywordClasses();
  709     if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
  710         string kwClassName=kwClasses[currentKeywordClass-1];
  711         if (kwClassName.size()==3)
  712             kwClassId = kwClassName[2] - 'a' + 1;
  713     }
  714     return kwClassId;
  715 }
  716 
  717 //it is faster to pass ostream reference
  718 void CodeGenerator::maskString ( ostream& ss, const string & s )
  719 {
  720     for ( unsigned int i=0; i< s.length(); i++ ) {
  721         ss << maskCharacter ( s[i] );
  722 
  723         if (applySyntaxTestCase) {
  724             PositionState ps(currentState, getCurrentKeywordClassId(), false);
  725             stateTraceCurrent.push_back(ps);
  726             
  727             if (stateTraceCurrent.size()>200) 
  728                 stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 ); 
  729         }
  730     }
  731 }
  732 
  733 
  734 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string&token)
  735 {
  736     Diluculum::LuaValueList params;
  737     params.push_back(Diluculum::LuaValue(token));
  738     params.push_back(Diluculum::LuaValue(currentState));
  739     params.push_back(Diluculum::LuaValue(currentKeywordClass));
  740     string trace(";");
  741     if (stateTrace.size()>1){
  742         for (size_t i=0; i<stateTrace.size()-1;i++){
  743             trace += std::to_string (stateTrace[i]);
  744             trace += ";";
  745         }
  746     }
  747     
  748     //std::cerr <<"TRC1: "<<trace<<"\n";
  749     
  750     params.push_back(Diluculum::LuaValue(trace));
  751 
  752     return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
  753             params,"getDecorateFct call")  ;
  754 }
  755 
  756 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
  757 {
  758     if ( flushWhiteSpace )
  759         flushWs(1);
  760     string caseToken = StringTools::change_case ( token, tcase );
  761     if (currentSyntax->getDecorateFct()) {
  762 
  763         Diluculum::LuaValueList res=callDecorateFct(caseToken);
  764         if (res.size()==1) {
  765             *out<<res[0].asString();
  766         } else {
  767             maskString ( *out, caseToken );
  768         }
  769     } else {
  770         maskString ( *out, caseToken );
  771     }
  772 
  773     token.clear();
  774 }
  775 
  776 bool CodeGenerator::styleFound()
  777 {
  778     return docStyle.found();
  779 }
  780 
  781 bool CodeGenerator::printIndexFile ( const vector<string> &fileList,
  782                                      const string &outPath )
  783 {
  784     return true;
  785 }
  786 
  787 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
  788 {
  789 
  790     if ( formatter!=NULL ) {
  791         return true;
  792     }
  793 
  794     if ( !indentScheme.size() ) return false;
  795 
  796     formatter=new astyle::ASFormatter();
  797 
  798     if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
  799         formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
  800     } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
  801         formatter->setFormattingStyle ( astyle::STYLE_KR );
  802     } else if ( indentScheme=="java" ) {
  803         formatter->setFormattingStyle ( astyle::STYLE_JAVA );
  804     } else if ( indentScheme=="stroustrup" ) {
  805         formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
  806     } else if ( indentScheme=="whitesmith" ) {
  807         formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
  808     } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
  809         formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
  810     } else if ( indentScheme=="gnu" ) {
  811         formatter->setFormattingStyle ( astyle::STYLE_GNU );
  812     } else if ( indentScheme=="linux" ) {
  813         formatter->setFormattingStyle ( astyle::STYLE_LINUX );
  814     } else if ( indentScheme=="horstmann" ) {
  815         formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
  816     } else if ( indentScheme=="otbs" ||  indentScheme=="1tbs") {
  817         formatter->setFormattingStyle ( astyle::STYLE_1TBS );
  818     } else if ( indentScheme=="google") {
  819         formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
  820     } else if ( indentScheme=="pico" ||  indentScheme=="a11") {
  821         formatter->setFormattingStyle ( astyle::STYLE_PICO );
  822     } else if ( indentScheme=="lisp" ||  indentScheme=="python"||  indentScheme=="a12") {
  823         formatter->setFormattingStyle ( astyle::STYLE_LISP );
  824     } else if ( indentScheme=="vtk") {
  825         formatter->setFormattingStyle ( astyle::STYLE_VTK );
  826     } else if ( indentScheme=="mozilla") {
  827         formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
  828     } else if ( indentScheme=="webkit") {
  829         formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
  830     } else if ( indentScheme!="user" ){
  831         return false;
  832     }
  833     return formattingEnabled=true;
  834 }
  835 
  836 
  837 /*Helper functions for astyle option parsing*/
  838 string CodeGenerator::getParam(const string& arg, const char* op)
  839 {
  840     return arg.substr(strlen(op));
  841 }
  842 
  843 string CodeGenerator::getParam(const string& arg, const char* op1, const char* op2)
  844 {
  845     return isParamOption(arg, op1) ? getParam(arg, op1) : getParam(arg, op2);
  846 }
  847 
  848 bool CodeGenerator::isOption(const string& arg, const char* op)
  849 {
  850     return arg.compare(op) == 0;
  851 }
  852 
  853 bool CodeGenerator::isOption(const string& arg, const char* op1, const char* op2)
  854 {
  855     return (isOption(arg, op1) || isOption(arg, op2));
  856 }
  857 
  858 bool CodeGenerator::isParamOption(const string& arg, const char* option)
  859 {
  860     bool retVal = arg.compare(0, strlen(option), option) == 0;
  861     // if comparing for short option, 2nd char of arg must be numeric
  862     if (retVal && strlen(option) == 1 && arg.length() > 1)
  863         if (!isdigit((unsigned char) arg[1]))
  864             retVal = false;
  865     return retVal;
  866 }
  867 
  868 bool CodeGenerator::isParamOption(const string& arg, const char* option1, const char* option2)
  869 {
  870     return isParamOption(arg, option1) || isParamOption(arg, option2);
  871 }
  872 
  873 //apply the same options as astyle
  874 void CodeGenerator::setIndentationOptions (const vector<string>& options){
  875     if (formatter) {
  876         string arg;
  877         for (unsigned int i=0; i<options.size(); i++) {
  878             arg=options[i];
  879             
  880             if (isOption(arg, "mode=cs"))
  881             {
  882                 formatter->setSharpStyle();
  883                 formatter->setModeManuallySet(true);
  884             }
  885             else if (isOption(arg, "mode=c"))
  886             {
  887                 formatter->setCStyle();
  888                 formatter->setModeManuallySet(true);
  889             }
  890             else if (isOption(arg, "mode=java"))
  891             {
  892                 formatter->setJavaStyle();
  893                 formatter->setModeManuallySet(true);
  894             }
  895             else if (isParamOption(arg, "t", "indent=tab="))
  896             {
  897                 int spaceNum = 4;
  898                 string spaceNumParam = getParam(arg, "t", "indent=tab=");
  899                 if (spaceNumParam.length() > 0)
  900                     spaceNum = atoi(spaceNumParam.c_str());
  901                 if (spaceNum >= 2 && spaceNum <= 20)
  902                     formatter->setTabIndentation(spaceNum, false);
  903             }
  904             else if (isOption(arg, "indent=tab"))
  905             {
  906                 formatter->setTabIndentation(4);
  907             }
  908             else if (isParamOption(arg, "T", "indent=force-tab="))
  909             {
  910                 int spaceNum = 4;
  911                 string spaceNumParam = getParam(arg, "T", "indent=force-tab=");
  912                 if (spaceNumParam.length() > 0)
  913                     spaceNum = atoi(spaceNumParam.c_str());
  914                 if (spaceNum >= 2 && spaceNum <= 20)
  915                     formatter->setTabIndentation(spaceNum, true);
  916             }
  917             else if (isOption(arg, "indent=force-tab"))
  918             {
  919                 formatter->setTabIndentation(4, true);
  920             }
  921             else if (isParamOption(arg, "xT", "indent=force-tab-x="))
  922             {
  923                 int tabNum = 8;
  924                 string tabNumParam = getParam(arg, "xT", "indent=force-tab-x=");
  925                 if (tabNumParam.length() > 0)
  926                     tabNum = atoi(tabNumParam.c_str());
  927                 if (tabNum >= 2 && tabNum <= 20)
  928                     formatter->setForceTabXIndentation(tabNum);
  929                 
  930             }
  931             else if (isOption(arg, "indent=force-tab-x"))
  932             {
  933                 formatter->setForceTabXIndentation(8);
  934             }
  935             else if (isParamOption(arg, "s", "indent=spaces="))
  936             {
  937                 int spaceNum = 4;
  938                 string spaceNumParam = getParam(arg, "s", "indent=spaces=");
  939                 if (spaceNumParam.length() > 0)
  940                     spaceNum = atoi(spaceNumParam.c_str());
  941                 if (spaceNum >= 2 && spaceNum <= 20)
  942                     formatter->setSpaceIndentation(spaceNum);
  943             }
  944             else if (isOption(arg, "indent=spaces"))
  945             {
  946                 formatter->setSpaceIndentation(4);
  947             }
  948             else if (isParamOption(arg, "xt", "indent-continuation="))
  949             {
  950                 int contIndent = 1;
  951                 string contIndentParam = getParam(arg, "xt", "indent-continuation=");
  952                 if (contIndentParam.length() > 0)
  953                     contIndent = atoi(contIndentParam.c_str());
  954                 if (contIndent > 0 && contIndent < 5)
  955                     formatter->setContinuationIndentation(contIndent);
  956             }
  957             else if (isParamOption(arg, "m", "min-conditional-indent="))
  958             {
  959                 int minIndent = astyle::MINCOND_TWO;
  960                 string minIndentParam = getParam(arg, "m", "min-conditional-indent=");
  961                 if (minIndentParam.length() > 0)
  962                     minIndent = atoi(minIndentParam.c_str());
  963                 if (minIndent < astyle::MINCOND_END)
  964                     formatter->setMinConditionalIndentOption(minIndent);
  965             }
  966             else if (isParamOption(arg, "M", "max-continuation-indent="))
  967             {
  968                 int maxIndent = 40;
  969                 string maxIndentParam = getParam(arg, "M", "max-continuation-indent=");
  970                 if (maxIndentParam.length() > 0)
  971                     maxIndent = atoi(maxIndentParam.c_str());
  972                 if (maxIndent >= 40 && maxIndent <= 120)
  973                     formatter->setMaxContinuationIndentLength(maxIndent);
  974             }
  975             else if (isOption(arg, "N", "indent-namespaces"))
  976             {
  977                 formatter->setNamespaceIndent(true);
  978             }
  979             else if (isOption(arg, "C", "indent-classes"))
  980             {
  981                 formatter->setClassIndent(true);
  982             }
  983             else if (isOption(arg, "xG", "indent-modifiers"))
  984             {
  985                 formatter->setModifierIndent(true);
  986             }
  987             else if (isOption(arg, "S", "indent-switches"))
  988             {
  989                 formatter->setSwitchIndent(true);
  990             }
  991             else if (isOption(arg, "K", "indent-cases"))
  992             {
  993                 formatter->setCaseIndent(true);
  994             }
  995             else if (isOption(arg, "xU", "indent-after-parens"))
  996             {
  997                 formatter->setAfterParenIndent(true);
  998             }
  999             else if (isOption(arg, "L", "indent-labels"))
 1000             {
 1001                 formatter->setLabelIndent(true);
 1002             }
 1003             else if (isOption(arg, "xW", "indent-preproc-block"))
 1004             {
 1005                 formatter->setPreprocBlockIndent(true);
 1006             }
 1007             else if (isOption(arg, "w", "indent-preproc-define"))
 1008             {
 1009                 formatter->setPreprocDefineIndent(true);
 1010             }
 1011             else if (isOption(arg, "xw", "indent-preproc-cond"))
 1012             {
 1013                 formatter->setPreprocConditionalIndent(true);
 1014             }
 1015             else if (isOption(arg, "y", "break-closing-braces"))
 1016             {
 1017                 formatter->setBreakClosingHeaderBracesMode(true);
 1018             }
 1019             else if (isOption(arg, "O", "keep-one-line-blocks"))
 1020             {
 1021                 formatter->setBreakOneLineBlocksMode(false);
 1022             }
 1023             else if (isOption(arg, "o", "keep-one-line-statements"))
 1024             {
 1025                 formatter->setBreakOneLineStatementsMode(false);
 1026             }
 1027             else if (isOption(arg, "P", "pad-paren"))
 1028             {
 1029                 formatter->setParensOutsidePaddingMode(true);
 1030                 formatter->setParensInsidePaddingMode(true);
 1031             }
 1032             else if (isOption(arg, "d", "pad-paren-out"))
 1033             {
 1034                 formatter->setParensOutsidePaddingMode(true);
 1035             }
 1036             else if (isOption(arg, "xd", "pad-first-paren-out"))
 1037             {
 1038                 formatter->setParensFirstPaddingMode(true);
 1039             }
 1040             else if (isOption(arg, "D", "pad-paren-in"))
 1041             {
 1042                 formatter->setParensInsidePaddingMode(true);
 1043             }
 1044             else if (isOption(arg, "H", "pad-header"))
 1045             {
 1046                 formatter->setParensHeaderPaddingMode(true);
 1047             }
 1048             else if (isOption(arg, "U", "unpad-paren"))
 1049             {
 1050                 formatter->setParensUnPaddingMode(true);
 1051             }
 1052             else if (isOption(arg, "p", "pad-oper"))
 1053             {
 1054                 formatter->setOperatorPaddingMode(true);
 1055             }
 1056             else if (isOption(arg, "xg", "pad-comma"))
 1057             {
 1058                 formatter->setCommaPaddingMode(true);
 1059             }
 1060             else if (isOption(arg, "xe", "delete-empty-lines"))
 1061             {
 1062                 formatter->setDeleteEmptyLinesMode(true);
 1063             }
 1064             else if (isOption(arg, "E", "fill-empty-lines"))
 1065             {
 1066                 formatter->setEmptyLineFill(true);
 1067             }
 1068             else if (isOption(arg, "c", "convert-tabs"))
 1069             {
 1070                 formatter->setTabSpaceConversionMode(true);
 1071             }
 1072             else if (isOption(arg, "xy", "close-templates"))
 1073             {
 1074                 formatter->setCloseTemplatesMode(true);
 1075             }
 1076             else if (isOption(arg, "F", "break-blocks=all"))
 1077             {
 1078                 formatter->setBreakBlocksMode(true);
 1079                 formatter->setBreakClosingHeaderBlocksMode(true);
 1080             }
 1081             else if (isOption(arg, "f", "break-blocks"))
 1082             {
 1083                 formatter->setBreakBlocksMode(true);
 1084             }
 1085             else if (isOption(arg, "e", "break-elseifs"))
 1086             {
 1087                 formatter->setBreakElseIfsMode(true);
 1088             }
 1089             else if (isOption(arg, "xb", "break-one-line-headers"))
 1090             {
 1091                 formatter->setBreakOneLineHeadersMode(true);
 1092             }
 1093             else if (isOption(arg, "j", "add-braces"))
 1094             {
 1095                 formatter->setAddBracesMode(true);
 1096             }
 1097             else if (isOption(arg, "J", "add-one-line-braces"))
 1098             {
 1099                 formatter->setAddOneLineBracesMode(true);
 1100             }
 1101             else if (isOption(arg, "xj", "remove-braces"))
 1102             {
 1103                 formatter->setRemoveBracesMode(true);
 1104             }
 1105             else if (isOption(arg, "Y", "indent-col1-comments"))
 1106             {
 1107                 formatter->setIndentCol1CommentsMode(true);
 1108             }
 1109             else if (isOption(arg, "align-pointer=type"))
 1110             {
 1111                 formatter->setPointerAlignment(astyle::PTR_ALIGN_TYPE);
 1112             }
 1113             else if (isOption(arg, "align-pointer=middle"))
 1114             {
 1115                 formatter->setPointerAlignment(astyle::PTR_ALIGN_MIDDLE);
 1116             }
 1117             else if (isOption(arg, "align-pointer=name"))
 1118             {
 1119                 formatter->setPointerAlignment(astyle::PTR_ALIGN_NAME);
 1120             }
 1121             else if (isParamOption(arg, "k"))
 1122             {
 1123                 int align = 0;
 1124                 string styleParam = getParam(arg, "k");
 1125                 if (styleParam.length() > 0)
 1126                     align = atoi(styleParam.c_str());
 1127                 if (align == 1)
 1128                     formatter->setPointerAlignment(astyle::PTR_ALIGN_TYPE);
 1129                 else if (align == 2)
 1130                     formatter->setPointerAlignment(astyle::PTR_ALIGN_MIDDLE);
 1131                 else if (align == 3)
 1132                     formatter->setPointerAlignment(astyle::PTR_ALIGN_NAME);
 1133             }
 1134             else if (isOption(arg, "align-reference=none"))
 1135             {
 1136                 formatter->setReferenceAlignment(astyle::REF_ALIGN_NONE);
 1137             }
 1138             else if (isOption(arg, "align-reference=type"))
 1139             {
 1140                 formatter->setReferenceAlignment(astyle::REF_ALIGN_TYPE);
 1141             }
 1142             else if (isOption(arg, "align-reference=middle"))
 1143             {
 1144                 formatter->setReferenceAlignment(astyle::REF_ALIGN_MIDDLE);
 1145             }
 1146             else if (isOption(arg, "align-reference=name"))
 1147             {
 1148                 formatter->setReferenceAlignment(astyle::REF_ALIGN_NAME);
 1149             }
 1150             else if (isParamOption(arg, "W"))
 1151             {
 1152                 int align = 0;
 1153                 string styleParam = getParam(arg, "W");
 1154                 if (styleParam.length() > 0)
 1155                     align = atoi(styleParam.c_str());
 1156                 if (align == 0)
 1157                     formatter->setReferenceAlignment(astyle::REF_ALIGN_NONE);
 1158                 else if (align == 1)
 1159                     formatter->setReferenceAlignment(astyle::REF_ALIGN_TYPE);
 1160                 else if (align == 2)
 1161                     formatter->setReferenceAlignment(astyle::REF_ALIGN_MIDDLE);
 1162                 else if (align == 3)
 1163                     formatter->setReferenceAlignment(astyle::REF_ALIGN_NAME);
 1164             }
 1165             else if (isParamOption(arg, "max-code-length="))
 1166             {
 1167                 int maxLength = 50;
 1168                 string maxLengthParam = getParam(arg, "max-code-length=");
 1169                 if (maxLengthParam.length() > 0)
 1170                     maxLength = atoi(maxLengthParam.c_str());
 1171                 if (maxLength >= 50 && maxLength<= 200)
 1172                     formatter->setMaxCodeLength(maxLength);
 1173             }
 1174             else if (isParamOption(arg, "xC"))
 1175             {
 1176                 int maxLength = 50;
 1177                 string maxLengthParam = getParam(arg, "xC");
 1178                 if (maxLengthParam.length() > 0)
 1179                     maxLength = atoi(maxLengthParam.c_str());
 1180                 if (maxLength > 0 && maxLength<= 200)
 1181                     formatter->setMaxCodeLength(maxLength);
 1182             }
 1183             else if (isOption(arg, "xL", "break-after-logical"))
 1184             {
 1185                 formatter->setBreakAfterMode(true);
 1186             }
 1187             else if (isOption(arg, "xc", "attach-classes"))
 1188             {
 1189                 formatter->setAttachClass(true);
 1190             }
 1191             else if (isOption(arg, "xV", "attach-closing-while"))
 1192             {
 1193                 formatter->setAttachClosingWhile(true);
 1194             }
 1195             else if (isOption(arg, "xk", "attach-extern-c"))
 1196             {
 1197                 formatter->setAttachExternC(true);
 1198             }
 1199             else if (isOption(arg, "xn", "attach-namespaces"))
 1200             {
 1201                 formatter->setAttachNamespace(true);
 1202             }
 1203             else if (isOption(arg, "xl", "attach-inlines"))
 1204             {
 1205                 formatter->setAttachInline(true);
 1206             }
 1207             else if (isOption(arg, "xp", "remove-comment-prefix"))
 1208             {
 1209                 formatter->setStripCommentPrefix(true);
 1210             }
 1211             else if (isOption(arg, "xB", "break-return-type"))
 1212             {
 1213                 formatter->setBreakReturnType(true);
 1214             }
 1215             else if (isOption(arg, "xD", "break-return-type-decl"))
 1216             {
 1217                 formatter->setBreakReturnTypeDecl(true);
 1218             }
 1219             else if (isOption(arg, "xf", "attach-return-type"))
 1220             {
 1221                 formatter->setAttachReturnType(true);
 1222             }
 1223             else if (isOption(arg, "xh", "attach-return-type-decl"))
 1224             {
 1225                 formatter->setAttachReturnTypeDecl(true);
 1226             }
 1227             // Objective-C options
 1228             else if (isOption(arg, "xQ", "pad-method-prefix"))
 1229             {
 1230                 formatter->setMethodPrefixPaddingMode(true);
 1231             }
 1232             else if (isOption(arg, "xR", "unpad-method-prefix"))
 1233             {
 1234                 formatter->setMethodPrefixUnPaddingMode(true);
 1235             }
 1236             else if (isOption(arg, "xq", "pad-return-type"))
 1237             {
 1238                 formatter->setReturnTypePaddingMode(true);
 1239             }
 1240             else if (isOption(arg, "xr", "unpad-return-type"))
 1241             {
 1242                 formatter->setReturnTypeUnPaddingMode(true);
 1243             }
 1244             else if (isOption(arg, "xS", "pad-param-type"))
 1245             {
 1246                 formatter->setParamTypePaddingMode(true);
 1247             }
 1248             else if (isOption(arg, "xs", "unpad-param-type"))
 1249             {
 1250                 formatter->setParamTypeUnPaddingMode(true);
 1251             }
 1252             else if (isOption(arg, "xM", "align-method-colon"))
 1253             {
 1254                 formatter->setAlignMethodColon(true);
 1255             }
 1256             else if (isOption(arg, "xP0", "pad-method-colon=none"))
 1257             {
 1258                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_NONE);
 1259             }
 1260             else if (isOption(arg, "xP1", "pad-method-colon=all"))
 1261             {
 1262                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_ALL);
 1263             }
 1264             else if (isOption(arg, "xP2", "pad-method-colon=after"))
 1265             {
 1266                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_AFTER);
 1267             }
 1268             else if (isOption(arg, "xP3", "pad-method-colon=before"))
 1269             {
 1270                 formatter->setObjCColonPaddingMode(astyle::COLON_PAD_BEFORE);
 1271             }
 1272         }
 1273     }
 1274 }
 1275 
 1276 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
 1277 {
 1278 
 1279     if (!embedded) {
 1280         while (!nestedLangs.empty()) {
 1281             nestedLangs.pop();
 1282         }   
 1283     }
 1284     
 1285     bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
 1286     LoadResult result=LOAD_OK;
 1287 
 1288     if ( reloadNecessary ) {
 1289         if (syntaxReaders.count(langDefPath)) {
 1290             currentSyntax=syntaxReaders[langDefPath];
 1291             result=LOAD_OK;
 1292         } else {
 1293             currentSyntax=new SyntaxReader();
 1294             result=currentSyntax->load(langDefPath, pluginParameter, outputType);
 1295             syntaxReaders[langDefPath]=currentSyntax;
 1296         }
 1297 
 1298         if ( result==LOAD_OK ) {
 1299             formattingPossible=currentSyntax->enableReformatting();
 1300 
 1301             if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
 1302                 // remove dynamic keyword tag delimiters of the old language definition
 1303                 vector<string>::iterator keyStyleOpenBegin =
 1304                     openTags.begin() + NUMBER_BUILTIN_STATES;
 1305                 vector<string>::iterator keyStyleCloseBegin =
 1306                     closeTags.begin() + NUMBER_BUILTIN_STATES;
 1307                 openTags.erase ( keyStyleOpenBegin, openTags.end() );
 1308                 closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
 1309             }
 1310             // add new keyword tag delimiters
 1311             for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
 1312                 openTags.push_back ( getKeywordOpenTag ( i ) );
 1313                 closeTags.push_back ( getKeywordCloseTag ( i ) );
 1314             }
 1315             
 1316             //test balloon
 1317             string overrideSpacer(currentSyntax->getOverrideConfigVal("spacer"));
 1318             if (!overrideSpacer.empty()) {
 1319                 spacer = overrideSpacer;
 1320             }
 1321             string overrideMaskWS(currentSyntax->getOverrideConfigVal("maskws"));
 1322             if (!overrideMaskWS.empty()) {
 1323                 maskWs = overrideMaskWS=="true";
 1324             }
 1325             
 1326         }
 1327     }
 1328     return result;
 1329 }
 1330 
 1331 bool CodeGenerator::validateInputStream()
 1332 {
 1333     if ( !in ) return false;
 1334 
 1335     // it is not possible to move stream pointer back with stdin
 1336     if ( ( int ) in->tellg() == -1 ) // -1 : stdin
 1337         return true;
 1338 
 1339     // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
 1340     // Magic configuration of "file"
 1341     // This is intended for web plugins - only check filetypes often found in the net
 1342     char magic_gif[]    = {'G','I','F','8', 0};
 1343     char magic_png[]    = {'\x89','P','N','G', 0};
 1344     char magic_java[]   = {'\xCA','\xFE','\xBA','\xBE', 0};
 1345     char magic_jpeg[]   = {'\xFF','\xD8','\xFF', 0};
 1346     char magic_bmp[]    = {'B','M', 0};
 1347     char magic_pdf[]    = {'%','P','D','F', 0};
 1348     char magic_utf8[]   = {'\xEF','\xBB','\xBF',0};
 1349     char magic_rar[]    = {'R','a','r','!', 0};
 1350     char magic_zip[]    = {'P','K','\x03','\x04', 0};
 1351     char magic_ace[]    = {'*','*','A','C','E','*','*', 0};
 1352     char magic_tgz[]    = {'\x8b','\x1f', '\x00', '\x08', 0};
 1353     char magic_bzip[]   = {'B','Z', 0};
 1354 
 1355     char* magic_table[] = {magic_utf8,
 1356                            magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
 1357                            magic_java,
 1358                            magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
 1359                            0
 1360                           };
 1361 
 1362     char buffer [10]= {0};
 1363     in->read ( buffer,8 );  //only read the first 8 bytes of input stream
 1364 
 1365     int magic_index=0;
 1366     while ( magic_table[magic_index] ) {
 1367         if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
 1368             break;
 1369         }
 1370         magic_index++;
 1371     }
 1372     int streamReadPos=0;
 1373     if ( magic_table[magic_index] == magic_utf8 ) {
 1374         //setEncoding("utf-8");
 1375         streamReadPos=3; // remove UTF-8 magic number from output
 1376     }
 1377 
 1378     in -> seekg ( streamReadPos, ios::beg );
 1379     in-> clear();  // clear fail bit to continue reading
 1380 
 1381     return !magic_table[magic_index] // points to 0 if no pattern was found
 1382            || magic_table[magic_index] == magic_utf8;
 1383 }
 1384 
 1385 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
 1386     
 1387     if ( pluginChunks.size()) {
 1388     
 1389         Diluculum::LuaState luaState;
 1390 
 1391         Diluculum::LuaValueList chunkParams;
 1392         chunkParams.push_back(currentSyntax->getDescription());
 1393         for (unsigned int i=0; i<pluginChunks.size(); i++) {
 1394             luaState.call(*pluginChunks[i], chunkParams, "format user function");
 1395         }
 1396         
 1397         if (luaState.globals().count(fctName)) {
 1398             Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
 1399         
 1400             luaState["HL_INPUT_FILE"] = luaState["HL_PLUGIN_PARAM"] = pluginParameter;
 1401             luaState["HL_OUTPUT"] = outputType;
 1402             luaState["HL_FORMAT_HTML"]=HTML;
 1403             luaState["HL_FORMAT_XHTML"]=XHTML;
 1404             luaState["HL_FORMAT_TEX"]=TEX;
 1405             luaState["HL_FORMAT_LATEX"]=LATEX;
 1406             luaState["HL_FORMAT_RTF"]=RTF;
 1407             luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
 1408             luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
 1409             luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
 1410             luaState["HL_FORMAT_SVG"]=SVG;
 1411             luaState["HL_FORMAT_BBCODE"]=BBCODE;
 1412             luaState["HL_FORMAT_PANGO"]=PANGO;
 1413             luaState["HL_FORMAT_ODT"]=ODTFLAT;
 1414             
 1415             Diluculum::LuaValueList params;
 1416             Diluculum::LuaValueMap options;
 1417             options[Diluculum::LuaValue("title")] =  Diluculum::LuaValue( docTitle );   
 1418             options[Diluculum::LuaValue("encoding")] =  Diluculum::LuaValue(encoding);   
 1419             options[Diluculum::LuaValue("fragment")] =  Diluculum::LuaValue(fragmentOutput);   
 1420             options[Diluculum::LuaValue("font")] =  Diluculum::LuaValue(getBaseFont());   
 1421             options[Diluculum::LuaValue("fontsize")] =  Diluculum::LuaValue(getBaseFontSize());   
 1422 
 1423             params.push_back(inputFilesCnt);
 1424             params.push_back(processedFilesCnt);
 1425             params.push_back(options);
 1426             
 1427             Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call")  ;
 1428             if (res.size()>=1) {
 1429                 *keepDefault=false;
 1430                 *result = res[0].asString();
 1431                 if (res.size()==2)
 1432                     *keepDefault = res[1].asBoolean();
 1433             }
 1434             delete documentFct;
 1435         }
 1436     }
 1437 }
 1438 
 1439 void CodeGenerator::printHeader()
 1440 {
 1441     bool keepDefaultHeader=true;
 1442     string pluginHeader;
 1443     
 1444     processedFilesCnt++;
 1445     
 1446     applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
 1447 
 1448     if ( ! fragmentOutput && keepDefaultHeader)
 1449         *out << getHeader();
 1450     
 1451     *out << pluginHeader; 
 1452    
 1453     if ( !fragmentOutput || keepInjections)
 1454         *out << currentSyntax->getHeaderInjection();
 1455 }
 1456 
 1457 void CodeGenerator::printFooter()
 1458 {
 1459     
 1460     bool keepDefaultFooter=true;
 1461     string pluginFooter;
 1462     
 1463     applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
 1464     
 1465     if ( !fragmentOutput || keepInjections)
 1466         *out << currentSyntax->getFooterInjection();
 1467 
 1468     *out << pluginFooter; 
 1469     
 1470     if ( ! fragmentOutput && keepDefaultFooter )
 1471         *out << getFooter();
 1472 }
 1473 
 1474 ParseError CodeGenerator::generateFile ( const string &inFileName,
 1475         const string &outFileName )
 1476 {
 1477     if ( !docStyle.found() ) {
 1478         return BAD_STYLE;
 1479     }
 1480 
 1481     reset();
 1482 
 1483     ParseError error=PARSE_OK;
 1484 
 1485     inFile=inFileName;
 1486     outFile=outFileName;
 1487         
 1488     in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
 1489 
 1490     if ( validateInput )
 1491         if ( !validateInputStream() ) error= BAD_INPUT;
 1492 
 1493     if ( !in->fail() && error==PARSE_OK ) {
 1494         out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
 1495         if ( out->fail() ) {
 1496             error=BAD_OUTPUT;
 1497         }
 1498     }
 1499 
 1500     if ( in->fail() ) {
 1501         error=BAD_INPUT;
 1502     }
 1503 
 1504     if ( error==PARSE_OK ) {
 1505         if ( formatter != NULL ) {
 1506             formatter->init ( new astyle::ASStreamIterator ( in ) );
 1507         }
 1508         printHeader();
 1509         printBody();
 1510         printFooter();
 1511     }
 1512 
 1513     if ( !outFileName.empty() ) {
 1514         delete out;
 1515         out=NULL;
 1516     }
 1517     if ( !inFileName.empty() ) {
 1518         delete in;
 1519         in=NULL;
 1520     }
 1521     return error;
 1522 }
 1523 
 1524 string CodeGenerator::generateString ( const string &input )
 1525 {
 1526 
 1527     if ( !docStyle.found() ) {
 1528         return "";
 1529     }
 1530 
 1531     reset();
 1532 
 1533     in = new istringstream ( input );
 1534     out = new ostringstream ();
 1535 
 1536     if ( in->fail() || out->fail() ) {
 1537         return "";
 1538     }
 1539 
 1540     if ( formatter != NULL ) {
 1541         formatter->init ( new astyle::ASStreamIterator ( in ) );
 1542     }
 1543     printHeader();
 1544     printBody();
 1545     printFooter();
 1546 
 1547     string result = static_cast<ostringstream*> ( out )->str();
 1548 
 1549     delete out;
 1550     out=NULL;
 1551     delete in;
 1552     in=NULL;
 1553 
 1554     return result;
 1555 }
 1556 
 1557 string CodeGenerator::generateStringFromFile ( const string &inFileName )
 1558 {
 1559 
 1560     if ( !docStyle.found() ) {
 1561         return "";
 1562     }
 1563 
 1564     reset();
 1565 
 1566     inFile = inFileName;
 1567     
 1568     in = new ifstream ( inFileName.c_str() );
 1569     out = new ostringstream ();
 1570 
 1571     if ( in->fail() || out->fail() ) {
 1572         return "";
 1573     }
 1574 
 1575     if ( validateInput && !validateInputStream() ) {
 1576         return "ERROR: detected binary input";
 1577     }
 1578 
 1579     if ( formatter != NULL ) {
 1580         formatter->init ( new astyle::ASStreamIterator ( in ) );
 1581     }
 1582     printHeader();
 1583     printBody();
 1584     printFooter();
 1585 
 1586     string result = static_cast<ostringstream*> ( out )->str();
 1587 
 1588     delete out;
 1589     out=NULL;
 1590     delete in;
 1591     in=NULL;
 1592 
 1593     return result;
 1594 }
 1595 
 1596 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
 1597 {
 1598     if ( s==KEYWORD && kwClassID ) {
 1599         return NUMBER_BUILTIN_STATES + kwClassID-1;
 1600     }
 1601     return ( unsigned int ) s ;
 1602 }
 1603 
 1604 void CodeGenerator::openTag ( State s )
 1605 {
 1606     *out << openTags[ ( unsigned int ) s];
 1607     currentState=s;
 1608 
 1609 }
 1610 
 1611 void CodeGenerator::closeTag ( State s )
 1612 {
 1613     *out << closeTags[ ( unsigned int ) s];
 1614     flushWs(2);
 1615     currentState=_UNKNOWN;
 1616 }
 1617 
 1618 void CodeGenerator::openKWTag ( unsigned int kwClassID )
 1619 {
 1620     *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1621     currentState=KEYWORD;
 1622 }
 1623 
 1624 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
 1625 {
 1626     *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1627     flushWs(3);
 1628     currentState=_UNKNOWN;
 1629 }
 1630 
 1631 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
 1632 {
 1633     if (nestedLangs.empty()) {
 1634         nestedLangs.push(currentSyntax->getCurrentPath() );
 1635     }
 1636     if (nestedLangs.top() != embedLangDefPath) {
 1637         nestedLangs.push(embedLangDefPath);
 1638     }
 1639     LoadResult res = loadLanguage(embedLangDefPath, true);
 1640     //pass end delimiter regex to syntax description
 1641     currentSyntax->restoreLangEndDelim(embedLangDefPath);
 1642     return res == LOAD_OK;
 1643 }
 1644 
 1645 ///////////////////////////////////////////////////////////////////////////////
 1646 
 1647 void CodeGenerator::processRootState()
 1648 {
 1649     bool eof=false,
 1650          firstLine=true; // avoid newline before printing the first output line
 1651 
 1652     applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
 1653     
 1654     if ( currentSyntax->highlightingDisabled() ) {
 1655         string line;
 1656         while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
 1657             ++lineNumber;
 1658             insertLineNumber ( !firstLine );
 1659             flushWs(4);
 1660             firstLine=false;
 1661             if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
 1662                 maskString ( *out, line );
 1663         }
 1664         *out << flush;
 1665         return;
 1666     }
 1667 
 1668     if (!embedLangStart.empty()) {
 1669         if (!loadEmbeddedLang(currentSyntax->getNewPath(embedLangStart))) return;
 1670     }
 1671 
 1672     State state=STANDARD;
 1673 
 1674     openTag ( STANDARD );
 1675     do {
 1676         // determine next state
 1677         state= getCurrentState(STANDARD);
 1678 
 1679         // handle current state
 1680         switch ( state ) {
 1681         case KEYWORD:
 1682             closeTag ( STANDARD );
 1683             eof=processKeywordState ( state );
 1684             openTag ( STANDARD );
 1685             break;
 1686         case NUMBER:
 1687             closeTag ( STANDARD );
 1688             eof=processNumberState();
 1689             openTag ( STANDARD );
 1690             break;
 1691         case ML_COMMENT:
 1692             closeTag ( STANDARD );
 1693             eof=processMultiLineCommentState();
 1694             openTag ( STANDARD );
 1695             break;
 1696         case SL_COMMENT:
 1697             closeTag ( STANDARD );
 1698             eof=processSingleLineCommentState();
 1699             openTag ( STANDARD );
 1700             break;
 1701         case STRING:
 1702             closeTag ( STANDARD );
 1703             eof=processStringState ( STANDARD );
 1704             openTag ( STANDARD );
 1705             break;
 1706         case DIRECTIVE:
 1707             closeTag ( STANDARD );
 1708             eof=processDirectiveState();
 1709             openTag ( STANDARD );
 1710             break;
 1711         case ESC_CHAR:
 1712             closeTag ( STANDARD );
 1713             eof=processEscapeCharState();
 1714             openTag ( STANDARD );
 1715             break;
 1716         case SYMBOL:
 1717             closeTag ( STANDARD );
 1718             eof=processSymbolState();
 1719             openTag ( STANDARD );
 1720             break;
 1721 
 1722         case EMBEDDED_CODE_END:
 1723             closeTag ( STANDARD );
 1724             eof=processSyntaxChangeState(state);
 1725             openTag ( STANDARD );
 1726             break;
 1727         case _EOL:
 1728 
 1729             // XTERM256 fix (issue with less cmd)
 1730             if  (!firstLine || showLineNumbers) {
 1731                 closeTag ( STANDARD );
 1732             }
 1733             insertLineNumber ( !firstLine );
 1734             if (!firstLine || showLineNumbers) {
 1735                 flushWs(5);
 1736                 stateTraceCurrent.clear();
 1737                 openTag ( STANDARD );
 1738             }
 1739             firstLine=false;
 1740             break;
 1741         case _EOF:
 1742             eof=true;
 1743             break;
 1744         case _WS:
 1745             processWsState();
 1746             break;
 1747         default:
 1748             printMaskedToken ();
 1749             break;
 1750         }
 1751     } while ( !eof );
 1752     closeTag ( STANDARD );
 1753 
 1754     if (currentSyntax->getDecorateLineEndFct()) {
 1755         Diluculum::LuaValueList res=callDecorateLineFct(false);
 1756         if (res.size()==1) {
 1757             *out << res[0].asString();
 1758         }
 1759     }
 1760 
 1761     printNewLines = !noTrailingNewLine;
 1762     *out << getNewLine();
 1763     *out << flush;
 1764 }
 1765 
 1766 bool CodeGenerator::processSyntaxChangeState(State myState)
 1767 {
 1768     State newState=STANDARD;
 1769     bool eof=false,
 1770          exitState=false;
 1771 
 1772     openTag ( KEYWORD );
 1773     do {
 1774 
 1775         if (myState==EMBEDDED_CODE_END) {
 1776             if (!nestedLangs.empty()) {
 1777                 nestedLangs.pop();
 1778             }
 1779             // load host language syntax
 1780             if (!nestedLangs.empty()) {
 1781                 loadLanguage(nestedLangs.top(), true);
 1782             }
 1783             matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
 1784         }
 1785         
 1786         printMaskedToken ( newState!=_WS );
 1787 
 1788         newState= getCurrentState(myState);
 1789 
 1790         switch ( newState ) {
 1791         case _WS:
 1792             processWsState();
 1793             break;
 1794         case _EOL:
 1795             insertLineNumber();
 1796             exitState=true;
 1797             break;
 1798         case _EOF:
 1799             eof = true;
 1800             break;
 1801         default:
 1802             exitState=true;
 1803             break;
 1804         }
 1805     } while (  !exitState  &&  !eof );
 1806     closeTag ( KEYWORD );
 1807 
 1808     return eof;
 1809 }
 1810 
 1811 
 1812 bool CodeGenerator::processKeywordState ( State myState )
 1813 {
 1814     State newState=STANDARD;
 1815     unsigned int myClassID=currentKeywordClass;
 1816     bool eof=false,
 1817          exitState=false;
 1818 
 1819     openKWTag ( myClassID );
 1820     do {
 1821         printMaskedToken ( newState!=_WS,
 1822                            ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
 1823         newState= getCurrentState(myState);
 1824         switch ( newState ) {
 1825         case _WS:
 1826             processWsState();
 1827             exitState=isolateTags;
 1828             break;
 1829         case _EOL:
 1830             insertLineNumber();
 1831             exitState=true;
 1832             
 1833             break;
 1834         case _EOF:
 1835             eof = true;
 1836             break;
 1837         case KEYWORD_END:
 1838             exitState=true;
 1839             break;
 1840         default:
 1841             exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
 1842             break;
 1843         }
 1844     } while ( !exitState  &&  !eof );
 1845 
 1846     closeKWTag ( myClassID );
 1847 
 1848     currentKeywordClass=0;
 1849     return eof;
 1850 }
 1851 
 1852 bool CodeGenerator::processNumberState()
 1853 {
 1854     State newState=STANDARD;
 1855     bool eof=false,
 1856          exitState=false;
 1857     openTag ( NUMBER );
 1858     do {
 1859         printMaskedToken ( newState!=_WS );
 1860         newState= getCurrentState(NUMBER);
 1861         switch ( newState ) {
 1862         case _WS:
 1863             processWsState();
 1864             exitState=isolateTags;
 1865             break;
 1866         case _EOL:
 1867             insertLineNumber();
 1868             exitState=true;
 1869             break;
 1870         case _EOF:
 1871             eof = true;
 1872             break;
 1873         default:
 1874             exitState=newState!=NUMBER;
 1875             break;
 1876         }
 1877     } while ( !exitState && !eof );
 1878 
 1879     closeTag ( NUMBER );
 1880     return eof;
 1881 }
 1882 
 1883 
 1884 bool CodeGenerator::processMultiLineCommentState()
 1885 {
 1886     int commentCount=1;
 1887     int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
 1888     State newState=STANDARD;
 1889     bool eof=false, exitState=false, containedTestCase=false;
 1890     unsigned int startColumn=lineIndex - token.size() ;
 1891     openTag ( ML_COMMENT );
 1892     do {
 1893         printMaskedToken (newState!=_WS );
 1894         newState= getCurrentState(ML_COMMENT);
 1895 
 1896         switch ( newState ) {
 1897         case _WS:
 1898             processWsState();
 1899             break;
 1900         case _EOL:
 1901             wsBuffer += closeTags[ML_COMMENT];
 1902             insertLineNumber();
 1903             wsBuffer += openTags[ML_COMMENT];
 1904             startColumn=0;
 1905             break;
 1906         case _EOF:
 1907             eof = true;
 1908             break;
 1909         case _TESTPOS:
 1910             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1911             printMaskedToken();
 1912             containedTestCase=true;
 1913             break;
 1914         case ML_COMMENT:
 1915 
 1916             if ( currentSyntax->allowNestedMLComments() ) {
 1917                 ++commentCount;
 1918             }
 1919             // if delimiters are equal, close the comment by continueing to
 1920             // ML_COMMENT_END section
 1921             if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT  ))) break;
 1922 
 1923         case ML_COMMENT_END:
 1924 
 1925             if (!currentSyntax->matchesOpenDelimiter (token,  ML_COMMENT_END, openDelimID)) {
 1926                 break;
 1927             }
 1928             commentCount--;
 1929             if ( !commentCount ) {
 1930                 printMaskedToken();
 1931                 exitState=true;
 1932             }
 1933             break;
 1934         default:
 1935             break;
 1936         }
 1937     } while ( !exitState  &&  !eof );
 1938 
 1939     closeTag ( ML_COMMENT );
 1940    
 1941     if (containedTestCase){
 1942         stateTraceCurrent.clear();
 1943     }
 1944     return eof;
 1945 }
 1946 
 1947 
 1948 bool CodeGenerator::processSingleLineCommentState()
 1949 {
 1950     State newState=STANDARD;
 1951     bool eof=false, exitState=false, containedTestCase=false;
 1952     unsigned int startColumn = lineIndex - token.size() ;
 1953 
 1954     openTag ( SL_COMMENT );
 1955     do {
 1956         printMaskedToken ( newState!=_WS );
 1957         newState= getCurrentState(SL_COMMENT);
 1958 
 1959         switch ( newState ) {
 1960         case _WS:
 1961             processWsState();
 1962             break;
 1963         case _EOL:
 1964             printMaskedToken();
 1965             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1966                 exitState=false;
 1967             } else {
 1968                 exitState=true;
 1969             }
 1970             if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
 1971             insertLineNumber();
 1972             if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
 1973 
 1974             break;
 1975         case _EOF:
 1976             eof = true;
 1977             break;
 1978         case _TESTPOS:
 1979             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1980             printMaskedToken();
 1981             containedTestCase=true;
 1982             break;
 1983      
 1984         default:
 1985             break;
 1986         }
 1987     } while ( !exitState  &&  !eof );
 1988 
 1989     closeTag ( SL_COMMENT );
 1990     
 1991     if (containedTestCase) {
 1992         stateTraceCurrent.clear();
 1993     }
 1994     
 1995     return eof;
 1996 }
 1997 
 1998 bool CodeGenerator::processDirectiveState()
 1999 {
 2000     State  newState=STANDARD;
 2001     bool eof=false, exitState=false;
 2002 
 2003     openTag ( DIRECTIVE );
 2004     do {
 2005         printMaskedToken ( newState!=_WS );
 2006         newState= getCurrentState(DIRECTIVE);
 2007         switch ( newState ) {
 2008         case _WS:
 2009             processWsState();
 2010             break;
 2011         case DIRECTIVE_END:
 2012             printMaskedToken();
 2013             exitState=true;
 2014             break;
 2015         case _EOL:
 2016             printMaskedToken();
 2017             
 2018             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 2019                 exitState=false;
 2020             } else {
 2021                 if (currentSyntax->getContinuationChar()!=0x13){
 2022                     exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
 2023                 } 
 2024             }
 2025             if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
 2026             insertLineNumber();
 2027             if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
 2028             break;
 2029         case ML_COMMENT:
 2030             closeTag ( DIRECTIVE );
 2031             eof= processMultiLineCommentState();
 2032             openTag ( DIRECTIVE );
 2033             break;
 2034         case SL_COMMENT:
 2035             closeTag ( DIRECTIVE );
 2036             eof= processSingleLineCommentState();
 2037             openTag ( DIRECTIVE );
 2038             exitState=true;
 2039             break;
 2040         case STRING:
 2041             closeTag ( DIRECTIVE );
 2042             eof=processStringState ( DIRECTIVE );
 2043             openTag ( DIRECTIVE );
 2044             break;
 2045         case _EOF:
 2046             eof = true;
 2047             break;
 2048         default:
 2049             break;
 2050         }
 2051     } while ( !exitState && !eof );
 2052 
 2053     closeTag ( DIRECTIVE );
 2054     return eof;
 2055 }
 2056 
 2057 bool CodeGenerator::processStringState ( State oldState )
 2058 {
 2059     State newState=STANDARD;
 2060     bool eof=false, exitState=false;
 2061     bool returnedFromOtherState=false;
 2062 
 2063     State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
 2064 
 2065     int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
 2066     string openDelim=token;
 2067 
 2068     //Raw String by definition:
 2069     bool isRawString=currentSyntax->delimiterIsRawString(openDelimID);
 2070 
 2071     // Test if character before string open delimiter token equals to the
 2072     // raw string prefix (Example: r" ", r""" """ in Python)
 2073 
 2074     //Raw String Prefix:
 2075     if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
 2076         isRawString=true;
 2077     }
 2078 
 2079     openTag ( myState );
 2080     do {
 2081         // true if last token was an escape char
 2082         if ( !returnedFromOtherState ) {
 2083             printMaskedToken (newState!=_WS );
 2084         }
 2085         returnedFromOtherState=false;
 2086         newState= getCurrentState(myState);
 2087 
 2088         switch ( newState ) {
 2089         case _WS:
 2090             processWsState();
 2091             break;
 2092         case _EOL:
 2093             wsBuffer += closeTags[myState];
 2094             insertLineNumber();
 2095             wsBuffer += openTags[myState];
 2096             break;
 2097         case STRING_END:
 2098             if (resultOfHook || currentSyntax->matchesOpenDelimiter (token,  STRING_END, openDelimID)) {
 2099                 if (currentSyntax->assertDelimEqualLength()) {
 2100                     exitState= openDelim.length()==token.length();
 2101                 } else {
 2102                     exitState= true;
 2103                 }
 2104                 printMaskedToken();
 2105             }
 2106             break;
 2107         case STRING:
 2108             // if there exist multiple string delimiters, close string if
 2109             // current delimiter is equal to the opening delimiter
 2110             exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING  ))&&token==openDelim;
 2111             printMaskedToken();
 2112             break;
 2113         case ESC_CHAR:
 2114             if ( !isRawString ) {
 2115                 closeTag ( myState );
 2116                 eof=processEscapeCharState();
 2117                 openTag ( myState );
 2118                 returnedFromOtherState=true;
 2119             } else {
 2120                 // FIXME not a fix for Python r"""\"""
 2121                 exitState=token.size()>1 && token[1] == openDelim[0];
 2122                 printMaskedToken();
 2123             }
 2124             break;
 2125         case STRING_INTERPOLATION:
 2126             closeTag ( myState );
 2127             eof=processInterpolationState();
 2128             openTag ( myState );
 2129             returnedFromOtherState=true;
 2130             break;
 2131     
 2132         case _EOF:
 2133             eof = true;
 2134             break;
 2135         default:
 2136             printMaskedToken();
 2137             break;
 2138         }
 2139     } while ( !exitState && !eof );
 2140 
 2141     closeTag ( myState );
 2142 
 2143     return eof;
 2144 }
 2145 
 2146 bool CodeGenerator::processSymbolState()
 2147 {
 2148 
 2149     State newState=STANDARD;
 2150     bool eof=false,
 2151          exitState=false;
 2152 
 2153     openTag ( SYMBOL );
 2154     do {
 2155         printMaskedToken ( newState!=_WS );
 2156         newState= getCurrentState(SYMBOL);
 2157         switch ( newState ) {
 2158         case _WS:
 2159             processWsState();
 2160             exitState=isolateTags;
 2161             break;
 2162         case _EOL:
 2163             insertLineNumber();
 2164             exitState=true;
 2165             break;
 2166         case _EOF:
 2167             eof = true;
 2168             break;
 2169         default:
 2170             exitState=newState!=SYMBOL;
 2171             break;
 2172         }
 2173     } while ( !exitState && !eof );
 2174 
 2175     closeTag ( SYMBOL );
 2176     return eof;
 2177 }
 2178 
 2179 bool CodeGenerator::processEscapeCharState()
 2180 {
 2181     State newState=STANDARD;
 2182     bool eof=false, exitState=false;
 2183     openTag ( ESC_CHAR );
 2184     do {
 2185         printMaskedToken (newState!=_WS );
 2186         newState= getCurrentState(ESC_CHAR);
 2187         switch ( newState ) {
 2188         case _EOL:
 2189             insertLineNumber();
 2190             exitState=true;
 2191             break;
 2192         case _WS:
 2193             processWsState();
 2194             exitState=isolateTags;
 2195             break;
 2196         case _EOF:
 2197             eof = true;
 2198             break;
 2199         default:
 2200             exitState=newState!=ESC_CHAR;
 2201             break;
 2202         }
 2203     } while ( !exitState && !eof );
 2204 
 2205     closeTag ( ESC_CHAR );
 2206     return eof;
 2207 }
 2208 
 2209 bool CodeGenerator::processInterpolationState()
 2210 {
 2211     State newState=STANDARD;
 2212     bool eof=false, exitState=false;
 2213     openTag ( STRING_INTERPOLATION );
 2214     do {
 2215         printMaskedToken (newState!=_WS );
 2216         newState= getCurrentState(STRING_INTERPOLATION);
 2217         switch ( newState ) {
 2218         case _EOL:
 2219             insertLineNumber();
 2220             exitState=true;
 2221             break;
 2222         case _WS:
 2223             processWsState();
 2224             exitState=isolateTags;
 2225             break;
 2226         case _EOF:
 2227             eof = true;
 2228             break;
 2229         default:
 2230             exitState=newState!=STRING_INTERPOLATION;
 2231             break;
 2232         }
 2233     } while ( !exitState && !eof );
 2234 
 2235     closeTag ( STRING_INTERPOLATION );
 2236     return eof;
 2237 }
 2238 
 2239 void CodeGenerator::processWsState()
 2240 {
 2241     if ( !maskWs ) {
 2242         wsBuffer += token;
 2243         token.clear();
 2244         return;
 2245     }
 2246 
 2247     flushWs(6);
 2248 
 2249     int cntWs=0;
 2250     lineIndex--;
 2251     PositionState ps(currentState, 0, true);
 2252             
 2253     while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
 2254         ++cntWs;
 2255         ++lineIndex;
 2256     }
 2257     if ( cntWs>1 ) {
 2258 
 2259         unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
 2260         if ( excludeWs && styleID!=_UNKNOWN ) {
 2261             *out << closeTags[styleID];
 2262         }
 2263         *out << maskWsBegin ;
 2264         for ( int i=0; i<cntWs; i++ ) {
 2265             *out <<  spacer;
 2266             if (applySyntaxTestCase){
 2267                 stateTraceCurrent.push_back(ps);
 2268             }
 2269         }
 2270         *out << maskWsEnd;
 2271         if ( excludeWs && styleID!=_UNKNOWN ) {
 2272             *out << openTags[styleID];
 2273         }
 2274     } else {
 2275     
 2276         *out << spacer; //Bugfix fehlender Space nach Strings
 2277         if (applySyntaxTestCase){
 2278             stateTraceCurrent.push_back(ps);            
 2279         }
 2280     }
 2281     token.clear();
 2282 }
 2283 
 2284 void CodeGenerator::flushWs(int arg)
 2285 {
 2286      PositionState ps(currentState, 0, true);
 2287      //workaround condition
 2288      for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
 2289         stateTraceCurrent.push_back(ps);
 2290         //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;           
 2291     }
 2292      
 2293      //fix canvas whitespace
 2294      if (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR){
 2295         *out<< maskWsBegin;
 2296      }
 2297     
 2298     *out<<wsBuffer;
 2299     wsBuffer.clear();
 2300 }
 2301 
 2302 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
 2303     switch (s) {
 2304         
 2305         case STANDARD:
 2306             return STY_NAME_STD;
 2307         case STRING:
 2308             return STY_NAME_STR;
 2309         case NUMBER:
 2310             return STY_NAME_NUM;
 2311         case SL_COMMENT:
 2312             return STY_NAME_SLC;
 2313         case ML_COMMENT:
 2314             return STY_NAME_COM;
 2315         case ESC_CHAR:
 2316             return STY_NAME_ESC;
 2317         case DIRECTIVE:
 2318             return STY_NAME_DIR;
 2319         case DIRECTIVE_STRING:
 2320             return STY_NAME_DST;
 2321         case SYMBOL:
 2322             return STY_NAME_SYM;
 2323         case STRING_INTERPOLATION:
 2324             return STY_NAME_IPL;
 2325         case _WS:
 2326             return "ws";
 2327         case KEYWORD: {
 2328             
 2329             if (!kwClass)
 2330                 return "ws";
 2331             
 2332             char kwName[5] = {0};
 2333             snprintf(kwName, sizeof(kwName), "kw%c", ('a'+kwClass-1));
 2334             return string(kwName);
 2335         }
 2336         default:
 2337             return "unknown_test";
 2338     }
 2339 }
 2340 
 2341 void CodeGenerator::printTrace(const string &s){
 2342     std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
 2343     for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
 2344         std::cout<<" "<<stateTraceCurrent[i].state;
 2345     }
 2346     std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
 2347     for (unsigned int i=0; i< stateTraceTest.size(); i++) {
 2348         std::cout<<" "<<stateTraceTest[i].state;
 2349     }
 2350     /*
 2351     for (unsigned int i=0; i< stateTrace.size(); i++) {
 2352         std::cout<<" "<<stateTrace[i];
 2353     }
 2354    */
 2355     std::cout<<"\n";
 2356 }
 2357 
 2358 void CodeGenerator::runSyntaxTestcases(unsigned int column){
 2359     
 2360     unsigned int assertGroup=0;
 2361     size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
 2362     State assertState=_UNKNOWN;
 2363     
 2364     //printTrace("trace 2");
 2365     
 2366     if (typeDescPos!=string::npos) {
 2367     
 2368         if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
 2369             assertState=NUMBER;
 2370         else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos)
 2371             assertState=STRING;
 2372         else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
 2373             assertState=ESC_CHAR;
 2374         else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
 2375             assertState=STRING_INTERPOLATION;
 2376         else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
 2377             assertState=SYMBOL;
 2378         else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
 2379             assertState=DIRECTIVE;
 2380         else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
 2381             assertState=SL_COMMENT;
 2382         else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
 2383             assertState=ML_COMMENT;
 2384         else if (line.find("ws", typeDescPos)==typeDescPos)
 2385             assertState=_WS;
 2386         else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos)
 2387             assertState=STANDARD;
 2388         else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
 2389             assertState=DIRECTIVE_STRING;
 2390         
 2391         else if (line.find("kw", typeDescPos)==typeDescPos) {
 2392             assertState=KEYWORD;
 2393             if (isalpha(line[typeDescPos+2]))
 2394                 assertGroup=line[typeDescPos+2] - 'a' +1;
 2395         }
 2396     
 2397         if (   (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
 2398             || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
 2399             || assertGroup != stateTraceTest[column].kwClass) {
 2400             ostringstream err;
 2401             err << inFile << " line " << lineNumber << ", column "<< column 
 2402                 << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)  
 2403                 << " instead of " << getTestcaseName(assertState, assertGroup);
 2404             failedPosTests.push_back(err.str());
 2405         }
 2406         
 2407     }
 2408     
 2409     lineContainedTestCase=true; 
 2410 }
 2411 
 2412 
 2413 string CodeGenerator::getNewLine()
 2414 {
 2415     return (printNewLines) ? newLineTag : "";
 2416 }
 2417 
 2418 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
 2419 {
 2420 
 2421     Diluculum::LuaValueList params;
 2422     params.push_back(Diluculum::LuaValue(lineNumber));
 2423 
 2424     return currentSyntax->getLuaState()->call ( isLineStart ?
 2425             *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
 2426             params,"getDecorateLineFct call")  ;
 2427 
 2428 }
 2429 
 2430 void CodeGenerator::insertLineNumber ( bool insertNewLine )
 2431 {
 2432     if ( insertNewLine ) {
 2433         if (currentSyntax->getDecorateLineEndFct()) {
 2434             Diluculum::LuaValueList res=callDecorateLineFct(false);
 2435             if (res.size()==1) {
 2436                 wsBuffer +=res[0].asString();
 2437             }
 2438         }
 2439 
 2440         wsBuffer += getNewLine();
 2441     }
 2442 
 2443     if (currentSyntax->getDecorateLineBeginFct()) {
 2444         Diluculum::LuaValueList res=callDecorateLineFct(true);
 2445         if (res.size()==1) {
 2446             wsBuffer +=res[0].asString();
 2447         }
 2448     }
 2449 
 2450     if ( showLineNumbers ) {
 2451         ostringstream os;
 2452         ostringstream numberPrefix;
 2453 
 2454         os << setw ( getLineNumberWidth() ) << right;
 2455         if( numberCurrentLine ) {
 2456             if ( lineNumberFillZeroes ) {
 2457                 os.fill ( '0' );
 2458             }
 2459             os << lineNumber+lineNumberOffset;
 2460         } else {
 2461             os << "";
 2462         }
 2463 
 2464         numberPrefix << openTags[LINENUMBER];
 2465         maskString ( numberPrefix, os.str() );
 2466         numberPrefix << spacer << closeTags[LINENUMBER];
 2467 
 2468         wsBuffer += numberPrefix.str();
 2469     }
 2470 }
 2471 
 2472 unsigned int CodeGenerator::getLineIndex()
 2473 {
 2474     return lineIndex;
 2475 }
 2476 unsigned int CodeGenerator::getLastLineLength()
 2477 {
 2478     return lastLineLength;
 2479 }
 2480 
 2481 bool CodeGenerator::printExternalStyle ( const string &outFile )
 2482 {
 2483     if ( !includeStyleDef ) {
 2484         ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
 2485         if ( !cssOutFile->fail() ) {
 2486             if (!omitVersionComment) {
 2487                 *cssOutFile << styleCommentOpen
 2488                             <<" Style definition file generated by highlight "
 2489                             << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
 2490                             << " " << styleCommentClose << "\n";
 2491             }
 2492             *cssOutFile << getStyleDefinition()
 2493                         << "\n";
 2494             *cssOutFile << readUserStyleDef();
 2495             if ( !outFile.empty() ) delete cssOutFile;
 2496         } else {
 2497             return false;
 2498         }
 2499     }
 2500     return true;
 2501 }
 2502 
 2503 string CodeGenerator::readUserStyleDef()
 2504 {
 2505     ostringstream ostr;
 2506     if ( !styleInputPath.empty() ) {
 2507         ifstream userStyleDef ( styleInputPath.c_str() );
 2508         if ( userStyleDef ) {
 2509             ostr    << "\n" << styleCommentOpen
 2510                     << " Content of " << styleInputPath
 2511                     << ": " <<styleCommentClose << "\n";
 2512             string line;
 2513             while ( getline ( userStyleDef, line ) ) {
 2514                 ostr << line << "\n";
 2515             }
 2516             userStyleDef.close();
 2517         } else {
 2518             ostr    << styleCommentOpen
 2519                     << " ERROR: Could not include " << styleInputPath
 2520                     << "." << styleCommentClose << "\n";
 2521         }
 2522     }
 2523 
 2524     string injections=docStyle.getInjections();
 2525     if (!injections.empty()) {
 2526         ostr    << "\n" << styleCommentOpen
 2527                 << " Plug-in theme injections: " <<styleCommentClose << "\n";
 2528         ostr << injections<<"\n";
 2529     }
 2530     return ostr.str();
 2531 }
 2532 
 2533 bool CodeGenerator::initPluginScript(const string& script)
 2534 {
 2535 
 2536     if (script.empty()) return true;
 2537 
 2538     try {
 2539 
 2540         userScriptError="";
 2541         Diluculum::LuaState ls;
 2542         
 2543         ls.doFile (script);
 2544         int listIdx=1;
 2545 
 2546         while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
 2547 
 2548             // Theme plugins
 2549             if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
 2550                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2551                     docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2552                 }
 2553             }
 2554             // Syntax plugins
 2555             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
 2556                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2557                     currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2558                 }
 2559             }
 2560             // Format plugins
 2561             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
 2562                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2563                     addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2564                 }
 2565             }
 2566             
 2567             listIdx++;
 2568         }
 2569     }  catch (Diluculum::LuaError &err) {
 2570         userScriptError=err.what();
 2571         return false;
 2572     }
 2573     return true;
 2574 }
 2575 
 2576 }