"Fossies" - the Fresh Open Source Software Archive

Member "highlight-4.1/src/core/codegenerator.cpp" (10 May 2021, 70288 Bytes) of package /linux/www/highlight-4.1.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "codegenerator.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 4.0.beta5_vs_4.0.beta6.

    1 /***************************************************************************
    2                           codegenerator.cpp  -  description
    3                              -------------------
    4     begin                : Die Jul 9 2002
    5     copyright            : (C) 2002-2021 by Andre Simon
    6     email                : a.simon@mailbox.org
    7  ***************************************************************************/
    8 
    9 
   10 /*
   11 This file is part of Highlight.
   12 
   13 Highlight is free software: you can redistribute it and/or modify
   14 it under the terms of the GNU General Public License as published by
   15 the Free Software Foundation, either version 3 of the License, or
   16 (at your option) any later version.
   17 
   18 Highlight is distributed in the hope that it will be useful,
   19 but WITHOUT ANY WARRANTY; without even the implied warranty of
   20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   21 GNU General Public License for more details.
   22 
   23 You should have received a copy of the GNU General Public License
   24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
   25 */
   26 
   27 
   28 #include <climits>
   29 #include <memory>
   30 
   31 #include <chrono>
   32 #include <thread>
   33 
   34 
   35 #include <boost/xpressive/xpressive_dynamic.hpp>
   36 
   37 #include "codegenerator.h"
   38 
   39 #include "htmlgenerator.h"
   40 #include "xhtmlgenerator.h"
   41 #include "rtfgenerator.h"
   42 #include "latexgenerator.h"
   43 #include "texgenerator.h"
   44 #include "svggenerator.h"
   45 #include "bbcodegenerator.h"
   46 #include "pangogenerator.h"
   47 #include "odtgenerator.h"
   48 #include "astyle/astyle.h"
   49 
   50 #if !defined (QT)
   51 #include "ansigenerator.h"
   52 #include "xterm256generator.h"
   53 #endif
   54 
   55 namespace highlight
   56 {
   57 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
   58 
   59 // must not start with kw, st, sm prefixes
   60 const string CodeGenerator::STY_NAME_STD="def";
   61 const string CodeGenerator::STY_NAME_STR="sng";
   62 const string CodeGenerator::STY_NAME_NUM="num";
   63 const string CodeGenerator::STY_NAME_SLC="slc";
   64 const string CodeGenerator::STY_NAME_COM="com";
   65 const string CodeGenerator::STY_NAME_ESC="esc";
   66 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
   67 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
   68 const string CodeGenerator::STY_NAME_LIN="lin";
   69 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
   70 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
   71 
   72 const string CodeGenerator::STY_NAME_HVR="hvr";
   73 const string CodeGenerator::STY_NAME_ERR="err";
   74 const string CodeGenerator::STY_NAME_ERM="erm";
   75 
   76 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
   77 
   78 
   79 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
   80 {
   81     CodeGenerator* generator=NULL;
   82     switch ( type ) {
   83     case HTML:
   84         generator = new HtmlGenerator();
   85         break;
   86     case XHTML:
   87         generator = new XHtmlGenerator();
   88         break;
   89     case TEX:
   90         generator = new TexGenerator ();
   91         break;
   92     case LATEX:
   93         generator = new LatexGenerator();
   94         break;
   95     case RTF:
   96         generator = new RtfGenerator ();
   97         break;
   98     case SVG:
   99         generator = new SVGGenerator();
  100         break;
  101     case BBCODE:
  102         generator = new BBCodeGenerator();
  103         break;
  104     case PANGO:
  105         generator = new PangoGenerator();
  106         break;
  107     case ODTFLAT:
  108         generator = new ODTGenerator();
  109         break;
  110     case ESC_ANSI:
  111         generator = new AnsiGenerator();
  112         break;
  113     case ESC_XTERM256:
  114     case ESC_TRUECOLOR:
  115         generator = new Xterm256Generator();
  116         generator->setESCTrueColor(type==ESC_TRUECOLOR);
  117         break;
  118     default:
  119         break;
  120     }
  121     return generator;
  122 }
  123 
  124 
  125 CodeGenerator::CodeGenerator ( highlight::OutputType type )
  126     :currentSyntax(NULL),
  127      in ( NULL ),
  128      out ( NULL ),
  129      encoding ( "none" ),
  130      docTitle ( "Source file" ),
  131      maskWs ( false ),
  132      excludeWs ( false ),
  133      fragmentOutput ( false ),
  134      keepInjections( false ),
  135      showLineNumbers ( false ),
  136      lineNumberFillZeroes ( false ),
  137      printNewLines(true),
  138      omitVersionComment(false),
  139      isolateTags(false),
  140      disableStyleCache(false),
  141      baseFontSize("10"),
  142      lineNumber ( 0 ),
  143      lineNumberOffset ( 0 ),
  144      currentState ( _UNKNOWN ),
  145      currentKeywordClass ( 0 ),
  146      includeStyleDef ( false ),
  147      numberCurrentLine ( false ),
  148      lineIndex ( 0 ),
  149      lastLineLength( 0 ),
  150      syntaxChangeIndex(UINT_MAX),
  151      syntaxChangeLineNo(UINT_MAX),
  152      lineNumberWidth ( 5 ),
  153      startLineCnt( 1 ),
  154      startLineCntCurFile( 1 ),
  155      maxLineCnt ( UINT_MAX ),
  156      inputFilesCnt (0),
  157      processedFilesCnt (0),
  158      kwOffset(0),
  159      noTrailingNewLine(0),
  160 
  161      terminatingChar ( '\0' ),
  162      formatter ( NULL ),
  163      streamIterator ( NULL ),
  164      formattingEnabled ( false ),
  165      formattingPossible ( false ),
  166      validateInput ( false ),
  167      numberWrappedLines ( true ),
  168      resultOfHook(false),
  169      lineContainedTestCase(false),
  170      lineContainedStmt(false),
  171      applySyntaxTestCase(false),
  172      toggleDynRawString(false),
  173      lsEnableHoverRequests(false),
  174      lsCheckSemanticTokens(false),
  175      lsCheckSyntaxErrors(false),
  176 
  177      keywordCase ( StringTools::CASE_UNCHANGED ),
  178      eolDelimiter ('\n'),
  179      outputType ( type )
  180 {
  181 }
  182 
  183 
  184 CodeGenerator::~CodeGenerator()
  185 {
  186     delete formatter;
  187     delete streamIterator;
  188 
  189     resetSyntaxReaders();
  190 
  191     for (unsigned int i=0; i<pluginChunks.size(); i++) {
  192         delete pluginChunks[i];
  193     }
  194     pluginChunks.clear();
  195 }
  196 
  197 
  198 bool CodeGenerator::initTheme ( const string& themePath, bool loadSemanticStyles)
  199 {
  200     this->themePath=themePath;
  201     bool loadOK = docStyle.load ( themePath, outputType, loadSemanticStyles );
  202     initOutputTags();
  203     return loadOK;
  204 }
  205 
  206 LSResult CodeGenerator::initLanguageServer ( const string& executable, const vector<string> &options,
  207                                              const string& workspace, const string& syntax,
  208                                              int delay, int logLevel )
  209 {
  210     if (LSPClient.isInitialized()) {
  211         return LSResult::INIT_OK;
  212     }
  213 
  214     LSPClient.setLogging(logLevel>1);
  215 
  216     LSPClient.setExecutable(executable);
  217     LSPClient.setWorkspace(workspace);
  218     LSPClient.setOptions(options);
  219     LSPClient.setSyntax(syntax);
  220     LSPClient.setInitializeDelay(delay);
  221     if (!LSPClient.connect()){
  222         return LSResult::INIT_BAD_PIPE;
  223     }
  224 
  225     if (!LSPClient.runInitialize()){
  226         return LSResult::INIT_BAD_REQUEST;
  227     }
  228     for (int i=0; i<docStyle.getSemanticTokenStyleCount();i++) {
  229         currentSyntax->generateNewKWClass(i+1, "st");
  230     }
  231     LSPClient.runInitialized();
  232     updateKeywordClasses();
  233     return LSResult::INIT_OK;
  234 }
  235 
  236 bool CodeGenerator::lsOpenDocument(const string& fileName, const string & suffix){
  237     lsDocumentPath = fileName;
  238     return LSPClient.runDidOpen(fileName, suffix);
  239 }
  240 
  241 bool CodeGenerator::lsCloseDocument(const string& fileName, const string & suffix){
  242     lsDocumentPath.clear();
  243     return LSPClient.runDidClose(fileName, suffix);
  244 }
  245 
  246 bool CodeGenerator::lsAddSemanticInfo(const string& fileName, const string & suffix){
  247     lsCheckSemanticTokens = LSPClient.runSemanticTokensFull(fileName);
  248     return lsCheckSemanticTokens;
  249 }
  250 
  251 bool CodeGenerator::isHoverProvider(){
  252     return LSPClient.isHoverProvider();
  253 }
  254 
  255 bool CodeGenerator::isSemanticTokensProvider(){
  256     return LSPClient.isSemanticTokensProvider();
  257 }
  258 
  259 void CodeGenerator::lsAddHoverInfo(bool hover){
  260     lsEnableHoverRequests = hover;
  261 }
  262 
  263 void CodeGenerator::lsAddSyntaxErrorInfo(bool error) {
  264     lsCheckSyntaxErrors = error;;
  265 }
  266 
  267 
  268 void CodeGenerator::exitLanguageServer () {
  269     LSPClient.runShutdown();
  270     LSPClient.runExit();
  271 }
  272 
  273 const string& CodeGenerator::getStyleName()
  274 {
  275     return themePath;
  276 }
  277 
  278 void CodeGenerator::setLineNumberWidth ( int w )
  279 {
  280     lineNumberWidth=w;
  281 }
  282 
  283 int CodeGenerator::getLineNumberWidth()
  284 {
  285     return lineNumberWidth;
  286 }
  287 
  288 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
  289 {
  290     showLineNumbers=flag;
  291     lineNumberOffset = startCnt-1;
  292 }
  293 
  294 bool CodeGenerator::getPrintLineNumbers()
  295 {
  296     return showLineNumbers;
  297 }
  298 
  299 void CodeGenerator::setPrintZeroes ( bool flag )
  300 {
  301     lineNumberFillZeroes=flag;
  302 }
  303 
  304 bool CodeGenerator::getPrintZeroes()
  305 {
  306     return lineNumberFillZeroes;
  307 }
  308 
  309 void CodeGenerator::setIncludeStyle ( bool flag )
  310 {
  311     includeStyleDef = flag;
  312 }
  313 
  314 void CodeGenerator::disableTrailingNL ( int flag )
  315 {
  316     noTrailingNewLine = flag;
  317 }
  318 
  319 void CodeGenerator::setStyleInputPath ( const string& path )
  320 {
  321     styleInputPath = path;
  322 }
  323 
  324 void CodeGenerator::setStyleOutputPath ( const string& path )
  325 {
  326     styleOutputPath = path;
  327 }
  328 
  329 void CodeGenerator::setPluginParameter ( const string& param )
  330 {
  331     pluginParameter = param;
  332 }
  333 
  334 const string&  CodeGenerator::getStyleInputPath()
  335 {
  336     return styleInputPath;
  337 }
  338 
  339 const string&  CodeGenerator::getStyleOutputPath()
  340 {
  341     return styleOutputPath;
  342 }
  343 
  344 void CodeGenerator::setFragmentCode ( bool flag )
  345 {
  346     fragmentOutput=flag;
  347 }
  348 
  349 bool CodeGenerator::getFragmentCode()
  350 {
  351     return fragmentOutput;
  352 }
  353 void CodeGenerator::setKeepInjections ( bool flag )
  354 {
  355     keepInjections=flag;
  356 }
  357 
  358 bool CodeGenerator::getKeepInjections()
  359 {
  360     return keepInjections;
  361 }
  362 void CodeGenerator::setValidateInput ( bool flag )
  363 {
  364     validateInput=flag;
  365 }
  366 
  367 bool CodeGenerator::getValidateInput()
  368 {
  369     return validateInput;
  370 }
  371 
  372 void CodeGenerator::setNumberWrappedLines ( bool flag )
  373 {
  374     numberWrappedLines=flag;
  375 }
  376 
  377 bool CodeGenerator::getNumberWrappedLines()
  378 {
  379     return numberWrappedLines;
  380 }
  381 
  382 void CodeGenerator::setOmitVersionComment ( bool flag )
  383 {
  384     omitVersionComment=flag;
  385 }
  386 
  387 bool CodeGenerator::getOmitVersionComment ()
  388 {
  389     return omitVersionComment;
  390 }
  391 
  392 void CodeGenerator::setIsolateTags ( bool flag )
  393 {
  394     isolateTags=flag;
  395 }
  396 
  397 bool CodeGenerator::getIsolateTags ()
  398 {
  399     return isolateTags;
  400 }
  401 
  402 void CodeGenerator::setBaseFont ( const string& fontName )
  403 {
  404     baseFont = fontName;
  405 }
  406 
  407 void CodeGenerator::setBaseFontSize ( const string& fontSize)
  408 {
  409     baseFontSize = fontSize;
  410 }
  411 
  412 void CodeGenerator::setStyleCaching ( bool flag )
  413 {
  414     disableStyleCache=!flag;
  415 }
  416 
  417 const string CodeGenerator::getBaseFont() const
  418 {
  419     if ( !baseFont.empty() ) return baseFont;
  420     switch ( outputType ) {
  421     case HTML:
  422     case XHTML:
  423     case SVG:
  424         return "'Courier New',monospace";
  425         break;
  426     case LATEX:
  427         return "ttfamily";
  428         break;
  429     case TEX:
  430         return "tt";
  431         break;
  432     default:
  433         return "Courier New";
  434     }
  435 }
  436 
  437 const string CodeGenerator::getBaseFontSize()
  438 {
  439     return baseFontSize;
  440 }
  441 
  442 void CodeGenerator::setTitle ( const string & title )
  443 {
  444     if ( !title.empty() ) docTitle= title;
  445 }
  446 
  447 string CodeGenerator::getTitle()
  448 {
  449     return docTitle;
  450 }
  451 
  452 void CodeGenerator::setEncoding ( const string& encodingName )
  453 {
  454     encoding = encodingName;
  455 }
  456 
  457 bool CodeGenerator::formattingDisabled()
  458 {
  459     return !formattingEnabled;
  460 }
  461 
  462 void CodeGenerator::setStartingInputLine ( unsigned int begin )
  463 {
  464     startLineCnt = startLineCntCurFile = begin;
  465 }
  466 
  467 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
  468 {
  469     maxLineCnt = cnt;
  470 }
  471 
  472 void CodeGenerator::setFilesCnt ( unsigned int cnt )
  473 {
  474     inputFilesCnt = cnt;
  475     processedFilesCnt = 0;
  476 }
  477 
  478 bool CodeGenerator::formattingIsPossible()
  479 {
  480     return formattingPossible;
  481 }
  482 
  483 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
  484                                        unsigned int lineLength,
  485                                        int numberSpaces )
  486 {
  487     bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
  488     bool replaceTabs = numberSpaces > 0;
  489 
  490     if ( enableWrap || replaceTabs ) {
  491         preFormatter.setWrap ( enableWrap );
  492         preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
  493         preFormatter.setWrapLineLength ( lineLength );
  494         preFormatter.setReplaceTabs ( replaceTabs );
  495         preFormatter.setNumberSpaces ( numberSpaces );
  496     }
  497 }
  498 
  499 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
  500 {
  501     keywordCase = keyCase;
  502 }
  503 
  504 void CodeGenerator::setEOLDelimiter(char delim)
  505 {
  506     eolDelimiter = delim;
  507 }
  508 
  509 void CodeGenerator::reset()
  510 {
  511     lineIndex = 0;
  512     lineNumber = 0;
  513     line.clear();
  514     preFormatter.reset();
  515     inFile.clear();
  516     outFile.clear();
  517     embedLangDefPath.clear();
  518     printNewLines=true;
  519     syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  520     startLineCntCurFile = startLineCnt;
  521     applySyntaxTestCase=lineContainedTestCase=false;
  522     if (currentSyntax){
  523         vector<int> overrideStyleAttrs=currentSyntax->getOverrideStyleAttributes();
  524         docStyle.overrideAttributes(overrideStyleAttrs);
  525         if (overrideStyleAttrs.size())
  526             disableStyleCache = true;
  527     }
  528 }
  529 
  530 string CodeGenerator::getThemeInitError()
  531 {
  532     return docStyle.getErrorMessage();
  533 }
  534 
  535 string CodeGenerator::getPluginScriptError()
  536 {
  537     return userScriptError;
  538 }
  539 
  540 string CodeGenerator::getSyntaxRegexError()
  541 {
  542     return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
  543 }
  544 string CodeGenerator::getSyntaxLuaError()
  545 {
  546     return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
  547 
  548 }
  549 string CodeGenerator::getSyntaxDescription()
  550 {
  551     return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
  552 
  553 }
  554 string CodeGenerator::getSyntaxEncodingHint()
  555 {
  556     return (currentSyntax)? currentSyntax->getEncodingHint(): "";
  557 
  558 }
  559 string CodeGenerator::getThemeDescription()
  560 {
  561     return docStyle.getDescription();
  562 }
  563 
  564 string CodeGenerator::getSyntaxCatDescription(){
  565     return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
  566 }
  567 
  568 string CodeGenerator::getThemeCatDescription()
  569 {
  570     return docStyle.getCategoryDescription();
  571 }
  572 
  573 float CodeGenerator::getThemeContrast()
  574 {
  575     return docStyle.getContrast();
  576 }
  577 
  578 unsigned int CodeGenerator::getLineNumber()
  579 {
  580     return lineNumber;
  581 }
  582 
  583 bool CodeGenerator::readNewLine ( string &newLine )
  584 {
  585     bool eof=false;
  586 
  587     if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
  588 
  589     while (!eof && startLineCntCurFile>0) {
  590         if ( formattingPossible && formattingEnabled ) {
  591             eof=!formatter->hasMoreLines();
  592             if ( !eof ) {
  593                 newLine = formatter->nextLine();
  594             }
  595         } else {
  596             eof = ! getline ( *in, newLine, eolDelimiter );
  597         }
  598         --startLineCntCurFile;
  599     }
  600     startLineCntCurFile=1;
  601 #ifndef _WIN32
  602     // drop CR of CRLF files
  603     if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
  604         newLine.erase(newLine.size() - 1);
  605 #endif
  606 
  607     return eof || ( lineNumber == maxLineCnt );
  608 }
  609 
  610 void CodeGenerator::matchRegex ( const string &line, State skipState)
  611 {
  612     regexGroups.clear();
  613     int matchBegin=0;
  614     int groupID=0;
  615 
  616     // cycle through all regex, save the start and ending indices of matches to report them later
  617     for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
  618         RegexElement *regexElem = currentSyntax->getRegexElements() [i];
  619 
  620         if (regexElem->open == skipState) continue;
  621 
  622         if (regexElem->constraintLineNum && regexElem->constraintLineNum != lineNumber) {
  623             continue;
  624         }
  625 
  626         if (regexElem->constraintFilename.size() && regexElem->constraintFilename != inFile) {
  627             continue;
  628         }
  629 
  630         boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
  631         boost::xpressive::sregex_iterator end;
  632 
  633         for( ; cur != end; ++cur )  {
  634             groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
  635             matchBegin = cur->position(groupID);
  636 
  637             regexGroups.insert (
  638                 make_pair ( matchBegin + 1, RegexToken ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
  639 
  640             // priority regex (match required)
  641             if (regexElem->priority) {
  642                 return;
  643             }
  644         }
  645     }
  646 }
  647 
  648 unsigned char CodeGenerator::getInputChar()
  649 {
  650     // end of line?
  651     if ( lineIndex == line.length() ) {
  652 
  653         //more testing required:
  654         if (outputType==ESC_TRUECOLOR || outputType==ESC_XTERM256)
  655             lastLineLength=StringTools::utf8_strlen(line + lsSyntaxErrorDesc);
  656 
  657         bool eof=false;
  658         if ( preFormatter.isEnabled() ) {
  659             if ( !preFormatter.hasMoreLines() ) {
  660                 eof=readNewLine ( line );
  661                 preFormatter.setLine ( line );
  662                 ++lineNumber;
  663                 numberCurrentLine = true;
  664             } else {
  665                 if (numberWrappedLines)
  666                     ++lineNumber;
  667                 numberCurrentLine = numberWrappedLines;
  668             }
  669 
  670             line = preFormatter.getNextLine();
  671         } else {
  672             eof=readNewLine ( line );
  673             ++lineNumber;
  674 
  675             numberCurrentLine = true;
  676         }
  677         lineIndex=0;
  678 
  679         if (!lineContainedTestCase && applySyntaxTestCase){
  680             stateTraceTest = stateTraceCurrent;
  681             stateTraceCurrent.clear();
  682         }
  683 
  684         lineContainedTestCase=false;
  685         lineContainedStmt=false;
  686         matchRegex ( line );
  687 
  688         return ( eof ) ?'\0':'\n';
  689     }
  690 
  691     return line[lineIndex++];
  692 }
  693 
  694 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS,
  695  *  Coffeescript with block regex, Pas + ASM)
  696  *  especially nested syntax in one line
  697  */
  698 State CodeGenerator::getCurrentState (State oldState)
  699 {
  700     unsigned char c='\0';
  701 
  702     if ( token.length() ==0 ) {
  703         c=getInputChar();
  704     } else {
  705         lineIndex-= ( token.length()-1 );
  706         c=token[0];
  707     }
  708     if ( c=='\n' ) {
  709         return _EOL;   // End of line
  710     }
  711 
  712     if ( c=='\0' ) {
  713         return _EOF;   // End of file
  714     }
  715 
  716     if ( c==' ' || c=='\t' ) {
  717         token= c;
  718         return _WS;    // White space
  719     }
  720 
  721     if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT)  ) {
  722         token= c;
  723         return _TESTPOS;
  724     }
  725 
  726     // at this position the syntax change takes place
  727     if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
  728         loadEmbeddedLang(embedLangDefPath);  // load new syntax
  729         matchRegex(line);                    // recognize new patterns in the (remaining) line
  730         syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  731     }
  732 
  733 SKIP_EMBEDDED:
  734 
  735     if (lsCheckSyntaxErrors && LSPClient.errorExists(lineNumber, lineIndex)) {
  736         highlight::SemanticToken errorToken = LSPClient.getError(lineNumber, lineIndex);
  737         token = line.substr ( lineIndex-1, errorToken.length);
  738         lineIndex += errorToken.length-1;
  739         lsSyntaxErrorDesc = errorToken.id;
  740 
  741         //std::cerr <<"error num "<<lineNumber<< " idx "<<lineIndex<< " error "<<errorToken.id<< "\n";
  742         return SYNTAX_ERROR;
  743     }
  744 
  745     if (lsCheckSemanticTokens && LSPClient.tokenExists(lineNumber, lineIndex)) {
  746         highlight::SemanticToken semToken = LSPClient.getToken(lineNumber, lineIndex);
  747         int semStyleKwId = docStyle.getSemanticStyle(semToken.id);
  748         if (semStyleKwId) {
  749             token = line.substr ( lineIndex-1, semToken.length);
  750             lineIndex += semToken.length-1;
  751 
  752             currentKeywordClass = semStyleKwId + kwOffset;  // +offset of missing kw groups in the theme
  753             //std::cerr <<"l "<<lineNumber<<  "t "<<token<< " semStyleKwId "<< semStyleKwId << "  off "<<kwOffset<<" -> "  << semToken.id <<"\n";
  754             return KEYWORD;
  755         }
  756     }
  757 
  758     // Test if a regular expression was found at the current position
  759     if ( !regexGroups.empty() ) {
  760         if ( regexGroups.count ( lineIndex ) ) {
  761             token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
  762 
  763             unsigned int oldIndex= lineIndex;
  764             if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
  765 
  766             if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
  767                 //do not handle a nested section if the syntax is marked as "sealed"
  768                 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
  769                     embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
  770                     //remember position
  771                     syntaxChangeIndex = lineIndex+2;
  772                     syntaxChangeLineNo = lineNumber;
  773                 }
  774 
  775                 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
  776                 matchRegex(line, EMBEDDED_CODE_BEGIN);
  777                 lineIndex = oldIndex;
  778                 goto SKIP_EMBEDDED; // this is how it should be done
  779             }
  780 
  781             if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
  782                 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
  783                 currentKeywordClass=currentSyntax->getKeywordListGroup ( reservedWord ); //check in lists (no regex)
  784 
  785                 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
  786                     currentKeywordClass = regexGroups[oldIndex].kwClass;
  787                 }
  788                 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
  789             } else {
  790                 return validateState(regexGroups[oldIndex].state, oldState);
  791             }
  792         }
  793     }
  794 
  795     // Character not referring to any state
  796     token = c;
  797     return STANDARD;
  798 }
  799 
  800 State CodeGenerator::validateState(State newState, State oldState)
  801 {
  802 
  803     if (currentSyntax->getValidateStateChangeFct()) {
  804         Diluculum::LuaValueList params;
  805         params.push_back(Diluculum::LuaValue(oldState));
  806         params.push_back(Diluculum::LuaValue(newState));
  807         params.push_back(Diluculum::LuaValue(token));
  808         params.push_back(Diluculum::LuaValue(getCurrentKeywordClassId()) );
  809         params.push_back(Diluculum::LuaValue(lineNumber) );
  810         params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
  811 
  812         Diluculum::LuaValueList res=
  813             currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
  814                     params,"getValidateStateChangeFct call")  ;
  815 
  816         resultOfHook = res.size()>=1;
  817         if (resultOfHook) {
  818 
  819             setOverrideParams();
  820 
  821             State validatedState = (State)res[0].asInteger();
  822             if ( validatedState== _REJECT) {
  823 
  824                 // proceed using only the first character of the token
  825                 if (res.size()==1) {
  826                     lineIndex -= (token.length() -1);
  827                     token=token.substr(0, 1);
  828                 }
  829 
  830                 //experimental for slim.lang: evaluate second return arg after _REJECT
  831                 if (res.size()>=2) {
  832                     lineIndex -= (token.length() );
  833                     token.clear();
  834                     return (State)res[1].asInteger();
  835                 }
  836                 return oldState;
  837             }
  838 
  839             return validatedState;
  840         }
  841     }
  842     resultOfHook  = false;
  843 
  844     return newState;
  845 }
  846 
  847 unsigned int CodeGenerator::getCurrentKeywordClassId(){
  848     unsigned int kwClassId=0;
  849 
  850     // this vector contains the defined keyword classes, and currentKeywordClass is its index:
  851     vector<string> kwClasses=currentSyntax->getKeywordClasses();
  852 
  853     if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
  854         string kwClassName=kwClasses[currentKeywordClass-1];
  855         if (kwClassName.size()==3)
  856             kwClassId = kwClassName[2] - 'a' + 1;
  857     }
  858     return kwClassId;
  859 }
  860 
  861 //it is faster to pass ostream reference
  862 void CodeGenerator::maskString ( ostream& ss, const string & s )
  863 {
  864     string escHoverText;
  865 
  866     if (lsEnableHoverRequests && (currentState==STANDARD || currentState==NUMBER || currentState==KEYWORD)) {
  867 
  868         string hoverText = LSPClient.runHover(lsDocumentPath, lineIndex - s.size(), lineNumber-1);
  869 
  870         for(const auto &c : hoverText)
  871         {
  872             if (isascii(c))
  873                 escHoverText.append(maskCharacter(c));
  874         }
  875     }
  876 
  877     if (escHoverText.size()) {
  878         ss << getHoverTagOpen(escHoverText);
  879     }
  880 
  881     for (const auto &c : s)
  882     {
  883         ss << maskCharacter ( c );
  884     }
  885 
  886     if (escHoverText.size()) {
  887         ss << getHoverTagClose();
  888     }
  889 
  890     // The test markers position should also be deternmined by calculating the code points
  891     if ( applySyntaxTestCase ) {
  892 
  893         PositionState ps(currentState, getCurrentKeywordClassId(), false);
  894 
  895         //TODO avoid repeated string comparison:
  896         int slen = encoding=="utf-8" ? StringTools::utf8_strlen(s) : s.length();
  897         for (int i=0; i< slen; i++ ) {
  898             stateTraceCurrent.push_back(ps);
  899         }
  900         if (stateTraceCurrent.size()>200)
  901             stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 );
  902     }
  903 }
  904 
  905 void CodeGenerator::printSyntaxError ( ostream& ss ) {
  906     if ( !lsSyntaxErrorDesc.empty()) {
  907         ss << openTags[ highlight::SYNTAX_ERROR_MSG ];
  908 
  909         for(const auto &c : lsSyntaxErrorDesc)
  910         {
  911             ss << maskCharacter ( c );
  912         }
  913 
  914         ss << closeTags[ highlight::SYNTAX_ERROR_MSG ];
  915         lsSyntaxErrorDesc.clear();
  916     }
  917 }
  918 
  919 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string& token)
  920 {
  921 
  922     Diluculum::LuaValueList params;
  923     params.push_back(Diluculum::LuaValue(token));
  924     params.push_back(Diluculum::LuaValue(currentState));
  925     params.push_back(Diluculum::LuaValue(currentKeywordClass));
  926     params.push_back(Diluculum::LuaValue(lineContainedStmt));
  927     params.push_back(Diluculum::LuaValue(lineNumber) );
  928     params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
  929 
  930     return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
  931             params,"getDecorateFct call")  ;
  932 }
  933 
  934 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
  935 {
  936     if ( flushWhiteSpace )
  937         flushWs(1);
  938     string caseToken = StringTools::change_case ( token, tcase );
  939     if (currentSyntax->getDecorateFct()) {
  940 
  941         Diluculum::LuaValueList res=callDecorateFct(caseToken);
  942         if (res.size()==1) {
  943             *out<<res[0].asString();
  944         } else {
  945             maskString ( *out, caseToken );
  946         }
  947     } else {
  948         maskString ( *out, caseToken );
  949     }
  950 
  951     // check this *after* the decorate call
  952     if (   currentState == STANDARD || currentState == KEYWORD || currentState == NUMBER
  953         || currentState == STRING || currentState == IDENTIFIER_BEGIN) {
  954         lineContainedStmt = true;
  955     }
  956     token.clear();
  957 }
  958 
  959 bool CodeGenerator::styleFound()
  960 {
  961     return docStyle.found();
  962 }
  963 
  964 bool CodeGenerator::printIndexFile ( const vector<string> &fileList, const string &outPath )
  965 {
  966     return true;
  967 }
  968 
  969 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
  970 {
  971 
  972     if ( formatter!=NULL ) {
  973         return true;
  974     }
  975 
  976     if ( !indentScheme.size() ) return false;
  977 
  978     formatter=new astyle::ASFormatter();
  979 
  980     if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
  981         formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
  982     } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
  983         formatter->setFormattingStyle ( astyle::STYLE_KR );
  984     } else if ( indentScheme=="java" ) {
  985         formatter->setFormattingStyle ( astyle::STYLE_JAVA );
  986     } else if ( indentScheme=="stroustrup" ) {
  987         formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
  988     } else if ( indentScheme=="whitesmith" ) {
  989         formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
  990     } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
  991         formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
  992     } else if ( indentScheme=="gnu" ) {
  993         formatter->setFormattingStyle ( astyle::STYLE_GNU );
  994     } else if ( indentScheme=="linux" ) {
  995         formatter->setFormattingStyle ( astyle::STYLE_LINUX );
  996     } else if ( indentScheme=="horstmann" ) {
  997         formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
  998     } else if ( indentScheme=="otbs" ||  indentScheme=="1tbs") {
  999         formatter->setFormattingStyle ( astyle::STYLE_1TBS );
 1000     } else if ( indentScheme=="google") {
 1001         formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
 1002     } else if ( indentScheme=="pico" ||  indentScheme=="a11") {
 1003         formatter->setFormattingStyle ( astyle::STYLE_PICO );
 1004     } else if ( indentScheme=="lisp" ||  indentScheme=="python"||  indentScheme=="a12") {
 1005         formatter->setFormattingStyle ( astyle::STYLE_LISP );
 1006     } else if ( indentScheme=="vtk") {
 1007         formatter->setFormattingStyle ( astyle::STYLE_VTK );
 1008     } else if ( indentScheme=="mozilla") {
 1009         formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
 1010     } else if ( indentScheme=="webkit") {
 1011         formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
 1012     } else if ( indentScheme!="user" ){
 1013         return false;
 1014     }
 1015     return formattingEnabled=true;
 1016 }
 1017 
 1018 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
 1019 {
 1020 
 1021     if (!embedded) {
 1022         while (!nestedLangs.empty()) {
 1023             nestedLangs.pop();
 1024         }
 1025     }
 1026 
 1027     bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
 1028     LoadResult result=LOAD_OK;
 1029     if ( reloadNecessary ) {
 1030         if (syntaxReaders.count(langDefPath)) {
 1031             currentSyntax=syntaxReaders[langDefPath];
 1032             result=LOAD_OK;
 1033         } else {
 1034 
 1035             currentSyntax=new SyntaxReader();
 1036             result=currentSyntax->load(langDefPath, pluginParameter, outputType);
 1037             syntaxReaders[langDefPath]=currentSyntax;
 1038         }
 1039 
 1040         if ( result==LOAD_OK ) {
 1041             formattingPossible=currentSyntax->enableReformatting();
 1042             updateKeywordClasses();
 1043         }
 1044     }
 1045 
 1046     kwOffset=currentSyntax->getKeywordCount() - docStyle.getKeywordStyleCount();
 1047 
 1048     return result;
 1049 }
 1050 
 1051 bool CodeGenerator::validateInputStream()
 1052 {
 1053     if ( !in ) return false;
 1054 
 1055     // it is not possible to move stream pointer back with stdin
 1056     if ( ( int ) in->tellg() == -1 ) // -1 : stdin
 1057         return true;
 1058 
 1059     // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
 1060     // Magic configuration of "file"
 1061     // This is intended for web plugins - only check filetypes often found in the net
 1062     char magic_gif[]    = {'G','I','F','8', 0};
 1063     char magic_png[]    = {'\x89','P','N','G', 0};
 1064     char magic_java[]   = {'\xCA','\xFE','\xBA','\xBE', 0};
 1065     char magic_jpeg[]   = {'\xFF','\xD8','\xFF', 0};
 1066     char magic_bmp[]    = {'B','M', 0};
 1067     char magic_pdf[]    = {'%','P','D','F', 0};
 1068     char magic_utf8[]   = {'\xEF','\xBB','\xBF',0};
 1069     char magic_rar[]    = {'R','a','r','!', 0};
 1070     char magic_zip[]    = {'P','K','\x03','\x04', 0};
 1071     char magic_ace[]    = {'*','*','A','C','E','*','*', 0};
 1072     char magic_tgz[]    = {'\x8b','\x1f', '\x00', '\x08', 0};
 1073     char magic_bzip[]   = {'B','Z', 0};
 1074 
 1075     char* magic_table[] = {magic_utf8,
 1076                            magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
 1077                            magic_java,
 1078                            magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
 1079                            0
 1080                           };
 1081 
 1082     char buffer [10]= {0};
 1083     in->read ( buffer,8 );  //only read the first 8 bytes of input stream
 1084 
 1085     int magic_index=0;
 1086     while ( magic_table[magic_index] ) {
 1087         if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
 1088             break;
 1089         }
 1090         magic_index++;
 1091     }
 1092     int streamReadPos=0;
 1093     if ( magic_table[magic_index] == magic_utf8 ) {
 1094         //setEncoding("utf-8");
 1095         streamReadPos=3; // remove UTF-8 magic number from output
 1096     }
 1097 
 1098     in -> seekg ( streamReadPos, ios::beg );
 1099     in-> clear();  // clear fail bit to continue reading
 1100 
 1101     return !magic_table[magic_index] // points to 0 if no pattern was found
 1102            || magic_table[magic_index] == magic_utf8;
 1103 }
 1104 
 1105 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
 1106 
 1107     if (currentSyntax && pluginChunks.size()) {
 1108 
 1109         Diluculum::LuaState luaState;
 1110 
 1111         Diluculum::LuaValueList chunkParams;
 1112         chunkParams.push_back(currentSyntax->getDescription());
 1113         for (unsigned int i=0; i<pluginChunks.size(); i++) {
 1114             luaState.call(*pluginChunks[i], chunkParams, "format user function");
 1115         }
 1116 
 1117         if (luaState.globals().count(fctName)) {
 1118             Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
 1119 
 1120             luaState["HL_PLUGIN_PARAM"] = pluginParameter;
 1121             luaState["HL_OUTPUT"] = outputType;
 1122             luaState["HL_FORMAT_HTML"]=HTML;
 1123             luaState["HL_FORMAT_XHTML"]=XHTML;
 1124             luaState["HL_FORMAT_TEX"]=TEX;
 1125             luaState["HL_FORMAT_LATEX"]=LATEX;
 1126             luaState["HL_FORMAT_RTF"]=RTF;
 1127             luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
 1128             luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
 1129             luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
 1130             luaState["HL_FORMAT_SVG"]=SVG;
 1131             luaState["HL_FORMAT_BBCODE"]=BBCODE;
 1132             luaState["HL_FORMAT_PANGO"]=PANGO;
 1133             luaState["HL_FORMAT_ODT"]=ODTFLAT;
 1134 
 1135             Diluculum::LuaValueList params;
 1136             Diluculum::LuaValueMap options;
 1137             options[Diluculum::LuaValue("title")] =  Diluculum::LuaValue( docTitle );
 1138             options[Diluculum::LuaValue("encoding")] =  Diluculum::LuaValue(encoding);
 1139             options[Diluculum::LuaValue("fragment")] =  Diluculum::LuaValue(fragmentOutput);
 1140             options[Diluculum::LuaValue("font")] =  Diluculum::LuaValue(getBaseFont());
 1141             options[Diluculum::LuaValue("fontsize")] =  Diluculum::LuaValue(getBaseFontSize());
 1142 
 1143             params.push_back(inputFilesCnt);
 1144             params.push_back(processedFilesCnt);
 1145             params.push_back(options);
 1146 
 1147             Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call");
 1148             if (res.size()>=1) {
 1149                 *keepDefault=false;
 1150                 *result = res[0].asString();
 1151                 if (res.size()==2)
 1152                     *keepDefault = res[1].asBoolean();
 1153             }
 1154             delete documentFct;
 1155         }
 1156     }
 1157 }
 1158 
 1159 void CodeGenerator::printHeader()
 1160 {
 1161     bool keepDefaultHeader=true;
 1162     string pluginHeader;
 1163 
 1164     processedFilesCnt++;
 1165 
 1166     applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
 1167 
 1168     if ( ! fragmentOutput && keepDefaultHeader)
 1169         *out << getHeader();
 1170 
 1171     *out << pluginHeader;
 1172 
 1173     if ( !fragmentOutput || keepInjections)
 1174         *out << currentSyntax->getHeaderInjection();
 1175 }
 1176 
 1177 void CodeGenerator::printFooter()
 1178 {
 1179 
 1180     bool keepDefaultFooter=true;
 1181     string pluginFooter;
 1182 
 1183     applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
 1184 
 1185     if ( !fragmentOutput || keepInjections)
 1186         *out << currentSyntax->getFooterInjection();
 1187 
 1188     *out << pluginFooter;
 1189 
 1190     if ( ! fragmentOutput && keepDefaultFooter )
 1191         *out << getFooter();
 1192 }
 1193 
 1194 ParseError CodeGenerator::generateFile ( const string &inFileName,
 1195         const string &outFileName )
 1196 {
 1197     if ( !docStyle.found() ) {
 1198         return BAD_STYLE;
 1199     }
 1200 
 1201     reset();
 1202 
 1203     ParseError error=PARSE_OK;
 1204 
 1205     inFile=inFileName;
 1206     outFile=outFileName;
 1207 
 1208     in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
 1209 
 1210     if ( validateInput )
 1211         if ( !validateInputStream() ) error= BAD_INPUT;
 1212 
 1213     if ( !in->fail() && error==PARSE_OK ) {
 1214         out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
 1215         if ( out->fail() ) {
 1216             error=BAD_OUTPUT;
 1217         }
 1218     }
 1219 
 1220     if ( in->fail() ) {
 1221         error=BAD_INPUT;
 1222     }
 1223 
 1224     if ( error==PARSE_OK ) {
 1225         initASStream();
 1226         currentSyntax->setInputFileName(inFile);
 1227         printHeader();
 1228         printBody();
 1229         printFooter();
 1230     }
 1231 
 1232     if ( !outFileName.empty() ) {
 1233         delete out;
 1234         out=NULL;
 1235     }
 1236     if ( !inFileName.empty() ) {
 1237         delete in;
 1238         in=NULL;
 1239     }
 1240     return error;
 1241 }
 1242 
 1243 string CodeGenerator::generateString ( const string &input )
 1244 {
 1245 
 1246     if ( !docStyle.found() ) {
 1247         return "";
 1248     }
 1249 
 1250     reset();
 1251 
 1252     in = new istringstream ( input );
 1253     out = new ostringstream ();
 1254 
 1255     if ( in->fail() || out->fail() ) {
 1256         return "";
 1257     }
 1258 
 1259     initASStream();
 1260 
 1261     printHeader();
 1262     printBody();
 1263     printFooter();
 1264 
 1265     string result = static_cast<ostringstream*> ( out )->str();
 1266 
 1267     delete out;
 1268     out=NULL;
 1269     delete in;
 1270     in=NULL;
 1271 
 1272     return result;
 1273 }
 1274 
 1275 void CodeGenerator::initASStream() {
 1276     if ( formatter != NULL ) {
 1277         if (streamIterator) delete streamIterator;
 1278         streamIterator =  new astyle::ASStreamIterator ( in );
 1279         formatter->init ( streamIterator );
 1280     }
 1281 }
 1282 
 1283 string CodeGenerator::generateStringFromFile ( const string &inFileName )
 1284 {
 1285 
 1286     if ( !docStyle.found() ) {
 1287         return "";
 1288     }
 1289 
 1290     reset();
 1291 
 1292     inFile = inFileName;
 1293 
 1294     in = new ifstream ( inFileName.c_str() );
 1295     out = new ostringstream ();
 1296 
 1297     if ( in->fail() || out->fail() ) {
 1298         return "";
 1299     }
 1300 
 1301     if ( validateInput && !validateInputStream() ) {
 1302         return "ERROR: detected binary input";
 1303     }
 1304 
 1305     initASStream();
 1306 
 1307     currentSyntax->setInputFileName(inFile);
 1308 
 1309     printHeader();
 1310     printBody();
 1311     printFooter();
 1312 
 1313     string result = static_cast<ostringstream*> ( out )->str();
 1314 
 1315     delete out;
 1316     out=NULL;
 1317     delete in;
 1318     in=NULL;
 1319 
 1320     return result;
 1321 }
 1322 
 1323 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
 1324 {
 1325     if ( s==KEYWORD && kwClassID ) {
 1326         return NUMBER_BUILTIN_STATES + kwClassID-1;
 1327     }
 1328     return ( unsigned int ) s ;
 1329 }
 1330 
 1331 void CodeGenerator::openTag ( State s )
 1332 {
 1333     *out << openTags[ ( unsigned int ) s];
 1334     currentState=s;
 1335 }
 1336 
 1337 void CodeGenerator::closeTag ( State s )
 1338 {
 1339     *out << closeTags[ ( unsigned int ) s];
 1340     flushWs(2);
 1341     currentState=_UNKNOWN;
 1342 }
 1343 
 1344 void CodeGenerator::openKWTag ( unsigned int kwClassID )
 1345 {
 1346     *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1347     currentState=KEYWORD;
 1348 }
 1349 
 1350 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
 1351 {
 1352     *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1353     flushWs(3);
 1354     currentState=_UNKNOWN;
 1355 }
 1356 
 1357 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
 1358 {
 1359     if (nestedLangs.empty()) {
 1360         nestedLangs.push(currentSyntax->getCurrentPath() );
 1361     }
 1362     if (nestedLangs.top() != embedLangDefPath) {
 1363         nestedLangs.push(embedLangDefPath);
 1364     }
 1365     LoadResult res = loadLanguage(embedLangDefPath, true);
 1366     //pass end delimiter regex to syntax description
 1367     currentSyntax->restoreLangEndDelim(embedLangDefPath);
 1368     return res == LOAD_OK;
 1369 }
 1370 
 1371 ///////////////////////////////////////////////////////////////////////////////
 1372 
 1373 void CodeGenerator::processRootState()
 1374 {
 1375     bool eof=false,
 1376          firstLine=true; // avoid newline before printing the first output line
 1377 
 1378     applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
 1379 
 1380     if ( currentSyntax->highlightingDisabled() ) {
 1381         string line;
 1382         while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
 1383             ++lineNumber;
 1384             insertLineNumber ( !firstLine );
 1385             flushWs(4);
 1386             firstLine=false;
 1387             if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
 1388                 maskString ( *out, line );
 1389         }
 1390         *out << flush;
 1391         return;
 1392     }
 1393 
 1394     State state=STANDARD;
 1395     openTag ( STANDARD );
 1396 
 1397     do {
 1398         // determine next state
 1399         state= getCurrentState(STANDARD);
 1400 
 1401         // handle current state
 1402         switch ( state ) {
 1403         case KEYWORD:
 1404             closeTag ( STANDARD );
 1405             eof=processKeywordState ( state );
 1406             openTag ( STANDARD );
 1407             break;
 1408         case NUMBER:
 1409             closeTag ( STANDARD );
 1410             eof=processNumberState();
 1411             openTag ( STANDARD );
 1412             break;
 1413         case ML_COMMENT:
 1414             closeTag ( STANDARD );
 1415             eof=processMultiLineCommentState();
 1416             openTag ( STANDARD );
 1417             break;
 1418         case SL_COMMENT:
 1419             closeTag ( STANDARD );
 1420             eof=processSingleLineCommentState();
 1421             openTag ( STANDARD );
 1422             break;
 1423         case STRING:
 1424             closeTag ( STANDARD );
 1425             eof=processStringState ( STANDARD );
 1426             openTag ( STANDARD );
 1427             break;
 1428         case DIRECTIVE:
 1429             closeTag ( STANDARD );
 1430             eof=processDirectiveState();
 1431             openTag ( STANDARD );
 1432             break;
 1433         case ESC_CHAR:
 1434             closeTag ( STANDARD );
 1435             eof=processEscapeCharState();
 1436             openTag ( STANDARD );
 1437             break;
 1438         case SYMBOL:
 1439             closeTag ( STANDARD );
 1440             eof=processSymbolState();
 1441             openTag ( STANDARD );
 1442             break;
 1443         case EMBEDDED_CODE_END:
 1444             closeTag ( STANDARD );
 1445             eof=processSyntaxChangeState(state);
 1446             openTag ( STANDARD );
 1447             break;
 1448         case SYNTAX_ERROR:
 1449             closeTag ( STANDARD );
 1450             eof=processSyntaxErrorState();
 1451             openTag ( STANDARD );
 1452             break;
 1453 
 1454         case _EOL:
 1455             // XTERM256 fix (issue with less cmd)
 1456             if  (!firstLine || showLineNumbers) {
 1457                 closeTag ( STANDARD );
 1458             }
 1459             insertLineNumber(!firstLine);
 1460             if (!firstLine || showLineNumbers) {
 1461                 flushWs(5);
 1462                 stateTraceCurrent.clear();
 1463                 openTag ( STANDARD );
 1464             }
 1465             firstLine=false;
 1466             break;
 1467         case _EOF:
 1468             eof=true;
 1469             break;
 1470         case _WS:
 1471             processWsState();
 1472             break;
 1473         default:
 1474             printMaskedToken();
 1475             break;
 1476         }
 1477     } while ( !eof );
 1478 
 1479     if (token.size() || lineNumber>1 || (outputType!=ESC_TRUECOLOR && outputType!=ESC_XTERM256))
 1480         closeTag ( STANDARD );
 1481 
 1482     if (currentSyntax->getDecorateLineEndFct()) {
 1483         Diluculum::LuaValueList res=callDecorateLineFct(false);
 1484         if (res.size()==1) {
 1485             *out << res[0].asString();
 1486         }
 1487     }
 1488 
 1489     printNewLines = noTrailingNewLine==0 || ( noTrailingNewLine==2 && ( token.size() || lineNumber>1) );
 1490     *out << getNewLine();
 1491     *out << flush;
 1492 }
 1493 
 1494 bool CodeGenerator::processSyntaxChangeState(State myState)
 1495 {
 1496     State newState=STANDARD;
 1497     bool eof=false,
 1498          exitState=false;
 1499 
 1500     openTag ( KEYWORD );
 1501     do {
 1502 
 1503         if (myState==EMBEDDED_CODE_END) {
 1504             if (!nestedLangs.empty()) {
 1505                 nestedLangs.pop();
 1506             }
 1507             // load host language syntax
 1508             if (!nestedLangs.empty()) {
 1509                 loadLanguage(nestedLangs.top(), true);
 1510             }
 1511             matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
 1512         }
 1513 
 1514         printMaskedToken ( newState!=_WS );
 1515 
 1516         newState= getCurrentState(myState);
 1517 
 1518         switch ( newState ) {
 1519         case _WS:
 1520             processWsState();
 1521             break;
 1522         case _EOL:
 1523             insertLineNumber();
 1524             exitState=true;
 1525             break;
 1526         case _EOF:
 1527             eof = true;
 1528             break;
 1529         default:
 1530             exitState=true;
 1531             break;
 1532         }
 1533     } while (  !exitState  &&  !eof );
 1534     closeTag ( KEYWORD );
 1535 
 1536     return eof;
 1537 }
 1538 
 1539 
 1540 bool CodeGenerator::processKeywordState ( State myState )
 1541 {
 1542     State newState=STANDARD;
 1543     unsigned int myClassID=currentKeywordClass;
 1544     bool eof=false,
 1545          exitState=false;
 1546 
 1547     openKWTag ( myClassID );
 1548     do {
 1549         printMaskedToken ( newState!=_WS,
 1550                            ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
 1551         newState= getCurrentState(myState);
 1552         switch ( newState ) {
 1553         case _WS:
 1554             processWsState();
 1555             exitState=isolateTags;
 1556             break;
 1557         case _EOL:
 1558             insertLineNumber();
 1559             exitState=true;
 1560 
 1561             break;
 1562         case _EOF:
 1563             eof = true;
 1564             break;
 1565         case KEYWORD_END:
 1566             exitState=true;
 1567             break;
 1568         default:
 1569             exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
 1570             break;
 1571         }
 1572     } while ( !exitState  &&  !eof );
 1573 
 1574     closeKWTag ( myClassID );
 1575 
 1576     currentKeywordClass=0;
 1577     return eof;
 1578 }
 1579 
 1580 bool CodeGenerator::processNumberState()
 1581 {
 1582     State newState=STANDARD;
 1583     bool eof=false,
 1584          exitState=false;
 1585     openTag ( NUMBER );
 1586     do {
 1587         printMaskedToken ( newState!=_WS );
 1588         newState= getCurrentState(NUMBER);
 1589         switch ( newState ) {
 1590         case _WS:
 1591             processWsState();
 1592             exitState=isolateTags;
 1593             break;
 1594         case _EOL:
 1595             insertLineNumber();
 1596             exitState=true;
 1597             break;
 1598         case _EOF:
 1599             eof = true;
 1600             break;
 1601         default:
 1602             exitState=newState!=NUMBER;
 1603             break;
 1604         }
 1605     } while ( !exitState && !eof );
 1606 
 1607     closeTag ( NUMBER );
 1608     return eof;
 1609 }
 1610 
 1611 
 1612 bool CodeGenerator::processMultiLineCommentState()
 1613 {
 1614     int commentCount=1;
 1615     int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
 1616     State newState=STANDARD;
 1617     bool eof=false, exitState=false, containedTestCase=false;
 1618     unsigned int startColumn=lineIndex - token.size() ;
 1619     openTag ( ML_COMMENT );
 1620     do {
 1621         printMaskedToken (newState!=_WS );
 1622         newState= getCurrentState(ML_COMMENT);
 1623 
 1624         switch ( newState ) {
 1625         case _WS:
 1626             processWsState();
 1627             break;
 1628         case _EOL:
 1629             wsBuffer += closeTags[ML_COMMENT];
 1630             insertLineNumber();
 1631             wsBuffer += openTags[ML_COMMENT];
 1632             startColumn=0;
 1633             break;
 1634         case _EOF:
 1635             eof = true;
 1636             break;
 1637         case _TESTPOS:
 1638             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1639             printMaskedToken();
 1640             containedTestCase=true;
 1641             break;
 1642         case ML_COMMENT:
 1643 
 1644             if ( currentSyntax->allowNestedMLComments() ) {
 1645                 ++commentCount;
 1646             }
 1647             // if delimiters are equal, close the comment by continuing to
 1648             // ML_COMMENT_END section
 1649             if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT  ))) break;
 1650 
 1651         case ML_COMMENT_END:
 1652 
 1653             if (!currentSyntax->matchesOpenDelimiter (token,  ML_COMMENT_END, openDelimID)) {
 1654                 break;
 1655             }
 1656             commentCount--;
 1657             if ( !commentCount ) {
 1658                 printMaskedToken();
 1659                 exitState=true;
 1660             }
 1661             break;
 1662         default:
 1663             break;
 1664         }
 1665     } while ( !exitState  &&  !eof );
 1666 
 1667     closeTag ( ML_COMMENT );
 1668 
 1669     if (containedTestCase){
 1670         stateTraceCurrent.clear();
 1671     }
 1672     return eof;
 1673 }
 1674 
 1675 
 1676 bool CodeGenerator::processSingleLineCommentState()
 1677 {
 1678     State newState=STANDARD;
 1679     bool eof=false, exitState=false, containedTestCase=false;
 1680     unsigned int startColumn = lineIndex - token.size() ;
 1681 
 1682     openTag ( SL_COMMENT );
 1683     do {
 1684         printMaskedToken ( newState!=_WS );
 1685         newState= getCurrentState(SL_COMMENT);
 1686 
 1687         switch ( newState ) {
 1688         case _WS:
 1689             processWsState();
 1690             break;
 1691         case _EOL:
 1692             printMaskedToken();
 1693             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1694                 exitState=false;
 1695             } else {
 1696                 exitState=true;
 1697             }
 1698             if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
 1699             insertLineNumber();
 1700             if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
 1701 
 1702             break;
 1703         case _EOF:
 1704             eof = true;
 1705             break;
 1706         case _TESTPOS:
 1707             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1708             printMaskedToken();
 1709             containedTestCase=true;
 1710             break;
 1711 
 1712         default:
 1713             break;
 1714         }
 1715     } while ( !exitState  &&  !eof );
 1716 
 1717     closeTag ( SL_COMMENT );
 1718 
 1719     if (containedTestCase) {
 1720         stateTraceCurrent.clear();
 1721     }
 1722 
 1723     return eof;
 1724 }
 1725 
 1726 bool CodeGenerator::processDirectiveState()
 1727 {
 1728     State  newState=STANDARD;
 1729     bool eof=false, exitState=false;
 1730 
 1731     openTag ( DIRECTIVE );
 1732     do {
 1733         printMaskedToken ( newState!=_WS );
 1734         newState= getCurrentState(DIRECTIVE);
 1735         switch ( newState ) {
 1736         case _WS:
 1737             processWsState();
 1738             break;
 1739         case DIRECTIVE_END:
 1740             printMaskedToken();
 1741             exitState=true;
 1742             break;
 1743         case _EOL:
 1744             printMaskedToken();
 1745 
 1746             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1747                 exitState=false;
 1748             } else {
 1749                 if (currentSyntax->getContinuationChar()!=0x13){
 1750                     exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
 1751                 }
 1752             }
 1753             if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
 1754             insertLineNumber();
 1755             if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
 1756             break;
 1757         case ML_COMMENT:
 1758             closeTag ( DIRECTIVE );
 1759             eof= processMultiLineCommentState();
 1760             openTag ( DIRECTIVE );
 1761             break;
 1762         case SL_COMMENT:
 1763             closeTag ( DIRECTIVE );
 1764             eof= processSingleLineCommentState();
 1765             openTag ( DIRECTIVE );
 1766             exitState=true;
 1767             break;
 1768         case STRING:
 1769             closeTag ( DIRECTIVE );
 1770             eof=processStringState ( DIRECTIVE );
 1771             openTag ( DIRECTIVE );
 1772             break;
 1773         case _EOF:
 1774             eof = true;
 1775             break;
 1776         default:
 1777             break;
 1778         }
 1779     } while ( !exitState && !eof );
 1780 
 1781     closeTag ( DIRECTIVE );
 1782     return eof;
 1783 }
 1784 
 1785 bool CodeGenerator::processStringState ( State oldState )
 1786 {
 1787     State newState=STANDARD;
 1788     bool eof=false, exitState=false;
 1789     bool returnedFromOtherState=false;
 1790 
 1791     State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
 1792 
 1793     int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
 1794     string openDelim=token;
 1795 
 1796     //Raw String by definition:
 1797     bool isRawString=currentSyntax->delimiterIsRawString(openDelimID) || toggleDynRawString;
 1798 
 1799     // Test if character before string open delimiter token equals to the
 1800     // raw string prefix (Example: r" ", r""" """ in Python)
 1801 
 1802     //Raw String Prefix:
 1803     if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
 1804         isRawString=true;
 1805     }
 1806 
 1807     openTag ( myState );
 1808     do {
 1809         // true if last token was an escape char
 1810         if ( !returnedFromOtherState ) {
 1811             printMaskedToken (newState!=_WS );
 1812         }
 1813         returnedFromOtherState=false;
 1814         newState= getCurrentState(myState);
 1815 
 1816         switch ( newState ) {
 1817         case _WS:
 1818             processWsState();
 1819             break;
 1820         case _EOL:
 1821             wsBuffer += closeTags[myState];
 1822             insertLineNumber();
 1823             wsBuffer += openTags[myState];
 1824             break;
 1825         case STRING_END:
 1826             if (resultOfHook || currentSyntax->matchesOpenDelimiter (token,  STRING_END, openDelimID)) {
 1827                 if (currentSyntax->assertDelimEqualLength()) {
 1828                     exitState= openDelim.length()==token.length();
 1829                 } else {
 1830                     exitState= true;
 1831                 }
 1832                 printMaskedToken();
 1833             }
 1834             break;
 1835         case STRING:
 1836             // if there exist multiple string delimiters, close string if
 1837             // current delimiter is equal to the opening delimiter
 1838             exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING  )) && token==openDelim;
 1839             printMaskedToken();
 1840             break;
 1841         case ESC_CHAR:
 1842             if ( !isRawString ) {
 1843                 closeTag ( myState );
 1844                 eof=processEscapeCharState();
 1845                 openTag ( myState );
 1846                 returnedFromOtherState=true;
 1847             } else {
 1848                 // FIXME not a fix for Python r"""\"""
 1849                 exitState=token.size()>1 && token[1] == openDelim[0];
 1850                 printMaskedToken();
 1851             }
 1852             break;
 1853         case STRING_INTERPOLATION:
 1854             closeTag ( myState );
 1855             eof=processInterpolationState();
 1856             openTag ( myState );
 1857             returnedFromOtherState=true;
 1858             break;
 1859 
 1860         case _EOF:
 1861             eof = true;
 1862             break;
 1863         default:
 1864             printMaskedToken();
 1865             break;
 1866         }
 1867     } while ( !exitState && !eof );
 1868 
 1869     closeTag ( myState );
 1870 
 1871     toggleDynRawString = false;
 1872 
 1873     return eof;
 1874 }
 1875 
 1876 bool CodeGenerator::processSymbolState()
 1877 {
 1878     State newState=STANDARD;
 1879     bool eof=false,
 1880          exitState=false;
 1881 
 1882     openTag ( SYMBOL );
 1883     do {
 1884         printMaskedToken ( newState!=_WS );
 1885         newState= getCurrentState(SYMBOL);
 1886         switch ( newState ) {
 1887         case _WS:
 1888             processWsState();
 1889             exitState=isolateTags;
 1890             break;
 1891         case _EOL:
 1892             insertLineNumber();
 1893             exitState=true;
 1894             break;
 1895         case _EOF:
 1896             eof = true;
 1897             break;
 1898         default:
 1899             exitState=newState!=SYMBOL;
 1900             break;
 1901         }
 1902     } while ( !exitState && !eof );
 1903 
 1904     closeTag ( SYMBOL );
 1905     return eof;
 1906 }
 1907 
 1908 bool CodeGenerator::processSyntaxErrorState()
 1909 {
 1910     State newState=STANDARD;
 1911     bool eof=false,
 1912     exitState=false;
 1913 
 1914     openTag ( SYNTAX_ERROR );
 1915     do {
 1916         printMaskedToken ( newState!=_WS );
 1917         newState= getCurrentState(SYNTAX_ERROR);
 1918         switch ( newState ) {
 1919             case _WS:
 1920                 processWsState();
 1921                 exitState=isolateTags;
 1922                 break;
 1923             case _EOL:
 1924                 insertLineNumber();
 1925                 exitState=true;
 1926                 break;
 1927             case _EOF:
 1928                 eof = true;
 1929                 break;
 1930             default:
 1931                 exitState=newState!=SYMBOL;
 1932                 break;
 1933         }
 1934     } while ( !exitState && !eof );
 1935 
 1936     closeTag ( SYNTAX_ERROR );
 1937     return eof;
 1938 }
 1939 
 1940 bool CodeGenerator::processEscapeCharState()
 1941 {
 1942     State newState=STANDARD;
 1943     bool eof=false, exitState=false;
 1944     openTag ( ESC_CHAR );
 1945     do {
 1946         printMaskedToken (newState!=_WS );
 1947         newState= getCurrentState(ESC_CHAR);
 1948         switch ( newState ) {
 1949         case _EOL:
 1950             insertLineNumber();
 1951             exitState=true;
 1952             break;
 1953         case _WS:
 1954             processWsState();
 1955             exitState=isolateTags;
 1956             break;
 1957         case _EOF:
 1958             eof = true;
 1959             break;
 1960         default:
 1961             exitState=newState!=ESC_CHAR;
 1962             break;
 1963         }
 1964     } while ( !exitState && !eof );
 1965 
 1966     closeTag ( ESC_CHAR );
 1967     return eof;
 1968 }
 1969 
 1970 bool CodeGenerator::processInterpolationState()
 1971 {
 1972     State newState=STANDARD;
 1973     bool eof=false, exitState=false;
 1974     openTag ( STRING_INTERPOLATION );
 1975     do {
 1976         printMaskedToken (newState!=_WS );
 1977         newState= getCurrentState(STRING_INTERPOLATION);
 1978         switch ( newState ) {
 1979         case _EOL:
 1980             insertLineNumber();
 1981             exitState=true;
 1982             break;
 1983         case _WS:
 1984             processWsState();
 1985             exitState=isolateTags;
 1986             break;
 1987         case _EOF:
 1988             eof = true;
 1989             break;
 1990         default:
 1991             exitState=newState!=STRING_INTERPOLATION;
 1992             break;
 1993         }
 1994     } while ( !exitState && !eof );
 1995 
 1996     closeTag ( STRING_INTERPOLATION );
 1997     return eof;
 1998 }
 1999 
 2000 void CodeGenerator::processWsState()
 2001 {
 2002 
 2003     if ( !maskWs ) {
 2004         wsBuffer += token;
 2005         token.clear();
 2006         return;
 2007     }
 2008 
 2009     flushWs(6);
 2010 
 2011     int cntWs=0;
 2012     lineIndex--;
 2013     PositionState ps(currentState, 0, true);
 2014 
 2015     while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
 2016         ++cntWs;
 2017         ++lineIndex;
 2018     }
 2019 
 2020     if ( cntWs>1 ) {
 2021 
 2022         unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
 2023         if ( excludeWs && styleID!=_UNKNOWN ) {
 2024             *out << closeTags[styleID];
 2025         }
 2026 
 2027         *out << maskWsBegin;
 2028         for ( int i=0; i<cntWs; i++ ) {
 2029             *out << spacer;
 2030             if (applySyntaxTestCase){
 2031                 stateTraceCurrent.push_back(ps);
 2032             }
 2033         }
 2034         *out << maskWsEnd;
 2035         if ( excludeWs && styleID!=_UNKNOWN ) {
 2036             *out << openTags[styleID];
 2037         }
 2038     } else {
 2039 
 2040         *out << spacer; //Bugfix fehlender Space nach Strings
 2041         if (applySyntaxTestCase){
 2042             stateTraceCurrent.push_back(ps);
 2043         }
 2044     }
 2045 
 2046     spacer = initialSpacer;
 2047 
 2048     token.clear();
 2049 }
 2050 
 2051 void CodeGenerator::flushWs(int arg)
 2052 {
 2053     PositionState ps(currentState, 0, true);
 2054     //workaround condition
 2055     for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
 2056         stateTraceCurrent.push_back(ps);
 2057         //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;
 2058     }
 2059 
 2060     //fix canvas whitespace
 2061     if (wsBuffer.length() && (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR) ){
 2062         *out<<maskWsBegin;
 2063     }
 2064 
 2065     *out << wsBuffer;
 2066     wsBuffer.clear();
 2067 }
 2068 
 2069 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
 2070     switch (s) {
 2071 
 2072         case STANDARD:
 2073             return STY_NAME_STD;
 2074         case STRING:
 2075             return STY_NAME_STR;
 2076         case NUMBER:
 2077             return STY_NAME_NUM;
 2078         case SL_COMMENT:
 2079             return STY_NAME_SLC;
 2080         case ML_COMMENT:
 2081             return STY_NAME_COM;
 2082         case ESC_CHAR:
 2083             return STY_NAME_ESC;
 2084         case DIRECTIVE:
 2085             return STY_NAME_DIR;
 2086         case DIRECTIVE_STRING:
 2087             return STY_NAME_DST;
 2088         case SYMBOL:
 2089             return STY_NAME_SYM;
 2090         case STRING_INTERPOLATION:
 2091             return STY_NAME_IPL;
 2092         case SYNTAX_ERROR:
 2093             return STY_NAME_ERR;
 2094         case _WS:
 2095             return "ws";
 2096         case KEYWORD: {
 2097 
 2098             if (!kwClass)
 2099                 return "ws";
 2100 
 2101             char kwName[20] = {0};
 2102             snprintf(kwName, sizeof(kwName), "keyword %c", ('a'+kwClass-1));
 2103 
 2104             return string(kwName);
 2105         }
 2106         default:
 2107             return "unknown_test";
 2108     }
 2109 }
 2110 
 2111 void CodeGenerator::printTrace(const string &s){
 2112     std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
 2113     for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
 2114         std::cout<<" "<<stateTraceCurrent[i].state;
 2115     }
 2116     std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
 2117     for (unsigned int i=0; i< stateTraceTest.size(); i++) {
 2118         std::cout<<" "<<stateTraceTest[i].state;
 2119     }
 2120     std::cout<<"\n";
 2121 }
 2122 
 2123 //column: lineIndex (not a UTF-8 validated string position)
 2124 void CodeGenerator::runSyntaxTestcases(unsigned int column){
 2125 
 2126     if (encoding=="utf-8")
 2127         column = StringTools::utf8_strlen(line.substr(0, column));
 2128 
 2129     unsigned int assertGroup=0;
 2130     size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
 2131     State assertState=_UNKNOWN;
 2132     bool negation=false;
 2133     bool testFailed=false;
 2134 
 2135     ostringstream errMsg;
 2136     string prefix;
 2137     //printTrace("trace 2");
 2138 
 2139     if (typeDescPos!=string::npos) {
 2140 
 2141         if (line[typeDescPos]=='~') {
 2142 
 2143             negation=true;
 2144             prefix="~";
 2145             ++typeDescPos;
 2146         }
 2147 
 2148         if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
 2149             assertState=NUMBER;
 2150         //TODO temp. fix to allow old and new string classes
 2151         else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos || line.find("str", typeDescPos)==typeDescPos)
 2152             assertState=STRING;
 2153         else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
 2154             assertState=ESC_CHAR;
 2155         else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
 2156             assertState=STRING_INTERPOLATION;
 2157         else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
 2158             assertState=SYMBOL;
 2159         else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
 2160             assertState=DIRECTIVE;
 2161         else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
 2162             assertState=SL_COMMENT;
 2163         else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
 2164             assertState=ML_COMMENT;
 2165         else if (line.find("ws", typeDescPos)==typeDescPos)
 2166             assertState=_WS;
 2167         //TODO temp. fix to allow old and new default classes
 2168         else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos  || line.find("std", typeDescPos)==typeDescPos)
 2169             assertState=STANDARD;
 2170         else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
 2171             assertState=DIRECTIVE_STRING;
 2172 
 2173         else if (line.find("kw", typeDescPos)==typeDescPos || line.find("st", typeDescPos)==typeDescPos) {
 2174             assertState=KEYWORD;
 2175             if (isalpha(line[typeDescPos+2]))
 2176                 assertGroup=line[typeDescPos+2] - 'a' +1;
 2177         }
 2178 
 2179        if (   (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
 2180             || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
 2181             || assertGroup != stateTraceTest[column].kwClass) {
 2182 
 2183             testFailed=!negation;
 2184 
 2185         } else if (negation ) {
 2186 
 2187             //TODO Fix ~ws
 2188             if (assertState!=_WS  && !stateTraceTest[column].isWhiteSpace )
 2189                 testFailed=true;
 2190         }
 2191 
 2192         if (testFailed) {
 2193             errMsg << inFile << " line " << lineNumber << ", column "<< column
 2194                     << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)
 2195                     << " instead of " << prefix << getTestcaseName(assertState, assertGroup);
 2196 
 2197             failedPosTests.push_back(errMsg.str());
 2198         }
 2199 
 2200     }
 2201 
 2202     lineContainedTestCase=true;
 2203 }
 2204 
 2205 string CodeGenerator::getNewLine()
 2206 {
 2207     ostringstream ss;
 2208     printSyntaxError(ss);
 2209     if (printNewLines)
 2210         ss << newLineTag;
 2211     return ss.str();
 2212 }
 2213 
 2214 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
 2215 {
 2216 
 2217     Diluculum::LuaValueList params;
 2218     params.push_back(Diluculum::LuaValue(lineNumber));
 2219 
 2220     return currentSyntax->getLuaState()->call ( isLineStart ?
 2221             *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
 2222             params,"getDecorateLineFct call");
 2223 }
 2224 
 2225 void CodeGenerator::setOverrideParams() {
 2226     if (currentSyntax->requiresParamUpdate()) {
 2227         if ( currentSyntax->getOverrideConfigVal("state.string.raw")=="true"){
 2228             toggleDynRawString=true; // reset to false in string state fct
 2229         }
 2230         if ( currentSyntax->getOverrideConfigVal("format.maskws")=="true") {
 2231             maskWs=true;
 2232         }
 2233         if ( currentSyntax->getOverrideConfigVal("format.spacer").size()) {
 2234             spacer=currentSyntax->getOverrideConfigVal("format.spacer");
 2235         }
 2236     }
 2237 }
 2238 
 2239 void CodeGenerator::insertLineNumber ( bool insertNewLine )
 2240 {
 2241     if ( insertNewLine ) {
 2242         if (currentSyntax->getDecorateLineEndFct()) {
 2243             Diluculum::LuaValueList res=callDecorateLineFct(false);
 2244             if (res.size()==1) {
 2245                 setOverrideParams();
 2246                 wsBuffer +=res[0].asString();
 2247             }
 2248         }
 2249         wsBuffer += getNewLine();
 2250     }
 2251 
 2252     if (currentSyntax->getDecorateLineBeginFct()) {
 2253         Diluculum::LuaValueList res=callDecorateLineFct(true);
 2254         if (res.size()==1) {
 2255             setOverrideParams();
 2256             wsBuffer += res[0].asString();
 2257         }
 2258     }
 2259 
 2260     if ( showLineNumbers ) {
 2261         ostringstream os;
 2262         ostringstream numberPrefix;
 2263 
 2264         os << setw ( getLineNumberWidth() ) << right;
 2265         if( numberCurrentLine ) {
 2266             if ( lineNumberFillZeroes ) {
 2267                 os.fill ( '0' );
 2268             }
 2269             os << lineNumber+lineNumberOffset;
 2270         } else {
 2271             os << "";
 2272         }
 2273 
 2274         numberPrefix << openTags[LINENUMBER];
 2275         maskString ( numberPrefix, os.str() );
 2276 
 2277         //use initialSpacer here, spacer can be overridden by plug-in (format.spacer)
 2278         numberPrefix << initialSpacer << closeTags[LINENUMBER];
 2279         wsBuffer += numberPrefix.str();
 2280     }
 2281 }
 2282 
 2283 unsigned int CodeGenerator::getLineIndex()
 2284 {
 2285     return lineIndex;
 2286 }
 2287 unsigned int CodeGenerator::getLastLineLength()
 2288 {
 2289     return lastLineLength;
 2290 }
 2291 
 2292 bool CodeGenerator::requiresTwoPassParsing() const {
 2293     if (!currentSyntax) return false;
 2294     return currentSyntax->getPersistentSnippetsNum()>0;
 2295 }
 2296 
 2297 
 2298 bool CodeGenerator::printExternalStyle ( const string &outFile )
 2299 {
 2300     if ( !includeStyleDef ) {
 2301         ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
 2302         if ( !cssOutFile->fail() ) {
 2303             if (!omitVersionComment) {
 2304                 *cssOutFile << styleCommentOpen
 2305                             <<" Style definition file generated by highlight "
 2306                             << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
 2307                             << " " << styleCommentClose << "\n";
 2308             }
 2309             *cssOutFile << getStyleDefinition()
 2310                         << "\n";
 2311             *cssOutFile << readUserStyleDef();
 2312             if ( !outFile.empty() ) delete cssOutFile;
 2313         } else {
 2314             return false;
 2315         }
 2316     }
 2317     return true;
 2318 }
 2319 
 2320 bool CodeGenerator::printPersistentState ( const string &outFile )
 2321 {
 2322     if (!currentSyntax) return false;
 2323 
 2324     ofstream pluginOutFile( outFile.c_str());
 2325     if ( !pluginOutFile.fail() ) {
 2326 
 2327         pluginOutFile   <<"Description=\"Plugin generated by highlight using the --two-pass option\"\n\n"
 2328                         <<"Categories = {\"two-pass\" }\n\n"
 2329                         <<"function syntaxUpdate(desc)\n\n";
 2330 
 2331         pluginOutFile << currentSyntax->getPersistentHookConditions();
 2332 
 2333         for (auto snippet: currentSyntax->getPersistentSnippets())
 2334         {
 2335             pluginOutFile << snippet <<"\n\n";
 2336         }
 2337 
 2338         pluginOutFile<<"end\n\n"
 2339                      <<"Plugins={\n"
 2340                      <<"  { Type=\"lang\", Chunk=syntaxUpdate }\n"
 2341                      <<"}\n";
 2342     } else {
 2343         return false;
 2344     }
 2345 
 2346     return true;
 2347 }
 2348 
 2349 string CodeGenerator::readUserStyleDef()
 2350 {
 2351     ostringstream ostr;
 2352     if ( !styleInputPath.empty() ) {
 2353         ifstream userStyleDef ( styleInputPath.c_str() );
 2354         if ( userStyleDef ) {
 2355             ostr << "\n" << styleCommentOpen
 2356                 << " Content of " << styleInputPath
 2357                 << ": " <<styleCommentClose << "\n";
 2358             string line;
 2359             while ( getline ( userStyleDef, line ) ) {
 2360                 ostr << line << "\n";
 2361             }
 2362             userStyleDef.close();
 2363         } else {
 2364             ostr << styleCommentOpen
 2365                 << " ERROR: Could not include " << styleInputPath
 2366                 << "." << styleCommentClose << "\n";
 2367         }
 2368     }
 2369 
 2370     string injections=docStyle.getInjections();
 2371     if (!injections.empty()) {
 2372         ostr    << "\n" << styleCommentOpen
 2373                 << " Plug-in theme injections: " <<styleCommentClose << "\n";
 2374         ostr << injections<<"\n";
 2375     }
 2376     return ostr.str();
 2377 }
 2378 
 2379 bool CodeGenerator::initPluginScript(const string& script)
 2380 {
 2381 
 2382     if (script.empty()) return true;
 2383 
 2384     try {
 2385 
 2386         userScriptError="";
 2387         Diluculum::LuaState ls;
 2388 
 2389         ls.doFile (script);
 2390         int listIdx=1;
 2391 
 2392         while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
 2393 
 2394             // Theme plugins
 2395             if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
 2396                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2397                     docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2398                 }
 2399             }
 2400             // Syntax plugins
 2401             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
 2402                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2403                     currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2404                 }
 2405             }
 2406             // Format plugins
 2407             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
 2408                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2409                     addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2410                 }
 2411             }
 2412 
 2413             listIdx++;
 2414         }
 2415     }  catch (Diluculum::LuaError &err) {
 2416         userScriptError=err.what();
 2417         return false;
 2418     }
 2419     return true;
 2420 }
 2421 
 2422 void CodeGenerator::resetSyntaxReaders() {
 2423     for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
 2424         delete it->second;
 2425     }
 2426     currentSyntax=NULL;
 2427     syntaxReaders.clear();
 2428 }
 2429 
 2430 bool CodeGenerator::syntaxRequiresTwoPassRun() {
 2431     if (!currentSyntax) return false;
 2432     return currentSyntax->requiresTwoPassRun();
 2433 }
 2434 
 2435 void CodeGenerator::clearPersistentSnippets(){
 2436     if (currentSyntax) {
 2437         currentSyntax->clearPersistentSnippets();
 2438     }
 2439 }
 2440 
 2441 void CodeGenerator::updateKeywordClasses(){
 2442 
 2443     if (openTags.size()) {
 2444         if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
 2445             // remove dynamic keyword tag delimiters of the old language definition
 2446             vector<string>::iterator keyStyleOpenBegin =
 2447             openTags.begin() + NUMBER_BUILTIN_STATES;
 2448             vector<string>::iterator keyStyleCloseBegin =
 2449             closeTags.begin() + NUMBER_BUILTIN_STATES;
 2450             openTags.erase ( keyStyleOpenBegin, openTags.end() );
 2451             closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
 2452         }
 2453         // add new keyword tag delimiters
 2454 
 2455         for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
 2456             openTags.push_back ( getKeywordOpenTag ( i ) );
 2457             closeTags.push_back ( getKeywordCloseTag ( i ) );
 2458         }
 2459     }
 2460 }
 2461 
 2462 
 2463 }