"Fossies" - the Fresh Open Source Software Archive

Member "highlight-4.6/src/core/codegenerator.cpp" (19 May 2023, 71381 Bytes) of package /linux/www/highlight-4.6.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "codegenerator.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.5_vs_4.6.

    1 /***************************************************************************
    2                           codegenerator.cpp  -  description
    3                              -------------------
    4     begin                : Die Jul 9 2002
    5     copyright            : (C) 2002-2023 by Andre Simon
    6     email                : a.simon@mailbox.org
    7  ***************************************************************************/
    8 
    9 
   10 /*
   11 This file is part of Highlight.
   12 
   13 Highlight is free software: you can redistribute it and/or modify
   14 it under the terms of the GNU General Public License as published by
   15 the Free Software Foundation, either version 3 of the License, or
   16 (at your option) any later version.
   17 
   18 Highlight is distributed in the hope that it will be useful,
   19 but WITHOUT ANY WARRANTY; without even the implied warranty of
   20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   21 GNU General Public License for more details.
   22 
   23 You should have received a copy of the GNU General Public License
   24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
   25 */
   26 
   27 
   28 #include <climits>
   29 #include <memory>
   30 
   31 #include <chrono>
   32 #include <thread>
   33 
   34 
   35 #include <boost/xpressive/xpressive_dynamic.hpp>
   36 
   37 #include "codegenerator.h"
   38 
   39 #include "htmlgenerator.h"
   40 #include "xhtmlgenerator.h"
   41 #include "rtfgenerator.h"
   42 #include "latexgenerator.h"
   43 #include "texgenerator.h"
   44 #include "svggenerator.h"
   45 #include "bbcodegenerator.h"
   46 #include "pangogenerator.h"
   47 #include "odtgenerator.h"
   48 #include "astyle/astyle.h"
   49 
   50 #if !defined (QT)
   51 #include "ansigenerator.h"
   52 #include "xterm256generator.h"
   53 #endif
   54 
   55 namespace highlight
   56 {
   57 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
   58 
   59 // must not start with kw, st, sm prefixes
   60 const string CodeGenerator::STY_NAME_STD="def";
   61 const string CodeGenerator::STY_NAME_STR="sng";
   62 const string CodeGenerator::STY_NAME_NUM="num";
   63 const string CodeGenerator::STY_NAME_SLC="slc";
   64 const string CodeGenerator::STY_NAME_COM="com";
   65 const string CodeGenerator::STY_NAME_ESC="esc";
   66 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
   67 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
   68 const string CodeGenerator::STY_NAME_LIN="lin";
   69 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
   70 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
   71 
   72 const string CodeGenerator::STY_NAME_HVR="hvr";
   73 const string CodeGenerator::STY_NAME_ERR="err";
   74 const string CodeGenerator::STY_NAME_ERM="erm";
   75 
   76 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
   77 
   78 
   79 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
   80 {
   81     CodeGenerator* generator=NULL;
   82     switch ( type ) {
   83     case HTML:
   84         generator = new HtmlGenerator();
   85         break;
   86     case XHTML:
   87         generator = new XHtmlGenerator();
   88         break;
   89     case TEX:
   90         generator = new TexGenerator ();
   91         break;
   92     case LATEX:
   93         generator = new LatexGenerator();
   94         break;
   95     case RTF:
   96         generator = new RtfGenerator ();
   97         break;
   98     case SVG:
   99         generator = new SVGGenerator();
  100         break;
  101     case BBCODE:
  102         generator = new BBCodeGenerator();
  103         break;
  104     case PANGO:
  105         generator = new PangoGenerator();
  106         break;
  107     case ODTFLAT:
  108         generator = new ODTGenerator();
  109         break;
  110     case ESC_ANSI:
  111         generator = new AnsiGenerator();
  112         break;
  113     case ESC_XTERM256:
  114     case ESC_TRUECOLOR:
  115         generator = new Xterm256Generator();
  116         generator->setESCTrueColor(type==ESC_TRUECOLOR);
  117         break;
  118     default:
  119         break;
  120     }
  121     return generator;
  122 }
  123 
  124 
  125 CodeGenerator::CodeGenerator ( highlight::OutputType type )
  126     :currentSyntax(NULL),
  127      in ( NULL ),
  128      out ( NULL ),
  129      encoding ( "none" ),
  130      docTitle ( "Source file" ),
  131      maskWs ( false ),
  132      excludeWs ( false ),
  133      fragmentOutput ( false ),
  134      keepInjections( false ),
  135      showLineNumbers ( false ),
  136      lineNumberFillZeroes ( false ),
  137      printNewLines(true),
  138      omitVersionComment(false),
  139      isolateTags(false),
  140      disableStyleCache(false),
  141      baseFontSize("10"),
  142      lineNumber ( 0 ),
  143      lineNumberOffset ( 0 ),
  144      currentState ( _UNKNOWN ),
  145      currentKeywordClass ( 0 ),
  146      includeStyleDef ( false ),
  147      numberCurrentLine ( false ),
  148      lineIndex ( 0 ),
  149      lastLineLength( 0 ),
  150      syntaxChangeIndex(UINT_MAX),
  151      syntaxChangeLineNo(UINT_MAX),
  152      lineNumberWidth ( 5 ),
  153      startLineCnt( 1 ),
  154      startLineCntCurFile( 1 ),
  155      maxLineCnt ( UINT_MAX ),
  156      inputFilesCnt (0),
  157      processedFilesCnt (0),
  158      kwOffset(0),
  159      noTrailingNewLine(0),
  160 
  161      terminatingChar ( '\0' ),
  162      formatter ( NULL ),
  163      streamIterator ( NULL ),
  164      formattingEnabled ( false ),
  165      formattingPossible ( false ),
  166      validateInput ( false ),
  167      numberWrappedLines ( true ),
  168      resultOfHook(false),
  169      lineContainedTestCase(false),
  170      lineContainedStmt(false),
  171      applySyntaxTestCase(false),
  172      toggleDynRawString(false),
  173      lsEnableHoverRequests(false),
  174      lsCheckSemanticTokens(false),
  175      lsCheckSyntaxErrors(false),
  176 
  177      keywordCase ( StringTools::CASE_UNCHANGED ),
  178      eolDelimiter ('\n'),
  179      outputType ( type )
  180 {
  181 }
  182 
  183 
  184 CodeGenerator::~CodeGenerator()
  185 {
  186     delete formatter;
  187     delete streamIterator;
  188 
  189     resetSyntaxReaders();
  190 
  191     for (unsigned int i=0; i<pluginChunks.size(); i++) {
  192         delete pluginChunks[i];
  193     }
  194     pluginChunks.clear();
  195 }
  196 
  197 
  198 bool CodeGenerator::initTheme ( const string& themePath, bool loadSemanticStyles)
  199 {
  200     this->themePath=themePath;
  201     bool loadOK = docStyle.load ( themePath, outputType, loadSemanticStyles );
  202     initOutputTags();
  203     return loadOK;
  204 }
  205 
  206 LSResult CodeGenerator::initLanguageServer ( const string& executable, const vector<string> &options,
  207                                              const string& workspace, const string& syntax,
  208                                              int delay, int logLevel, bool legacy )
  209 {
  210     if (LSPClient.isInitialized()) {
  211         return LSResult::INIT_OK;
  212     }
  213 
  214     LSPClient.setLogging(logLevel>1);
  215 
  216     LSPClient.setExecutable(executable);
  217     LSPClient.setWorkspace(workspace);
  218     LSPClient.setOptions(options);
  219     LSPClient.setSyntax(syntax);
  220     LSPClient.setInitializeDelay(delay);
  221     LSPClient.setLegacyProtocol(legacy);
  222     if (!LSPClient.connect()){
  223         return LSResult::INIT_BAD_PIPE;
  224     }
  225 
  226     if (!LSPClient.runInitialize()){
  227         return LSResult::INIT_BAD_REQUEST;
  228     }
  229     for (int i=0; i<docStyle.getSemanticTokenStyleCount();i++) {
  230         currentSyntax->generateNewKWClass(i+1, "st");
  231     }
  232     LSPClient.runInitialized();
  233     updateKeywordClasses();
  234     return LSResult::INIT_OK;
  235 }
  236 
  237 bool CodeGenerator::lsOpenDocument(const string& fileName, const string & suffix){
  238     lsDocumentPath = fileName;
  239     return LSPClient.runDidOpen(fileName, suffix);
  240 }
  241 
  242 bool CodeGenerator::lsCloseDocument(const string& fileName, const string & suffix){
  243     lsDocumentPath.clear();
  244     return LSPClient.runDidClose(fileName, suffix);
  245 }
  246 
  247 bool CodeGenerator::lsAddSemanticInfo(const string& fileName, const string & suffix){
  248     lsCheckSemanticTokens = LSPClient.runSemanticTokensFull(fileName);
  249     return lsCheckSemanticTokens;
  250 }
  251 
  252 bool CodeGenerator::isHoverProvider(){
  253     return LSPClient.isHoverProvider();
  254 }
  255 
  256 bool CodeGenerator::isSemanticTokensProvider(){
  257     return LSPClient.isSemanticTokensProvider();
  258 }
  259 
  260 void CodeGenerator::lsAddHoverInfo(bool hover){
  261     lsEnableHoverRequests = hover;
  262 }
  263 
  264 void CodeGenerator::lsAddSyntaxErrorInfo(bool error) {
  265     lsCheckSyntaxErrors = error;;
  266 }
  267 
  268 
  269 void CodeGenerator::exitLanguageServer () {
  270     LSPClient.runShutdown();
  271     LSPClient.runExit();
  272 }
  273 
  274 const string& CodeGenerator::getStyleName()
  275 {
  276     return themePath;
  277 }
  278 
  279 void CodeGenerator::setLineNumberWidth ( int w )
  280 {
  281     lineNumberWidth=w;
  282 }
  283 
  284 int CodeGenerator::getLineNumberWidth()
  285 {
  286     return lineNumberWidth;
  287 }
  288 
  289 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
  290 {
  291     showLineNumbers=flag;
  292     lineNumberOffset = startCnt-1;
  293 }
  294 
  295 bool CodeGenerator::getPrintLineNumbers()
  296 {
  297     return showLineNumbers;
  298 }
  299 
  300 void CodeGenerator::setPrintZeroes ( bool flag )
  301 {
  302     lineNumberFillZeroes=flag;
  303 }
  304 
  305 bool CodeGenerator::getPrintZeroes()
  306 {
  307     return lineNumberFillZeroes;
  308 }
  309 
  310 void CodeGenerator::setIncludeStyle ( bool flag )
  311 {
  312     includeStyleDef = flag;
  313 }
  314 
  315 void CodeGenerator::disableTrailingNL ( int flag )
  316 {
  317     noTrailingNewLine = flag;
  318 }
  319 
  320 void CodeGenerator::setStyleInputPath ( const string& path )
  321 {
  322     styleInputPath = path;
  323 }
  324 
  325 void CodeGenerator::setStyleOutputPath ( const string& path )
  326 {
  327     styleOutputPath = path;
  328 }
  329 
  330 void CodeGenerator::setPluginParameter ( const string& param )
  331 {
  332     pluginParameter = param;
  333 }
  334 
  335 const string&  CodeGenerator::getStyleInputPath()
  336 {
  337     return styleInputPath;
  338 }
  339 
  340 const string&  CodeGenerator::getStyleOutputPath()
  341 {
  342     return styleOutputPath;
  343 }
  344 
  345 void CodeGenerator::setFragmentCode ( bool flag )
  346 {
  347     fragmentOutput=flag;
  348 }
  349 
  350 bool CodeGenerator::getFragmentCode()
  351 {
  352     return fragmentOutput;
  353 }
  354 void CodeGenerator::setKeepInjections ( bool flag )
  355 {
  356     keepInjections=flag;
  357 }
  358 
  359 bool CodeGenerator::getKeepInjections()
  360 {
  361     return keepInjections;
  362 }
  363 void CodeGenerator::setValidateInput ( bool flag )
  364 {
  365     validateInput=flag;
  366 }
  367 
  368 bool CodeGenerator::getValidateInput()
  369 {
  370     return validateInput;
  371 }
  372 
  373 void CodeGenerator::setNumberWrappedLines ( bool flag )
  374 {
  375     numberWrappedLines=flag;
  376 }
  377 
  378 bool CodeGenerator::getNumberWrappedLines()
  379 {
  380     return numberWrappedLines;
  381 }
  382 
  383 void CodeGenerator::setOmitVersionComment ( bool flag )
  384 {
  385     omitVersionComment=flag;
  386 }
  387 
  388 bool CodeGenerator::getOmitVersionComment ()
  389 {
  390     return omitVersionComment;
  391 }
  392 
  393 void CodeGenerator::setIsolateTags ( bool flag )
  394 {
  395     isolateTags=flag;
  396 }
  397 
  398 bool CodeGenerator::getIsolateTags ()
  399 {
  400     return isolateTags;
  401 }
  402 
  403 void CodeGenerator::setBaseFont ( const string& fontName )
  404 {
  405     baseFont = fontName;
  406 }
  407 
  408 void CodeGenerator::setBaseFontSize ( const string& fontSize)
  409 {
  410     baseFontSize = fontSize;
  411 }
  412 
  413 void CodeGenerator::setStyleCaching ( bool flag )
  414 {
  415     disableStyleCache=!flag;
  416 }
  417 
  418 const string CodeGenerator::getBaseFont() const
  419 {
  420     if ( !baseFont.empty() ) return baseFont;
  421     switch ( outputType ) {
  422     case HTML:
  423     case XHTML:
  424     case SVG:
  425         return "'Courier New',monospace";
  426         break;
  427     case LATEX:
  428         return "ttfamily";
  429         break;
  430     case TEX:
  431         return "tt";
  432         break;
  433     default:
  434         return "Courier New";
  435     }
  436 }
  437 
  438 const string CodeGenerator::getBaseFontSize()
  439 {
  440     return baseFontSize;
  441 }
  442 
  443 void CodeGenerator::setTitle ( const string & title )
  444 {
  445     if ( !title.empty() ) docTitle= title;
  446 }
  447 
  448 string CodeGenerator::getTitle()
  449 {
  450     return docTitle;
  451 }
  452 
  453 void CodeGenerator::setEncoding ( const string& encodingName )
  454 {
  455     encoding = encodingName;
  456 }
  457 
  458 bool CodeGenerator::formattingDisabled()
  459 {
  460     return !formattingEnabled;
  461 }
  462 
  463 void CodeGenerator::setStartingInputLine ( unsigned int begin )
  464 {
  465     startLineCnt = startLineCntCurFile = begin;
  466 }
  467 
  468 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
  469 {
  470     maxLineCnt = cnt;
  471 }
  472 
  473 void CodeGenerator::setFilesCnt ( unsigned int cnt )
  474 {
  475     inputFilesCnt = cnt;
  476     processedFilesCnt = 0;
  477 }
  478 
  479 bool CodeGenerator::formattingIsPossible()
  480 {
  481     return formattingPossible;
  482 }
  483 unsigned char CodeGenerator::getAdditionalEOFChar()
  484 {
  485     return extraEOFChar;
  486 }
  487 void CodeGenerator::setAdditionalEOFChar ( unsigned char eofChar )
  488 {
  489     extraEOFChar = eofChar;
  490 }
  491 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
  492                                        unsigned int lineLength,
  493                                        int numberSpaces )
  494 {
  495     bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
  496     bool replaceTabs = numberSpaces > 0;
  497 
  498     if ( enableWrap || replaceTabs ) {
  499         preFormatter.setWrap ( enableWrap );
  500         preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
  501         preFormatter.setWrapLineLength ( lineLength );
  502         preFormatter.setReplaceTabs ( replaceTabs );
  503         preFormatter.setNumberSpaces ( numberSpaces );
  504     }
  505 }
  506 
  507 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
  508 {
  509     keywordCase = keyCase;
  510 }
  511 
  512 void CodeGenerator::setEOLDelimiter(char delim)
  513 {
  514     eolDelimiter = delim;
  515 }
  516 
  517 void CodeGenerator::reset()
  518 {
  519     lineIndex = 0;
  520     lineNumber = 0;
  521     line.clear();
  522     preFormatter.reset();
  523     inFile.clear();
  524     outFile.clear();
  525     embedLangDefPath.clear();
  526     printNewLines=true;
  527     syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  528     startLineCntCurFile = startLineCnt;
  529     applySyntaxTestCase=lineContainedTestCase=false;
  530     if (currentSyntax){
  531         vector<int> overrideStyleAttrs=currentSyntax->getOverrideStyleAttributes();
  532         docStyle.overrideAttributes(overrideStyleAttrs);
  533         if (overrideStyleAttrs.size())
  534             disableStyleCache = true;
  535     }
  536 }
  537 
  538 string CodeGenerator::getThemeInitError()
  539 {
  540     return docStyle.getErrorMessage();
  541 }
  542 
  543 string CodeGenerator::getPluginScriptError()
  544 {
  545     return userScriptError;
  546 }
  547 
  548 string CodeGenerator::getSyntaxRegexError()
  549 {
  550     return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
  551 }
  552 string CodeGenerator::getSyntaxLuaError()
  553 {
  554     return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
  555 
  556 }
  557 string CodeGenerator::getSyntaxDescription()
  558 {
  559     return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
  560 
  561 }
  562 string CodeGenerator::getSyntaxEncodingHint()
  563 {
  564     return (currentSyntax)? currentSyntax->getEncodingHint(): "";
  565 
  566 }
  567 string CodeGenerator::getThemeDescription()
  568 {
  569     return docStyle.getDescription();
  570 }
  571 
  572 string CodeGenerator::getSyntaxCatDescription(){
  573     return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
  574 }
  575 
  576 string CodeGenerator::getThemeCatDescription()
  577 {
  578     return docStyle.getCategoryDescription();
  579 }
  580 
  581 float CodeGenerator::getThemeContrast()
  582 {
  583     return docStyle.getContrast();
  584 }
  585 
  586 unsigned int CodeGenerator::getLineNumber()
  587 {
  588     return lineNumber;
  589 }
  590 bool CodeGenerator::AtEnd(char c) const {
  591     bool instream_eof = in->eof();
  592     if (extraEOFChar == 255)
  593         return instream_eof;
  594 
  595     bool c_null = c == extraEOFChar;
  596     bool instream_peek_null = false;
  597     if (instream_eof == false && c_null == false)
  598         instream_peek_null = in->peek() == extraEOFChar;
  599     bool ret = instream_eof || c_null || instream_peek_null;
  600     return ret;
  601 }
  602 bool CodeGenerator::readNewLine ( string &newLine )
  603 {
  604     bool eof=false;
  605 
  606     if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
  607 
  608     while (!eof && startLineCntCurFile>0) {
  609         if ( formattingPossible && formattingEnabled ) {
  610             eof=!formatter->hasMoreLines();
  611             if ( !eof ) {
  612                 newLine = formatter->nextLine();
  613             }
  614         } else {
  615             eof = AtEnd() || ! getline ( *in, newLine, eolDelimiter );
  616         }
  617         --startLineCntCurFile;
  618     }
  619 
  620     startLineCntCurFile=1;
  621 #ifndef _WIN32
  622     // drop CR of CRLF files
  623     if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
  624         newLine.erase(newLine.size() - 1);
  625 #endif
  626 
  627     return eof || ( lineNumber == maxLineCnt );
  628 }
  629 
  630 void CodeGenerator::matchRegex ( const string &line, State skipState)
  631 {
  632     regexGroups.clear();
  633     int matchBegin=0;
  634     int groupID=0;
  635 
  636     // cycle through all regex, save the start and ending indices of matches to report them later
  637     for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
  638         RegexElement *regexElem = currentSyntax->getRegexElements() [i];
  639 
  640         if (regexElem->open == skipState) continue;
  641 
  642         if (regexElem->constraintLineNum && regexElem->constraintLineNum != lineNumber) {
  643             continue;
  644         }
  645 
  646         if (regexElem->constraintFilename.size() && regexElem->constraintFilename != inFile) {
  647             continue;
  648         }
  649 
  650         boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
  651         boost::xpressive::sregex_iterator end;
  652 
  653         for( ; cur != end; ++cur )  {
  654             groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
  655             matchBegin = cur->position(groupID);
  656 
  657             regexGroups.insert (
  658                 make_pair ( matchBegin + 1, RegexToken ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
  659 
  660             // priority regex (match required)
  661             if (regexElem->priority) {
  662                 return;
  663             }
  664         }
  665     }
  666 }
  667 
  668 unsigned char CodeGenerator::getInputChar()
  669 {
  670     // end of line?
  671     if ( lineIndex == line.length() ) {
  672 
  673         //more testing required:
  674         if (outputType==ESC_TRUECOLOR || outputType==ESC_XTERM256)
  675             lastLineLength=StringTools::utf8_strlen(line + lsSyntaxErrorDesc);
  676 
  677         bool eof=false;
  678         if ( preFormatter.isEnabled() ) {
  679             if ( !preFormatter.hasMoreLines() ) {
  680                 eof=readNewLine ( line );
  681                 preFormatter.setLine ( line );
  682                 ++lineNumber;
  683                 numberCurrentLine = true;
  684             } else {
  685                 if (numberWrappedLines)
  686                     ++lineNumber;
  687                 numberCurrentLine = numberWrappedLines;
  688             }
  689 
  690             line = preFormatter.getNextLine();
  691         } else {
  692             eof=readNewLine ( line );
  693             ++lineNumber;
  694 
  695             numberCurrentLine = true;
  696         }
  697         lineIndex=0;
  698 
  699         if (!lineContainedTestCase && applySyntaxTestCase){
  700             stateTraceTest = stateTraceCurrent;
  701             stateTraceCurrent.clear();
  702         }
  703 
  704         lineContainedTestCase=false;
  705         lineContainedStmt=false;
  706         matchRegex ( line );
  707 
  708         return ( eof ) ?'\0':'\n';
  709     }
  710 
  711     return line[lineIndex++];
  712 }
  713 
  714 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS,
  715  *  Coffeescript with block regex, Pas + ASM)
  716  *  especially nested syntax in one line
  717  */
  718 State CodeGenerator::getCurrentState (State oldState)
  719 {
  720     unsigned char c='\0';
  721 
  722     if ( token.length() ==0 ) {
  723         c=getInputChar();
  724     } else {
  725         lineIndex-= ( token.length()-1 );
  726         c=token[0];
  727     }
  728     if ( c=='\n' ) {
  729         return _EOL;   // End of line
  730     }
  731 
  732     if ( c=='\0' ) {
  733         return _EOF;   // End of file
  734     }
  735 
  736     if ( c==' ' || c=='\t' ) {
  737         token= c;
  738         return _WS;    // White space
  739     }
  740 
  741     if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT)  ) {
  742         token= c;
  743         return _TESTPOS;
  744     }
  745 
  746     // at this position the syntax change takes place
  747     if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
  748         loadEmbeddedLang(embedLangDefPath);  // load new syntax
  749         matchRegex(line);                    // recognize new patterns in the (remaining) line
  750         syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
  751     }
  752 
  753 SKIP_EMBEDDED:
  754 
  755     if (lsCheckSyntaxErrors && LSPClient.errorExists(lineNumber, lineIndex)) {
  756         highlight::SemanticToken errorToken = LSPClient.getError(lineNumber, lineIndex);
  757         token = line.substr ( lineIndex-1, errorToken.length);
  758         lineIndex += errorToken.length-1;
  759         lsSyntaxErrorDesc = errorToken.id;
  760 
  761         //std::cerr <<"error num "<<lineNumber<< " idx "<<lineIndex<< " error "<<errorToken.id<< "\n";
  762         return SYNTAX_ERROR;
  763     }
  764 
  765     if (lsCheckSemanticTokens && LSPClient.tokenExists(lineNumber, lineIndex)) {
  766         highlight::SemanticToken semToken = LSPClient.getToken(lineNumber, lineIndex);
  767         int semStyleKwId = docStyle.getSemanticStyle(semToken.id);
  768         if (semStyleKwId) {
  769             token = line.substr ( lineIndex-1, semToken.length);
  770             lineIndex += semToken.length-1;
  771 
  772             currentKeywordClass = semStyleKwId + kwOffset;  // +offset of missing kw groups in the theme
  773             //std::cerr <<"l "<<lineNumber<<  "t "<<token<< " semStyleKwId "<< semStyleKwId << "  off "<<kwOffset<<" -> "  << semToken.id <<"\n";
  774             return KEYWORD;
  775         }
  776     }
  777 
  778     // Test if a regular expression was found at the current position
  779     if ( !regexGroups.empty() ) {
  780         if ( regexGroups.count ( lineIndex ) ) {
  781             token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
  782 
  783             unsigned int oldIndex= lineIndex;
  784             if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
  785 
  786             if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
  787                 //do not handle a nested section if the syntax is marked as "sealed"
  788                 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
  789                     embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
  790                     //remember position
  791                     syntaxChangeIndex = lineIndex+2;
  792                     syntaxChangeLineNo = lineNumber;
  793                 }
  794 
  795                 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
  796                 matchRegex(line, EMBEDDED_CODE_BEGIN);
  797                 lineIndex = oldIndex;
  798                 goto SKIP_EMBEDDED; // this is how it should be done
  799             }
  800 
  801             if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
  802                 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
  803                 currentKeywordClass=currentSyntax->getKeywordListGroup ( reservedWord ); //check in lists (no regex)
  804 
  805                 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
  806                     currentKeywordClass = regexGroups[oldIndex].kwClass;
  807                 }
  808                 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
  809             } else {
  810                 return validateState(regexGroups[oldIndex].state, oldState);
  811             }
  812         }
  813     }
  814 
  815     // Character not referring to any state
  816     token = c;
  817     return STANDARD;
  818 }
  819 
  820 State CodeGenerator::validateState(State newState, State oldState)
  821 {
  822 
  823     if (currentSyntax->getValidateStateChangeFct()) {
  824         Diluculum::LuaValueList params;
  825         params.push_back(Diluculum::LuaValue(oldState));
  826         params.push_back(Diluculum::LuaValue(newState));
  827         params.push_back(Diluculum::LuaValue(token));
  828         params.push_back(Diluculum::LuaValue(getCurrentKeywordClassId()) );
  829         params.push_back(Diluculum::LuaValue(lineNumber) );
  830         params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
  831 
  832         Diluculum::LuaValueList res=
  833             currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
  834                     params,"getValidateStateChangeFct call")  ;
  835 
  836         resultOfHook = res.size()>=1;
  837         if (resultOfHook) {
  838 
  839             setOverrideParams();
  840 
  841             State validatedState = (State)res[0].asInteger();
  842             if ( validatedState== _REJECT) {
  843 
  844                 // proceed using only the first character of the token
  845                 if (res.size()==1) {
  846                     lineIndex -= (token.length() -1);
  847                     token=token.substr(0, 1);
  848                 }
  849 
  850                 //experimental for slim.lang: evaluate second return arg after _REJECT
  851                 if (res.size()>=2) {
  852                     lineIndex -= (token.length() );
  853                     token.clear();
  854                     return (State)res[1].asInteger();
  855                 }
  856                 return oldState;
  857             }
  858 
  859             return validatedState;
  860         }
  861     }
  862     resultOfHook  = false;
  863 
  864     return newState;
  865 }
  866 
  867 unsigned int CodeGenerator::getCurrentKeywordClassId(){
  868     unsigned int kwClassId=0;
  869 
  870     // this vector contains the defined keyword classes, and currentKeywordClass is its index:
  871     vector<string> kwClasses=currentSyntax->getKeywordClasses();
  872 
  873     if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
  874         string kwClassName=kwClasses[currentKeywordClass-1];
  875         if (kwClassName.size()==3)
  876             kwClassId = kwClassName[2] - 'a' + 1;
  877     }
  878     return kwClassId;
  879 }
  880 
  881 //it is faster to pass ostream reference
  882 void CodeGenerator::maskString ( ostream& ss, const string & s )
  883 {
  884     string escHoverText;
  885 
  886     if (lsEnableHoverRequests && (currentState==STANDARD || currentState==NUMBER || currentState==KEYWORD)) {
  887 
  888         string hoverText = LSPClient.runHover(lsDocumentPath, lineIndex - s.size(), lineNumber-1);
  889 
  890         for(const auto &c : hoverText)
  891         {
  892             if (isascii(c))
  893                 escHoverText.append(maskCharacter(c));
  894         }
  895     }
  896 
  897     if (escHoverText.size()) {
  898         ss << getHoverTagOpen(escHoverText);
  899     }
  900 
  901     for (const auto &c : s)
  902     {
  903         ss << maskCharacter ( c );
  904     }
  905 
  906     if (escHoverText.size()) {
  907         ss << getHoverTagClose();
  908     }
  909 
  910     // The test markers position should also be deternmined by calculating the code points
  911     if ( applySyntaxTestCase ) {
  912 
  913         PositionState ps(currentState, getCurrentKeywordClassId(), false);
  914 
  915         //TODO avoid repeated string comparison:
  916         int slen = encoding=="utf-8" ? StringTools::utf8_strlen(s) : s.length();
  917         for (int i=0; i< slen; i++ ) {
  918             stateTraceCurrent.push_back(ps);
  919         }
  920         if (stateTraceCurrent.size()>200)
  921             stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 );
  922     }
  923 }
  924 
  925 void CodeGenerator::printSyntaxError ( ostream& ss ) {
  926     if ( !lsSyntaxErrorDesc.empty()) {
  927         ss << openTags[ highlight::SYNTAX_ERROR_MSG ];
  928 
  929         for(const auto &c : lsSyntaxErrorDesc)
  930         {
  931             ss << maskCharacter ( c );
  932         }
  933 
  934         ss << closeTags[ highlight::SYNTAX_ERROR_MSG ];
  935         lsSyntaxErrorDesc.clear();
  936     }
  937 }
  938 
  939 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string& token)
  940 {
  941 
  942     Diluculum::LuaValueList params;
  943     params.push_back(Diluculum::LuaValue(token));
  944     params.push_back(Diluculum::LuaValue(currentState));
  945     params.push_back(Diluculum::LuaValue(currentKeywordClass));
  946     params.push_back(Diluculum::LuaValue(lineContainedStmt));
  947     params.push_back(Diluculum::LuaValue(lineNumber) );
  948     params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
  949 
  950     return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
  951             params,"getDecorateFct call")  ;
  952 }
  953 
  954 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
  955 {
  956     if ( flushWhiteSpace )
  957         flushWs(1);
  958     string caseToken = StringTools::change_case ( token, tcase );
  959     if (currentSyntax->getDecorateFct()) {
  960 
  961         Diluculum::LuaValueList res=callDecorateFct(caseToken);
  962         if (res.size()==1) {
  963             *out<<res[0].asString();
  964         } else {
  965             maskString ( *out, caseToken );
  966         }
  967     } else {
  968         maskString ( *out, caseToken );
  969     }
  970 
  971     // check this *after* the decorate call
  972     if (   currentState == STANDARD || currentState == KEYWORD || currentState == NUMBER
  973         || currentState == STRING || currentState == IDENTIFIER_BEGIN) {
  974         lineContainedStmt = true;
  975     }
  976     token.clear();
  977 }
  978 
  979 bool CodeGenerator::styleFound()
  980 {
  981     return docStyle.found();
  982 }
  983 
  984 bool CodeGenerator::printIndexFile ( const vector<string> &fileList, const string &outPath )
  985 {
  986     return true;
  987 }
  988 
  989 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
  990 {
  991 
  992     if ( formatter!=NULL ) {
  993         return true;
  994     }
  995 
  996     if ( !indentScheme.size() ) return false;
  997 
  998     formatter=new astyle::ASFormatter();
  999 
 1000     if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
 1001         formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
 1002     } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
 1003         formatter->setFormattingStyle ( astyle::STYLE_KR );
 1004     } else if ( indentScheme=="java" ) {
 1005         formatter->setFormattingStyle ( astyle::STYLE_JAVA );
 1006     } else if ( indentScheme=="stroustrup" ) {
 1007         formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
 1008     } else if ( indentScheme=="whitesmith" ) {
 1009         formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
 1010     } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
 1011         formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
 1012     } else if ( indentScheme=="gnu" ) {
 1013         formatter->setFormattingStyle ( astyle::STYLE_GNU );
 1014     } else if ( indentScheme=="linux" ) {
 1015         formatter->setFormattingStyle ( astyle::STYLE_LINUX );
 1016     } else if ( indentScheme=="horstmann" ) {
 1017         formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
 1018     } else if ( indentScheme=="otbs" ||  indentScheme=="1tbs") {
 1019         formatter->setFormattingStyle ( astyle::STYLE_1TBS );
 1020     } else if ( indentScheme=="google") {
 1021         formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
 1022     } else if ( indentScheme=="pico" ||  indentScheme=="a11") {
 1023         formatter->setFormattingStyle ( astyle::STYLE_PICO );
 1024     } else if ( indentScheme=="lisp" ||  indentScheme=="python"||  indentScheme=="a12") {
 1025         formatter->setFormattingStyle ( astyle::STYLE_LISP );
 1026     } else if ( indentScheme=="vtk") {
 1027         formatter->setFormattingStyle ( astyle::STYLE_VTK );
 1028     } else if ( indentScheme=="mozilla") {
 1029         formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
 1030     } else if ( indentScheme=="webkit") {
 1031         formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
 1032     } else if ( indentScheme!="user" ){
 1033         return false;
 1034     }
 1035     return formattingEnabled=true;
 1036 }
 1037 
 1038 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
 1039 {
 1040 
 1041     if (!embedded) {
 1042         while (!nestedLangs.empty()) {
 1043             nestedLangs.pop();
 1044         }
 1045     }
 1046 
 1047     bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
 1048     LoadResult result=LOAD_OK;
 1049     if ( reloadNecessary ) {
 1050         if (syntaxReaders.count(langDefPath)) {
 1051             currentSyntax=syntaxReaders[langDefPath];
 1052             result=LOAD_OK;
 1053         } else {
 1054 
 1055             currentSyntax=new SyntaxReader();
 1056             result=currentSyntax->load(langDefPath, pluginParameter, outputType);
 1057             syntaxReaders[langDefPath]=currentSyntax;
 1058         }
 1059 
 1060         if ( result==LOAD_OK ) {
 1061             formattingPossible=currentSyntax->enableReformatting();
 1062             updateKeywordClasses();
 1063         }
 1064     }
 1065 
 1066     kwOffset=currentSyntax->getKeywordCount() - docStyle.getKeywordStyleCount();
 1067 
 1068     return result;
 1069 }
 1070 
 1071 bool CodeGenerator::validateInputStream()
 1072 {
 1073     if ( !in ) return false;
 1074 
 1075     // it is not possible to move stream pointer back with stdin
 1076     if ( ( int ) in->tellg() == -1 ) // -1 : stdin
 1077         return true;
 1078 
 1079     // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
 1080     // Magic configuration of "file"
 1081     // This is intended for web plugins - only check filetypes often found in the net
 1082     char magic_gif[]    = {'G','I','F','8', 0};
 1083     char magic_png[]    = {'\x89','P','N','G', 0};
 1084     char magic_java[]   = {'\xCA','\xFE','\xBA','\xBE', 0};
 1085     char magic_jpeg[]   = {'\xFF','\xD8','\xFF', 0};
 1086     char magic_bmp[]    = {'B','M', 0};
 1087     char magic_pdf[]    = {'%','P','D','F', 0};
 1088     char magic_utf8[]   = {'\xEF','\xBB','\xBF',0};
 1089     char magic_rar[]    = {'R','a','r','!', 0};
 1090     char magic_zip[]    = {'P','K','\x03','\x04', 0};
 1091     char magic_ace[]    = {'*','*','A','C','E','*','*', 0};
 1092     char magic_tgz[]    = {'\x8b','\x1f', '\x00', '\x08', 0};
 1093     char magic_bzip[]   = {'B','Z', 0};
 1094 
 1095     char* magic_table[] = {magic_utf8,
 1096                            magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
 1097                            magic_java,
 1098                            magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
 1099                            0
 1100                           };
 1101 
 1102     char buffer [10]= {0};
 1103     in->read ( buffer,8 );  //only read the first 8 bytes of input stream
 1104 
 1105     int magic_index=0;
 1106     while ( magic_table[magic_index] ) {
 1107         if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
 1108             break;
 1109         }
 1110         magic_index++;
 1111     }
 1112     int streamReadPos=0;
 1113     if ( magic_table[magic_index] == magic_utf8 ) {
 1114         //setEncoding("utf-8");
 1115         streamReadPos=3; // remove UTF-8 magic number from output
 1116     }
 1117 
 1118     in -> seekg ( streamReadPos, ios::beg );
 1119     in-> clear();  // clear fail bit to continue reading
 1120 
 1121     return !magic_table[magic_index] // points to 0 if no pattern was found
 1122            || magic_table[magic_index] == magic_utf8;
 1123 }
 1124 
 1125 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
 1126 
 1127     if (currentSyntax && pluginChunks.size()) {
 1128 
 1129         Diluculum::LuaState luaState;
 1130 
 1131         Diluculum::LuaValueList chunkParams;
 1132         chunkParams.push_back(currentSyntax->getDescription());
 1133         for (unsigned int i=0; i<pluginChunks.size(); i++) {
 1134             luaState.call(*pluginChunks[i], chunkParams, "format user function");
 1135         }
 1136 
 1137         if (luaState.globals().count(fctName)) {
 1138             Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
 1139 
 1140             luaState["HL_PLUGIN_PARAM"] = pluginParameter;
 1141             luaState["HL_OUTPUT"] = outputType;
 1142             luaState["HL_FORMAT_HTML"]=HTML;
 1143             luaState["HL_FORMAT_XHTML"]=XHTML;
 1144             luaState["HL_FORMAT_TEX"]=TEX;
 1145             luaState["HL_FORMAT_LATEX"]=LATEX;
 1146             luaState["HL_FORMAT_RTF"]=RTF;
 1147             luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
 1148             luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
 1149             luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
 1150             luaState["HL_FORMAT_SVG"]=SVG;
 1151             luaState["HL_FORMAT_BBCODE"]=BBCODE;
 1152             luaState["HL_FORMAT_PANGO"]=PANGO;
 1153             luaState["HL_FORMAT_ODT"]=ODTFLAT;
 1154 
 1155             Diluculum::LuaValueList params;
 1156             Diluculum::LuaValueMap options;
 1157             options[Diluculum::LuaValue("title")] =  Diluculum::LuaValue( docTitle );
 1158             options[Diluculum::LuaValue("encoding")] =  Diluculum::LuaValue(encoding);
 1159             options[Diluculum::LuaValue("fragment")] =  Diluculum::LuaValue(fragmentOutput);
 1160             options[Diluculum::LuaValue("font")] =  Diluculum::LuaValue(getBaseFont());
 1161             options[Diluculum::LuaValue("fontsize")] =  Diluculum::LuaValue(getBaseFontSize());
 1162 
 1163             params.push_back(inputFilesCnt);
 1164             params.push_back(processedFilesCnt);
 1165             params.push_back(options);
 1166 
 1167             Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call");
 1168             if (res.size()>=1) {
 1169                 *keepDefault=false;
 1170                 *result = res[0].asString();
 1171                 if (res.size()==2)
 1172                     *keepDefault = res[1].asBoolean();
 1173             }
 1174             delete documentFct;
 1175         }
 1176     }
 1177 }
 1178 
 1179 void CodeGenerator::printHeader()
 1180 {
 1181     bool keepDefaultHeader=true;
 1182     string pluginHeader;
 1183 
 1184     processedFilesCnt++;
 1185 
 1186     applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
 1187 
 1188     if ( ! fragmentOutput && keepDefaultHeader)
 1189         *out << getHeader();
 1190 
 1191     *out << pluginHeader;
 1192 
 1193     if ( !fragmentOutput || keepInjections)
 1194         *out << currentSyntax->getHeaderInjection();
 1195 }
 1196 
 1197 void CodeGenerator::printFooter()
 1198 {
 1199 
 1200     bool keepDefaultFooter=true;
 1201     string pluginFooter;
 1202 
 1203     applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
 1204 
 1205     if ( !fragmentOutput || keepInjections)
 1206         *out << currentSyntax->getFooterInjection();
 1207 
 1208     *out << pluginFooter;
 1209 
 1210     if ( ! fragmentOutput && keepDefaultFooter )
 1211         *out << getFooter();
 1212 }
 1213 
 1214 ParseError CodeGenerator::generateFile ( const string &inFileName,
 1215         const string &outFileName )
 1216 {
 1217     if ( !docStyle.found() ) {
 1218         return BAD_STYLE;
 1219     }
 1220 
 1221     reset();
 1222 
 1223     ParseError error=PARSE_OK;
 1224 
 1225     inFile=inFileName;
 1226     outFile=outFileName;
 1227 
 1228     in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
 1229 
 1230     if ( validateInput )
 1231         if ( !validateInputStream() ) error= BAD_INPUT;
 1232 
 1233     if ( !in->fail() && error==PARSE_OK ) {
 1234         out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
 1235         if ( out->fail() ) {
 1236             error=BAD_OUTPUT;
 1237         }
 1238     }
 1239 
 1240     if ( in->fail() ) {
 1241         error=BAD_INPUT;
 1242     }
 1243 
 1244     if ( error==PARSE_OK ) {
 1245         initASStream();
 1246         currentSyntax->setInputFileName(inFile);
 1247         printHeader();
 1248         printBody();
 1249         printFooter();
 1250     }
 1251 
 1252     if ( !outFileName.empty() ) {
 1253         delete out;
 1254         out=NULL;
 1255     }
 1256     if ( !inFileName.empty() ) {
 1257         delete in;
 1258         in=NULL;
 1259     }
 1260     return error;
 1261 }
 1262 
 1263 string CodeGenerator::generateString ( const string &input )
 1264 {
 1265 
 1266     if ( !docStyle.found() ) {
 1267         return "";
 1268     }
 1269 
 1270     reset();
 1271 
 1272     in = new istringstream ( input );
 1273     out = new ostringstream ();
 1274 
 1275     if ( in->fail() || out->fail() ) {
 1276         return "";
 1277     }
 1278 
 1279     initASStream();
 1280 
 1281     printHeader();
 1282     printBody();
 1283     printFooter();
 1284 
 1285     string result = static_cast<ostringstream*> ( out )->str();
 1286 
 1287     delete out;
 1288     out=NULL;
 1289     delete in;
 1290     in=NULL;
 1291 
 1292     return result;
 1293 }
 1294 
 1295 void CodeGenerator::initASStream() {
 1296     if ( formatter != NULL ) {
 1297         if (streamIterator) delete streamIterator;
 1298         streamIterator =  new astyle::ASStreamIterator ( in, extraEOFChar );
 1299         formatter->init ( streamIterator );
 1300 
 1301         if (currentSyntax->getDescription()=="C#") {
 1302             formatter->setSharpStyle();
 1303         } else if (currentSyntax->getDescription()=="Java") {
 1304             formatter->setJavaStyle();
 1305         } else if (currentSyntax->getDescription()=="Javascript") {
 1306             formatter->setJSStyle();
 1307         } else if (currentSyntax->getDescription()=="Objective C") {
 1308             formatter->setObjCStyle();
 1309         } else {
 1310             formatter->setCStyle();
 1311         }
 1312 
 1313     }
 1314 }
 1315 
 1316 string CodeGenerator::generateStringFromFile ( const string &inFileName )
 1317 {
 1318 
 1319     if ( !docStyle.found() ) {
 1320         return "";
 1321     }
 1322 
 1323     reset();
 1324 
 1325     inFile = inFileName;
 1326 
 1327     in = new ifstream ( inFileName.c_str() );
 1328     out = new ostringstream ();
 1329 
 1330     if ( in->fail() || out->fail() ) {
 1331         return "";
 1332     }
 1333 
 1334     if ( validateInput && !validateInputStream() ) {
 1335         return "ERROR: detected binary input";
 1336     }
 1337 
 1338     initASStream();
 1339 
 1340     currentSyntax->setInputFileName(inFile);
 1341 
 1342     printHeader();
 1343     printBody();
 1344     printFooter();
 1345 
 1346     string result = static_cast<ostringstream*> ( out )->str();
 1347 
 1348     delete out;
 1349     out=NULL;
 1350     delete in;
 1351     in=NULL;
 1352 
 1353     return result;
 1354 }
 1355 
 1356 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
 1357 {
 1358     if ( s==KEYWORD && kwClassID ) {
 1359         return NUMBER_BUILTIN_STATES + kwClassID-1;
 1360     }
 1361     return ( unsigned int ) s ;
 1362 }
 1363 
 1364 void CodeGenerator::openTag ( State s )
 1365 {
 1366     *out << openTags[ ( unsigned int ) s];
 1367     currentState=s;
 1368 }
 1369 
 1370 void CodeGenerator::closeTag ( State s )
 1371 {
 1372     *out << closeTags[ ( unsigned int ) s];
 1373     flushWs(2);
 1374     currentState=_UNKNOWN;
 1375 }
 1376 
 1377 void CodeGenerator::openKWTag ( unsigned int kwClassID )
 1378 {
 1379     *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1380     currentState=KEYWORD;
 1381 }
 1382 
 1383 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
 1384 {
 1385     *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
 1386     flushWs(3);
 1387     currentState=_UNKNOWN;
 1388 }
 1389 
 1390 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
 1391 {
 1392     if (nestedLangs.empty()) {
 1393         nestedLangs.push(currentSyntax->getCurrentPath() );
 1394     }
 1395     if (nestedLangs.top() != embedLangDefPath) {
 1396         nestedLangs.push(embedLangDefPath);
 1397     }
 1398     LoadResult res = loadLanguage(embedLangDefPath, true);
 1399     //pass end delimiter regex to syntax description
 1400     currentSyntax->restoreLangEndDelim(embedLangDefPath);
 1401     return res == LOAD_OK;
 1402 }
 1403 
 1404 ///////////////////////////////////////////////////////////////////////////////
 1405 
 1406 void CodeGenerator::processRootState()
 1407 {
 1408     bool eof=false,
 1409          firstLine=true; // avoid newline before printing the first output line
 1410 
 1411     applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
 1412 
 1413     if ( currentSyntax->highlightingDisabled() ) {
 1414         string line;
 1415         while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
 1416             ++lineNumber;
 1417             insertLineNumber ( !firstLine );
 1418             flushWs(4);
 1419             firstLine=false;
 1420             if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
 1421                 maskString ( *out, line );
 1422         }
 1423         *out << flush;
 1424         return;
 1425     }
 1426 
 1427     State state=STANDARD;
 1428     openTag ( STANDARD );
 1429 
 1430     do {
 1431         // determine next state
 1432         state= getCurrentState(STANDARD);
 1433 
 1434         // handle current state
 1435         switch ( state ) {
 1436         case KEYWORD:
 1437             closeTag ( STANDARD );
 1438             eof=processKeywordState ( state );
 1439             openTag ( STANDARD );
 1440             break;
 1441         case NUMBER:
 1442             closeTag ( STANDARD );
 1443             eof=processNumberState();
 1444             openTag ( STANDARD );
 1445             break;
 1446         case ML_COMMENT:
 1447             closeTag ( STANDARD );
 1448             eof=processMultiLineCommentState();
 1449             openTag ( STANDARD );
 1450             break;
 1451         case SL_COMMENT:
 1452             closeTag ( STANDARD );
 1453             eof=processSingleLineCommentState();
 1454             openTag ( STANDARD );
 1455             break;
 1456         case STRING:
 1457             closeTag ( STANDARD );
 1458             eof=processStringState ( STANDARD );
 1459             openTag ( STANDARD );
 1460             break;
 1461         case DIRECTIVE:
 1462             closeTag ( STANDARD );
 1463             eof=processDirectiveState();
 1464             openTag ( STANDARD );
 1465             break;
 1466         case ESC_CHAR:
 1467             closeTag ( STANDARD );
 1468             eof=processEscapeCharState();
 1469             openTag ( STANDARD );
 1470             break;
 1471         case SYMBOL:
 1472             closeTag ( STANDARD );
 1473             eof=processSymbolState();
 1474             openTag ( STANDARD );
 1475             break;
 1476         case EMBEDDED_CODE_END:
 1477             closeTag ( STANDARD );
 1478             eof=processSyntaxChangeState(state);
 1479             openTag ( STANDARD );
 1480             break;
 1481         case SYNTAX_ERROR:
 1482             closeTag ( STANDARD );
 1483             eof=processSyntaxErrorState();
 1484             openTag ( STANDARD );
 1485             break;
 1486 
 1487         case _EOL:
 1488             // XTERM256 fix (issue with less cmd)
 1489             if  (!firstLine || showLineNumbers) {
 1490                 closeTag ( STANDARD );
 1491             }
 1492             insertLineNumber(!firstLine);
 1493             if (!firstLine || showLineNumbers) {
 1494                 flushWs(5);
 1495                 stateTraceCurrent.clear();
 1496                 openTag ( STANDARD );
 1497             }
 1498             firstLine=false;
 1499             break;
 1500         case _EOF:
 1501             eof=true;
 1502             break;
 1503         case _WS:
 1504             processWsState();
 1505             break;
 1506         default:
 1507             printMaskedToken();
 1508             break;
 1509         }
 1510     } while ( !eof );
 1511 
 1512     if (token.size() || lineNumber>1 || (outputType!=ESC_TRUECOLOR && outputType!=ESC_XTERM256))
 1513         closeTag ( STANDARD );
 1514 
 1515     if (currentSyntax->getDecorateLineEndFct()) {
 1516         Diluculum::LuaValueList res=callDecorateLineFct(false);
 1517         if (res.size()==1) {
 1518             *out << res[0].asString();
 1519         }
 1520     }
 1521 
 1522     printNewLines = noTrailingNewLine==0 || ( noTrailingNewLine==2 && ( token.size() || lineNumber>1) );
 1523     *out << getNewLine();
 1524     *out << flush;
 1525 }
 1526 
 1527 bool CodeGenerator::processSyntaxChangeState(State myState)
 1528 {
 1529     State newState=STANDARD;
 1530     bool eof=false,
 1531          exitState=false;
 1532 
 1533     openTag ( KEYWORD );
 1534     do {
 1535 
 1536         if (myState==EMBEDDED_CODE_END) {
 1537             if (!nestedLangs.empty()) {
 1538                 nestedLangs.pop();
 1539             }
 1540             // load host language syntax
 1541             if (!nestedLangs.empty()) {
 1542                 loadLanguage(nestedLangs.top(), true);
 1543             }
 1544             matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
 1545         }
 1546 
 1547         printMaskedToken ( newState!=_WS );
 1548 
 1549         newState= getCurrentState(myState);
 1550 
 1551         switch ( newState ) {
 1552         case _WS:
 1553             processWsState();
 1554             break;
 1555         case _EOL:
 1556             insertLineNumber();
 1557             exitState=true;
 1558             break;
 1559         case _EOF:
 1560             eof = true;
 1561             break;
 1562         default:
 1563             exitState=true;
 1564             break;
 1565         }
 1566     } while (  !exitState  &&  !eof );
 1567     closeTag ( KEYWORD );
 1568 
 1569     return eof;
 1570 }
 1571 
 1572 
 1573 bool CodeGenerator::processKeywordState ( State myState )
 1574 {
 1575     State newState=STANDARD;
 1576     unsigned int myClassID=currentKeywordClass;
 1577     bool eof=false,
 1578          exitState=false;
 1579 
 1580     openKWTag ( myClassID );
 1581     do {
 1582         printMaskedToken ( newState!=_WS,
 1583                            ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
 1584         newState= getCurrentState(myState);
 1585         switch ( newState ) {
 1586         case _WS:
 1587             processWsState();
 1588             exitState=isolateTags;
 1589             break;
 1590         case _EOL:
 1591             insertLineNumber();
 1592             exitState=true;
 1593 
 1594             break;
 1595         case _EOF:
 1596             eof = true;
 1597             break;
 1598         case KEYWORD_END:
 1599             exitState=true;
 1600             break;
 1601         default:
 1602             exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
 1603             break;
 1604         }
 1605     } while ( !exitState  &&  !eof );
 1606 
 1607     closeKWTag ( myClassID );
 1608 
 1609     currentKeywordClass=0;
 1610     return eof;
 1611 }
 1612 
 1613 bool CodeGenerator::processNumberState()
 1614 {
 1615     State newState=STANDARD;
 1616     bool eof=false,
 1617          exitState=false;
 1618     openTag ( NUMBER );
 1619     do {
 1620         printMaskedToken ( newState!=_WS );
 1621         newState= getCurrentState(NUMBER);
 1622         switch ( newState ) {
 1623         case _WS:
 1624             processWsState();
 1625             exitState=isolateTags;
 1626             break;
 1627         case _EOL:
 1628             insertLineNumber();
 1629             exitState=true;
 1630             break;
 1631         case _EOF:
 1632             eof = true;
 1633             break;
 1634         default:
 1635             exitState=newState!=NUMBER;
 1636             break;
 1637         }
 1638     } while ( !exitState && !eof );
 1639 
 1640     closeTag ( NUMBER );
 1641     return eof;
 1642 }
 1643 
 1644 
 1645 bool CodeGenerator::processMultiLineCommentState()
 1646 {
 1647     int commentCount=1;
 1648     int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
 1649     State newState=STANDARD;
 1650     bool eof=false, exitState=false, containedTestCase=false;
 1651     unsigned int startColumn=lineIndex - token.size() ;
 1652     openTag ( ML_COMMENT );
 1653     do {
 1654         printMaskedToken (newState!=_WS );
 1655         newState= getCurrentState(ML_COMMENT);
 1656 
 1657         switch ( newState ) {
 1658         case _WS:
 1659             processWsState();
 1660             break;
 1661         case _EOL:
 1662             wsBuffer += closeTags[ML_COMMENT];
 1663             insertLineNumber();
 1664             wsBuffer += openTags[ML_COMMENT];
 1665             startColumn=0;
 1666             break;
 1667         case _EOF:
 1668             eof = true;
 1669             break;
 1670         case _TESTPOS:
 1671             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1672             printMaskedToken();
 1673             containedTestCase=true;
 1674             break;
 1675         case ML_COMMENT:
 1676 
 1677             if ( currentSyntax->allowNestedMLComments() ) {
 1678                 ++commentCount;
 1679             }
 1680             // if delimiters are equal, close the comment by continuing to
 1681             // ML_COMMENT_END section
 1682             if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT  ))) break;
 1683 
 1684         case ML_COMMENT_END:
 1685 
 1686             if (!currentSyntax->matchesOpenDelimiter (token,  ML_COMMENT_END, openDelimID)) {
 1687                 break;
 1688             }
 1689             commentCount--;
 1690             if ( !commentCount ) {
 1691                 printMaskedToken();
 1692                 exitState=true;
 1693             }
 1694             break;
 1695         default:
 1696             break;
 1697         }
 1698     } while ( !exitState  &&  !eof );
 1699 
 1700     closeTag ( ML_COMMENT );
 1701 
 1702     if (containedTestCase){
 1703         stateTraceCurrent.clear();
 1704     }
 1705     return eof;
 1706 }
 1707 
 1708 
 1709 bool CodeGenerator::processSingleLineCommentState()
 1710 {
 1711     State newState=STANDARD;
 1712     bool eof=false, exitState=false, containedTestCase=false;
 1713     unsigned int startColumn = lineIndex - token.size() ;
 1714 
 1715     openTag ( SL_COMMENT );
 1716     do {
 1717         printMaskedToken ( newState!=_WS );
 1718         newState= getCurrentState(SL_COMMENT);
 1719 
 1720         switch ( newState ) {
 1721         case _WS:
 1722             processWsState();
 1723             break;
 1724         case _EOL:
 1725             printMaskedToken();
 1726             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1727                 exitState=false;
 1728             } else {
 1729                 exitState=true;
 1730             }
 1731             if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
 1732             insertLineNumber();
 1733             if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
 1734 
 1735             break;
 1736         case _EOF:
 1737             eof = true;
 1738             break;
 1739         case _TESTPOS:
 1740             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
 1741             printMaskedToken();
 1742             containedTestCase=true;
 1743             break;
 1744 
 1745         default:
 1746             break;
 1747         }
 1748     } while ( !exitState  &&  !eof );
 1749 
 1750     closeTag ( SL_COMMENT );
 1751 
 1752     if (containedTestCase) {
 1753         stateTraceCurrent.clear();
 1754     }
 1755 
 1756     return eof;
 1757 }
 1758 
 1759 bool CodeGenerator::processDirectiveState()
 1760 {
 1761     State  newState=STANDARD;
 1762     bool eof=false, exitState=false;
 1763 
 1764     openTag ( DIRECTIVE );
 1765     do {
 1766         printMaskedToken ( newState!=_WS );
 1767         newState= getCurrentState(DIRECTIVE);
 1768         switch ( newState ) {
 1769         case _WS:
 1770             processWsState();
 1771             break;
 1772         case DIRECTIVE_END:
 1773             printMaskedToken();
 1774             exitState=true;
 1775             break;
 1776         case _EOL:
 1777             printMaskedToken();
 1778 
 1779             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
 1780                 exitState=false;
 1781             } else {
 1782                 if (currentSyntax->getContinuationChar()!=0x13){
 1783                     exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
 1784                 }
 1785             }
 1786             if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
 1787             insertLineNumber();
 1788             if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
 1789             break;
 1790         case ML_COMMENT:
 1791             closeTag ( DIRECTIVE );
 1792             eof= processMultiLineCommentState();
 1793             openTag ( DIRECTIVE );
 1794             break;
 1795         case SL_COMMENT:
 1796             closeTag ( DIRECTIVE );
 1797             eof= processSingleLineCommentState();
 1798             openTag ( DIRECTIVE );
 1799             exitState=true;
 1800             break;
 1801         case STRING:
 1802             closeTag ( DIRECTIVE );
 1803             eof=processStringState ( DIRECTIVE );
 1804             openTag ( DIRECTIVE );
 1805             break;
 1806         case _EOF:
 1807             eof = true;
 1808             break;
 1809         default:
 1810             break;
 1811         }
 1812     } while ( !exitState && !eof );
 1813 
 1814     closeTag ( DIRECTIVE );
 1815     return eof;
 1816 }
 1817 
 1818 bool CodeGenerator::processStringState ( State oldState )
 1819 {
 1820     State newState=STANDARD;
 1821     bool eof=false, exitState=false;
 1822     bool returnedFromOtherState=false;
 1823 
 1824     State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
 1825 
 1826     int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
 1827     string openDelim=token;
 1828 
 1829     //Raw String by definition:
 1830     bool isRawString=currentSyntax->delimiterIsRawString(openDelimID) || toggleDynRawString;
 1831 
 1832     // Test if character before string open delimiter token equals to the
 1833     // raw string prefix (Example: r" ", r""" """ in Python)
 1834 
 1835     //Raw String Prefix:
 1836     if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
 1837         isRawString=true;
 1838     }
 1839 
 1840     openTag ( myState );
 1841     do {
 1842         // true if last token was an escape char
 1843         if ( !returnedFromOtherState ) {
 1844             printMaskedToken (newState!=_WS );
 1845         }
 1846         returnedFromOtherState=false;
 1847         newState= getCurrentState(myState);
 1848 
 1849         switch ( newState ) {
 1850         case _WS:
 1851             processWsState();
 1852             break;
 1853         case _EOL:
 1854             wsBuffer += closeTags[myState];
 1855             insertLineNumber();
 1856             wsBuffer += openTags[myState];
 1857             break;
 1858         case STRING_END:
 1859             if (resultOfHook || currentSyntax->matchesOpenDelimiter (token,  STRING_END, openDelimID)) {
 1860                 if (currentSyntax->assertDelimEqualLength()) {
 1861                     exitState= openDelim.length()==token.length();
 1862                 } else {
 1863                     exitState= true;
 1864                 }
 1865                 printMaskedToken();
 1866             }
 1867             break;
 1868         case STRING:
 1869             // if there exist multiple string delimiters, close string if
 1870             // current delimiter is equal to the opening delimiter
 1871             exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING  )) && token==openDelim;
 1872             printMaskedToken();
 1873             break;
 1874         case ESC_CHAR:
 1875             if ( !isRawString ) {
 1876                 closeTag ( myState );
 1877                 eof=processEscapeCharState();
 1878                 openTag ( myState );
 1879                 returnedFromOtherState=true;
 1880             } else {
 1881                 // FIXME not a fix for Python r"""\"""
 1882                 exitState=token.size()>1 && token[1] == openDelim[0];
 1883                 printMaskedToken();
 1884             }
 1885             break;
 1886         case STRING_INTERPOLATION:
 1887             closeTag ( myState );
 1888             eof=processInterpolationState();
 1889             openTag ( myState );
 1890             returnedFromOtherState=true;
 1891             break;
 1892 
 1893         case _EOF:
 1894             eof = true;
 1895             break;
 1896         default:
 1897             printMaskedToken();
 1898             break;
 1899         }
 1900     } while ( !exitState && !eof );
 1901 
 1902     closeTag ( myState );
 1903 
 1904     toggleDynRawString = false;
 1905 
 1906     return eof;
 1907 }
 1908 
 1909 bool CodeGenerator::processSymbolState()
 1910 {
 1911     State newState=STANDARD;
 1912     bool eof=false,
 1913          exitState=false;
 1914 
 1915     openTag ( SYMBOL );
 1916     do {
 1917         printMaskedToken ( newState!=_WS );
 1918         newState= getCurrentState(SYMBOL);
 1919         switch ( newState ) {
 1920         case _WS:
 1921             processWsState();
 1922             exitState=isolateTags;
 1923             break;
 1924         case _EOL:
 1925             insertLineNumber();
 1926             exitState=true;
 1927             break;
 1928         case _EOF:
 1929             eof = true;
 1930             break;
 1931         default:
 1932             exitState=newState!=SYMBOL;
 1933             break;
 1934         }
 1935     } while ( !exitState && !eof );
 1936 
 1937     closeTag ( SYMBOL );
 1938     return eof;
 1939 }
 1940 
 1941 bool CodeGenerator::processSyntaxErrorState()
 1942 {
 1943     State newState=STANDARD;
 1944     bool eof=false,
 1945     exitState=false;
 1946 
 1947     openTag ( SYNTAX_ERROR );
 1948     do {
 1949         printMaskedToken ( newState!=_WS );
 1950         newState= getCurrentState(SYNTAX_ERROR);
 1951         switch ( newState ) {
 1952             case _WS:
 1953                 processWsState();
 1954                 exitState=isolateTags;
 1955                 break;
 1956             case _EOL:
 1957                 insertLineNumber();
 1958                 exitState=true;
 1959                 break;
 1960             case _EOF:
 1961                 eof = true;
 1962                 break;
 1963             default:
 1964                 exitState=newState!=SYMBOL;
 1965                 break;
 1966         }
 1967     } while ( !exitState && !eof );
 1968 
 1969     closeTag ( SYNTAX_ERROR );
 1970     return eof;
 1971 }
 1972 
 1973 bool CodeGenerator::processEscapeCharState()
 1974 {
 1975     State newState=STANDARD;
 1976     bool eof=false, exitState=false;
 1977     openTag ( ESC_CHAR );
 1978     do {
 1979         printMaskedToken (newState!=_WS );
 1980         newState= getCurrentState(ESC_CHAR);
 1981         switch ( newState ) {
 1982         case _EOL:
 1983             insertLineNumber();
 1984             exitState=true;
 1985             break;
 1986         case _WS:
 1987             processWsState();
 1988             exitState=isolateTags;
 1989             break;
 1990         case _EOF:
 1991             eof = true;
 1992             break;
 1993         default:
 1994             exitState=newState!=ESC_CHAR;
 1995             break;
 1996         }
 1997     } while ( !exitState && !eof );
 1998 
 1999     closeTag ( ESC_CHAR );
 2000     return eof;
 2001 }
 2002 
 2003 bool CodeGenerator::processInterpolationState()
 2004 {
 2005     State newState=STANDARD;
 2006     bool eof=false, exitState=false;
 2007     openTag ( STRING_INTERPOLATION );
 2008     do {
 2009         printMaskedToken (newState!=_WS );
 2010         newState= getCurrentState(STRING_INTERPOLATION);
 2011         switch ( newState ) {
 2012         case _EOL:
 2013             insertLineNumber();
 2014             exitState=true;
 2015             break;
 2016         case _WS:
 2017             processWsState();
 2018             exitState=isolateTags;
 2019             break;
 2020         case _EOF:
 2021             eof = true;
 2022             break;
 2023         default:
 2024             exitState=newState!=STRING_INTERPOLATION;
 2025             break;
 2026         }
 2027     } while ( !exitState && !eof );
 2028 
 2029     closeTag ( STRING_INTERPOLATION );
 2030     return eof;
 2031 }
 2032 
 2033 void CodeGenerator::processWsState()
 2034 {
 2035 
 2036     if ( !maskWs ) {
 2037         wsBuffer += token;
 2038         token.clear();
 2039         return;
 2040     }
 2041 
 2042     flushWs(6);
 2043 
 2044     int cntWs=0;
 2045     lineIndex--;
 2046     PositionState ps(currentState, 0, true);
 2047 
 2048     while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
 2049         ++cntWs;
 2050         ++lineIndex;
 2051     }
 2052 
 2053     if ( cntWs>1 ) {
 2054 
 2055         unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
 2056         if ( excludeWs && styleID!=_UNKNOWN ) {
 2057             *out << closeTags[styleID];
 2058         }
 2059 
 2060         *out << maskWsBegin;
 2061         for ( int i=0; i<cntWs; i++ ) {
 2062             *out << spacer;
 2063             if (applySyntaxTestCase){
 2064                 stateTraceCurrent.push_back(ps);
 2065             }
 2066         }
 2067         *out << maskWsEnd;
 2068         if ( excludeWs && styleID!=_UNKNOWN ) {
 2069             *out << openTags[styleID];
 2070         }
 2071     } else {
 2072 
 2073         *out << spacer; //Bugfix fehlender Space nach Strings
 2074         if (applySyntaxTestCase){
 2075             stateTraceCurrent.push_back(ps);
 2076         }
 2077     }
 2078 
 2079     spacer = initialSpacer;
 2080 
 2081     token.clear();
 2082 }
 2083 
 2084 void CodeGenerator::flushWs(int arg)
 2085 {
 2086     PositionState ps(currentState, 0, true);
 2087     //workaround condition
 2088     for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
 2089         stateTraceCurrent.push_back(ps);
 2090         //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;
 2091     }
 2092 
 2093     //fix canvas whitespace
 2094     if (wsBuffer.length() && (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR) ){
 2095         *out<<maskWsBegin;
 2096     }
 2097 
 2098     *out << wsBuffer;
 2099     wsBuffer.clear();
 2100 }
 2101 
 2102 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
 2103     switch (s) {
 2104 
 2105         case STANDARD:
 2106             return STY_NAME_STD;
 2107         case STRING:
 2108             return STY_NAME_STR;
 2109         case NUMBER:
 2110             return STY_NAME_NUM;
 2111         case SL_COMMENT:
 2112             return STY_NAME_SLC;
 2113         case ML_COMMENT:
 2114             return STY_NAME_COM;
 2115         case ESC_CHAR:
 2116             return STY_NAME_ESC;
 2117         case DIRECTIVE:
 2118             return STY_NAME_DIR;
 2119         case DIRECTIVE_STRING:
 2120             return STY_NAME_DST;
 2121         case SYMBOL:
 2122             return STY_NAME_SYM;
 2123         case STRING_INTERPOLATION:
 2124             return STY_NAME_IPL;
 2125         case SYNTAX_ERROR:
 2126             return STY_NAME_ERR;
 2127         case _WS:
 2128             return "ws";
 2129         case KEYWORD: {
 2130 
 2131             if (!kwClass)
 2132                 return "ws";
 2133 
 2134             char kwName[20] = {0};
 2135             snprintf(kwName, sizeof(kwName), "keyword %c", ('a'+kwClass-1));
 2136 
 2137             return string(kwName);
 2138         }
 2139         default:
 2140             return "unknown_test";
 2141     }
 2142 }
 2143 
 2144 void CodeGenerator::printTrace(const string &s){
 2145     std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
 2146     for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
 2147         std::cout<<" "<<stateTraceCurrent[i].state;
 2148     }
 2149     std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
 2150     for (unsigned int i=0; i< stateTraceTest.size(); i++) {
 2151         std::cout<<" "<<stateTraceTest[i].state;
 2152     }
 2153     std::cout<<"\n";
 2154 }
 2155 
 2156 //column: lineIndex (not a UTF-8 validated string position)
 2157 void CodeGenerator::runSyntaxTestcases(unsigned int column){
 2158 
 2159     if (encoding=="utf-8")
 2160         column = StringTools::utf8_strlen(line.substr(0, column));
 2161 
 2162     unsigned int assertGroup=0;
 2163     size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
 2164     State assertState=_UNKNOWN;
 2165     bool negation=false;
 2166     bool testFailed=false;
 2167 
 2168     ostringstream errMsg;
 2169     string prefix;
 2170     //printTrace("trace 2");
 2171 
 2172     if (typeDescPos!=string::npos) {
 2173 
 2174         if (line[typeDescPos]=='~') {
 2175 
 2176             negation=true;
 2177             prefix="~";
 2178             ++typeDescPos;
 2179         }
 2180 
 2181         if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
 2182             assertState=NUMBER;
 2183         //TODO temp. fix to allow old and new string classes
 2184         else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos || line.find("str", typeDescPos)==typeDescPos)
 2185             assertState=STRING;
 2186         else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
 2187             assertState=ESC_CHAR;
 2188         else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
 2189             assertState=STRING_INTERPOLATION;
 2190         else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
 2191             assertState=SYMBOL;
 2192         else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
 2193             assertState=DIRECTIVE;
 2194         else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
 2195             assertState=SL_COMMENT;
 2196         else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
 2197             assertState=ML_COMMENT;
 2198         else if (line.find("ws", typeDescPos)==typeDescPos)
 2199             assertState=_WS;
 2200         //TODO temp. fix to allow old and new default classes
 2201         else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos  || line.find("std", typeDescPos)==typeDescPos)
 2202             assertState=STANDARD;
 2203         else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
 2204             assertState=DIRECTIVE_STRING;
 2205 
 2206         else if (line.find("kw", typeDescPos)==typeDescPos || line.find("st", typeDescPos)==typeDescPos) {
 2207             assertState=KEYWORD;
 2208             if (isalpha(line[typeDescPos+2]))
 2209                 assertGroup=line[typeDescPos+2] - 'a' +1;
 2210         }
 2211 
 2212        if (   (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
 2213             || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
 2214             || assertGroup != stateTraceTest[column].kwClass) {
 2215 
 2216             testFailed=!negation;
 2217 
 2218         } else if (negation ) {
 2219 
 2220             //TODO Fix ~ws
 2221             if (assertState!=_WS  && !stateTraceTest[column].isWhiteSpace )
 2222                 testFailed=true;
 2223         }
 2224 
 2225         if (testFailed) {
 2226             errMsg << inFile << " line " << lineNumber << ", column "<< column
 2227                     << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)
 2228                     << " instead of " << prefix << getTestcaseName(assertState, assertGroup);
 2229 
 2230             failedPosTests.push_back(errMsg.str());
 2231         }
 2232 
 2233     }
 2234 
 2235     lineContainedTestCase=true;
 2236 }
 2237 
 2238 string CodeGenerator::getNewLine()
 2239 {
 2240     ostringstream ss;
 2241     printSyntaxError(ss);
 2242     if (printNewLines)
 2243         ss << newLineTag;
 2244     return ss.str();
 2245 }
 2246 
 2247 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
 2248 {
 2249 
 2250     Diluculum::LuaValueList params;
 2251     params.push_back(Diluculum::LuaValue(lineNumber));
 2252 
 2253     return currentSyntax->getLuaState()->call ( isLineStart ?
 2254             *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
 2255             params,"getDecorateLineFct call");
 2256 }
 2257 
 2258 void CodeGenerator::setOverrideParams() {
 2259     if (currentSyntax->requiresParamUpdate()) {
 2260         if ( currentSyntax->getOverrideConfigVal("state.string.raw")=="true"){
 2261             toggleDynRawString=true; // reset to false in string state fct
 2262         }
 2263         if ( currentSyntax->getOverrideConfigVal("format.maskws")=="true") {
 2264             maskWs=true;
 2265         }
 2266         if ( currentSyntax->getOverrideConfigVal("format.spacer").size()) {
 2267             spacer=currentSyntax->getOverrideConfigVal("format.spacer");
 2268         }
 2269     }
 2270 }
 2271 
 2272 void CodeGenerator::insertLineNumber ( bool insertNewLine )
 2273 {
 2274     if ( insertNewLine ) {
 2275         if (currentSyntax->getDecorateLineEndFct()) {
 2276             Diluculum::LuaValueList res=callDecorateLineFct(false);
 2277             if (res.size()==1) {
 2278                 setOverrideParams();
 2279                 wsBuffer +=res[0].asString();
 2280             }
 2281         }
 2282         wsBuffer += getNewLine();
 2283     }
 2284 
 2285     if (currentSyntax->getDecorateLineBeginFct()) {
 2286         Diluculum::LuaValueList res=callDecorateLineFct(true);
 2287         if (res.size()==1) {
 2288             setOverrideParams();
 2289             wsBuffer += res[0].asString();
 2290         }
 2291     }
 2292 
 2293     if ( showLineNumbers ) {
 2294         ostringstream os;
 2295         ostringstream numberPrefix;
 2296 
 2297         os << setw ( getLineNumberWidth() ) << right;
 2298         if( numberCurrentLine ) {
 2299             if ( lineNumberFillZeroes ) {
 2300                 os.fill ( '0' );
 2301             }
 2302             os << lineNumber+lineNumberOffset;
 2303         } else {
 2304             os << "";
 2305         }
 2306 
 2307         numberPrefix << openTags[LINENUMBER];
 2308         maskString ( numberPrefix, os.str() );
 2309 
 2310         //use initialSpacer here, spacer can be overridden by plug-in (format.spacer)
 2311         numberPrefix << initialSpacer << closeTags[LINENUMBER];
 2312         wsBuffer += numberPrefix.str();
 2313     }
 2314 }
 2315 
 2316 unsigned int CodeGenerator::getLineIndex()
 2317 {
 2318     return lineIndex;
 2319 }
 2320 unsigned int CodeGenerator::getLastLineLength()
 2321 {
 2322     return lastLineLength;
 2323 }
 2324 
 2325 bool CodeGenerator::requiresTwoPassParsing() const {
 2326     if (!currentSyntax) return false;
 2327     return currentSyntax->getPersistentSnippetsNum()>0;
 2328 }
 2329 
 2330 
 2331 bool CodeGenerator::printExternalStyle ( const string &outFile )
 2332 {
 2333     if ( !includeStyleDef ) {
 2334         ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
 2335         if ( !cssOutFile->fail() ) {
 2336             if (!omitVersionComment) {
 2337                 *cssOutFile << styleCommentOpen
 2338                             <<" Style definition file generated by highlight "
 2339                             << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
 2340                             << " " << styleCommentClose << "\n";
 2341             }
 2342             *cssOutFile << getStyleDefinition()
 2343                         << "\n";
 2344             *cssOutFile << readUserStyleDef();
 2345             if ( !outFile.empty() ) delete cssOutFile;
 2346         } else {
 2347             return false;
 2348         }
 2349     }
 2350     return true;
 2351 }
 2352 
 2353 bool CodeGenerator::printPersistentState ( const string &outFile )
 2354 {
 2355     if (!currentSyntax) return false;
 2356 
 2357     ofstream pluginOutFile( outFile.c_str());
 2358     if ( !pluginOutFile.fail() ) {
 2359 
 2360         pluginOutFile   <<"Description=\"Plugin generated by highlight using the --two-pass option\"\n\n"
 2361                         <<"Categories = {\"two-pass\" }\n\n"
 2362                         <<"function syntaxUpdate(desc)\n\n";
 2363 
 2364         pluginOutFile << currentSyntax->getPersistentHookConditions();
 2365 
 2366         for (auto snippet: currentSyntax->getPersistentSnippets())
 2367         {
 2368             pluginOutFile << snippet <<"\n\n";
 2369         }
 2370 
 2371         pluginOutFile<<"end\n\n"
 2372                      <<"Plugins={\n"
 2373                      <<"  { Type=\"lang\", Chunk=syntaxUpdate }\n"
 2374                      <<"}\n";
 2375     } else {
 2376         return false;
 2377     }
 2378 
 2379     return true;
 2380 }
 2381 
 2382 string CodeGenerator::readUserStyleDef()
 2383 {
 2384     ostringstream ostr;
 2385     if ( !styleInputPath.empty() ) {
 2386         ifstream userStyleDef ( styleInputPath.c_str() );
 2387         if ( userStyleDef ) {
 2388             ostr << "\n" << styleCommentOpen
 2389                 << " Content of " << styleInputPath
 2390                 << ": " <<styleCommentClose << "\n";
 2391             string line;
 2392             while ( getline ( userStyleDef, line ) ) {
 2393                 ostr << line << "\n";
 2394             }
 2395             userStyleDef.close();
 2396         } else {
 2397             ostr << styleCommentOpen
 2398                 << " ERROR: Could not include " << styleInputPath
 2399                 << "." << styleCommentClose << "\n";
 2400         }
 2401     }
 2402 
 2403     string injections=docStyle.getInjections();
 2404     if (!injections.empty()) {
 2405         ostr    << "\n" << styleCommentOpen
 2406                 << " Plug-in theme injections: " <<styleCommentClose << "\n";
 2407         ostr << injections<<"\n";
 2408     }
 2409     return ostr.str();
 2410 }
 2411 
 2412 bool CodeGenerator::initPluginScript(const string& script)
 2413 {
 2414 
 2415     if (script.empty()) return true;
 2416 
 2417     try {
 2418 
 2419         userScriptError="";
 2420         Diluculum::LuaState ls;
 2421 
 2422         ls.doFile (script);
 2423         int listIdx=1;
 2424 
 2425         while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
 2426 
 2427             // Theme plugins
 2428             if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
 2429                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2430                     docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2431                 }
 2432             }
 2433             // Syntax plugins
 2434             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
 2435                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2436                     currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2437                 }
 2438             }
 2439             // Format plugins
 2440             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
 2441                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
 2442                     addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
 2443                 }
 2444             }
 2445 
 2446             listIdx++;
 2447         }
 2448     }  catch (Diluculum::LuaError &err) {
 2449         userScriptError=err.what();
 2450         return false;
 2451     }
 2452     return true;
 2453 }
 2454 
 2455 void CodeGenerator::resetSyntaxReaders() {
 2456     for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
 2457         delete it->second;
 2458     }
 2459     currentSyntax=NULL;
 2460     syntaxReaders.clear();
 2461 }
 2462 
 2463 bool CodeGenerator::syntaxRequiresTwoPassRun() {
 2464     if (!currentSyntax) return false;
 2465     return currentSyntax->requiresTwoPassRun();
 2466 }
 2467 
 2468 void CodeGenerator::clearPersistentSnippets(){
 2469     if (currentSyntax) {
 2470         currentSyntax->clearPersistentSnippets();
 2471     }
 2472 }
 2473 
 2474 void CodeGenerator::updateKeywordClasses(){
 2475 
 2476     if (openTags.size()) {
 2477         if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
 2478             // remove dynamic keyword tag delimiters of the old language definition
 2479             vector<string>::iterator keyStyleOpenBegin =
 2480             openTags.begin() + NUMBER_BUILTIN_STATES;
 2481             vector<string>::iterator keyStyleCloseBegin =
 2482             closeTags.begin() + NUMBER_BUILTIN_STATES;
 2483             openTags.erase ( keyStyleOpenBegin, openTags.end() );
 2484             closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
 2485         }
 2486         // add new keyword tag delimiters
 2487 
 2488         for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
 2489             openTags.push_back ( getKeywordOpenTag ( i ) );
 2490             closeTags.push_back ( getKeywordCloseTag ( i ) );
 2491         }
 2492     }
 2493 }
 2494 
 2495 
 2496 }