"Fossies" - the Fresh Open Source Software Archive 
Member "highlight-4.6/src/core/codegenerator.cpp" (19 May 2023, 71381 Bytes) of package /linux/www/highlight-4.6.tar.bz2:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "codegenerator.cpp" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
4.5_vs_4.6.
1 /***************************************************************************
2 codegenerator.cpp - description
3 -------------------
4 begin : Die Jul 9 2002
5 copyright : (C) 2002-2023 by Andre Simon
6 email : a.simon@mailbox.org
7 ***************************************************************************/
8
9
10 /*
11 This file is part of Highlight.
12
13 Highlight is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 Highlight is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with Highlight. If not, see <http://www.gnu.org/licenses/>.
25 */
26
27
28 #include <climits>
29 #include <memory>
30
31 #include <chrono>
32 #include <thread>
33
34
35 #include <boost/xpressive/xpressive_dynamic.hpp>
36
37 #include "codegenerator.h"
38
39 #include "htmlgenerator.h"
40 #include "xhtmlgenerator.h"
41 #include "rtfgenerator.h"
42 #include "latexgenerator.h"
43 #include "texgenerator.h"
44 #include "svggenerator.h"
45 #include "bbcodegenerator.h"
46 #include "pangogenerator.h"
47 #include "odtgenerator.h"
48 #include "astyle/astyle.h"
49
50 #if !defined (QT)
51 #include "ansigenerator.h"
52 #include "xterm256generator.h"
53 #endif
54
55 namespace highlight
56 {
57 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
58
59 // must not start with kw, st, sm prefixes
60 const string CodeGenerator::STY_NAME_STD="def";
61 const string CodeGenerator::STY_NAME_STR="sng";
62 const string CodeGenerator::STY_NAME_NUM="num";
63 const string CodeGenerator::STY_NAME_SLC="slc";
64 const string CodeGenerator::STY_NAME_COM="com";
65 const string CodeGenerator::STY_NAME_ESC="esc";
66 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
67 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
68 const string CodeGenerator::STY_NAME_LIN="lin";
69 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
70 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
71
72 const string CodeGenerator::STY_NAME_HVR="hvr";
73 const string CodeGenerator::STY_NAME_ERR="err";
74 const string CodeGenerator::STY_NAME_ERM="erm";
75
76 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
77
78
79 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
80 {
81 CodeGenerator* generator=NULL;
82 switch ( type ) {
83 case HTML:
84 generator = new HtmlGenerator();
85 break;
86 case XHTML:
87 generator = new XHtmlGenerator();
88 break;
89 case TEX:
90 generator = new TexGenerator ();
91 break;
92 case LATEX:
93 generator = new LatexGenerator();
94 break;
95 case RTF:
96 generator = new RtfGenerator ();
97 break;
98 case SVG:
99 generator = new SVGGenerator();
100 break;
101 case BBCODE:
102 generator = new BBCodeGenerator();
103 break;
104 case PANGO:
105 generator = new PangoGenerator();
106 break;
107 case ODTFLAT:
108 generator = new ODTGenerator();
109 break;
110 case ESC_ANSI:
111 generator = new AnsiGenerator();
112 break;
113 case ESC_XTERM256:
114 case ESC_TRUECOLOR:
115 generator = new Xterm256Generator();
116 generator->setESCTrueColor(type==ESC_TRUECOLOR);
117 break;
118 default:
119 break;
120 }
121 return generator;
122 }
123
124
125 CodeGenerator::CodeGenerator ( highlight::OutputType type )
126 :currentSyntax(NULL),
127 in ( NULL ),
128 out ( NULL ),
129 encoding ( "none" ),
130 docTitle ( "Source file" ),
131 maskWs ( false ),
132 excludeWs ( false ),
133 fragmentOutput ( false ),
134 keepInjections( false ),
135 showLineNumbers ( false ),
136 lineNumberFillZeroes ( false ),
137 printNewLines(true),
138 omitVersionComment(false),
139 isolateTags(false),
140 disableStyleCache(false),
141 baseFontSize("10"),
142 lineNumber ( 0 ),
143 lineNumberOffset ( 0 ),
144 currentState ( _UNKNOWN ),
145 currentKeywordClass ( 0 ),
146 includeStyleDef ( false ),
147 numberCurrentLine ( false ),
148 lineIndex ( 0 ),
149 lastLineLength( 0 ),
150 syntaxChangeIndex(UINT_MAX),
151 syntaxChangeLineNo(UINT_MAX),
152 lineNumberWidth ( 5 ),
153 startLineCnt( 1 ),
154 startLineCntCurFile( 1 ),
155 maxLineCnt ( UINT_MAX ),
156 inputFilesCnt (0),
157 processedFilesCnt (0),
158 kwOffset(0),
159 noTrailingNewLine(0),
160
161 terminatingChar ( '\0' ),
162 formatter ( NULL ),
163 streamIterator ( NULL ),
164 formattingEnabled ( false ),
165 formattingPossible ( false ),
166 validateInput ( false ),
167 numberWrappedLines ( true ),
168 resultOfHook(false),
169 lineContainedTestCase(false),
170 lineContainedStmt(false),
171 applySyntaxTestCase(false),
172 toggleDynRawString(false),
173 lsEnableHoverRequests(false),
174 lsCheckSemanticTokens(false),
175 lsCheckSyntaxErrors(false),
176
177 keywordCase ( StringTools::CASE_UNCHANGED ),
178 eolDelimiter ('\n'),
179 outputType ( type )
180 {
181 }
182
183
184 CodeGenerator::~CodeGenerator()
185 {
186 delete formatter;
187 delete streamIterator;
188
189 resetSyntaxReaders();
190
191 for (unsigned int i=0; i<pluginChunks.size(); i++) {
192 delete pluginChunks[i];
193 }
194 pluginChunks.clear();
195 }
196
197
198 bool CodeGenerator::initTheme ( const string& themePath, bool loadSemanticStyles)
199 {
200 this->themePath=themePath;
201 bool loadOK = docStyle.load ( themePath, outputType, loadSemanticStyles );
202 initOutputTags();
203 return loadOK;
204 }
205
206 LSResult CodeGenerator::initLanguageServer ( const string& executable, const vector<string> &options,
207 const string& workspace, const string& syntax,
208 int delay, int logLevel, bool legacy )
209 {
210 if (LSPClient.isInitialized()) {
211 return LSResult::INIT_OK;
212 }
213
214 LSPClient.setLogging(logLevel>1);
215
216 LSPClient.setExecutable(executable);
217 LSPClient.setWorkspace(workspace);
218 LSPClient.setOptions(options);
219 LSPClient.setSyntax(syntax);
220 LSPClient.setInitializeDelay(delay);
221 LSPClient.setLegacyProtocol(legacy);
222 if (!LSPClient.connect()){
223 return LSResult::INIT_BAD_PIPE;
224 }
225
226 if (!LSPClient.runInitialize()){
227 return LSResult::INIT_BAD_REQUEST;
228 }
229 for (int i=0; i<docStyle.getSemanticTokenStyleCount();i++) {
230 currentSyntax->generateNewKWClass(i+1, "st");
231 }
232 LSPClient.runInitialized();
233 updateKeywordClasses();
234 return LSResult::INIT_OK;
235 }
236
237 bool CodeGenerator::lsOpenDocument(const string& fileName, const string & suffix){
238 lsDocumentPath = fileName;
239 return LSPClient.runDidOpen(fileName, suffix);
240 }
241
242 bool CodeGenerator::lsCloseDocument(const string& fileName, const string & suffix){
243 lsDocumentPath.clear();
244 return LSPClient.runDidClose(fileName, suffix);
245 }
246
247 bool CodeGenerator::lsAddSemanticInfo(const string& fileName, const string & suffix){
248 lsCheckSemanticTokens = LSPClient.runSemanticTokensFull(fileName);
249 return lsCheckSemanticTokens;
250 }
251
252 bool CodeGenerator::isHoverProvider(){
253 return LSPClient.isHoverProvider();
254 }
255
256 bool CodeGenerator::isSemanticTokensProvider(){
257 return LSPClient.isSemanticTokensProvider();
258 }
259
260 void CodeGenerator::lsAddHoverInfo(bool hover){
261 lsEnableHoverRequests = hover;
262 }
263
264 void CodeGenerator::lsAddSyntaxErrorInfo(bool error) {
265 lsCheckSyntaxErrors = error;;
266 }
267
268
269 void CodeGenerator::exitLanguageServer () {
270 LSPClient.runShutdown();
271 LSPClient.runExit();
272 }
273
274 const string& CodeGenerator::getStyleName()
275 {
276 return themePath;
277 }
278
279 void CodeGenerator::setLineNumberWidth ( int w )
280 {
281 lineNumberWidth=w;
282 }
283
284 int CodeGenerator::getLineNumberWidth()
285 {
286 return lineNumberWidth;
287 }
288
289 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
290 {
291 showLineNumbers=flag;
292 lineNumberOffset = startCnt-1;
293 }
294
295 bool CodeGenerator::getPrintLineNumbers()
296 {
297 return showLineNumbers;
298 }
299
300 void CodeGenerator::setPrintZeroes ( bool flag )
301 {
302 lineNumberFillZeroes=flag;
303 }
304
305 bool CodeGenerator::getPrintZeroes()
306 {
307 return lineNumberFillZeroes;
308 }
309
310 void CodeGenerator::setIncludeStyle ( bool flag )
311 {
312 includeStyleDef = flag;
313 }
314
315 void CodeGenerator::disableTrailingNL ( int flag )
316 {
317 noTrailingNewLine = flag;
318 }
319
320 void CodeGenerator::setStyleInputPath ( const string& path )
321 {
322 styleInputPath = path;
323 }
324
325 void CodeGenerator::setStyleOutputPath ( const string& path )
326 {
327 styleOutputPath = path;
328 }
329
330 void CodeGenerator::setPluginParameter ( const string& param )
331 {
332 pluginParameter = param;
333 }
334
335 const string& CodeGenerator::getStyleInputPath()
336 {
337 return styleInputPath;
338 }
339
340 const string& CodeGenerator::getStyleOutputPath()
341 {
342 return styleOutputPath;
343 }
344
345 void CodeGenerator::setFragmentCode ( bool flag )
346 {
347 fragmentOutput=flag;
348 }
349
350 bool CodeGenerator::getFragmentCode()
351 {
352 return fragmentOutput;
353 }
354 void CodeGenerator::setKeepInjections ( bool flag )
355 {
356 keepInjections=flag;
357 }
358
359 bool CodeGenerator::getKeepInjections()
360 {
361 return keepInjections;
362 }
363 void CodeGenerator::setValidateInput ( bool flag )
364 {
365 validateInput=flag;
366 }
367
368 bool CodeGenerator::getValidateInput()
369 {
370 return validateInput;
371 }
372
373 void CodeGenerator::setNumberWrappedLines ( bool flag )
374 {
375 numberWrappedLines=flag;
376 }
377
378 bool CodeGenerator::getNumberWrappedLines()
379 {
380 return numberWrappedLines;
381 }
382
383 void CodeGenerator::setOmitVersionComment ( bool flag )
384 {
385 omitVersionComment=flag;
386 }
387
388 bool CodeGenerator::getOmitVersionComment ()
389 {
390 return omitVersionComment;
391 }
392
393 void CodeGenerator::setIsolateTags ( bool flag )
394 {
395 isolateTags=flag;
396 }
397
398 bool CodeGenerator::getIsolateTags ()
399 {
400 return isolateTags;
401 }
402
403 void CodeGenerator::setBaseFont ( const string& fontName )
404 {
405 baseFont = fontName;
406 }
407
408 void CodeGenerator::setBaseFontSize ( const string& fontSize)
409 {
410 baseFontSize = fontSize;
411 }
412
413 void CodeGenerator::setStyleCaching ( bool flag )
414 {
415 disableStyleCache=!flag;
416 }
417
418 const string CodeGenerator::getBaseFont() const
419 {
420 if ( !baseFont.empty() ) return baseFont;
421 switch ( outputType ) {
422 case HTML:
423 case XHTML:
424 case SVG:
425 return "'Courier New',monospace";
426 break;
427 case LATEX:
428 return "ttfamily";
429 break;
430 case TEX:
431 return "tt";
432 break;
433 default:
434 return "Courier New";
435 }
436 }
437
438 const string CodeGenerator::getBaseFontSize()
439 {
440 return baseFontSize;
441 }
442
443 void CodeGenerator::setTitle ( const string & title )
444 {
445 if ( !title.empty() ) docTitle= title;
446 }
447
448 string CodeGenerator::getTitle()
449 {
450 return docTitle;
451 }
452
453 void CodeGenerator::setEncoding ( const string& encodingName )
454 {
455 encoding = encodingName;
456 }
457
458 bool CodeGenerator::formattingDisabled()
459 {
460 return !formattingEnabled;
461 }
462
463 void CodeGenerator::setStartingInputLine ( unsigned int begin )
464 {
465 startLineCnt = startLineCntCurFile = begin;
466 }
467
468 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
469 {
470 maxLineCnt = cnt;
471 }
472
473 void CodeGenerator::setFilesCnt ( unsigned int cnt )
474 {
475 inputFilesCnt = cnt;
476 processedFilesCnt = 0;
477 }
478
479 bool CodeGenerator::formattingIsPossible()
480 {
481 return formattingPossible;
482 }
483 unsigned char CodeGenerator::getAdditionalEOFChar()
484 {
485 return extraEOFChar;
486 }
487 void CodeGenerator::setAdditionalEOFChar ( unsigned char eofChar )
488 {
489 extraEOFChar = eofChar;
490 }
491 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
492 unsigned int lineLength,
493 int numberSpaces )
494 {
495 bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
496 bool replaceTabs = numberSpaces > 0;
497
498 if ( enableWrap || replaceTabs ) {
499 preFormatter.setWrap ( enableWrap );
500 preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
501 preFormatter.setWrapLineLength ( lineLength );
502 preFormatter.setReplaceTabs ( replaceTabs );
503 preFormatter.setNumberSpaces ( numberSpaces );
504 }
505 }
506
507 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
508 {
509 keywordCase = keyCase;
510 }
511
512 void CodeGenerator::setEOLDelimiter(char delim)
513 {
514 eolDelimiter = delim;
515 }
516
517 void CodeGenerator::reset()
518 {
519 lineIndex = 0;
520 lineNumber = 0;
521 line.clear();
522 preFormatter.reset();
523 inFile.clear();
524 outFile.clear();
525 embedLangDefPath.clear();
526 printNewLines=true;
527 syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
528 startLineCntCurFile = startLineCnt;
529 applySyntaxTestCase=lineContainedTestCase=false;
530 if (currentSyntax){
531 vector<int> overrideStyleAttrs=currentSyntax->getOverrideStyleAttributes();
532 docStyle.overrideAttributes(overrideStyleAttrs);
533 if (overrideStyleAttrs.size())
534 disableStyleCache = true;
535 }
536 }
537
538 string CodeGenerator::getThemeInitError()
539 {
540 return docStyle.getErrorMessage();
541 }
542
543 string CodeGenerator::getPluginScriptError()
544 {
545 return userScriptError;
546 }
547
548 string CodeGenerator::getSyntaxRegexError()
549 {
550 return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
551 }
552 string CodeGenerator::getSyntaxLuaError()
553 {
554 return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
555
556 }
557 string CodeGenerator::getSyntaxDescription()
558 {
559 return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
560
561 }
562 string CodeGenerator::getSyntaxEncodingHint()
563 {
564 return (currentSyntax)? currentSyntax->getEncodingHint(): "";
565
566 }
567 string CodeGenerator::getThemeDescription()
568 {
569 return docStyle.getDescription();
570 }
571
572 string CodeGenerator::getSyntaxCatDescription(){
573 return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
574 }
575
576 string CodeGenerator::getThemeCatDescription()
577 {
578 return docStyle.getCategoryDescription();
579 }
580
581 float CodeGenerator::getThemeContrast()
582 {
583 return docStyle.getContrast();
584 }
585
586 unsigned int CodeGenerator::getLineNumber()
587 {
588 return lineNumber;
589 }
590 bool CodeGenerator::AtEnd(char c) const {
591 bool instream_eof = in->eof();
592 if (extraEOFChar == 255)
593 return instream_eof;
594
595 bool c_null = c == extraEOFChar;
596 bool instream_peek_null = false;
597 if (instream_eof == false && c_null == false)
598 instream_peek_null = in->peek() == extraEOFChar;
599 bool ret = instream_eof || c_null || instream_peek_null;
600 return ret;
601 }
602 bool CodeGenerator::readNewLine ( string &newLine )
603 {
604 bool eof=false;
605
606 if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
607
608 while (!eof && startLineCntCurFile>0) {
609 if ( formattingPossible && formattingEnabled ) {
610 eof=!formatter->hasMoreLines();
611 if ( !eof ) {
612 newLine = formatter->nextLine();
613 }
614 } else {
615 eof = AtEnd() || ! getline ( *in, newLine, eolDelimiter );
616 }
617 --startLineCntCurFile;
618 }
619
620 startLineCntCurFile=1;
621 #ifndef _WIN32
622 // drop CR of CRLF files
623 if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
624 newLine.erase(newLine.size() - 1);
625 #endif
626
627 return eof || ( lineNumber == maxLineCnt );
628 }
629
630 void CodeGenerator::matchRegex ( const string &line, State skipState)
631 {
632 regexGroups.clear();
633 int matchBegin=0;
634 int groupID=0;
635
636 // cycle through all regex, save the start and ending indices of matches to report them later
637 for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
638 RegexElement *regexElem = currentSyntax->getRegexElements() [i];
639
640 if (regexElem->open == skipState) continue;
641
642 if (regexElem->constraintLineNum && regexElem->constraintLineNum != lineNumber) {
643 continue;
644 }
645
646 if (regexElem->constraintFilename.size() && regexElem->constraintFilename != inFile) {
647 continue;
648 }
649
650 boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
651 boost::xpressive::sregex_iterator end;
652
653 for( ; cur != end; ++cur ) {
654 groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
655 matchBegin = cur->position(groupID);
656
657 regexGroups.insert (
658 make_pair ( matchBegin + 1, RegexToken ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
659
660 // priority regex (match required)
661 if (regexElem->priority) {
662 return;
663 }
664 }
665 }
666 }
667
668 unsigned char CodeGenerator::getInputChar()
669 {
670 // end of line?
671 if ( lineIndex == line.length() ) {
672
673 //more testing required:
674 if (outputType==ESC_TRUECOLOR || outputType==ESC_XTERM256)
675 lastLineLength=StringTools::utf8_strlen(line + lsSyntaxErrorDesc);
676
677 bool eof=false;
678 if ( preFormatter.isEnabled() ) {
679 if ( !preFormatter.hasMoreLines() ) {
680 eof=readNewLine ( line );
681 preFormatter.setLine ( line );
682 ++lineNumber;
683 numberCurrentLine = true;
684 } else {
685 if (numberWrappedLines)
686 ++lineNumber;
687 numberCurrentLine = numberWrappedLines;
688 }
689
690 line = preFormatter.getNextLine();
691 } else {
692 eof=readNewLine ( line );
693 ++lineNumber;
694
695 numberCurrentLine = true;
696 }
697 lineIndex=0;
698
699 if (!lineContainedTestCase && applySyntaxTestCase){
700 stateTraceTest = stateTraceCurrent;
701 stateTraceCurrent.clear();
702 }
703
704 lineContainedTestCase=false;
705 lineContainedStmt=false;
706 matchRegex ( line );
707
708 return ( eof ) ?'\0':'\n';
709 }
710
711 return line[lineIndex++];
712 }
713
714 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS,
715 * Coffeescript with block regex, Pas + ASM)
716 * especially nested syntax in one line
717 */
718 State CodeGenerator::getCurrentState (State oldState)
719 {
720 unsigned char c='\0';
721
722 if ( token.length() ==0 ) {
723 c=getInputChar();
724 } else {
725 lineIndex-= ( token.length()-1 );
726 c=token[0];
727 }
728 if ( c=='\n' ) {
729 return _EOL; // End of line
730 }
731
732 if ( c=='\0' ) {
733 return _EOF; // End of file
734 }
735
736 if ( c==' ' || c=='\t' ) {
737 token= c;
738 return _WS; // White space
739 }
740
741 if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT) ) {
742 token= c;
743 return _TESTPOS;
744 }
745
746 // at this position the syntax change takes place
747 if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
748 loadEmbeddedLang(embedLangDefPath); // load new syntax
749 matchRegex(line); // recognize new patterns in the (remaining) line
750 syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
751 }
752
753 SKIP_EMBEDDED:
754
755 if (lsCheckSyntaxErrors && LSPClient.errorExists(lineNumber, lineIndex)) {
756 highlight::SemanticToken errorToken = LSPClient.getError(lineNumber, lineIndex);
757 token = line.substr ( lineIndex-1, errorToken.length);
758 lineIndex += errorToken.length-1;
759 lsSyntaxErrorDesc = errorToken.id;
760
761 //std::cerr <<"error num "<<lineNumber<< " idx "<<lineIndex<< " error "<<errorToken.id<< "\n";
762 return SYNTAX_ERROR;
763 }
764
765 if (lsCheckSemanticTokens && LSPClient.tokenExists(lineNumber, lineIndex)) {
766 highlight::SemanticToken semToken = LSPClient.getToken(lineNumber, lineIndex);
767 int semStyleKwId = docStyle.getSemanticStyle(semToken.id);
768 if (semStyleKwId) {
769 token = line.substr ( lineIndex-1, semToken.length);
770 lineIndex += semToken.length-1;
771
772 currentKeywordClass = semStyleKwId + kwOffset; // +offset of missing kw groups in the theme
773 //std::cerr <<"l "<<lineNumber<< "t "<<token<< " semStyleKwId "<< semStyleKwId << " off "<<kwOffset<<" -> " << semToken.id <<"\n";
774 return KEYWORD;
775 }
776 }
777
778 // Test if a regular expression was found at the current position
779 if ( !regexGroups.empty() ) {
780 if ( regexGroups.count ( lineIndex ) ) {
781 token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
782
783 unsigned int oldIndex= lineIndex;
784 if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
785
786 if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
787 //do not handle a nested section if the syntax is marked as "sealed"
788 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
789 embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
790 //remember position
791 syntaxChangeIndex = lineIndex+2;
792 syntaxChangeLineNo = lineNumber;
793 }
794
795 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
796 matchRegex(line, EMBEDDED_CODE_BEGIN);
797 lineIndex = oldIndex;
798 goto SKIP_EMBEDDED; // this is how it should be done
799 }
800
801 if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
802 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
803 currentKeywordClass=currentSyntax->getKeywordListGroup ( reservedWord ); //check in lists (no regex)
804
805 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
806 currentKeywordClass = regexGroups[oldIndex].kwClass;
807 }
808 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
809 } else {
810 return validateState(regexGroups[oldIndex].state, oldState);
811 }
812 }
813 }
814
815 // Character not referring to any state
816 token = c;
817 return STANDARD;
818 }
819
820 State CodeGenerator::validateState(State newState, State oldState)
821 {
822
823 if (currentSyntax->getValidateStateChangeFct()) {
824 Diluculum::LuaValueList params;
825 params.push_back(Diluculum::LuaValue(oldState));
826 params.push_back(Diluculum::LuaValue(newState));
827 params.push_back(Diluculum::LuaValue(token));
828 params.push_back(Diluculum::LuaValue(getCurrentKeywordClassId()) );
829 params.push_back(Diluculum::LuaValue(lineNumber) );
830 params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
831
832 Diluculum::LuaValueList res=
833 currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
834 params,"getValidateStateChangeFct call") ;
835
836 resultOfHook = res.size()>=1;
837 if (resultOfHook) {
838
839 setOverrideParams();
840
841 State validatedState = (State)res[0].asInteger();
842 if ( validatedState== _REJECT) {
843
844 // proceed using only the first character of the token
845 if (res.size()==1) {
846 lineIndex -= (token.length() -1);
847 token=token.substr(0, 1);
848 }
849
850 //experimental for slim.lang: evaluate second return arg after _REJECT
851 if (res.size()>=2) {
852 lineIndex -= (token.length() );
853 token.clear();
854 return (State)res[1].asInteger();
855 }
856 return oldState;
857 }
858
859 return validatedState;
860 }
861 }
862 resultOfHook = false;
863
864 return newState;
865 }
866
867 unsigned int CodeGenerator::getCurrentKeywordClassId(){
868 unsigned int kwClassId=0;
869
870 // this vector contains the defined keyword classes, and currentKeywordClass is its index:
871 vector<string> kwClasses=currentSyntax->getKeywordClasses();
872
873 if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
874 string kwClassName=kwClasses[currentKeywordClass-1];
875 if (kwClassName.size()==3)
876 kwClassId = kwClassName[2] - 'a' + 1;
877 }
878 return kwClassId;
879 }
880
881 //it is faster to pass ostream reference
882 void CodeGenerator::maskString ( ostream& ss, const string & s )
883 {
884 string escHoverText;
885
886 if (lsEnableHoverRequests && (currentState==STANDARD || currentState==NUMBER || currentState==KEYWORD)) {
887
888 string hoverText = LSPClient.runHover(lsDocumentPath, lineIndex - s.size(), lineNumber-1);
889
890 for(const auto &c : hoverText)
891 {
892 if (isascii(c))
893 escHoverText.append(maskCharacter(c));
894 }
895 }
896
897 if (escHoverText.size()) {
898 ss << getHoverTagOpen(escHoverText);
899 }
900
901 for (const auto &c : s)
902 {
903 ss << maskCharacter ( c );
904 }
905
906 if (escHoverText.size()) {
907 ss << getHoverTagClose();
908 }
909
910 // The test markers position should also be deternmined by calculating the code points
911 if ( applySyntaxTestCase ) {
912
913 PositionState ps(currentState, getCurrentKeywordClassId(), false);
914
915 //TODO avoid repeated string comparison:
916 int slen = encoding=="utf-8" ? StringTools::utf8_strlen(s) : s.length();
917 for (int i=0; i< slen; i++ ) {
918 stateTraceCurrent.push_back(ps);
919 }
920 if (stateTraceCurrent.size()>200)
921 stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 );
922 }
923 }
924
925 void CodeGenerator::printSyntaxError ( ostream& ss ) {
926 if ( !lsSyntaxErrorDesc.empty()) {
927 ss << openTags[ highlight::SYNTAX_ERROR_MSG ];
928
929 for(const auto &c : lsSyntaxErrorDesc)
930 {
931 ss << maskCharacter ( c );
932 }
933
934 ss << closeTags[ highlight::SYNTAX_ERROR_MSG ];
935 lsSyntaxErrorDesc.clear();
936 }
937 }
938
939 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string& token)
940 {
941
942 Diluculum::LuaValueList params;
943 params.push_back(Diluculum::LuaValue(token));
944 params.push_back(Diluculum::LuaValue(currentState));
945 params.push_back(Diluculum::LuaValue(currentKeywordClass));
946 params.push_back(Diluculum::LuaValue(lineContainedStmt));
947 params.push_back(Diluculum::LuaValue(lineNumber) );
948 params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
949
950 return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
951 params,"getDecorateFct call") ;
952 }
953
954 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
955 {
956 if ( flushWhiteSpace )
957 flushWs(1);
958 string caseToken = StringTools::change_case ( token, tcase );
959 if (currentSyntax->getDecorateFct()) {
960
961 Diluculum::LuaValueList res=callDecorateFct(caseToken);
962 if (res.size()==1) {
963 *out<<res[0].asString();
964 } else {
965 maskString ( *out, caseToken );
966 }
967 } else {
968 maskString ( *out, caseToken );
969 }
970
971 // check this *after* the decorate call
972 if ( currentState == STANDARD || currentState == KEYWORD || currentState == NUMBER
973 || currentState == STRING || currentState == IDENTIFIER_BEGIN) {
974 lineContainedStmt = true;
975 }
976 token.clear();
977 }
978
979 bool CodeGenerator::styleFound()
980 {
981 return docStyle.found();
982 }
983
984 bool CodeGenerator::printIndexFile ( const vector<string> &fileList, const string &outPath )
985 {
986 return true;
987 }
988
989 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
990 {
991
992 if ( formatter!=NULL ) {
993 return true;
994 }
995
996 if ( !indentScheme.size() ) return false;
997
998 formatter=new astyle::ASFormatter();
999
1000 if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
1001 formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
1002 } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
1003 formatter->setFormattingStyle ( astyle::STYLE_KR );
1004 } else if ( indentScheme=="java" ) {
1005 formatter->setFormattingStyle ( astyle::STYLE_JAVA );
1006 } else if ( indentScheme=="stroustrup" ) {
1007 formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
1008 } else if ( indentScheme=="whitesmith" ) {
1009 formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
1010 } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
1011 formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
1012 } else if ( indentScheme=="gnu" ) {
1013 formatter->setFormattingStyle ( astyle::STYLE_GNU );
1014 } else if ( indentScheme=="linux" ) {
1015 formatter->setFormattingStyle ( astyle::STYLE_LINUX );
1016 } else if ( indentScheme=="horstmann" ) {
1017 formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
1018 } else if ( indentScheme=="otbs" || indentScheme=="1tbs") {
1019 formatter->setFormattingStyle ( astyle::STYLE_1TBS );
1020 } else if ( indentScheme=="google") {
1021 formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
1022 } else if ( indentScheme=="pico" || indentScheme=="a11") {
1023 formatter->setFormattingStyle ( astyle::STYLE_PICO );
1024 } else if ( indentScheme=="lisp" || indentScheme=="python"|| indentScheme=="a12") {
1025 formatter->setFormattingStyle ( astyle::STYLE_LISP );
1026 } else if ( indentScheme=="vtk") {
1027 formatter->setFormattingStyle ( astyle::STYLE_VTK );
1028 } else if ( indentScheme=="mozilla") {
1029 formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
1030 } else if ( indentScheme=="webkit") {
1031 formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
1032 } else if ( indentScheme!="user" ){
1033 return false;
1034 }
1035 return formattingEnabled=true;
1036 }
1037
1038 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
1039 {
1040
1041 if (!embedded) {
1042 while (!nestedLangs.empty()) {
1043 nestedLangs.pop();
1044 }
1045 }
1046
1047 bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
1048 LoadResult result=LOAD_OK;
1049 if ( reloadNecessary ) {
1050 if (syntaxReaders.count(langDefPath)) {
1051 currentSyntax=syntaxReaders[langDefPath];
1052 result=LOAD_OK;
1053 } else {
1054
1055 currentSyntax=new SyntaxReader();
1056 result=currentSyntax->load(langDefPath, pluginParameter, outputType);
1057 syntaxReaders[langDefPath]=currentSyntax;
1058 }
1059
1060 if ( result==LOAD_OK ) {
1061 formattingPossible=currentSyntax->enableReformatting();
1062 updateKeywordClasses();
1063 }
1064 }
1065
1066 kwOffset=currentSyntax->getKeywordCount() - docStyle.getKeywordStyleCount();
1067
1068 return result;
1069 }
1070
1071 bool CodeGenerator::validateInputStream()
1072 {
1073 if ( !in ) return false;
1074
1075 // it is not possible to move stream pointer back with stdin
1076 if ( ( int ) in->tellg() == -1 ) // -1 : stdin
1077 return true;
1078
1079 // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
1080 // Magic configuration of "file"
1081 // This is intended for web plugins - only check filetypes often found in the net
1082 char magic_gif[] = {'G','I','F','8', 0};
1083 char magic_png[] = {'\x89','P','N','G', 0};
1084 char magic_java[] = {'\xCA','\xFE','\xBA','\xBE', 0};
1085 char magic_jpeg[] = {'\xFF','\xD8','\xFF', 0};
1086 char magic_bmp[] = {'B','M', 0};
1087 char magic_pdf[] = {'%','P','D','F', 0};
1088 char magic_utf8[] = {'\xEF','\xBB','\xBF',0};
1089 char magic_rar[] = {'R','a','r','!', 0};
1090 char magic_zip[] = {'P','K','\x03','\x04', 0};
1091 char magic_ace[] = {'*','*','A','C','E','*','*', 0};
1092 char magic_tgz[] = {'\x8b','\x1f', '\x00', '\x08', 0};
1093 char magic_bzip[] = {'B','Z', 0};
1094
1095 char* magic_table[] = {magic_utf8,
1096 magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
1097 magic_java,
1098 magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
1099 0
1100 };
1101
1102 char buffer [10]= {0};
1103 in->read ( buffer,8 ); //only read the first 8 bytes of input stream
1104
1105 int magic_index=0;
1106 while ( magic_table[magic_index] ) {
1107 if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
1108 break;
1109 }
1110 magic_index++;
1111 }
1112 int streamReadPos=0;
1113 if ( magic_table[magic_index] == magic_utf8 ) {
1114 //setEncoding("utf-8");
1115 streamReadPos=3; // remove UTF-8 magic number from output
1116 }
1117
1118 in -> seekg ( streamReadPos, ios::beg );
1119 in-> clear(); // clear fail bit to continue reading
1120
1121 return !magic_table[magic_index] // points to 0 if no pattern was found
1122 || magic_table[magic_index] == magic_utf8;
1123 }
1124
1125 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
1126
1127 if (currentSyntax && pluginChunks.size()) {
1128
1129 Diluculum::LuaState luaState;
1130
1131 Diluculum::LuaValueList chunkParams;
1132 chunkParams.push_back(currentSyntax->getDescription());
1133 for (unsigned int i=0; i<pluginChunks.size(); i++) {
1134 luaState.call(*pluginChunks[i], chunkParams, "format user function");
1135 }
1136
1137 if (luaState.globals().count(fctName)) {
1138 Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
1139
1140 luaState["HL_PLUGIN_PARAM"] = pluginParameter;
1141 luaState["HL_OUTPUT"] = outputType;
1142 luaState["HL_FORMAT_HTML"]=HTML;
1143 luaState["HL_FORMAT_XHTML"]=XHTML;
1144 luaState["HL_FORMAT_TEX"]=TEX;
1145 luaState["HL_FORMAT_LATEX"]=LATEX;
1146 luaState["HL_FORMAT_RTF"]=RTF;
1147 luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
1148 luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
1149 luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
1150 luaState["HL_FORMAT_SVG"]=SVG;
1151 luaState["HL_FORMAT_BBCODE"]=BBCODE;
1152 luaState["HL_FORMAT_PANGO"]=PANGO;
1153 luaState["HL_FORMAT_ODT"]=ODTFLAT;
1154
1155 Diluculum::LuaValueList params;
1156 Diluculum::LuaValueMap options;
1157 options[Diluculum::LuaValue("title")] = Diluculum::LuaValue( docTitle );
1158 options[Diluculum::LuaValue("encoding")] = Diluculum::LuaValue(encoding);
1159 options[Diluculum::LuaValue("fragment")] = Diluculum::LuaValue(fragmentOutput);
1160 options[Diluculum::LuaValue("font")] = Diluculum::LuaValue(getBaseFont());
1161 options[Diluculum::LuaValue("fontsize")] = Diluculum::LuaValue(getBaseFontSize());
1162
1163 params.push_back(inputFilesCnt);
1164 params.push_back(processedFilesCnt);
1165 params.push_back(options);
1166
1167 Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call");
1168 if (res.size()>=1) {
1169 *keepDefault=false;
1170 *result = res[0].asString();
1171 if (res.size()==2)
1172 *keepDefault = res[1].asBoolean();
1173 }
1174 delete documentFct;
1175 }
1176 }
1177 }
1178
1179 void CodeGenerator::printHeader()
1180 {
1181 bool keepDefaultHeader=true;
1182 string pluginHeader;
1183
1184 processedFilesCnt++;
1185
1186 applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
1187
1188 if ( ! fragmentOutput && keepDefaultHeader)
1189 *out << getHeader();
1190
1191 *out << pluginHeader;
1192
1193 if ( !fragmentOutput || keepInjections)
1194 *out << currentSyntax->getHeaderInjection();
1195 }
1196
1197 void CodeGenerator::printFooter()
1198 {
1199
1200 bool keepDefaultFooter=true;
1201 string pluginFooter;
1202
1203 applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
1204
1205 if ( !fragmentOutput || keepInjections)
1206 *out << currentSyntax->getFooterInjection();
1207
1208 *out << pluginFooter;
1209
1210 if ( ! fragmentOutput && keepDefaultFooter )
1211 *out << getFooter();
1212 }
1213
1214 ParseError CodeGenerator::generateFile ( const string &inFileName,
1215 const string &outFileName )
1216 {
1217 if ( !docStyle.found() ) {
1218 return BAD_STYLE;
1219 }
1220
1221 reset();
1222
1223 ParseError error=PARSE_OK;
1224
1225 inFile=inFileName;
1226 outFile=outFileName;
1227
1228 in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
1229
1230 if ( validateInput )
1231 if ( !validateInputStream() ) error= BAD_INPUT;
1232
1233 if ( !in->fail() && error==PARSE_OK ) {
1234 out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
1235 if ( out->fail() ) {
1236 error=BAD_OUTPUT;
1237 }
1238 }
1239
1240 if ( in->fail() ) {
1241 error=BAD_INPUT;
1242 }
1243
1244 if ( error==PARSE_OK ) {
1245 initASStream();
1246 currentSyntax->setInputFileName(inFile);
1247 printHeader();
1248 printBody();
1249 printFooter();
1250 }
1251
1252 if ( !outFileName.empty() ) {
1253 delete out;
1254 out=NULL;
1255 }
1256 if ( !inFileName.empty() ) {
1257 delete in;
1258 in=NULL;
1259 }
1260 return error;
1261 }
1262
1263 string CodeGenerator::generateString ( const string &input )
1264 {
1265
1266 if ( !docStyle.found() ) {
1267 return "";
1268 }
1269
1270 reset();
1271
1272 in = new istringstream ( input );
1273 out = new ostringstream ();
1274
1275 if ( in->fail() || out->fail() ) {
1276 return "";
1277 }
1278
1279 initASStream();
1280
1281 printHeader();
1282 printBody();
1283 printFooter();
1284
1285 string result = static_cast<ostringstream*> ( out )->str();
1286
1287 delete out;
1288 out=NULL;
1289 delete in;
1290 in=NULL;
1291
1292 return result;
1293 }
1294
1295 void CodeGenerator::initASStream() {
1296 if ( formatter != NULL ) {
1297 if (streamIterator) delete streamIterator;
1298 streamIterator = new astyle::ASStreamIterator ( in, extraEOFChar );
1299 formatter->init ( streamIterator );
1300
1301 if (currentSyntax->getDescription()=="C#") {
1302 formatter->setSharpStyle();
1303 } else if (currentSyntax->getDescription()=="Java") {
1304 formatter->setJavaStyle();
1305 } else if (currentSyntax->getDescription()=="Javascript") {
1306 formatter->setJSStyle();
1307 } else if (currentSyntax->getDescription()=="Objective C") {
1308 formatter->setObjCStyle();
1309 } else {
1310 formatter->setCStyle();
1311 }
1312
1313 }
1314 }
1315
1316 string CodeGenerator::generateStringFromFile ( const string &inFileName )
1317 {
1318
1319 if ( !docStyle.found() ) {
1320 return "";
1321 }
1322
1323 reset();
1324
1325 inFile = inFileName;
1326
1327 in = new ifstream ( inFileName.c_str() );
1328 out = new ostringstream ();
1329
1330 if ( in->fail() || out->fail() ) {
1331 return "";
1332 }
1333
1334 if ( validateInput && !validateInputStream() ) {
1335 return "ERROR: detected binary input";
1336 }
1337
1338 initASStream();
1339
1340 currentSyntax->setInputFileName(inFile);
1341
1342 printHeader();
1343 printBody();
1344 printFooter();
1345
1346 string result = static_cast<ostringstream*> ( out )->str();
1347
1348 delete out;
1349 out=NULL;
1350 delete in;
1351 in=NULL;
1352
1353 return result;
1354 }
1355
1356 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
1357 {
1358 if ( s==KEYWORD && kwClassID ) {
1359 return NUMBER_BUILTIN_STATES + kwClassID-1;
1360 }
1361 return ( unsigned int ) s ;
1362 }
1363
1364 void CodeGenerator::openTag ( State s )
1365 {
1366 *out << openTags[ ( unsigned int ) s];
1367 currentState=s;
1368 }
1369
1370 void CodeGenerator::closeTag ( State s )
1371 {
1372 *out << closeTags[ ( unsigned int ) s];
1373 flushWs(2);
1374 currentState=_UNKNOWN;
1375 }
1376
1377 void CodeGenerator::openKWTag ( unsigned int kwClassID )
1378 {
1379 *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
1380 currentState=KEYWORD;
1381 }
1382
1383 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
1384 {
1385 *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
1386 flushWs(3);
1387 currentState=_UNKNOWN;
1388 }
1389
1390 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
1391 {
1392 if (nestedLangs.empty()) {
1393 nestedLangs.push(currentSyntax->getCurrentPath() );
1394 }
1395 if (nestedLangs.top() != embedLangDefPath) {
1396 nestedLangs.push(embedLangDefPath);
1397 }
1398 LoadResult res = loadLanguage(embedLangDefPath, true);
1399 //pass end delimiter regex to syntax description
1400 currentSyntax->restoreLangEndDelim(embedLangDefPath);
1401 return res == LOAD_OK;
1402 }
1403
1404 ///////////////////////////////////////////////////////////////////////////////
1405
1406 void CodeGenerator::processRootState()
1407 {
1408 bool eof=false,
1409 firstLine=true; // avoid newline before printing the first output line
1410
1411 applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
1412
1413 if ( currentSyntax->highlightingDisabled() ) {
1414 string line;
1415 while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
1416 ++lineNumber;
1417 insertLineNumber ( !firstLine );
1418 flushWs(4);
1419 firstLine=false;
1420 if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
1421 maskString ( *out, line );
1422 }
1423 *out << flush;
1424 return;
1425 }
1426
1427 State state=STANDARD;
1428 openTag ( STANDARD );
1429
1430 do {
1431 // determine next state
1432 state= getCurrentState(STANDARD);
1433
1434 // handle current state
1435 switch ( state ) {
1436 case KEYWORD:
1437 closeTag ( STANDARD );
1438 eof=processKeywordState ( state );
1439 openTag ( STANDARD );
1440 break;
1441 case NUMBER:
1442 closeTag ( STANDARD );
1443 eof=processNumberState();
1444 openTag ( STANDARD );
1445 break;
1446 case ML_COMMENT:
1447 closeTag ( STANDARD );
1448 eof=processMultiLineCommentState();
1449 openTag ( STANDARD );
1450 break;
1451 case SL_COMMENT:
1452 closeTag ( STANDARD );
1453 eof=processSingleLineCommentState();
1454 openTag ( STANDARD );
1455 break;
1456 case STRING:
1457 closeTag ( STANDARD );
1458 eof=processStringState ( STANDARD );
1459 openTag ( STANDARD );
1460 break;
1461 case DIRECTIVE:
1462 closeTag ( STANDARD );
1463 eof=processDirectiveState();
1464 openTag ( STANDARD );
1465 break;
1466 case ESC_CHAR:
1467 closeTag ( STANDARD );
1468 eof=processEscapeCharState();
1469 openTag ( STANDARD );
1470 break;
1471 case SYMBOL:
1472 closeTag ( STANDARD );
1473 eof=processSymbolState();
1474 openTag ( STANDARD );
1475 break;
1476 case EMBEDDED_CODE_END:
1477 closeTag ( STANDARD );
1478 eof=processSyntaxChangeState(state);
1479 openTag ( STANDARD );
1480 break;
1481 case SYNTAX_ERROR:
1482 closeTag ( STANDARD );
1483 eof=processSyntaxErrorState();
1484 openTag ( STANDARD );
1485 break;
1486
1487 case _EOL:
1488 // XTERM256 fix (issue with less cmd)
1489 if (!firstLine || showLineNumbers) {
1490 closeTag ( STANDARD );
1491 }
1492 insertLineNumber(!firstLine);
1493 if (!firstLine || showLineNumbers) {
1494 flushWs(5);
1495 stateTraceCurrent.clear();
1496 openTag ( STANDARD );
1497 }
1498 firstLine=false;
1499 break;
1500 case _EOF:
1501 eof=true;
1502 break;
1503 case _WS:
1504 processWsState();
1505 break;
1506 default:
1507 printMaskedToken();
1508 break;
1509 }
1510 } while ( !eof );
1511
1512 if (token.size() || lineNumber>1 || (outputType!=ESC_TRUECOLOR && outputType!=ESC_XTERM256))
1513 closeTag ( STANDARD );
1514
1515 if (currentSyntax->getDecorateLineEndFct()) {
1516 Diluculum::LuaValueList res=callDecorateLineFct(false);
1517 if (res.size()==1) {
1518 *out << res[0].asString();
1519 }
1520 }
1521
1522 printNewLines = noTrailingNewLine==0 || ( noTrailingNewLine==2 && ( token.size() || lineNumber>1) );
1523 *out << getNewLine();
1524 *out << flush;
1525 }
1526
1527 bool CodeGenerator::processSyntaxChangeState(State myState)
1528 {
1529 State newState=STANDARD;
1530 bool eof=false,
1531 exitState=false;
1532
1533 openTag ( KEYWORD );
1534 do {
1535
1536 if (myState==EMBEDDED_CODE_END) {
1537 if (!nestedLangs.empty()) {
1538 nestedLangs.pop();
1539 }
1540 // load host language syntax
1541 if (!nestedLangs.empty()) {
1542 loadLanguage(nestedLangs.top(), true);
1543 }
1544 matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
1545 }
1546
1547 printMaskedToken ( newState!=_WS );
1548
1549 newState= getCurrentState(myState);
1550
1551 switch ( newState ) {
1552 case _WS:
1553 processWsState();
1554 break;
1555 case _EOL:
1556 insertLineNumber();
1557 exitState=true;
1558 break;
1559 case _EOF:
1560 eof = true;
1561 break;
1562 default:
1563 exitState=true;
1564 break;
1565 }
1566 } while ( !exitState && !eof );
1567 closeTag ( KEYWORD );
1568
1569 return eof;
1570 }
1571
1572
1573 bool CodeGenerator::processKeywordState ( State myState )
1574 {
1575 State newState=STANDARD;
1576 unsigned int myClassID=currentKeywordClass;
1577 bool eof=false,
1578 exitState=false;
1579
1580 openKWTag ( myClassID );
1581 do {
1582 printMaskedToken ( newState!=_WS,
1583 ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
1584 newState= getCurrentState(myState);
1585 switch ( newState ) {
1586 case _WS:
1587 processWsState();
1588 exitState=isolateTags;
1589 break;
1590 case _EOL:
1591 insertLineNumber();
1592 exitState=true;
1593
1594 break;
1595 case _EOF:
1596 eof = true;
1597 break;
1598 case KEYWORD_END:
1599 exitState=true;
1600 break;
1601 default:
1602 exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
1603 break;
1604 }
1605 } while ( !exitState && !eof );
1606
1607 closeKWTag ( myClassID );
1608
1609 currentKeywordClass=0;
1610 return eof;
1611 }
1612
1613 bool CodeGenerator::processNumberState()
1614 {
1615 State newState=STANDARD;
1616 bool eof=false,
1617 exitState=false;
1618 openTag ( NUMBER );
1619 do {
1620 printMaskedToken ( newState!=_WS );
1621 newState= getCurrentState(NUMBER);
1622 switch ( newState ) {
1623 case _WS:
1624 processWsState();
1625 exitState=isolateTags;
1626 break;
1627 case _EOL:
1628 insertLineNumber();
1629 exitState=true;
1630 break;
1631 case _EOF:
1632 eof = true;
1633 break;
1634 default:
1635 exitState=newState!=NUMBER;
1636 break;
1637 }
1638 } while ( !exitState && !eof );
1639
1640 closeTag ( NUMBER );
1641 return eof;
1642 }
1643
1644
1645 bool CodeGenerator::processMultiLineCommentState()
1646 {
1647 int commentCount=1;
1648 int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
1649 State newState=STANDARD;
1650 bool eof=false, exitState=false, containedTestCase=false;
1651 unsigned int startColumn=lineIndex - token.size() ;
1652 openTag ( ML_COMMENT );
1653 do {
1654 printMaskedToken (newState!=_WS );
1655 newState= getCurrentState(ML_COMMENT);
1656
1657 switch ( newState ) {
1658 case _WS:
1659 processWsState();
1660 break;
1661 case _EOL:
1662 wsBuffer += closeTags[ML_COMMENT];
1663 insertLineNumber();
1664 wsBuffer += openTags[ML_COMMENT];
1665 startColumn=0;
1666 break;
1667 case _EOF:
1668 eof = true;
1669 break;
1670 case _TESTPOS:
1671 runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1672 printMaskedToken();
1673 containedTestCase=true;
1674 break;
1675 case ML_COMMENT:
1676
1677 if ( currentSyntax->allowNestedMLComments() ) {
1678 ++commentCount;
1679 }
1680 // if delimiters are equal, close the comment by continuing to
1681 // ML_COMMENT_END section
1682 if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT ))) break;
1683
1684 case ML_COMMENT_END:
1685
1686 if (!currentSyntax->matchesOpenDelimiter (token, ML_COMMENT_END, openDelimID)) {
1687 break;
1688 }
1689 commentCount--;
1690 if ( !commentCount ) {
1691 printMaskedToken();
1692 exitState=true;
1693 }
1694 break;
1695 default:
1696 break;
1697 }
1698 } while ( !exitState && !eof );
1699
1700 closeTag ( ML_COMMENT );
1701
1702 if (containedTestCase){
1703 stateTraceCurrent.clear();
1704 }
1705 return eof;
1706 }
1707
1708
1709 bool CodeGenerator::processSingleLineCommentState()
1710 {
1711 State newState=STANDARD;
1712 bool eof=false, exitState=false, containedTestCase=false;
1713 unsigned int startColumn = lineIndex - token.size() ;
1714
1715 openTag ( SL_COMMENT );
1716 do {
1717 printMaskedToken ( newState!=_WS );
1718 newState= getCurrentState(SL_COMMENT);
1719
1720 switch ( newState ) {
1721 case _WS:
1722 processWsState();
1723 break;
1724 case _EOL:
1725 printMaskedToken();
1726 if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1727 exitState=false;
1728 } else {
1729 exitState=true;
1730 }
1731 if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
1732 insertLineNumber();
1733 if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
1734
1735 break;
1736 case _EOF:
1737 eof = true;
1738 break;
1739 case _TESTPOS:
1740 runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1741 printMaskedToken();
1742 containedTestCase=true;
1743 break;
1744
1745 default:
1746 break;
1747 }
1748 } while ( !exitState && !eof );
1749
1750 closeTag ( SL_COMMENT );
1751
1752 if (containedTestCase) {
1753 stateTraceCurrent.clear();
1754 }
1755
1756 return eof;
1757 }
1758
1759 bool CodeGenerator::processDirectiveState()
1760 {
1761 State newState=STANDARD;
1762 bool eof=false, exitState=false;
1763
1764 openTag ( DIRECTIVE );
1765 do {
1766 printMaskedToken ( newState!=_WS );
1767 newState= getCurrentState(DIRECTIVE);
1768 switch ( newState ) {
1769 case _WS:
1770 processWsState();
1771 break;
1772 case DIRECTIVE_END:
1773 printMaskedToken();
1774 exitState=true;
1775 break;
1776 case _EOL:
1777 printMaskedToken();
1778
1779 if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1780 exitState=false;
1781 } else {
1782 if (currentSyntax->getContinuationChar()!=0x13){
1783 exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
1784 }
1785 }
1786 if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
1787 insertLineNumber();
1788 if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
1789 break;
1790 case ML_COMMENT:
1791 closeTag ( DIRECTIVE );
1792 eof= processMultiLineCommentState();
1793 openTag ( DIRECTIVE );
1794 break;
1795 case SL_COMMENT:
1796 closeTag ( DIRECTIVE );
1797 eof= processSingleLineCommentState();
1798 openTag ( DIRECTIVE );
1799 exitState=true;
1800 break;
1801 case STRING:
1802 closeTag ( DIRECTIVE );
1803 eof=processStringState ( DIRECTIVE );
1804 openTag ( DIRECTIVE );
1805 break;
1806 case _EOF:
1807 eof = true;
1808 break;
1809 default:
1810 break;
1811 }
1812 } while ( !exitState && !eof );
1813
1814 closeTag ( DIRECTIVE );
1815 return eof;
1816 }
1817
1818 bool CodeGenerator::processStringState ( State oldState )
1819 {
1820 State newState=STANDARD;
1821 bool eof=false, exitState=false;
1822 bool returnedFromOtherState=false;
1823
1824 State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
1825
1826 int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
1827 string openDelim=token;
1828
1829 //Raw String by definition:
1830 bool isRawString=currentSyntax->delimiterIsRawString(openDelimID) || toggleDynRawString;
1831
1832 // Test if character before string open delimiter token equals to the
1833 // raw string prefix (Example: r" ", r""" """ in Python)
1834
1835 //Raw String Prefix:
1836 if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
1837 isRawString=true;
1838 }
1839
1840 openTag ( myState );
1841 do {
1842 // true if last token was an escape char
1843 if ( !returnedFromOtherState ) {
1844 printMaskedToken (newState!=_WS );
1845 }
1846 returnedFromOtherState=false;
1847 newState= getCurrentState(myState);
1848
1849 switch ( newState ) {
1850 case _WS:
1851 processWsState();
1852 break;
1853 case _EOL:
1854 wsBuffer += closeTags[myState];
1855 insertLineNumber();
1856 wsBuffer += openTags[myState];
1857 break;
1858 case STRING_END:
1859 if (resultOfHook || currentSyntax->matchesOpenDelimiter (token, STRING_END, openDelimID)) {
1860 if (currentSyntax->assertDelimEqualLength()) {
1861 exitState= openDelim.length()==token.length();
1862 } else {
1863 exitState= true;
1864 }
1865 printMaskedToken();
1866 }
1867 break;
1868 case STRING:
1869 // if there exist multiple string delimiters, close string if
1870 // current delimiter is equal to the opening delimiter
1871 exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING )) && token==openDelim;
1872 printMaskedToken();
1873 break;
1874 case ESC_CHAR:
1875 if ( !isRawString ) {
1876 closeTag ( myState );
1877 eof=processEscapeCharState();
1878 openTag ( myState );
1879 returnedFromOtherState=true;
1880 } else {
1881 // FIXME not a fix for Python r"""\"""
1882 exitState=token.size()>1 && token[1] == openDelim[0];
1883 printMaskedToken();
1884 }
1885 break;
1886 case STRING_INTERPOLATION:
1887 closeTag ( myState );
1888 eof=processInterpolationState();
1889 openTag ( myState );
1890 returnedFromOtherState=true;
1891 break;
1892
1893 case _EOF:
1894 eof = true;
1895 break;
1896 default:
1897 printMaskedToken();
1898 break;
1899 }
1900 } while ( !exitState && !eof );
1901
1902 closeTag ( myState );
1903
1904 toggleDynRawString = false;
1905
1906 return eof;
1907 }
1908
1909 bool CodeGenerator::processSymbolState()
1910 {
1911 State newState=STANDARD;
1912 bool eof=false,
1913 exitState=false;
1914
1915 openTag ( SYMBOL );
1916 do {
1917 printMaskedToken ( newState!=_WS );
1918 newState= getCurrentState(SYMBOL);
1919 switch ( newState ) {
1920 case _WS:
1921 processWsState();
1922 exitState=isolateTags;
1923 break;
1924 case _EOL:
1925 insertLineNumber();
1926 exitState=true;
1927 break;
1928 case _EOF:
1929 eof = true;
1930 break;
1931 default:
1932 exitState=newState!=SYMBOL;
1933 break;
1934 }
1935 } while ( !exitState && !eof );
1936
1937 closeTag ( SYMBOL );
1938 return eof;
1939 }
1940
1941 bool CodeGenerator::processSyntaxErrorState()
1942 {
1943 State newState=STANDARD;
1944 bool eof=false,
1945 exitState=false;
1946
1947 openTag ( SYNTAX_ERROR );
1948 do {
1949 printMaskedToken ( newState!=_WS );
1950 newState= getCurrentState(SYNTAX_ERROR);
1951 switch ( newState ) {
1952 case _WS:
1953 processWsState();
1954 exitState=isolateTags;
1955 break;
1956 case _EOL:
1957 insertLineNumber();
1958 exitState=true;
1959 break;
1960 case _EOF:
1961 eof = true;
1962 break;
1963 default:
1964 exitState=newState!=SYMBOL;
1965 break;
1966 }
1967 } while ( !exitState && !eof );
1968
1969 closeTag ( SYNTAX_ERROR );
1970 return eof;
1971 }
1972
1973 bool CodeGenerator::processEscapeCharState()
1974 {
1975 State newState=STANDARD;
1976 bool eof=false, exitState=false;
1977 openTag ( ESC_CHAR );
1978 do {
1979 printMaskedToken (newState!=_WS );
1980 newState= getCurrentState(ESC_CHAR);
1981 switch ( newState ) {
1982 case _EOL:
1983 insertLineNumber();
1984 exitState=true;
1985 break;
1986 case _WS:
1987 processWsState();
1988 exitState=isolateTags;
1989 break;
1990 case _EOF:
1991 eof = true;
1992 break;
1993 default:
1994 exitState=newState!=ESC_CHAR;
1995 break;
1996 }
1997 } while ( !exitState && !eof );
1998
1999 closeTag ( ESC_CHAR );
2000 return eof;
2001 }
2002
2003 bool CodeGenerator::processInterpolationState()
2004 {
2005 State newState=STANDARD;
2006 bool eof=false, exitState=false;
2007 openTag ( STRING_INTERPOLATION );
2008 do {
2009 printMaskedToken (newState!=_WS );
2010 newState= getCurrentState(STRING_INTERPOLATION);
2011 switch ( newState ) {
2012 case _EOL:
2013 insertLineNumber();
2014 exitState=true;
2015 break;
2016 case _WS:
2017 processWsState();
2018 exitState=isolateTags;
2019 break;
2020 case _EOF:
2021 eof = true;
2022 break;
2023 default:
2024 exitState=newState!=STRING_INTERPOLATION;
2025 break;
2026 }
2027 } while ( !exitState && !eof );
2028
2029 closeTag ( STRING_INTERPOLATION );
2030 return eof;
2031 }
2032
2033 void CodeGenerator::processWsState()
2034 {
2035
2036 if ( !maskWs ) {
2037 wsBuffer += token;
2038 token.clear();
2039 return;
2040 }
2041
2042 flushWs(6);
2043
2044 int cntWs=0;
2045 lineIndex--;
2046 PositionState ps(currentState, 0, true);
2047
2048 while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
2049 ++cntWs;
2050 ++lineIndex;
2051 }
2052
2053 if ( cntWs>1 ) {
2054
2055 unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
2056 if ( excludeWs && styleID!=_UNKNOWN ) {
2057 *out << closeTags[styleID];
2058 }
2059
2060 *out << maskWsBegin;
2061 for ( int i=0; i<cntWs; i++ ) {
2062 *out << spacer;
2063 if (applySyntaxTestCase){
2064 stateTraceCurrent.push_back(ps);
2065 }
2066 }
2067 *out << maskWsEnd;
2068 if ( excludeWs && styleID!=_UNKNOWN ) {
2069 *out << openTags[styleID];
2070 }
2071 } else {
2072
2073 *out << spacer; //Bugfix fehlender Space nach Strings
2074 if (applySyntaxTestCase){
2075 stateTraceCurrent.push_back(ps);
2076 }
2077 }
2078
2079 spacer = initialSpacer;
2080
2081 token.clear();
2082 }
2083
2084 void CodeGenerator::flushWs(int arg)
2085 {
2086 PositionState ps(currentState, 0, true);
2087 //workaround condition
2088 for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
2089 stateTraceCurrent.push_back(ps);
2090 //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;
2091 }
2092
2093 //fix canvas whitespace
2094 if (wsBuffer.length() && (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR) ){
2095 *out<<maskWsBegin;
2096 }
2097
2098 *out << wsBuffer;
2099 wsBuffer.clear();
2100 }
2101
2102 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
2103 switch (s) {
2104
2105 case STANDARD:
2106 return STY_NAME_STD;
2107 case STRING:
2108 return STY_NAME_STR;
2109 case NUMBER:
2110 return STY_NAME_NUM;
2111 case SL_COMMENT:
2112 return STY_NAME_SLC;
2113 case ML_COMMENT:
2114 return STY_NAME_COM;
2115 case ESC_CHAR:
2116 return STY_NAME_ESC;
2117 case DIRECTIVE:
2118 return STY_NAME_DIR;
2119 case DIRECTIVE_STRING:
2120 return STY_NAME_DST;
2121 case SYMBOL:
2122 return STY_NAME_SYM;
2123 case STRING_INTERPOLATION:
2124 return STY_NAME_IPL;
2125 case SYNTAX_ERROR:
2126 return STY_NAME_ERR;
2127 case _WS:
2128 return "ws";
2129 case KEYWORD: {
2130
2131 if (!kwClass)
2132 return "ws";
2133
2134 char kwName[20] = {0};
2135 snprintf(kwName, sizeof(kwName), "keyword %c", ('a'+kwClass-1));
2136
2137 return string(kwName);
2138 }
2139 default:
2140 return "unknown_test";
2141 }
2142 }
2143
2144 void CodeGenerator::printTrace(const string &s){
2145 std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
2146 for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
2147 std::cout<<" "<<stateTraceCurrent[i].state;
2148 }
2149 std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
2150 for (unsigned int i=0; i< stateTraceTest.size(); i++) {
2151 std::cout<<" "<<stateTraceTest[i].state;
2152 }
2153 std::cout<<"\n";
2154 }
2155
2156 //column: lineIndex (not a UTF-8 validated string position)
2157 void CodeGenerator::runSyntaxTestcases(unsigned int column){
2158
2159 if (encoding=="utf-8")
2160 column = StringTools::utf8_strlen(line.substr(0, column));
2161
2162 unsigned int assertGroup=0;
2163 size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
2164 State assertState=_UNKNOWN;
2165 bool negation=false;
2166 bool testFailed=false;
2167
2168 ostringstream errMsg;
2169 string prefix;
2170 //printTrace("trace 2");
2171
2172 if (typeDescPos!=string::npos) {
2173
2174 if (line[typeDescPos]=='~') {
2175
2176 negation=true;
2177 prefix="~";
2178 ++typeDescPos;
2179 }
2180
2181 if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
2182 assertState=NUMBER;
2183 //TODO temp. fix to allow old and new string classes
2184 else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos || line.find("str", typeDescPos)==typeDescPos)
2185 assertState=STRING;
2186 else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
2187 assertState=ESC_CHAR;
2188 else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
2189 assertState=STRING_INTERPOLATION;
2190 else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
2191 assertState=SYMBOL;
2192 else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
2193 assertState=DIRECTIVE;
2194 else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
2195 assertState=SL_COMMENT;
2196 else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
2197 assertState=ML_COMMENT;
2198 else if (line.find("ws", typeDescPos)==typeDescPos)
2199 assertState=_WS;
2200 //TODO temp. fix to allow old and new default classes
2201 else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos || line.find("std", typeDescPos)==typeDescPos)
2202 assertState=STANDARD;
2203 else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
2204 assertState=DIRECTIVE_STRING;
2205
2206 else if (line.find("kw", typeDescPos)==typeDescPos || line.find("st", typeDescPos)==typeDescPos) {
2207 assertState=KEYWORD;
2208 if (isalpha(line[typeDescPos+2]))
2209 assertGroup=line[typeDescPos+2] - 'a' +1;
2210 }
2211
2212 if ( (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
2213 || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
2214 || assertGroup != stateTraceTest[column].kwClass) {
2215
2216 testFailed=!negation;
2217
2218 } else if (negation ) {
2219
2220 //TODO Fix ~ws
2221 if (assertState!=_WS && !stateTraceTest[column].isWhiteSpace )
2222 testFailed=true;
2223 }
2224
2225 if (testFailed) {
2226 errMsg << inFile << " line " << lineNumber << ", column "<< column
2227 << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)
2228 << " instead of " << prefix << getTestcaseName(assertState, assertGroup);
2229
2230 failedPosTests.push_back(errMsg.str());
2231 }
2232
2233 }
2234
2235 lineContainedTestCase=true;
2236 }
2237
2238 string CodeGenerator::getNewLine()
2239 {
2240 ostringstream ss;
2241 printSyntaxError(ss);
2242 if (printNewLines)
2243 ss << newLineTag;
2244 return ss.str();
2245 }
2246
2247 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
2248 {
2249
2250 Diluculum::LuaValueList params;
2251 params.push_back(Diluculum::LuaValue(lineNumber));
2252
2253 return currentSyntax->getLuaState()->call ( isLineStart ?
2254 *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
2255 params,"getDecorateLineFct call");
2256 }
2257
2258 void CodeGenerator::setOverrideParams() {
2259 if (currentSyntax->requiresParamUpdate()) {
2260 if ( currentSyntax->getOverrideConfigVal("state.string.raw")=="true"){
2261 toggleDynRawString=true; // reset to false in string state fct
2262 }
2263 if ( currentSyntax->getOverrideConfigVal("format.maskws")=="true") {
2264 maskWs=true;
2265 }
2266 if ( currentSyntax->getOverrideConfigVal("format.spacer").size()) {
2267 spacer=currentSyntax->getOverrideConfigVal("format.spacer");
2268 }
2269 }
2270 }
2271
2272 void CodeGenerator::insertLineNumber ( bool insertNewLine )
2273 {
2274 if ( insertNewLine ) {
2275 if (currentSyntax->getDecorateLineEndFct()) {
2276 Diluculum::LuaValueList res=callDecorateLineFct(false);
2277 if (res.size()==1) {
2278 setOverrideParams();
2279 wsBuffer +=res[0].asString();
2280 }
2281 }
2282 wsBuffer += getNewLine();
2283 }
2284
2285 if (currentSyntax->getDecorateLineBeginFct()) {
2286 Diluculum::LuaValueList res=callDecorateLineFct(true);
2287 if (res.size()==1) {
2288 setOverrideParams();
2289 wsBuffer += res[0].asString();
2290 }
2291 }
2292
2293 if ( showLineNumbers ) {
2294 ostringstream os;
2295 ostringstream numberPrefix;
2296
2297 os << setw ( getLineNumberWidth() ) << right;
2298 if( numberCurrentLine ) {
2299 if ( lineNumberFillZeroes ) {
2300 os.fill ( '0' );
2301 }
2302 os << lineNumber+lineNumberOffset;
2303 } else {
2304 os << "";
2305 }
2306
2307 numberPrefix << openTags[LINENUMBER];
2308 maskString ( numberPrefix, os.str() );
2309
2310 //use initialSpacer here, spacer can be overridden by plug-in (format.spacer)
2311 numberPrefix << initialSpacer << closeTags[LINENUMBER];
2312 wsBuffer += numberPrefix.str();
2313 }
2314 }
2315
2316 unsigned int CodeGenerator::getLineIndex()
2317 {
2318 return lineIndex;
2319 }
2320 unsigned int CodeGenerator::getLastLineLength()
2321 {
2322 return lastLineLength;
2323 }
2324
2325 bool CodeGenerator::requiresTwoPassParsing() const {
2326 if (!currentSyntax) return false;
2327 return currentSyntax->getPersistentSnippetsNum()>0;
2328 }
2329
2330
2331 bool CodeGenerator::printExternalStyle ( const string &outFile )
2332 {
2333 if ( !includeStyleDef ) {
2334 ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
2335 if ( !cssOutFile->fail() ) {
2336 if (!omitVersionComment) {
2337 *cssOutFile << styleCommentOpen
2338 <<" Style definition file generated by highlight "
2339 << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
2340 << " " << styleCommentClose << "\n";
2341 }
2342 *cssOutFile << getStyleDefinition()
2343 << "\n";
2344 *cssOutFile << readUserStyleDef();
2345 if ( !outFile.empty() ) delete cssOutFile;
2346 } else {
2347 return false;
2348 }
2349 }
2350 return true;
2351 }
2352
2353 bool CodeGenerator::printPersistentState ( const string &outFile )
2354 {
2355 if (!currentSyntax) return false;
2356
2357 ofstream pluginOutFile( outFile.c_str());
2358 if ( !pluginOutFile.fail() ) {
2359
2360 pluginOutFile <<"Description=\"Plugin generated by highlight using the --two-pass option\"\n\n"
2361 <<"Categories = {\"two-pass\" }\n\n"
2362 <<"function syntaxUpdate(desc)\n\n";
2363
2364 pluginOutFile << currentSyntax->getPersistentHookConditions();
2365
2366 for (auto snippet: currentSyntax->getPersistentSnippets())
2367 {
2368 pluginOutFile << snippet <<"\n\n";
2369 }
2370
2371 pluginOutFile<<"end\n\n"
2372 <<"Plugins={\n"
2373 <<" { Type=\"lang\", Chunk=syntaxUpdate }\n"
2374 <<"}\n";
2375 } else {
2376 return false;
2377 }
2378
2379 return true;
2380 }
2381
2382 string CodeGenerator::readUserStyleDef()
2383 {
2384 ostringstream ostr;
2385 if ( !styleInputPath.empty() ) {
2386 ifstream userStyleDef ( styleInputPath.c_str() );
2387 if ( userStyleDef ) {
2388 ostr << "\n" << styleCommentOpen
2389 << " Content of " << styleInputPath
2390 << ": " <<styleCommentClose << "\n";
2391 string line;
2392 while ( getline ( userStyleDef, line ) ) {
2393 ostr << line << "\n";
2394 }
2395 userStyleDef.close();
2396 } else {
2397 ostr << styleCommentOpen
2398 << " ERROR: Could not include " << styleInputPath
2399 << "." << styleCommentClose << "\n";
2400 }
2401 }
2402
2403 string injections=docStyle.getInjections();
2404 if (!injections.empty()) {
2405 ostr << "\n" << styleCommentOpen
2406 << " Plug-in theme injections: " <<styleCommentClose << "\n";
2407 ostr << injections<<"\n";
2408 }
2409 return ostr.str();
2410 }
2411
2412 bool CodeGenerator::initPluginScript(const string& script)
2413 {
2414
2415 if (script.empty()) return true;
2416
2417 try {
2418
2419 userScriptError="";
2420 Diluculum::LuaState ls;
2421
2422 ls.doFile (script);
2423 int listIdx=1;
2424
2425 while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
2426
2427 // Theme plugins
2428 if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
2429 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2430 docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2431 }
2432 }
2433 // Syntax plugins
2434 else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
2435 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2436 currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2437 }
2438 }
2439 // Format plugins
2440 else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
2441 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2442 addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2443 }
2444 }
2445
2446 listIdx++;
2447 }
2448 } catch (Diluculum::LuaError &err) {
2449 userScriptError=err.what();
2450 return false;
2451 }
2452 return true;
2453 }
2454
2455 void CodeGenerator::resetSyntaxReaders() {
2456 for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
2457 delete it->second;
2458 }
2459 currentSyntax=NULL;
2460 syntaxReaders.clear();
2461 }
2462
2463 bool CodeGenerator::syntaxRequiresTwoPassRun() {
2464 if (!currentSyntax) return false;
2465 return currentSyntax->requiresTwoPassRun();
2466 }
2467
2468 void CodeGenerator::clearPersistentSnippets(){
2469 if (currentSyntax) {
2470 currentSyntax->clearPersistentSnippets();
2471 }
2472 }
2473
2474 void CodeGenerator::updateKeywordClasses(){
2475
2476 if (openTags.size()) {
2477 if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
2478 // remove dynamic keyword tag delimiters of the old language definition
2479 vector<string>::iterator keyStyleOpenBegin =
2480 openTags.begin() + NUMBER_BUILTIN_STATES;
2481 vector<string>::iterator keyStyleCloseBegin =
2482 closeTags.begin() + NUMBER_BUILTIN_STATES;
2483 openTags.erase ( keyStyleOpenBegin, openTags.end() );
2484 closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
2485 }
2486 // add new keyword tag delimiters
2487
2488 for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
2489 openTags.push_back ( getKeywordOpenTag ( i ) );
2490 closeTags.push_back ( getKeywordCloseTag ( i ) );
2491 }
2492 }
2493 }
2494
2495
2496 }