"Fossies" - the Fresh Open Source Software Archive

Member "highlight-3.57-x64/src/include/syntaxreader.h" (12 May 2020, 14883 Bytes) of package /windows/www/highlight-3.57-x64.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "syntaxreader.h": 3.56_vs_3.57.

    1 /***************************************************************************
    2                           syntaxreader.h  -  description
    3                              -------------------
    4     begin                : Wed Nov 28 2001
    5     copyright            : (C) 2001-2020 by Andre Simon
    6     email                : a.simon@mailbox.org
    7  ***************************************************************************/
    8 
    9 
   10 /*
   11 This file is part of Highlight.
   12 
   13 Highlight is free software: you can redistribute it and/or modify
   14 it under the terms of the GNU General Public License as published by
   15 the Free Software Foundation, either version 3 of the License, or
   16 (at your option) any later version.
   17 
   18 Highlight is distributed in the hope that it will be useful,
   19 but WITHOUT ANY WARRANTY; without even the implied warranty of
   20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   21 GNU General Public License for more details.
   22 
   23 You should have received a copy of the GNU General Public License
   24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
   25 */
   26 
   27 
   28 #ifndef SYNTAXREADER_H
   29 #define SYNTAXREADER_H
   30 
   31 #include <vector>
   32 #include <string>
   33 #include <map>
   34 #include <iostream>
   35 #include <fstream>
   36 #include <iterator>
   37 #include <sstream>
   38 
   39 #include <boost/xpressive/xpressive_dynamic.hpp>
   40 
   41 #include <Diluculum/LuaState.hpp>
   42 #include <Diluculum/LuaVariable.hpp>
   43 #include <Diluculum/LuaFunction.hpp>
   44 
   45 #include "platform_fs.h"
   46 #include "enums.h"
   47 
   48 #define GLOBAL_SR_INSTANCE_NAME "HL_SRInstance"
   49 
   50 using namespace std;
   51 
   52 
   53 namespace highlight
   54 {
   55 class RegexElement;
   56 
   57 /** maps keywords and the corresponding class IDs*/
   58 typedef map <string, int> KeywordMap;
   59 
   60 /** maps embedded language names to exit delimiter regexes*/
   61 typedef map <string, string> DelimiterMap;
   62 
   63 typedef map <string, bool> AllowInnerSectionsMap;
   64 
   65 
   66 /**\brief Contains specific data of the programming language being processed.
   67 
   68 * @author Andre  Simon
   69 */
   70 class SyntaxReader
   71 {
   72 
   73 public:
   74 
   75     SyntaxReader();
   76 
   77     ~SyntaxReader();
   78     
   79     
   80     /** Load new language definition
   81         Will only read a new language definition if the given
   82           file path is not equal to the path of the current language definition.
   83         \param langDefPath Path of language definition
   84         \param pluginReadFilePath path to file which is read by plugin
   85         \param outputType output format
   86         \return LoadResult  */
   87     LoadResult load( const string& langDefPath, const string& pluginReadFilePath,  OutputType outputType );
   88 
   89     /** \return True if the next load() call would load a new language definition
   90         \param  langDefPath Path to language definition  */
   91     bool needsReload ( const string &langDefPath ) const
   92     {
   93         return currentPath!=langDefPath;
   94     }
   95     
   96     /** \return Failed regular expression */
   97     string getFailedRegex() const
   98     {
   99         return regexErrorMsg;
  100     }
  101 
  102     /** \return Failed Lua exception description */
  103     string getLuaErrorText() const
  104     {
  105         return luaErrorMsg;
  106     }
  107 
  108     /** \return Prefix of raw strings */
  109     unsigned char getRawStringPrefix() const
  110     {
  111         return rawStringPrefix;
  112     }
  113 
  114     /** \return Continuation Character */
  115     unsigned char getContinuationChar() const
  116     {
  117         return continuationChar;
  118     }
  119 
  120     /** \return true if syntax highlighting is enabled*/
  121     bool highlightingEnabled() const
  122     {
  123         return !disableHighlighting;
  124     }
  125 
  126     /** \return True if language is case sensitive */
  127     bool isIgnoreCase() const
  128     {
  129         return ignoreCase;
  130     }
  131 
  132     /** \param s String
  133          \return true if s is not a known keyword */
  134     bool isKeyword ( const string &s ) ;
  135     
  136     /** \param s String
  137          \return keyword list group id */
  138     int getKeywordListGroup ( const string &s );
  139     
  140     /** \return True if multi line comments may be nested */
  141     bool allowNestedMLComments() const
  142     {
  143         return allowNestedComments;
  144     }
  145 
  146     /** \return True if highlighting is disabled
  147                             TODO remove method */
  148     bool highlightingDisabled() const
  149     {
  150         return disableHighlighting;
  151     }
  152 
  153     /** \return True if current language may be reformatted (c, c++, c#, java) */
  154     bool enableReformatting() const
  155     {
  156         return reformatCode;
  157     }
  158     
  159     bool assertDelimEqualLength() const
  160     {
  161       return assertEqualLength;
  162     }
  163 
  164     /** \return keywords*/
  165     const KeywordMap& getKeywords() const
  166     {
  167         return keywords;
  168     }
  169 
  170     /** \return keyword classes*/
  171     const vector<string>& getKeywordClasses() const
  172     {
  173         return keywordClasses;
  174     }
  175 
  176     /** \return regular expressions */
  177     const vector<RegexElement*>& getRegexElements() const
  178     {
  179         return regex;
  180     }
  181 
  182     /** \return list of Lua code snippets to be stored on disk */
  183     const vector<string>& getPersistentSnippets() const
  184     {
  185         return persistentSnippets;
  186     }
  187 
  188     /** \return number of Lua code snippets to be stored on disk */
  189     int getPersistentSnippetsNum() const
  190     {
  191         return persistentSnippets.size();
  192     }
  193     
  194     /** \return list of format override flags defined in syntax definitions */
  195     vector<int>& getOverrideStyleAttributes()
  196     {
  197         return overrideStyles;
  198     }
  199     
  200     /** \return description of the programming language */
  201     const string & getDescription () const
  202     {
  203         return langDesc;
  204     }
  205     
  206     const string & getCategoryDescription() const
  207     {
  208         return categories;
  209     }
  210 
  211 
  212     /** \return header string defined by a plug-in */
  213     const string & getHeaderInjection () const
  214     {
  215         return headerInjection;
  216     }
  217 
  218     /** \return footer string defined by a plug-in */
  219     const string & getFooterInjection () const
  220     {
  221         return footerInjection;
  222     }
  223 
  224     /**  \param delimID delimiter id
  225          \return true,  if no closing delimiter exists (open and close delimiters are equal)
  226      */
  227     bool delimiterIsDistinct ( int delimID )
  228     {
  229         return delimiterDistinct[delimID];
  230     }
  231 
  232     /**  \param delimID delimiter id
  233          \return true,  if delimiter indicates a raw string
  234      */
  235     bool delimiterIsRawString ( int delimID )
  236     {
  237         return rawStringOpenDelims[delimID];
  238     }
  239 
  240     /**  Pairs of open/close delimiters have a unique ID to test if two tokens act as delimiters
  241          \param token delimiter token
  242                      \param s State of delimiter
  243          \return delimiter ID
  244      */
  245     int getOpenDelimiterID ( const string& token, State s);
  246 
  247     /**  Pairs of open/close delimiters have a unique ID to test if two tokens act as delimiters
  248          \param token delimiter token
  249                      \param s State of delimiter
  250                      \param openDelimId opening delimiter retrieved with getOpenDelimiterID
  251          \return true if delimiter id of token matches openDelimID
  252      */
  253     bool matchesOpenDelimiter ( const string& token, State s, int openDelimId);
  254 
  255     /** initializes end delimiter regex to switch back to host language
  256         \param langPath path of embedded language definition
  257     */
  258     void restoreLangEndDelim(const string&langPath);
  259     
  260     bool allowsInnerSection(const string& langPath);
  261 
  262     bool requiresTwoPassRun();
  263 
  264     bool requiresParamUpdate();
  265 
  266     
  267     string getPersistentHookConditions();
  268 
  269     void clearPersistentSnippets();
  270     
  271     /**
  272         \param lang language definition name  (no path, no ".lang" extension)
  273         \return absolute path based on the previously loaded definition
  274     */
  275     string getNewPath(const string& lang);
  276 
  277     /**
  278         \return absolute path of currently loaded definition
  279     */
  280     string getCurrentPath() const
  281     {
  282         return currentPath;
  283     }
  284 
  285     /**
  286         \return encoding which is normally used for input files of this syntax
  287     */
  288     string getEncodingHint() const
  289     {
  290         return encodingHint;
  291     }
  292 
  293      /**
  294         \return test function
  295     */
  296     string getOverrideConfigVal(const string& name) const
  297     {
  298         return pluginConfigOverride.count(name) ? pluginConfigOverride[name] : "";
  299     }
  300     
  301     /**
  302         \return pointer to state validation function
  303     */
  304     Diluculum::LuaFunction* getValidateStateChangeFct() const
  305     {
  306         return validateStateChangeFct;
  307     }
  308     /**
  309         \return pointer to state decorate function
  310     */
  311     Diluculum::LuaFunction* getDecorateFct() const
  312     {
  313         return decorateFct;
  314     }
  315 
  316     /**
  317         \return pointer to line begin decorate function
  318     */
  319     Diluculum::LuaFunction* getDecorateLineBeginFct() const
  320     {
  321         return decorateLineBeginFct;
  322     }
  323     
  324     /**
  325         \return pointer to line end decorate function
  326     */
  327     Diluculum::LuaFunction* getDecorateLineEndFct() const
  328     {
  329         return decorateLineEndFct;
  330     }
  331     
  332     /**
  333         \return pointer to Lua state
  334     */
  335     Diluculum::LuaState* getLuaState() const
  336     {
  337         return luaState;
  338     }
  339 
  340     /**
  341         \param chunk Lua function to be added to the function list
  342     */
  343     void addUserChunk(const Diluculum::LuaFunction& chunk)
  344     {
  345         pluginChunks.push_back(new Diluculum::LuaFunction(chunk));
  346     }
  347 
  348     /**
  349         \param fn name of the processed input file
  350     */
  351     void setInputFileName(const string& fn) { currentInputFile=fn; }
  352     
  353     /**
  354         \return name of the processed input file
  355     */
  356     
  357     string getInputFileName() const { return currentInputFile; }
  358     
  359     /**
  360         \param groupID keyword group to be stored on disk
  361         \param kw keyword token to be stored on disk
  362     */
  363     
  364     void addPersistentKeyword(unsigned int groupID, const string& kw);
  365     
  366     /**
  367         \param groupID keyword group to be stored on disk
  368         \param column start of range within line
  369         \param length length of range
  370         \param lineNumber line number
  371         \param fileName file name of processed file containing the line
  372         */
  373     void addPersistentStateRange(unsigned int groupID, unsigned int column,unsigned int length, unsigned int lineNumber, const string& fileName);
  374     
  375     /**
  376         \param ls Lua state to be initialized with constants
  377         \param langDefPath absolute path of language definition
  378         \param pluginReadFilePath absolute path of plugin input file
  379     */
  380     static void initLuaState(Diluculum::LuaState& ls, const string& langDefPath, const string& pluginReadFilePath, OutputType outputType=HTML );
  381        
  382 private:
  383 
  384     static const string REGEX_IDENTIFIER;
  385     static const string REGEX_NUMBER;
  386     static const string REGEX_ESCSEQ;
  387 
  388     // path to loaded language definition
  389     string currentPath;
  390 
  391     // name of file being processed
  392     string currentInputFile;
  393     
  394     // Language description
  395     string langDesc, categories, encodingHint;
  396 
  397     string headerInjection, footerInjection;
  398 
  399     string regexErrorMsg, luaErrorMsg;
  400 
  401     KeywordMap keywords;
  402 
  403     vector <string> keywordClasses;
  404     static vector <string> persistentSnippets;
  405     static set <string> persistentSyntaxDescriptions;
  406 
  407     vector <RegexElement*> regex;
  408     
  409     vector <int>overrideStyles;
  410 
  411     // collect delimiters or get current delimiter in CodeGenerator::loadEmbeddedLang
  412     static DelimiterMap nestedStateEndDelimiters;
  413     
  414     static DelimiterMap pluginConfigOverride;
  415 
  416     static AllowInnerSectionsMap allowInnerSections;
  417     
  418     // saves if delimiter pair consists of the same delimiter symbol
  419     map <int, bool> delimiterDistinct;
  420 
  421     map <int, bool> rawStringOpenDelims;
  422 
  423     map <int, int> matchingDelimiters;
  424 
  425     // keywords are not case sensitive if set
  426     bool ignoreCase,
  427 
  428          // highlighting is disabled
  429          disableHighlighting,
  430 
  431          // allow nested multi line comment blocks
  432          allowNestedComments,
  433 
  434          // code formatting is enabled if set
  435          reformatCode,
  436          
  437          // string open and close delimiters must have the same length
  438          assertEqualLength,
  439          
  440          paramsNeedUpdate;
  441 
  442     // character which is prefix of raw string (c#)
  443     unsigned char rawStringPrefix;
  444 
  445     //character which continues curreent style on next line
  446     unsigned char continuationChar;
  447 
  448     bool readFlag(const Diluculum::LuaVariable& var) ;
  449 
  450     // interface for plug-ins: add keywords dynamically
  451     static int luaAddKeyword (lua_State *L);
  452 
  453     // interface for plug-ins: remove keywords dynamically
  454     static int luaRemoveKeyword (lua_State *L);
  455     
  456     static int luaAddPersistentState (lua_State *L);
  457     
  458     static int luaOverrideParam (lua_State *L);
  459     
  460     // generate a keyword class 
  461     unsigned int generateNewKWClass ( int classID );
  462     
  463     void addKeyword(unsigned int groupID, const string& kw);
  464 
  465     void removeKeyword(const string& kw);
  466     
  467     void overrideParam(const string& name, const string& val);
  468 
  469 
  470     // Functions accessible in Lua State
  471     Diluculum::LuaFunction* validateStateChangeFct;
  472     Diluculum::LuaFunction* decorateFct, *decorateLineBeginFct, *decorateLineEndFct;
  473 
  474     Diluculum::LuaState* luaState; // make member to allow interaction with codeparser instance
  475 
  476     static vector<Diluculum::LuaFunction*> pluginChunks;
  477 };
  478 
  479 
  480 /**\brief Association of a regex with a state description
  481 
  482   A RegexElement associates a regular expression with the state information
  483   (opening and closing state, pattern, keyword class, keyword group id, language name)
  484 */
  485 class RegexElement
  486 {
  487 public:
  488     RegexElement()
  489         :open ( STANDARD ), end ( STANDARD ), kwClass ( 0 ), capturingGroup ( -1 ), 
  490         langName(), instanceId(instanceCnt++),
  491         priority(0), constraintLineNum (0) 
  492     {
  493     }
  494 
  495     RegexElement ( State oState, State eState, const string&rePattern, unsigned int cID=0, int group=-1, const string& name="", 
  496                    unsigned int prio=0, unsigned int cLineNum=0,  const string &cFilename="" /*, const string &sDesc=""*/) :
  497         open ( oState ), end ( eState ), kwClass ( cID ), capturingGroup ( group ), langName(name),instanceId(instanceCnt++),
  498         priority(prio), constraintLineNum (cLineNum), constraintFilename (cFilename) //, semantics(sDesc)
  499     {
  500         pattern=rePattern;
  501         rex=boost::xpressive::sregex::compile(rePattern);
  502     }
  503 
  504     ~RegexElement()
  505     {
  506         instanceCnt--;
  507     }
  508 
  509     State open, ///< opening state
  510     end;  ///< closing state
  511     boost::xpressive::sregex rex;
  512     unsigned int kwClass;        ///< keyword class
  513     int capturingGroup;          ///< capturing group ID
  514     string langName;             ///< language name
  515     string pattern;              ///< RE pattern
  516     static int instanceCnt;
  517     int instanceId;
  518     unsigned int priority;          ///< if set and matched, no other other regular expression will be evaluated 
  519     unsigned int constraintLineNum; ///< restrict this regex to this source line number
  520     //int constraintColumn;         ///< restrict this regex to this source column
  521     string constraintFilename;      ///< restrict this regex to this source filename
  522     //string semantics;
  523 };
  524 
  525 }
  526 #endif