"Fossies" - the Fresh Open Source Software Archive

Member "ncc-2.8/lex.C" (14 Sep 2006, 9530 Bytes) of package /linux/privat/old/ncc-2.8.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /******************************************************************************
    2 
    3     C/C++ lexcial analyser on preprocessed source
    4 
    5 ******************************************************************************/
    6 
    7 #include <string.h>
    8 #include <stdlib.h>
    9 #include <stdio.h>
   10 #include <ctype.h>
   11 
   12 #include "global.h"
   13 
   14 token CTok;
   15 int &line = CTok.at_line;
   16 
   17 static char *Cpp;
   18 static int Ci, Clen;
   19 
   20 /******************************************************************************
   21         Maybe this is faster than ctype.h macros
   22         The first 127 ASCII characters is a universal constant.
   23 ******************************************************************************/
   24 static char ll_ctypes [256];
   25 
   26 static void initctypes ()
   27 {
   28 #define SET(x,y) ll_ctypes [(int)x] = y;
   29     SET('A',2) SET('B',2) SET('C',2) SET('D',2) SET('E',2) SET('F',3) SET('G',2)
   30     SET('H',2) SET('I',2) SET('J',2) SET('K',2) SET('L',3) SET('M',2) SET('N',2)
   31     SET('O',2) SET('P',2) SET('Q',2) SET('R',2) SET('S',2) SET('T',2) SET('U',3)
   32     SET('V',2) SET('W',2) SET('X',2) SET('Y',2) SET('Z',2)
   33     SET('a',2) SET('b',2) SET('c',2) SET('d',2) SET('e',2) SET('f',3) SET('g',2)
   34     SET('h',2) SET('i',2) SET('j',2) SET('k',2) SET('l',3) SET('m',2) SET('n',2)
   35     SET('o',2) SET('p',2) SET('q',2) SET('r',2) SET('s',2) SET('t',2) SET('u',3)
   36     SET('v',2) SET('w',2) SET('x',2) SET('y',2) SET('z',2)
   37     SET('_',2)
   38     SET('0',1) SET('1',1) SET('2',1) SET('3',1) SET('4',1) SET('5',1) SET('6',1)
   39     SET('7',1) SET('8',1) SET('9',1)
   40 }
   41 
   42 #define ISNIEND(x) (ll_ctypes [(int)x] == 3)
   43 #define ISALPHA(x) (ll_ctypes [(int)x] >= 2)
   44 #define ISDIGIT(x) (ll_ctypes [(int)x] == 1)
   45 #define ISALNUM(x) (ll_ctypes [(int)x] != 0)
   46 /******************************************************************************
   47         Unwindable lex exceptional error conditions
   48 ******************************************************************************/
   49 
   50 class EOFC {
   51 public:
   52     EOFC(const char*);
   53 };
   54 
   55 EOFC::EOFC (const char *c)
   56 {
   57     fprintf (stderr, "Unterminated %s near token %i\n", c, Ci);
   58 }
   59 
   60 /***************************************************************************
   61         Start of Token Parser Routines
   62 ***************************************************************************/
   63 
   64 static inline void skip_ws ()
   65 {
   66     for (;;) {
   67         for (;;) {
   68             if (Cpp [Ci] == ' ' || Cpp [Ci] == '\t') {
   69                 if (++Ci >= Clen) return;
   70                 continue;
   71             }
   72             if (Cpp [Ci] == '\n') {
   73                 ++line;
   74                 if (++Ci >= Clen) return;
   75                 continue;
   76             }
   77             break;
   78         }
   79 #if 0
   80         if (Cpp [Ci] == '\\' && Cpp [Ci + 1] == '\n') {
   81             Ci += 2;
   82             ++line;
   83             continue;
   84         }
   85 #endif
   86         break;
   87     }
   88 }
   89 
   90 static inline void get_ident ()
   91 {
   92     CTok.type = IDENT_DUMMY;
   93     CTok.p = &Cpp [Ci];
   94 
   95     while (ISALNUM (Cpp [Ci]))
   96         if (++Ci >= Clen) break;
   97 
   98     CTok.len = &Cpp [Ci] - CTok.p;
   99 }
  100 
  101 static char EOFstring [] = " string literal";
  102 
  103 static void get_string ()
  104 {
  105     CTok.type = STRING;
  106     CTok.p = &Cpp [++Ci];
  107 
  108     for (;;) {
  109         while (Cpp [Ci] != '\\' && Cpp [Ci] != '"')
  110             if (++Ci >= Clen) throw EOFC (EOFstring);
  111         if (Cpp [Ci] == '\\') {
  112             Ci += 2;
  113             if (Ci >= Clen) throw EOFC (EOFstring);
  114             continue;
  115         }
  116         break;
  117     }
  118 
  119     CTok.len = &Cpp [Ci] - CTok.p;
  120     ++Ci;
  121 }
  122 
  123 static inline void get_exponent ()
  124 {
  125     ++Ci;
  126     if (Cpp [Ci] == '-' || Cpp [Ci] == '+') Ci++;
  127     while (ISDIGIT (Cpp [Ci]))
  128         if (++Ci >= Clen) break;
  129 }
  130 
  131 static inline void get_float_frac ()
  132 {
  133     // The token pointer and length are already set to
  134     // the decimal part, or this[char] && 0 if no decimal part
  135 
  136     ++Ci;
  137 
  138     while (ISDIGIT (Cpp [Ci]))
  139         if (++Ci >= Clen) break;
  140 }
  141 
  142 static char EOFchar [] = "character constant";
  143 
  144 static void get_char_const ()
  145 {
  146     ++Ci;
  147     CTok.type = CCONSTANT;
  148     CTok.p = &Cpp [Ci];
  149 
  150     for (;;) {
  151         while (Cpp [Ci] != '\\' && Cpp [Ci] != '\'')
  152             if (++Ci >= Clen) throw EOFC (EOFchar);
  153         if (Cpp [Ci] == '\\') {
  154             Ci += 2;
  155             if (Ci >= Clen) throw EOFC (EOFchar);
  156             continue;
  157         }
  158         break;
  159     }
  160 
  161     CTok.len = &Cpp [Ci] - CTok.p;
  162     if (CTok.len > 10) throw (EOFchar);
  163     ++Ci;
  164 }
  165 
  166 static inline void get_nconst ()
  167 {
  168     CTok.type = CONSTANT;
  169     CTok.p = &Cpp [Ci];
  170 
  171     while (isalnum (Cpp [Ci]))
  172         if (++Ci >= Clen) break;
  173 
  174     if (Cpp [Ci] == '.') {
  175         get_float_frac ();
  176         CTok.type = FCONSTANT;
  177     }
  178     if (Cpp [Ci] == 'e' || Cpp [Ci] == 'E' || Cpp [Ci] == 'p') {
  179         get_exponent ();
  180         CTok.type = FCONSTANT;
  181     }
  182 
  183     while (ISNIEND (Cpp [Ci]))
  184         if (++Ci >= Clen) break;
  185 
  186     CTok.len = &Cpp [Ci] - CTok.p;
  187 }
  188 
  189 /***************************************************************************
  190         Little utils
  191 ***************************************************************************/
  192 
  193 static void grle_morph ()
  194 {
  195     char gl = Cpp [Ci];
  196 
  197     CTok.p = &Cpp [Ci];
  198     ++Ci;
  199 
  200     if (Cpp [Ci] == gl) {
  201         ++Ci;
  202         if (Cpp [Ci] == '=') {
  203             ++Ci;
  204             CTok.type = (gl == '>') ? ASSIGNRS : ASSIGNLS;
  205         } else CTok.type = (gl == '>') ? RSH : LSH;
  206     } else if (Cpp [Ci] == '=' || Cpp [Ci] == '?') {
  207         ++Ci;
  208         CTok.type = (gl == '>') ? GEQCMP : LEQCMP;
  209     } else CTok.type = gl;
  210 }
  211 
  212 static void anor_morph ()
  213 {
  214     char ao = Cpp [Ci];
  215 
  216     ++Ci;
  217 
  218     if (Cpp [Ci] == ao) {
  219         ++Ci;
  220         CTok.type = (ao == '&') ? ANDAND : OROR;
  221     } else if (Cpp [Ci] == '=') {
  222         ++Ci;
  223         CTok.type = (ao == '&') ? ASSIGNBA : ASSIGNBO;
  224     } else CTok.type = ao;
  225 }
  226 
  227 /***************************************************************************
  228 ***************************************************************************/
  229 
  230 /******************************************************************************
  231         Interface entry functions
  232 ******************************************************************************/
  233 
  234 static void do_yylex ()
  235 {
  236 Again:
  237     if (Ci >= Clen) {
  238         CTok.type = THE_END;
  239         return;
  240     }
  241 
  242     skip_ws ();
  243     if (Ci >= Clen) {
  244         CTok.type = THE_END;
  245         return;
  246     }
  247 
  248     CTok.p = &Cpp [Ci];
  249     CTok.len = 0;
  250 
  251     if (ISDIGIT (Cpp [Ci]))
  252         get_nconst ();
  253     else if (ISALPHA (Cpp [Ci]) /*|| Cpp [Ci] == '_'*/)
  254         if (Cpp [Ci] == 'L' && (Cpp [Ci + 1] == '\'' || Cpp [Ci + 1] == '"')) {
  255             Ci++;
  256             goto Switch;
  257         } else
  258             get_ident ();
  259     else Switch: switch (Cpp [Ci]) {
  260         case '(':
  261         case ')':
  262         case ';':
  263         case ',':
  264             CTok.type = Cpp [Ci];
  265             CTok.p = &Cpp [Ci];
  266             ++Ci;
  267             break;
  268         case '*':
  269             CTok.type = Cpp [Ci];
  270             ++Ci;
  271             if (Cpp [Ci] == '=') {
  272                 CTok.type = ASSIGNM;
  273                 ++Ci;
  274                 break;
  275             }
  276             break;
  277         case '"':
  278             get_string ();
  279             return;
  280         case '\'':
  281             get_char_const ();
  282             return;
  283         case '/':
  284             ++Ci;
  285             if (Cpp [Ci] == '=') {
  286                 CTok.type = ASSIGND;
  287                 ++Ci;
  288                 break;
  289             }
  290             CTok.type = '/';
  291             break;
  292         case '.':
  293             if (ISDIGIT (Cpp [Ci + 1])) {
  294                 get_nconst ();
  295                 break;
  296             }
  297             ++Ci;
  298             if (Cpp [Ci] == '.' && Cpp [Ci + 1] == '.') {
  299                 CTok.type = ELLIPSIS;
  300                 Ci += 2;
  301             } else CTok.type = '.';
  302             break;
  303         case '-':
  304             ++Ci;
  305             if (Cpp [Ci] == '>') {
  306                 ++Ci;
  307                 CTok.type = POINTSAT;
  308                 break;
  309             }
  310             if (Cpp [Ci] == '-') {
  311                 CTok.type = MINUSMINUS;
  312                 ++Ci;
  313                 break;
  314             }
  315             if (Cpp [Ci] == '=') {
  316                 CTok.type = ASSIGNS;
  317                 ++Ci;
  318                 break;
  319             }
  320             CTok.type = '-';
  321             break;
  322         case '+':
  323             ++Ci;
  324             if (Cpp [Ci] == '+') {
  325                 CTok.type = PLUSPLUS;
  326                 ++Ci;
  327                 break;
  328             }
  329             if (Cpp [Ci] == '=') {
  330                 CTok.type = ASSIGNA;
  331                 ++Ci;
  332                 break;
  333             }
  334             CTok.type = '+';
  335             break;
  336         case '!':
  337         case '%':
  338         case '^':
  339             CTok.type = Cpp [Ci];
  340             ++Ci;
  341             if (Cpp [Ci] == '=') {
  342                 CTok.type = (CTok.type == '!') ? NEQCMP :
  343                     (CTok.type == '%') ? ASSIGNR : ASSIGNBX;
  344                 ++Ci;
  345                 break;
  346             }
  347             break;
  348         case '&':
  349         case '|':
  350             anor_morph ();
  351             break;
  352         case ':':
  353             ++Ci;
  354             CTok.type = ':';
  355             break;
  356         case '=':
  357             ++Ci;
  358             if (Cpp [Ci] == '=') {
  359                 CTok.type = EQCMP;
  360                 ++Ci;
  361                 break;
  362             }
  363             CTok.type = '=';
  364             break;
  365         case '>':
  366         case '<':
  367             grle_morph ();
  368             break;
  369         case '#':
  370             CTok.type = '#';
  371             if (Ci == 0 || Cpp [Ci - 1] == '\n'
  372             || Cpp [Ci - 1] == '\r')
  373                 CTok.type = CPP_DIRECTIVE;
  374             ++Ci;
  375             if (Ci < Clen && Cpp [Ci] == '#') {
  376                 CTok.type = CPP_CONCAT;
  377                 ++Ci;
  378             }
  379             break;
  380         case '[':
  381         case ']':
  382         case '~':
  383             CTok.type = Cpp [Ci];
  384             CTok.p = &Cpp [Ci];
  385             ++Ci;
  386             break;
  387         case '\r':
  388         case '\f':
  389             ++Ci;
  390             goto Again;
  391         default:
  392             // $
  393             CTok.type = Cpp [Ci];
  394             CTok.p = &Cpp [Ci];
  395             ++Ci;
  396     }
  397 
  398     CTok.len = &Cpp [Ci] - CTok.p;
  399 }
  400 
  401 static void enter_abspath_file (char *file)
  402 {
  403     char tmp [1024];
  404     if (!abs_paths || file [0] == '/') enter_file_indicator (file);
  405     else enter_file_indicator (strcat (strcpy (tmp, cwd), file));
  406 
  407 }
  408 
  409 static void skip_pp_line ()
  410 {
  411     // For preprocessed source, the only directive is:
  412     // # <line> "file"
  413     // send the file to enter_file_indicator ()
  414     // ... but it can also be pragma (thing)
  415     char tmp [512];
  416     tmp [0] = 0;
  417 
  418     if (Cpp [++Ci] == 'p') { // #pragma
  419         while (Ci < Clen && Cpp [Ci] != '\n')
  420             ++Ci;
  421         ++Ci;
  422         return;
  423     }
  424 
  425     // Assume, without verification, that the next token is
  426     // a line number.
  427 
  428     line = strtol (&Cpp [Ci], NULL, 10 );
  429 
  430     for(;;) {
  431         if (Ci >= Clen) {
  432                 CTok.type = THE_END;
  433                 return;
  434         }
  435 
  436         switch (Cpp [Ci]) {
  437         case '\n':
  438             if (tmp [0])
  439                 enter_abspath_file (tmp);
  440             ++Ci;       /* Scott */
  441             return;
  442         case '"':
  443             get_string ();
  444             strncpy (tmp, CTok.p, CTok.len);
  445             tmp [CTok.len] = 0;
  446             break;
  447         default:
  448                 ++Ci;
  449         }
  450     }
  451 }
  452 
  453 /******************************************************************************
  454         Main
  455 ******************************************************************************/
  456 
  457 extern bool quiet;
  458 
  459 void yynorm (char *c, int l)
  460 {
  461     initctypes ();
  462     Cpp = c;
  463     Clen = l;
  464     line = 1;
  465     Ci = 0;
  466 
  467     try {
  468         for (;;) {
  469             do_yylex ();
  470 
  471             if (CTok.type == THE_END) break;
  472             if (CTok.type == CPP_DIRECTIVE)
  473                 skip_pp_line (); else
  474 
  475             enter_token ();
  476         }
  477     } catch (EOFC) { }
  478     if (!quiet)
  479         fprintf (stderr, "%i lines\n", line);
  480 }