"Fossies" - the Fresh Open Source Software Archive

Member "seed7/src/scanner.c" (9 Oct 2018, 16081 Bytes) of package /linux/misc/seed7_05_20210223.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "scanner.c" see the Fossies "Dox" file reference documentation.

    1 /********************************************************************/
    2 /*                                                                  */
    3 /*  s7   Seed7 interpreter                                          */
    4 /*  Copyright (C) 1990 - 2014  Thomas Mertes                        */
    5 /*                                                                  */
    6 /*  This program is free software; you can redistribute it and/or   */
    7 /*  modify it under the terms of the GNU General Public License as  */
    8 /*  published by the Free Software Foundation; either version 2 of  */
    9 /*  the License, or (at your option) any later version.             */
   10 /*                                                                  */
   11 /*  This program is distributed in the hope that it will be useful, */
   12 /*  but WITHOUT ANY WARRANTY; without even the implied warranty of  */
   13 /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   */
   14 /*  GNU General Public License for more details.                    */
   15 /*                                                                  */
   16 /*  You should have received a copy of the GNU General Public       */
   17 /*  License along with this program; if not, write to the           */
   18 /*  Free Software Foundation, Inc., 51 Franklin Street,             */
   19 /*  Fifth Floor, Boston, MA  02110-1301, USA.                       */
   20 /*                                                                  */
   21 /*  Module: Analyzer - Scanner                                      */
   22 /*  File: seed7/src/scanner.c                                       */
   23 /*  Changes: 1990, 1991, 1992, 1993, 1994, 2014  Thomas Mertes      */
   24 /*  Content: Read the next symbol from the source file.             */
   25 /*                                                                  */
   26 /*  The scan_symbol is tuned for maximum performance.               */
   27 /*                                                                  */
   28 /********************************************************************/
   29 
   30 #define LOG_FUNCTIONS 0
   31 #define VERBOSE_EXCEPTIONS 0
   32 
   33 #include "version.h"
   34 
   35 #include "stdlib.h"
   36 #include "stdio.h"
   37 #include "string.h"
   38 #include "limits.h"
   39 
   40 #include "common.h"
   41 #include "data.h"
   42 #include "heaputl.h"
   43 #include "chclsutl.h"
   44 #include "identutl.h"
   45 #include "striutl.h"
   46 #include "traceutl.h"
   47 #include "literal.h"
   48 #include "numlit.h"
   49 #include "info.h"
   50 #include "infile.h"
   51 #include "symbol.h"
   52 #include "error.h"
   53 #include "stat.h"
   54 #include "findid.h"
   55 #include "chr_rtl.h"
   56 
   57 #undef EXTERN
   58 #define EXTERN
   59 #include "scanner.h"
   60 
   61 
   62 
   63 static void scan_comment (void)
   64 
   65   {
   66     register int character;
   67     lineNumType start_line;
   68 
   69   /* scan_comment */
   70     logFunction(printf("scan_comment\n"););
   71     start_line = in_file.line;
   72     character = next_character();
   73     do {
   74       do {
   75         while (std_comment_char(character)) {
   76           character = next_character();
   77         } /* while */
   78         if (character == '(') {
   79           character = next_character();
   80           if (character == '*') {
   81             scan_comment();
   82             character = in_file.character;
   83           } /* if */
   84         } else if (character == '\n') {
   85 /*        character = next_line(); */
   86           SKIP_CR_SP(character);
   87           INCR_LINE_COUNT(in_file.line);
   88           symbol.syNumberInLine = 0;
   89         } /* if */
   90       } while (character != '*' && character != EOF);
   91       if (character != EOF) {
   92         character = next_character();
   93       } /* if */
   94     } while (character != ')' && character != EOF);
   95     if (character == EOF) {
   96       in_file.character = EOF;
   97       err_at_line(COMMENTOPEN, start_line);
   98     } else {
   99       in_file.character = next_character();
  100     } /* if */
  101 #if WITH_STATISTIC
  102     comment_count++;
  103 #endif
  104     logFunction(printf("scan_comment -->\n"););
  105   } /* scan_comment */
  106 
  107 
  108 
  109 static inline void scan_line_comment (void)
  110 
  111   {
  112     register int character;
  113 
  114   /* scan_line_comment */
  115     logFunction(printf("scan_line_comment\n"););
  116     SKIP_TO_NL(character);
  117     in_file.character = character;
  118 #if WITH_STATISTIC
  119     comment_count++;
  120 #endif
  121     logFunction(printf("scan_line_comment -->\n"););
  122   } /* scan_line_comment */
  123 
  124 
  125 
  126 /**
  127  *  Accept an UTF-8 byte order mark at the beginning of a file.
  128  *  UTF-16 byte order marks cause an error and the whole file is
  129  *  skipped.
  130  */
  131 void scan_byte_order_mark (void)
  132 
  133   {
  134     charType unicode_char;
  135 
  136   /* scan_byte_order_mark */
  137     logFunction(printf("scan_byte_order_mark\n"););
  138     if (in_file.character >= 0xC0 && in_file.character <= 0xFD) {
  139       /* character range 192 to 253 (leading bits 11......) */
  140       unicode_char = utf8_char(in_file.character);
  141       if (unicode_char != 0xFEFF /* Byte-order mark */) {
  142         err_char(CHAR_ILLEGAL, unicode_char);
  143         while (char_class(in_file.character) == ILLEGALCHAR) {
  144           in_file.character = next_character();
  145         } /* while */
  146       } /* if */
  147     } else if (in_file.character == 0xFE || in_file.character == 0xFF) {
  148       unicode_char = (charType) in_file.character << CHAR_BIT;
  149       in_file.character = next_character();
  150       unicode_char |= (charType) in_file.character;
  151       if (unicode_char == 0xFEFF || unicode_char == 0xFFFE) {
  152         err_char(UTF16_BYTE_ORDER_MARK_FOUND, unicode_char);
  153         while (char_class(in_file.character) != EOFCHAR) {
  154           in_file.character = next_character();
  155         } /* while */
  156       } else {
  157         err_char(CHAR_ILLEGAL, unicode_char >> CHAR_BIT);
  158         while (char_class(in_file.character) == ILLEGALCHAR) {
  159           in_file.character = next_character();
  160         } /* while */
  161       } /* if */
  162     } /* if */
  163     logFunction(printf("scan_byte_order_mark -->\n"););
  164   } /* scan_byte_order_mark */
  165 
  166 
  167 
  168 static void scan_illegal (void)
  169 
  170   {
  171     charType unicode_char;
  172 
  173   /* scan_illegal */
  174     logFunction(printf("scan_illegal\n"););
  175     if (in_file.character >= 0xC0 && in_file.character <= 0xFF) {
  176       /* character range 192 to 255 (leading bits 11......) */
  177       unicode_char = utf8_char(in_file.character);
  178       err_char(CHAR_ILLEGAL, unicode_char);
  179       while (char_class(in_file.character) == ILLEGALCHAR) {
  180         in_file.character = next_character();
  181       } /* while */
  182     } else {
  183       err_cchar(CHAR_ILLEGAL, in_file.character);
  184       do {
  185         in_file.character = next_character();
  186       } while (char_class(in_file.character) == ILLEGALCHAR &&
  187                (!symbol.unicodeNames || in_file.character < 0xC0));
  188     } /* if */
  189     logFunction(printf("scan_illegal -->\n"););
  190   } /* scan_illegal */
  191 
  192 
  193 
  194 static inline void scan_eof (void)
  195 
  196   { /* scan_eof */
  197     logFunction(printf("scan_eof\n"););
  198     if (symbol.sycategory == STOPSYMBOL) {
  199       err_warning(EOF_ENCOUNTERED);
  200     } else {
  201       next_file();
  202       if (in_file.end_of_file) {
  203         strcpy((cstriType) symbol.name, "END OF FILE");
  204         in_file.character = EOF;
  205         find_eof_ident();
  206         symbol.sycategory = STOPSYMBOL;
  207         symbol.syNumberInLine++;
  208       } else {
  209         scan_symbol();
  210       } /* if */
  211     } /* if */
  212     logFunction(printf("scan_eof -->\n"););
  213   } /* scan_eof */
  214 
  215 
  216 
  217 static void scan_symbol_utf8 (int character, register sySizeType position)
  218 
  219   {
  220     charType unicode_char;
  221     boolType reading_symbol = TRUE;
  222 
  223   /* scan_symbol_utf8 */
  224     logFunction(printf("scan_symbol_utf8\n"););
  225     unicode_char = utf8_char(character);
  226     if (chrIsLetter(unicode_char)) {
  227       check_symb_length_delta(position, symbol.utf8_length);
  228       memcpy(&symbol.name[position], symbol.utf8_repr, symbol.utf8_length);
  229       position += symbol.utf8_length;
  230       character = in_file.character;
  231       do {
  232         if (character >= 0xC0 && character <= 0xFF) {
  233           unicode_char = utf8_char(character);
  234           if (chrIsLetter(unicode_char)) {
  235             check_symb_length_delta(position, symbol.utf8_length);
  236             memcpy(&symbol.name[position], symbol.utf8_repr, symbol.utf8_length);
  237             position += symbol.utf8_length;
  238             character = in_file.character;
  239           } else {
  240             reading_symbol = FALSE;
  241           } /* if */
  242         } else {
  243           if (name_character(character)) {
  244             check_symb_length(position);
  245             symbol.name[position++] = (ucharType) character;
  246             character = next_character();
  247           } else {
  248             reading_symbol = FALSE;
  249             in_file.character = character;
  250             unicode_char = 0;
  251           } /* if */
  252         } /* if */
  253       } while (reading_symbol);
  254       symbol.name[position] = '\0';
  255       find_normal_ident(position);
  256       symbol.sycategory = NAMESYMBOL;
  257       symbol.syNumberInLine++;
  258     } else if (position != 0) {
  259       symbol.name[position] = '\0';
  260       find_normal_ident(position);
  261       in_file.character = character;
  262       symbol.sycategory = NAMESYMBOL;
  263       symbol.syNumberInLine++;
  264     } /* if */
  265     if (unicode_char != 0) {
  266       /* ILLEGALCHAR */
  267       err_char(CHAR_ILLEGAL, unicode_char);
  268       while (char_class(in_file.character) == ILLEGALCHAR &&
  269              character < 0xC0) {
  270         in_file.character = next_character();
  271       } /* while */
  272     } /* if */
  273     logFunction(printf("scan_symbol_utf8 -->\n"););
  274   } /* scan_symbol_utf8 */
  275 
  276 
  277 
  278 void scan_symbol (void)
  279 
  280   {                                                             /*  1.89%  1.94% */
  281     register int character;
  282     register sySizeType position;
  283 
  284   /* scan_symbol */
  285     logFunction(printf("scan_symbol\n"););
  286     character = in_file.character;                              /*  0.51%  0.22% */
  287     if (character == ' ' || character == '\t') {                /*  0.88%  0.73% */
  288       SKIP_SPACE(character);                                    /*  1.73%  1.93% */
  289     } else if (character == '\n') {                             /*  0.88%  0.44% */
  290 /*    character = next_line(); */
  291       SKIP_CR_SP(character);                                    /*  6.43%  8.02% */
  292       INCR_LINE_COUNT(in_file.line);                            /*  0.26%  0.28% */
  293       symbol.syNumberInLine = 0;
  294     } /* if */
  295     switch (character) {                                        /*  2.87%  2.55% */
  296       case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
  297       case 'F':  case 'G':  case 'H':  case 'I':  case 'J':
  298       case 'K':  case 'L':  case 'M':  case 'N':  case 'O':
  299       case 'P':  case 'Q':  case 'R':  case 'S':  case 'T':
  300       case 'U':  case 'V':  case 'W':  case 'X':  case 'Y':
  301       case 'Z':
  302       case 'a':  case 'b':  case 'c':  case 'd':  case 'e':
  303       case 'f':  case 'g':  case 'h':  case 'i':  case 'j':
  304       case 'k':  case 'l':  case 'm':  case 'n':  case 'o':
  305       case 'p':  case 'q':  case 'r':  case 's':  case 't':
  306       case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
  307       case 'z':
  308       case '_':
  309         /* LETTERCHAR, UNDERLINECHAR */
  310         symbol.name[0] = (ucharType) character;                 /*  0.24%  0.25% */
  311         position = 1;                                           /*  0.12%  0.12% */
  312 
  313         do {
  314           while (position != symbol.name_length &&
  315               name_character(character = next_character())) {   /*  2.87%  3.45% */
  316             symbol.name[position++] = (ucharType) character;    /*  2.31%  2.36% */
  317           } /* while */                                         /*  9.71% 11.78% */
  318           check_symb_length(position);
  319         } while (name_character(character));
  320 
  321         if (symbol.unicodeNames &&
  322             character >= 0xC0 && character <= 0xFF) {
  323           /* character range 192 to 255 (leading bits 11......) */
  324           scan_symbol_utf8(character, position);
  325         } else {
  326           symbol.name[position] = '\0';                         /*  0.36%  0.37% */
  327           find_normal_ident(position);                          /*  0.24%  0.25% */
  328           in_file.character = character;                        /*  0.12%  0.12% */
  329           symbol.sycategory = NAMESYMBOL;                       /*  0.24%  0.25% */
  330           symbol.syNumberInLine++;
  331         } /* if */
  332         break;                                                  /*  0.12%  0.12% */
  333       case ' ':  case '\t':  case '\r':
  334         /* SPACECHAR */
  335         in_file.character = next_character();                   /*  0.87%        */
  336         scan_symbol();
  337         break;
  338       case '\n':
  339         /* NEWLINECHAR */
  340         in_file.character = character;
  341         scan_symbol();
  342         break;
  343       case '!':  case '$':  case '%':  case '&':  case '*':
  344       case '+':  case ',':  case '-':  case '.':  case '/':
  345       case ':':  case ';':  case '<':  case '=':  case '>':
  346       case '?':  case '@':  case '\\': case '^':  case '`':
  347       case '|':  case '~':
  348         /* SPECIALCHAR */
  349         symbol.name[0] = (ucharType) character;                 /*  0.16%  0.16% */
  350         if (op_character(character = next_character())) {       /*  1.26%  1.46% */
  351           position = 1;
  352           do {
  353             check_symb_length(position);
  354             symbol.name[position++] = (ucharType) character;
  355           } while (op_character(character = next_character())); /*  0.07%  0.03% */
  356           symbol.name[position] = '\0';
  357           find_normal_ident(position);
  358         } else {
  359           symbol.name[1] = '\0';                                /*  0.10%  0.10% */
  360           find_1_ch_ident(symbol.name[0]);                      /*  0.49%  0.51% */
  361         } /* if */
  362         in_file.character = character;                          /*  0.05%  0.05% */
  363         symbol.sycategory = SPECIALSYMBOL;                      /*  0.10%  0.10% */
  364         symbol.syNumberInLine++;
  365         break;                                                  /*  0.05%  0.10% */
  366       case '(':
  367         /* LEFTPARENCHAR */
  368         if ((in_file.character = next_character()) == '*') {    /*  0.32%  0.32% */
  369           scan_comment();
  370           scan_symbol();
  371         } else {
  372           symbol.name[0] = '(';                                 /*  0.04%  0.04% */
  373 /*        symbol.name[1] = '\0';                                    0.03%  0.03% */
  374           find_1_ch_ident('(');                                 /*  0.04%  0.09% */
  375           symbol.sycategory = PARENSYMBOL;                      /*  0.03%  0.03% */
  376           symbol.syNumberInLine++;
  377         } /* if */
  378         break;                                                  /*         0.01% */
  379       case ')':
  380       case '[':  case ']':
  381       case '{':  case '}':
  382         /* PARENCHAR */
  383         symbol.name[0] = (ucharType) character;                 /*  0.05%  0.05% */
  384 /*      symbol.name[1] = '\0';                                      0.05%  0.05% */
  385         find_1_ch_ident(character);                             /*  0.08%  0.11% */
  386         in_file.character = next_character();                   /*  0.32%  0.34% */
  387         symbol.sycategory = PARENSYMBOL;                        /*  0.03%  0.03% */
  388         symbol.syNumberInLine++;
  389         break;                                                  /*         0.02% */
  390       case '0':  case '1':  case '2':  case '3':  case '4':
  391       case '5':  case '6':  case '7':  case '8':  case '9':
  392         /* DIGITCHAR */
  393         in_file.character = character;
  394         lit_number();
  395         break;
  396       case '\'':
  397         /* APOSTROPHECHAR */
  398         lit_char();
  399         break;
  400       case '\"':
  401         /* QUOTATIONCHAR */
  402         lit_string();                                           /*         0.02% */
  403         break;                                                  /*         0.02% */
  404       case '#':
  405         /* SHARPCHAR */
  406         scan_line_comment();
  407         scan_symbol();
  408         break;
  409       case EOF:
  410         /* EOFCHAR */
  411         scan_eof();
  412         break;
  413       default:
  414         if (symbol.unicodeNames &&
  415             character >= 0xC0 && character <= 0xFF) {
  416           /* character range 192 to 255 (leading bits 11......) */
  417           scan_symbol_utf8(character, 0);
  418         } else {
  419           /* ILLEGALCHAR */
  420           in_file.character = character;
  421           scan_illegal();
  422           scan_symbol();                                        /*  1.46%  1.28% */
  423         } /* if */
  424         break;
  425     } /* switch */
  426     logFunction(printf("scan_symbol -->\n"););
  427   } /* scan_symbol */