"Fossies" - the Fresh Open Source Software Archive

Member "ctok.c" (9 May 1995, 16712 Bytes) of package /linux/misc/old/cpost.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ctok.c" see the Fossies "Dox" file reference documentation.

    1 /*------------------------------------------------------------------
    2  * ctok : C language tokenizer
    3  *------------------------------------------------------------------
    4  * 10-01-91 Patrick J. Mueller
    5  *------------------------------------------------------------------*/
    6 
    7 #include <stdio.h>
    8 #include <stdlib.h>
    9 #include <string.h>
   10 #include <ctype.h>
   11 
   12 #include "ctok.h"
   13 
   14 /*------------------------------------------------------------------
   15  * is a character a valid character in a C identifier
   16  *------------------------------------------------------------------*/
   17 #define isCsymbol(c) (isalnum(c) || ('_' == c))
   18 
   19 /*------------------------------------------------------------------
   20  * typedefs
   21  *------------------------------------------------------------------*/
   22 typedef struct
   23    {
   24    int            eof;
   25    char          *buffer;
   26    long           bufferLen;
   27    long           bufferInd;
   28    long           fileOffs;
   29    long           line;
   30    int            unGetChar;
   31    int            unGetReady;
   32    long           tokOffs;
   33    long           tokLen;
   34    CTokRead       readFunc;
   35    void          *readInfo;
   36    char           ident[MAX_IDENT_LEN+1];
   37    } CTokInfo;
   38 
   39 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
   40 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
   41 
   42 /*------------------------------------------------------------------
   43  * get next char from file
   44  *------------------------------------------------------------------*/
   45 void GetNextChar(
   46    int         *c,
   47    CTokInfo    *cti
   48    )
   49    {
   50    cti->fileOffs++;
   51 
   52    /*---------------------------------------------------------------
   53     * check for end of file
   54     *---------------------------------------------------------------*/
   55    if (cti->eof)
   56       {
   57       *c = EOF;
   58       return;
   59       }
   60 
   61    /*---------------------------------------------------------------
   62     * check for a char in the unget holder
   63     *---------------------------------------------------------------*/
   64    if (cti->unGetReady)
   65       {
   66       cti->unGetReady = 0;
   67       *c = cti->unGetChar;
   68 
   69       if ('\n' == *c)
   70          cti->line++;
   71       return;
   72       }
   73 
   74    /*---------------------------------------------------------------
   75     * see if we need to read another buffer
   76     *---------------------------------------------------------------*/
   77    if (cti->bufferInd == cti->bufferLen)
   78       {
   79       cti->bufferLen = cti->readFunc(cti->readInfo,&(cti->buffer));
   80       cti->bufferInd = 0L;
   81 
   82       if (0L == cti->bufferLen)
   83          {
   84          *c = EOF;
   85          cti->eof = 1;
   86          return;
   87          }
   88       }
   89 
   90    /*---------------------------------------------------------------
   91     * read character from buffer
   92     *---------------------------------------------------------------*/
   93    *c = cti->buffer[cti->bufferInd++];
   94 
   95    if ('\n' == *c)
   96       cti->line++;
   97 
   98    return;
   99    }
  100 
  101 /*------------------------------------------------------------------
  102  * put back last char from file
  103  *------------------------------------------------------------------*/
  104 void UnGetNextChar(
  105    int          c,
  106    CTokInfo    *cti
  107    )
  108    {
  109    cti->fileOffs--;
  110 
  111    cti->unGetChar  = c;
  112    cti->unGetReady = 1;
  113 
  114    if ('\n' == c)
  115       cti->line--;
  116    }
  117 
  118 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  119 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  120 
  121 /*------------------------------------------------------------------
  122  * read a C character constant or string
  123  *------------------------------------------------------------------*/
  124 static void ReadString(
  125    CTokInfo    *cti,
  126    int          c
  127    )
  128    {
  129    int stop;
  130 
  131    /*---------------------------------------------------------------
  132     * the character passed in is ' or ", and it is the character that
  133     * signifies the end of the string
  134     *---------------------------------------------------------------*/
  135    stop = c;
  136 
  137    /*---------------------------------------------------------------
  138     * keep going until we hit our stop character
  139     *---------------------------------------------------------------*/
  140    GetNextChar(&c,cti);
  141    while (stop != c)
  142       {
  143       /*------------------------------------------------------------
  144        * for a \, inhale next character
  145        *------------------------------------------------------------*/
  146       if ('\\' == c)
  147          GetNextChar(&c,cti);
  148 
  149       /*------------------------------------------------------------
  150        * for EOF, break
  151        *------------------------------------------------------------*/
  152       if (EOF == c)
  153          break;
  154 
  155       GetNextChar(&c,cti);
  156       }
  157 
  158    return;
  159    }
  160 
  161 /*------------------------------------------------------------------
  162  * read a C comment
  163  *------------------------------------------------------------------*/
  164 static void ReadComment(
  165    CTokInfo    *cti
  166    )
  167    {
  168    int c;
  169 
  170    /*---------------------------------------------------------------
  171     * loop until end of file (or return in middle)
  172     *---------------------------------------------------------------*/
  173    GetNextChar(&c,cti);
  174    while (EOF != c)
  175       {
  176 
  177       /*------------------------------------------------------------
  178        * if not *, just get next character
  179        *------------------------------------------------------------*/
  180       if ('*' != c)
  181          GetNextChar(&c,cti);
  182 
  183       /*------------------------------------------------------------
  184        * got a * - see if next is /
  185        *------------------------------------------------------------*/
  186       else
  187          {
  188          /*---------------------------------------------------------
  189           * if next is /, return
  190           *---------------------------------------------------------*/
  191          GetNextChar(&c,cti);
  192          if ('/'  == c)
  193             return;
  194          }
  195 
  196       }
  197 
  198    return;
  199    }
  200 
  201 /*------------------------------------------------------------------
  202  * read a C++ style comment
  203  *------------------------------------------------------------------*/
  204 static void ReadCppComment(
  205    CTokInfo    *cti
  206    )
  207    {
  208    int c;
  209 
  210    /*---------------------------------------------------------------
  211     * loop until end of line or end of file
  212     *---------------------------------------------------------------*/
  213    GetNextChar(&c,cti);
  214 
  215    while ((EOF != c) && ('\n' != c))
  216       GetNextChar(&c,cti);
  217 
  218    UnGetNextChar(c,cti);
  219    return;
  220    }
  221 
  222 /*------------------------------------------------------------------
  223  * read an identifier
  224  *------------------------------------------------------------------*/
  225 static void ReadIdent(
  226    CTokInfo    *cti,
  227    int          c
  228    )
  229    {
  230    int identLen;
  231 
  232    /*---------------------------------------------------------------
  233     * initialize length and stick first char in
  234     *---------------------------------------------------------------*/
  235    identLen = 0;
  236    cti->ident[identLen++] = (char) c;
  237 
  238    /*---------------------------------------------------------------
  239     * while still a valid symbol character ...
  240     *---------------------------------------------------------------*/
  241    GetNextChar(&c,cti);
  242    while (isCsymbol(c))
  243       {
  244       /*------------------------------------------------------------
  245        * make sure we got enough room, then stick it in
  246        *------------------------------------------------------------*/
  247       if (identLen < MAX_IDENT_LEN)
  248          cti->ident[identLen++] = (char) c;
  249 
  250       GetNextChar(&c,cti);
  251       }
  252 
  253    /*---------------------------------------------------------------
  254     * finish up identifier, put last character back
  255     *---------------------------------------------------------------*/
  256    cti->ident[identLen] = '\0';
  257    UnGetNextChar(c,cti);
  258    }
  259 
  260 /*------------------------------------------------------------------
  261  * read a number
  262  *------------------------------------------------------------------*/
  263 static void ReadNumber(
  264    CTokInfo    *cti,
  265    int          c
  266    )
  267    {
  268 
  269    /*---------------------------------------------------------------
  270     * while still a valid number character ...
  271     *---------------------------------------------------------------*/
  272    GetNextChar(&c,cti);
  273    while (isalnum(c))
  274       GetNextChar(&c,cti);
  275 
  276    /*---------------------------------------------------------------
  277     * put last character back
  278     *---------------------------------------------------------------*/
  279    UnGetNextChar(c,cti);
  280    }
  281 
  282 /*------------------------------------------------------------------
  283  * read a preprocessor statement
  284  *------------------------------------------------------------------*/
  285 static void ReadPreprocessor(
  286    CTokInfo    *cti
  287    )
  288    {
  289    int c;
  290 
  291    /*---------------------------------------------------------------
  292     * loop until end of file (or return in middle)
  293     *---------------------------------------------------------------*/
  294    GetNextChar(&c,cti);
  295    while (EOF != c)
  296       {
  297       /*------------------------------------------------------------
  298        * if we found a newline, leave
  299        *------------------------------------------------------------*/
  300       if ('\n' == c)
  301          {
  302          UnGetNextChar(c,cti);
  303          return;
  304          }
  305 
  306       /*------------------------------------------------------------
  307        * if we got anything but a \, eat it
  308        *------------------------------------------------------------*/
  309       else if ('\\' != c)
  310          GetNextChar(&c,cti);
  311 
  312       /*------------------------------------------------------------
  313        * got a \ - see if next is \n
  314        *------------------------------------------------------------*/
  315       else
  316          {
  317          /*---------------------------------------------------------
  318           * if next isn't \n, start at top of loop
  319           *---------------------------------------------------------*/
  320          GetNextChar(&c,cti);
  321 
  322          /*---------------------------------------------------------
  323           * skip over white space first
  324           *---------------------------------------------------------*/
  325          while (isspace(c) && ('\n' != c))
  326             GetNextChar(&c,cti);
  327 
  328          if ('\n' != c)
  329             continue;
  330 
  331          /*---------------------------------------------------------
  332           * if it is a \n, read next char and continue
  333           *---------------------------------------------------------*/
  334          GetNextChar(&c,cti);
  335          continue;
  336          }
  337 
  338       }
  339 
  340    return;
  341    }
  342 
  343 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  344 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  345 
  346 /*------------------------------------------------------------------
  347  * tokenizer
  348  *------------------------------------------------------------------*/
  349 
  350 static int GetToken(
  351    CTokInfo    *cti
  352    )
  353    {
  354    int           c;
  355    int           type;
  356    unsigned long offsStart;
  357 
  358    /*---------------------------------------------------------------
  359     * read next character
  360     *---------------------------------------------------------------*/
  361    GetNextChar(&c,cti);
  362 
  363    /*---------------------------------------------------------------
  364     * skip white space
  365     *---------------------------------------------------------------*/
  366    while (isspace(c))
  367       GetNextChar(&c,cti);
  368 
  369    /*---------------------------------------------------------------
  370     * save starting offset
  371     *---------------------------------------------------------------*/
  372    offsStart = cti->fileOffs;
  373 
  374    /*---------------------------------------------------------------
  375     * empty identifier
  376     *---------------------------------------------------------------*/
  377    memset(cti->ident,'\0',sizeof(cti->ident));
  378 
  379    /*---------------------------------------------------------------
  380     * big switch on it's value
  381     *---------------------------------------------------------------*/
  382    switch(c)
  383       {
  384       /*------------------------------------------------------------
  385        * check for end of file
  386        *------------------------------------------------------------*/
  387       case EOF:
  388          type = TOKEN_EOF;
  389          break;
  390 
  391       /*------------------------------------------------------------
  392        * for pound sign, read preprocessor directive
  393        *------------------------------------------------------------*/
  394       case '#':
  395          ReadPreprocessor(cti);
  396          type = TOKEN_PREPROC;
  397          break;
  398 
  399       /*------------------------------------------------------------
  400        * single or double quote
  401        *------------------------------------------------------------*/
  402       case '\'':
  403       case '"':
  404          ReadString(cti,c);
  405          type = TOKEN_STRING;
  406          break;
  407 
  408       /*------------------------------------------------------------
  409        * start of comment?
  410        *------------------------------------------------------------*/
  411       case '/':
  412          /*---------------------------------------------------------
  413           * get next char - if *, read to end of comment
  414           *---------------------------------------------------------*/
  415          GetNextChar(&c,cti);
  416          if ('*' == c)
  417             {
  418             ReadComment(cti);
  419             type = TOKEN_COMMENT;
  420             }
  421 
  422          /*---------------------------------------------------------
  423           * see if it's a C++ style comment
  424           *---------------------------------------------------------*/
  425          else if ('/' == c)
  426             {
  427             ReadCppComment(cti);
  428             type = TOKEN_COMMENT;
  429             }
  430 
  431          /*---------------------------------------------------------
  432           * otherwise it's just a plain /
  433           *---------------------------------------------------------*/
  434          else
  435             {
  436             UnGetNextChar(c,cti);
  437             type = TOKEN_OPER;
  438             }
  439 
  440          break;
  441 
  442       /*------------------------------------------------------------
  443        * everything else - identifiers and punctuation
  444        *------------------------------------------------------------*/
  445       default:
  446          if (isCsymbol(c) && !isdigit(c))
  447             {
  448             ReadIdent(cti,c);
  449             type = TOKEN_IDENT;
  450             }
  451 
  452          else if (isdigit(c))
  453             {
  454             ReadNumber(cti,c);
  455             type = TOKEN_NUMBER;
  456             }
  457 
  458          /*---------------------------------------------------------
  459           * anything else
  460           *---------------------------------------------------------*/
  461          else
  462             {
  463             type = TOKEN_OPER;
  464             cti->ident[0] = (char) c;
  465             }
  466 
  467          break;
  468       }
  469 
  470    cti->tokOffs = offsStart;
  471    cti->tokLen  = cti->fileOffs - offsStart + 1;
  472    return(type);
  473    }
  474 
  475 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  476 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  477 
  478 
  479 /*------------------------------------------------------------------
  480  * Initializer
  481  *------------------------------------------------------------------*/
  482 void *CTokInit(
  483    CTokRead  readFunc,
  484    void     *readInfo
  485    )
  486    {
  487    CTokInfo *cti;
  488 
  489    /*---------------------------------------------------------------
  490     * allocate space for structure
  491     *---------------------------------------------------------------*/
  492    cti = malloc(sizeof(CTokInfo));
  493    if (NULL == cti)
  494       return NULL;
  495 
  496    /*---------------------------------------------------------------
  497     * initialize structure
  498     *---------------------------------------------------------------*/
  499    cti->eof         = 0;
  500    cti->buffer      = NULL;
  501    cti->bufferLen   = 0L;
  502    cti->bufferInd   = 0L;
  503    cti->fileOffs    = -1L;
  504    cti->line        = 1;
  505    cti->unGetChar   = '\0';
  506    cti->unGetReady  = 0;
  507    cti->tokOffs     = 0L;
  508    cti->tokLen      = 0L;
  509    cti->readFunc    = readFunc;
  510    cti->readInfo    = readInfo;
  511    memset(cti->ident,'\0',sizeof(cti->ident));
  512 
  513    return cti;
  514    }
  515 
  516 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  517 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  518 
  519 /*------------------------------------------------------------------
  520  * Terminator
  521  *------------------------------------------------------------------*/
  522 void CTokTerm(
  523    void *handle
  524    )
  525    {
  526    free(handle);
  527    }
  528 
  529 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
  530 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
  531 
  532 /*------------------------------------------------------------------
  533  * Tokenizer
  534  *------------------------------------------------------------------*/
  535 void CTokGet(
  536    void     *handle,
  537    Token    *token
  538    )
  539    {
  540    CTokInfo *cti;
  541 
  542    cti = handle;
  543 
  544    token->type  = GetToken(cti);
  545    token->offs  = cti->tokOffs;
  546    token->len   = cti->tokLen;
  547    token->ident = cti->ident;
  548    token->line  = cti->line;
  549    }