"Fossies" - the Fresh Open Source Software Archive

Member "memcached-1.6.15/vendor/lua/src/llex.c" (1 Oct 2021, 17095 Bytes) of package /linux/www/memcached-1.6.15.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /*
    2 ** $Id: llex.c $
    3 ** Lexical Analyzer
    4 ** See Copyright Notice in lua.h
    5 */
    6 
    7 #define llex_c
    8 #define LUA_CORE
    9 
   10 #include "lprefix.h"
   11 
   12 
   13 #include <locale.h>
   14 #include <string.h>
   15 
   16 #include "lua.h"
   17 
   18 #include "lctype.h"
   19 #include "ldebug.h"
   20 #include "ldo.h"
   21 #include "lgc.h"
   22 #include "llex.h"
   23 #include "lobject.h"
   24 #include "lparser.h"
   25 #include "lstate.h"
   26 #include "lstring.h"
   27 #include "ltable.h"
   28 #include "lzio.h"
   29 
   30 
   31 
   32 #define next(ls)    (ls->current = zgetc(ls->z))
   33 
   34 
   35 
   36 #define currIsNewline(ls)   (ls->current == '\n' || ls->current == '\r')
   37 
   38 
   39 /* ORDER RESERVED */
   40 static const char *const luaX_tokens [] = {
   41     "and", "break", "do", "else", "elseif",
   42     "end", "false", "for", "function", "goto", "if",
   43     "in", "local", "nil", "not", "or", "repeat",
   44     "return", "then", "true", "until", "while",
   45     "//", "..", "...", "==", ">=", "<=", "~=",
   46     "<<", ">>", "::", "<eof>",
   47     "<number>", "<integer>", "<name>", "<string>"
   48 };
   49 
   50 
   51 #define save_and_next(ls) (save(ls, ls->current), next(ls))
   52 
   53 
   54 static l_noret lexerror (LexState *ls, const char *msg, int token);
   55 
   56 
   57 static void save (LexState *ls, int c) {
   58   Mbuffer *b = ls->buff;
   59   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
   60     size_t newsize;
   61     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
   62       lexerror(ls, "lexical element too long", 0);
   63     newsize = luaZ_sizebuffer(b) * 2;
   64     luaZ_resizebuffer(ls->L, b, newsize);
   65   }
   66   b->buffer[luaZ_bufflen(b)++] = cast_char(c);
   67 }
   68 
   69 
   70 void luaX_init (lua_State *L) {
   71   int i;
   72   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
   73   luaC_fix(L, obj2gco(e));  /* never collect this name */
   74   for (i=0; i<NUM_RESERVED; i++) {
   75     TString *ts = luaS_new(L, luaX_tokens[i]);
   76     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
   77     ts->extra = cast_byte(i+1);  /* reserved word */
   78   }
   79 }
   80 
   81 
   82 const char *luaX_token2str (LexState *ls, int token) {
   83   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
   84     if (lisprint(token))
   85       return luaO_pushfstring(ls->L, "'%c'", token);
   86     else  /* control character */
   87       return luaO_pushfstring(ls->L, "'<\\%d>'", token);
   88   }
   89   else {
   90     const char *s = luaX_tokens[token - FIRST_RESERVED];
   91     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
   92       return luaO_pushfstring(ls->L, "'%s'", s);
   93     else  /* names, strings, and numerals */
   94       return s;
   95   }
   96 }
   97 
   98 
   99 static const char *txtToken (LexState *ls, int token) {
  100   switch (token) {
  101     case TK_NAME: case TK_STRING:
  102     case TK_FLT: case TK_INT:
  103       save(ls, '\0');
  104       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
  105     default:
  106       return luaX_token2str(ls, token);
  107   }
  108 }
  109 
  110 
  111 static l_noret lexerror (LexState *ls, const char *msg, int token) {
  112   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
  113   if (token)
  114     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
  115   luaD_throw(ls->L, LUA_ERRSYNTAX);
  116 }
  117 
  118 
  119 l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
  120   lexerror(ls, msg, ls->t.token);
  121 }
  122 
  123 
  124 /*
  125 ** Creates a new string and anchors it in scanner's table so that it
  126 ** will not be collected until the end of the compilation; by that time
  127 ** it should be anchored somewhere. It also internalizes long strings,
  128 ** ensuring there is only one copy of each unique string.  The table
  129 ** here is used as a set: the string enters as the key, while its value
  130 ** is irrelevant. We use the string itself as the value only because it
  131 ** is a TValue readly available. Later, the code generation can change
  132 ** this value.
  133 */
  134 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
  135   lua_State *L = ls->L;
  136   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
  137   const TValue *o = luaH_getstr(ls->h, ts);
  138   if (!ttisnil(o))  /* string already present? */
  139     ts = keystrval(nodefromval(o));  /* get saved copy */
  140   else {  /* not in use yet */
  141     TValue *stv = s2v(L->top++);  /* reserve stack space for string */
  142     setsvalue(L, stv, ts);  /* temporarily anchor the string */
  143     luaH_finishset(L, ls->h, stv, o, stv);  /* t[string] = string */
  144     /* table is not a metatable, so it does not need to invalidate cache */
  145     luaC_checkGC(L);
  146     L->top--;  /* remove string from stack */
  147   }
  148   return ts;
  149 }
  150 
  151 
  152 /*
  153 ** increment line number and skips newline sequence (any of
  154 ** \n, \r, \n\r, or \r\n)
  155 */
  156 static void inclinenumber (LexState *ls) {
  157   int old = ls->current;
  158   lua_assert(currIsNewline(ls));
  159   next(ls);  /* skip '\n' or '\r' */
  160   if (currIsNewline(ls) && ls->current != old)
  161     next(ls);  /* skip '\n\r' or '\r\n' */
  162   if (++ls->linenumber >= MAX_INT)
  163     lexerror(ls, "chunk has too many lines", 0);
  164 }
  165 
  166 
  167 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
  168                     int firstchar) {
  169   ls->t.token = 0;
  170   ls->L = L;
  171   ls->current = firstchar;
  172   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
  173   ls->z = z;
  174   ls->fs = NULL;
  175   ls->linenumber = 1;
  176   ls->lastline = 1;
  177   ls->source = source;
  178   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
  179   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
  180 }
  181 
  182 
  183 
  184 /*
  185 ** =======================================================
  186 ** LEXICAL ANALYZER
  187 ** =======================================================
  188 */
  189 
  190 
  191 static int check_next1 (LexState *ls, int c) {
  192   if (ls->current == c) {
  193     next(ls);
  194     return 1;
  195   }
  196   else return 0;
  197 }
  198 
  199 
  200 /*
  201 ** Check whether current char is in set 'set' (with two chars) and
  202 ** saves it
  203 */
  204 static int check_next2 (LexState *ls, const char *set) {
  205   lua_assert(set[2] == '\0');
  206   if (ls->current == set[0] || ls->current == set[1]) {
  207     save_and_next(ls);
  208     return 1;
  209   }
  210   else return 0;
  211 }
  212 
  213 
  214 /* LUA_NUMBER */
  215 /*
  216 ** This function is quite liberal in what it accepts, as 'luaO_str2num'
  217 ** will reject ill-formed numerals. Roughly, it accepts the following
  218 ** pattern:
  219 **
  220 **   %d(%x|%.|([Ee][+-]?))* | 0[Xx](%x|%.|([Pp][+-]?))*
  221 **
  222 ** The only tricky part is to accept [+-] only after a valid exponent
  223 ** mark, to avoid reading '3-4' or '0xe+1' as a single number.
  224 **
  225 ** The caller might have already read an initial dot.
  226 */
  227 static int read_numeral (LexState *ls, SemInfo *seminfo) {
  228   TValue obj;
  229   const char *expo = "Ee";
  230   int first = ls->current;
  231   lua_assert(lisdigit(ls->current));
  232   save_and_next(ls);
  233   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
  234     expo = "Pp";
  235   for (;;) {
  236     if (check_next2(ls, expo))  /* exponent mark? */
  237       check_next2(ls, "-+");  /* optional exponent sign */
  238     else if (lisxdigit(ls->current) || ls->current == '.')  /* '%x|%.' */
  239       save_and_next(ls);
  240     else break;
  241   }
  242   if (lislalpha(ls->current))  /* is numeral touching a letter? */
  243     save_and_next(ls);  /* force an error */
  244   save(ls, '\0');
  245   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
  246     lexerror(ls, "malformed number", TK_FLT);
  247   if (ttisinteger(&obj)) {
  248     seminfo->i = ivalue(&obj);
  249     return TK_INT;
  250   }
  251   else {
  252     lua_assert(ttisfloat(&obj));
  253     seminfo->r = fltvalue(&obj);
  254     return TK_FLT;
  255   }
  256 }
  257 
  258 
  259 /*
  260 ** read a sequence '[=*[' or ']=*]', leaving the last bracket. If
  261 ** sequence is well formed, return its number of '='s + 2; otherwise,
  262 ** return 1 if it is a single bracket (no '='s and no 2nd bracket);
  263 ** otherwise (an unfinished '[==...') return 0.
  264 */
  265 static size_t skip_sep (LexState *ls) {
  266   size_t count = 0;
  267   int s = ls->current;
  268   lua_assert(s == '[' || s == ']');
  269   save_and_next(ls);
  270   while (ls->current == '=') {
  271     save_and_next(ls);
  272     count++;
  273   }
  274   return (ls->current == s) ? count + 2
  275          : (count == 0) ? 1
  276          : 0;
  277 }
  278 
  279 
  280 static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
  281   int line = ls->linenumber;  /* initial line (for error message) */
  282   save_and_next(ls);  /* skip 2nd '[' */
  283   if (currIsNewline(ls))  /* string starts with a newline? */
  284     inclinenumber(ls);  /* skip it */
  285   for (;;) {
  286     switch (ls->current) {
  287       case EOZ: {  /* error */
  288         const char *what = (seminfo ? "string" : "comment");
  289         const char *msg = luaO_pushfstring(ls->L,
  290                      "unfinished long %s (starting at line %d)", what, line);
  291         lexerror(ls, msg, TK_EOS);
  292         break;  /* to avoid warnings */
  293       }
  294       case ']': {
  295         if (skip_sep(ls) == sep) {
  296           save_and_next(ls);  /* skip 2nd ']' */
  297           goto endloop;
  298         }
  299         break;
  300       }
  301       case '\n': case '\r': {
  302         save(ls, '\n');
  303         inclinenumber(ls);
  304         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
  305         break;
  306       }
  307       default: {
  308         if (seminfo) save_and_next(ls);
  309         else next(ls);
  310       }
  311     }
  312   } endloop:
  313   if (seminfo)
  314     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
  315                                      luaZ_bufflen(ls->buff) - 2 * sep);
  316 }
  317 
  318 
  319 static void esccheck (LexState *ls, int c, const char *msg) {
  320   if (!c) {
  321     if (ls->current != EOZ)
  322       save_and_next(ls);  /* add current to buffer for error message */
  323     lexerror(ls, msg, TK_STRING);
  324   }
  325 }
  326 
  327 
  328 static int gethexa (LexState *ls) {
  329   save_and_next(ls);
  330   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
  331   return luaO_hexavalue(ls->current);
  332 }
  333 
  334 
  335 static int readhexaesc (LexState *ls) {
  336   int r = gethexa(ls);
  337   r = (r << 4) + gethexa(ls);
  338   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
  339   return r;
  340 }
  341 
  342 
  343 static unsigned long readutf8esc (LexState *ls) {
  344   unsigned long r;
  345   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
  346   save_and_next(ls);  /* skip 'u' */
  347   esccheck(ls, ls->current == '{', "missing '{'");
  348   r = gethexa(ls);  /* must have at least one digit */
  349   while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
  350     i++;
  351     esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
  352     r = (r << 4) + luaO_hexavalue(ls->current);
  353   }
  354   esccheck(ls, ls->current == '}', "missing '}'");
  355   next(ls);  /* skip '}' */
  356   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
  357   return r;
  358 }
  359 
  360 
  361 static void utf8esc (LexState *ls) {
  362   char buff[UTF8BUFFSZ];
  363   int n = luaO_utf8esc(buff, readutf8esc(ls));
  364   for (; n > 0; n--)  /* add 'buff' to string */
  365     save(ls, buff[UTF8BUFFSZ - n]);
  366 }
  367 
  368 
  369 static int readdecesc (LexState *ls) {
  370   int i;
  371   int r = 0;  /* result accumulator */
  372   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
  373     r = 10*r + ls->current - '0';
  374     save_and_next(ls);
  375   }
  376   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
  377   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
  378   return r;
  379 }
  380 
  381 
  382 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
  383   save_and_next(ls);  /* keep delimiter (for error messages) */
  384   while (ls->current != del) {
  385     switch (ls->current) {
  386       case EOZ:
  387         lexerror(ls, "unfinished string", TK_EOS);
  388         break;  /* to avoid warnings */
  389       case '\n':
  390       case '\r':
  391         lexerror(ls, "unfinished string", TK_STRING);
  392         break;  /* to avoid warnings */
  393       case '\\': {  /* escape sequences */
  394         int c;  /* final character to be saved */
  395         save_and_next(ls);  /* keep '\\' for error messages */
  396         switch (ls->current) {
  397           case 'a': c = '\a'; goto read_save;
  398           case 'b': c = '\b'; goto read_save;
  399           case 'f': c = '\f'; goto read_save;
  400           case 'n': c = '\n'; goto read_save;
  401           case 'r': c = '\r'; goto read_save;
  402           case 't': c = '\t'; goto read_save;
  403           case 'v': c = '\v'; goto read_save;
  404           case 'x': c = readhexaesc(ls); goto read_save;
  405           case 'u': utf8esc(ls);  goto no_save;
  406           case '\n': case '\r':
  407             inclinenumber(ls); c = '\n'; goto only_save;
  408           case '\\': case '\"': case '\'':
  409             c = ls->current; goto read_save;
  410           case EOZ: goto no_save;  /* will raise an error next loop */
  411           case 'z': {  /* zap following span of spaces */
  412             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
  413             next(ls);  /* skip the 'z' */
  414             while (lisspace(ls->current)) {
  415               if (currIsNewline(ls)) inclinenumber(ls);
  416               else next(ls);
  417             }
  418             goto no_save;
  419           }
  420           default: {
  421             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
  422             c = readdecesc(ls);  /* digital escape '\ddd' */
  423             goto only_save;
  424           }
  425         }
  426        read_save:
  427          next(ls);
  428          /* go through */
  429        only_save:
  430          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
  431          save(ls, c);
  432          /* go through */
  433        no_save: break;
  434       }
  435       default:
  436         save_and_next(ls);
  437     }
  438   }
  439   save_and_next(ls);  /* skip delimiter */
  440   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
  441                                    luaZ_bufflen(ls->buff) - 2);
  442 }
  443 
  444 
  445 static int llex (LexState *ls, SemInfo *seminfo) {
  446   luaZ_resetbuffer(ls->buff);
  447   for (;;) {
  448     switch (ls->current) {
  449       case '\n': case '\r': {  /* line breaks */
  450         inclinenumber(ls);
  451         break;
  452       }
  453       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
  454         next(ls);
  455         break;
  456       }
  457       case '-': {  /* '-' or '--' (comment) */
  458         next(ls);
  459         if (ls->current != '-') return '-';
  460         /* else is a comment */
  461         next(ls);
  462         if (ls->current == '[') {  /* long comment? */
  463           size_t sep = skip_sep(ls);
  464           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
  465           if (sep >= 2) {
  466             read_long_string(ls, NULL, sep);  /* skip long comment */
  467             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
  468             break;
  469           }
  470         }
  471         /* else short comment */
  472         while (!currIsNewline(ls) && ls->current != EOZ)
  473           next(ls);  /* skip until end of line (or end of file) */
  474         break;
  475       }
  476       case '[': {  /* long string or simply '[' */
  477         size_t sep = skip_sep(ls);
  478         if (sep >= 2) {
  479           read_long_string(ls, seminfo, sep);
  480           return TK_STRING;
  481         }
  482         else if (sep == 0)  /* '[=...' missing second bracket? */
  483           lexerror(ls, "invalid long string delimiter", TK_STRING);
  484         return '[';
  485       }
  486       case '=': {
  487         next(ls);
  488         if (check_next1(ls, '=')) return TK_EQ;  /* '==' */
  489         else return '=';
  490       }
  491       case '<': {
  492         next(ls);
  493         if (check_next1(ls, '=')) return TK_LE;  /* '<=' */
  494         else if (check_next1(ls, '<')) return TK_SHL;  /* '<<' */
  495         else return '<';
  496       }
  497       case '>': {
  498         next(ls);
  499         if (check_next1(ls, '=')) return TK_GE;  /* '>=' */
  500         else if (check_next1(ls, '>')) return TK_SHR;  /* '>>' */
  501         else return '>';
  502       }
  503       case '/': {
  504         next(ls);
  505         if (check_next1(ls, '/')) return TK_IDIV;  /* '//' */
  506         else return '/';
  507       }
  508       case '~': {
  509         next(ls);
  510         if (check_next1(ls, '=')) return TK_NE;  /* '~=' */
  511         else return '~';
  512       }
  513       case ':': {
  514         next(ls);
  515         if (check_next1(ls, ':')) return TK_DBCOLON;  /* '::' */
  516         else return ':';
  517       }
  518       case '"': case '\'': {  /* short literal strings */
  519         read_string(ls, ls->current, seminfo);
  520         return TK_STRING;
  521       }
  522       case '.': {  /* '.', '..', '...', or number */
  523         save_and_next(ls);
  524         if (check_next1(ls, '.')) {
  525           if (check_next1(ls, '.'))
  526             return TK_DOTS;   /* '...' */
  527           else return TK_CONCAT;   /* '..' */
  528         }
  529         else if (!lisdigit(ls->current)) return '.';
  530         else return read_numeral(ls, seminfo);
  531       }
  532       case '0': case '1': case '2': case '3': case '4':
  533       case '5': case '6': case '7': case '8': case '9': {
  534         return read_numeral(ls, seminfo);
  535       }
  536       case EOZ: {
  537         return TK_EOS;
  538       }
  539       default: {
  540         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
  541           TString *ts;
  542           do {
  543             save_and_next(ls);
  544           } while (lislalnum(ls->current));
  545           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
  546                                   luaZ_bufflen(ls->buff));
  547           seminfo->ts = ts;
  548           if (isreserved(ts))  /* reserved word? */
  549             return ts->extra - 1 + FIRST_RESERVED;
  550           else {
  551             return TK_NAME;
  552           }
  553         }
  554         else {  /* single-char tokens ('+', '*', '%', '{', '}', ...) */
  555           int c = ls->current;
  556           next(ls);
  557           return c;
  558         }
  559       }
  560     }
  561   }
  562 }
  563 
  564 
  565 void luaX_next (LexState *ls) {
  566   ls->lastline = ls->linenumber;
  567   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
  568     ls->t = ls->lookahead;  /* use this one */
  569     ls->lookahead.token = TK_EOS;  /* and discharge it */
  570   }
  571   else
  572     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
  573 }
  574 
  575 
  576 int luaX_lookahead (LexState *ls) {
  577   lua_assert(ls->lookahead.token == TK_EOS);
  578   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
  579   return ls->lookahead.token;
  580 }
  581