"Fossies" - the Fresh Open Source Software Archive

Member "tnftp-20200705/libedit/tokenizer.c" (4 Jul 2020, 10501 Bytes) of package /linux/privat/tnftp-20200705.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tokenizer.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 20151004_vs_20200705.

    1 /*  $NetBSD: tokenizer.c,v 1.8 2020/07/04 13:43:21 lukem Exp $  */
    2 /*  from    NetBSD: tokenizer.c,v 1.28 2016/04/11 18:56:31 christos Exp */
    3 
    4 /*-
    5  * Copyright (c) 1992, 1993
    6  *  The Regents of the University of California.  All rights reserved.
    7  *
    8  * This code is derived from software contributed to Berkeley by
    9  * Christos Zoulas of Cornell University.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  */
   35 
   36 #include "config.h"
   37 
   38 #if 0 /* tnftp */
   39 #if !defined(lint) && !defined(SCCSID)
   40 #if 0
   41 static char sccsid[] = "@(#)tokenizer.c 8.1 (Berkeley) 6/4/93";
   42 #else
   43 __RCSID(" NetBSD: tokenizer.c,v 1.28 2016/04/11 18:56:31 christos Exp  ");
   44 #endif
   45 #endif /* not lint && not SCCSID */
   46 #endif /* tnftp */
   47 
   48 /* We build this file twice, once as NARROW, once as WIDE. */
   49 /*
   50  * tokenize.c: Bourne shell like tokenizer
   51  */
   52 #if 0 /* tnftp */
   53 #include <stdlib.h>
   54 #include <string.h>
   55 #endif /* tnftp */
   56 
   57 #include "histedit.h"
   58 
   59 typedef enum {
   60     Q_none, Q_single, Q_double, Q_one, Q_doubleone
   61 } quote_t;
   62 
   63 #define TOK_KEEP    1
   64 #define TOK_EAT     2
   65 
   66 #define WINCR       20
   67 #define AINCR       10
   68 
   69 #define IFS     STR("\t \n")
   70 
   71 #define tok_malloc(a)       malloc(a)
   72 #define tok_free(a)     free(a)
   73 #define tok_realloc(a, b)   realloc(a, b)
   74 
   75 #ifdef NARROWCHAR
   76 #define Char            char
   77 #define FUN(prefix, rest)   prefix ## _ ## rest
   78 #define TYPE(type)      type
   79 #define STR(x)          x
   80 #define Strchr(s, c)        strchr(s, c)
   81 #define tok_strdup(s)       strdup(s)
   82 #else
   83 #define Char            wchar_t
   84 #define FUN(prefix, rest)   prefix ## _w ## rest
   85 #define TYPE(type)      type ## W
   86 #define STR(x)          L ## x
   87 #define Strchr(s, c)        wcschr(s, c)
   88 #define tok_strdup(s)       wcsdup(s)
   89 #endif
   90 
   91 struct TYPE(tokenizer) {
   92     Char    *ifs;       /* In field separator            */
   93     size_t   argc, amax;    /* Current and maximum number of args    */
   94     Char   **argv;      /* Argument list             */
   95     Char    *wptr, *wmax;   /* Space and limit on the word buffer    */
   96     Char    *wstart;    /* Beginning of next word        */
   97     Char    *wspace;    /* Space of word buffer          */
   98     quote_t  quote;     /* Quoting state             */
   99     int  flags;     /* flags;                */
  100 };
  101 
  102 
  103 static void FUN(tok,finish)(TYPE(Tokenizer) *);
  104 
  105 
  106 /* FUN(tok,finish)():
  107  *  Finish a word in the tokenizer.
  108  */
  109 static void
  110 FUN(tok,finish)(TYPE(Tokenizer) *tok)
  111 {
  112 
  113     *tok->wptr = '\0';
  114     if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) {
  115         tok->argv[tok->argc++] = tok->wstart;
  116         tok->argv[tok->argc] = NULL;
  117         tok->wstart = ++tok->wptr;
  118     }
  119     tok->flags &= ~TOK_KEEP;
  120 }
  121 
  122 
  123 /* FUN(tok,init)():
  124  *  Initialize the tokenizer
  125  */
  126 TYPE(Tokenizer) *
  127 FUN(tok,init)(const Char *ifs)
  128 {
  129     TYPE(Tokenizer) *tok = tok_malloc(sizeof(*tok));
  130 
  131     if (tok == NULL)
  132         return NULL;
  133     tok->ifs = tok_strdup(ifs ? ifs : IFS);
  134     if (tok->ifs == NULL) {
  135         tok_free(tok);
  136         return NULL;
  137     }
  138     tok->argc = 0;
  139     tok->amax = AINCR;
  140     tok->argv = tok_malloc(sizeof(*tok->argv) * tok->amax);
  141     if (tok->argv == NULL) {
  142         tok_free(tok->ifs);
  143         tok_free(tok);
  144         return NULL;
  145     }
  146     tok->argv[0] = NULL;
  147     tok->wspace = tok_malloc(WINCR * sizeof(*tok->wspace));
  148     if (tok->wspace == NULL) {
  149         tok_free(tok->argv);
  150         tok_free(tok->ifs);
  151         tok_free(tok);
  152         return NULL;
  153     }
  154     tok->wmax = tok->wspace + WINCR;
  155     tok->wstart = tok->wspace;
  156     tok->wptr = tok->wspace;
  157     tok->flags = 0;
  158     tok->quote = Q_none;
  159 
  160     return tok;
  161 }
  162 
  163 
  164 /* FUN(tok,reset)():
  165  *  Reset the tokenizer
  166  */
  167 void
  168 FUN(tok,reset)(TYPE(Tokenizer) *tok)
  169 {
  170 
  171     tok->argc = 0;
  172     tok->wstart = tok->wspace;
  173     tok->wptr = tok->wspace;
  174     tok->flags = 0;
  175     tok->quote = Q_none;
  176 }
  177 
  178 
  179 /* FUN(tok,end)():
  180  *  Clean up
  181  */
  182 void
  183 FUN(tok,end)(TYPE(Tokenizer) *tok)
  184 {
  185 
  186     tok_free(tok->ifs);
  187     tok_free(tok->wspace);
  188     tok_free(tok->argv);
  189     tok_free(tok);
  190 }
  191 
  192 
  193 
  194 /* FUN(tok,line)():
  195  *  Bourne shell (sh(1)) like tokenizing
  196  *  Arguments:
  197  *      tok current tokenizer state (setup with FUN(tok,init)())
  198  *      line    line to parse
  199  *  Returns:
  200  *      -1  Internal error
  201  *       3  Quoted return
  202  *       2  Unmatched double quote
  203  *       1  Unmatched single quote
  204  *       0  Ok
  205  *  Modifies (if return value is 0):
  206  *      argc    number of arguments
  207  *      argv    argument array
  208  *      cursorc if !NULL, argv element containing cursor
  209  *      cursorv if !NULL, offset in argv[cursorc] of cursor
  210  */
  211 int
  212 FUN(tok,line)(TYPE(Tokenizer) *tok, const TYPE(LineInfo) *line,
  213     int *argc, const Char ***argv, int *cursorc, int *cursoro)
  214 {
  215     const Char *ptr;
  216     int cc, co;
  217 
  218     cc = co = -1;
  219     ptr = line->buffer;
  220     for (ptr = line->buffer; ;ptr++) {
  221         if (ptr >= line->lastchar)
  222             ptr = STR("");
  223         if (ptr == line->cursor) {
  224             cc = (int)tok->argc;
  225             co = (int)(tok->wptr - tok->wstart);
  226         }
  227         switch (*ptr) {
  228         case '\'':
  229             tok->flags |= TOK_KEEP;
  230             tok->flags &= ~TOK_EAT;
  231             switch (tok->quote) {
  232             case Q_none:
  233                 tok->quote = Q_single;  /* Enter single quote
  234                              * mode */
  235                 break;
  236 
  237             case Q_single:  /* Exit single quote mode */
  238                 tok->quote = Q_none;
  239                 break;
  240 
  241             case Q_one: /* Quote this ' */
  242                 tok->quote = Q_none;
  243                 *tok->wptr++ = *ptr;
  244                 break;
  245 
  246             case Q_double:  /* Stay in double quote mode */
  247                 *tok->wptr++ = *ptr;
  248                 break;
  249 
  250             case Q_doubleone:   /* Quote this ' */
  251                 tok->quote = Q_double;
  252                 *tok->wptr++ = *ptr;
  253                 break;
  254 
  255             default:
  256                 return -1;
  257             }
  258             break;
  259 
  260         case '"':
  261             tok->flags &= ~TOK_EAT;
  262             tok->flags |= TOK_KEEP;
  263             switch (tok->quote) {
  264             case Q_none:    /* Enter double quote mode */
  265                 tok->quote = Q_double;
  266                 break;
  267 
  268             case Q_double:  /* Exit double quote mode */
  269                 tok->quote = Q_none;
  270                 break;
  271 
  272             case Q_one: /* Quote this " */
  273                 tok->quote = Q_none;
  274                 *tok->wptr++ = *ptr;
  275                 break;
  276 
  277             case Q_single:  /* Stay in single quote mode */
  278                 *tok->wptr++ = *ptr;
  279                 break;
  280 
  281             case Q_doubleone:   /* Quote this " */
  282                 tok->quote = Q_double;
  283                 *tok->wptr++ = *ptr;
  284                 break;
  285 
  286             default:
  287                 return -1;
  288             }
  289             break;
  290 
  291         case '\\':
  292             tok->flags |= TOK_KEEP;
  293             tok->flags &= ~TOK_EAT;
  294             switch (tok->quote) {
  295             case Q_none:    /* Quote next character */
  296                 tok->quote = Q_one;
  297                 break;
  298 
  299             case Q_double:  /* Quote next character */
  300                 tok->quote = Q_doubleone;
  301                 break;
  302 
  303             case Q_one: /* Quote this, restore state */
  304                 *tok->wptr++ = *ptr;
  305                 tok->quote = Q_none;
  306                 break;
  307 
  308             case Q_single:  /* Stay in single quote mode */
  309                 *tok->wptr++ = *ptr;
  310                 break;
  311 
  312             case Q_doubleone:   /* Quote this \ */
  313                 tok->quote = Q_double;
  314                 *tok->wptr++ = *ptr;
  315                 break;
  316 
  317             default:
  318                 return -1;
  319             }
  320             break;
  321 
  322         case '\n':
  323             tok->flags &= ~TOK_EAT;
  324             switch (tok->quote) {
  325             case Q_none:
  326                 goto tok_line_outok;
  327 
  328             case Q_single:
  329             case Q_double:
  330                 *tok->wptr++ = *ptr;    /* Add the return */
  331                 break;
  332 
  333             case Q_doubleone:   /* Back to double, eat the '\n' */
  334                 tok->flags |= TOK_EAT;
  335                 tok->quote = Q_double;
  336                 break;
  337 
  338             case Q_one: /* No quote, more eat the '\n' */
  339                 tok->flags |= TOK_EAT;
  340                 tok->quote = Q_none;
  341                 break;
  342 
  343             default:
  344                 return 0;
  345             }
  346             break;
  347 
  348         case '\0':
  349             switch (tok->quote) {
  350             case Q_none:
  351                 /* Finish word and return */
  352                 if (tok->flags & TOK_EAT) {
  353                     tok->flags &= ~TOK_EAT;
  354                     return 3;
  355                 }
  356                 goto tok_line_outok;
  357 
  358             case Q_single:
  359                 return 1;
  360 
  361             case Q_double:
  362                 return 2;
  363 
  364             case Q_doubleone:
  365                 tok->quote = Q_double;
  366                 *tok->wptr++ = *ptr;
  367                 break;
  368 
  369             case Q_one:
  370                 tok->quote = Q_none;
  371                 *tok->wptr++ = *ptr;
  372                 break;
  373 
  374             default:
  375                 return -1;
  376             }
  377             break;
  378 
  379         default:
  380             tok->flags &= ~TOK_EAT;
  381             switch (tok->quote) {
  382             case Q_none:
  383                 if (Strchr(tok->ifs, *ptr) != NULL)
  384                     FUN(tok,finish)(tok);
  385                 else
  386                     *tok->wptr++ = *ptr;
  387                 break;
  388 
  389             case Q_single:
  390             case Q_double:
  391                 *tok->wptr++ = *ptr;
  392                 break;
  393 
  394 
  395             case Q_doubleone:
  396                 *tok->wptr++ = '\\';
  397                 tok->quote = Q_double;
  398                 *tok->wptr++ = *ptr;
  399                 break;
  400 
  401             case Q_one:
  402                 tok->quote = Q_none;
  403                 *tok->wptr++ = *ptr;
  404                 break;
  405 
  406             default:
  407                 return -1;
  408 
  409             }
  410             break;
  411         }
  412 
  413         if (tok->wptr >= tok->wmax - 4) {
  414             size_t size = (size_t)(tok->wmax - tok->wspace + WINCR);
  415             Char *s = tok_realloc(tok->wspace,
  416                 size * sizeof(*s));
  417             if (s == NULL)
  418                 return -1;
  419 
  420             if (s != tok->wspace) {
  421                 size_t i;
  422                 for (i = 0; i < tok->argc; i++) {
  423                     tok->argv[i] =
  424                     (tok->argv[i] - tok->wspace) + s;
  425                 }
  426                 tok->wptr = (tok->wptr - tok->wspace) + s;
  427                 tok->wstart = (tok->wstart - tok->wspace) + s;
  428                 tok->wspace = s;
  429             }
  430             tok->wmax = s + size;
  431         }
  432         if (tok->argc >= tok->amax - 4) {
  433             Char **p;
  434             tok->amax += AINCR;
  435             p = tok_realloc(tok->argv, tok->amax * sizeof(*p));
  436             if (p == NULL) {
  437                 tok->amax -= AINCR;
  438                 return -1;
  439             }
  440             tok->argv = p;
  441         }
  442     }
  443  tok_line_outok:
  444     if (cc == -1 && co == -1) {
  445         cc = (int)tok->argc;
  446         co = (int)(tok->wptr - tok->wstart);
  447     }
  448     if (cursorc != NULL)
  449         *cursorc = cc;
  450     if (cursoro != NULL)
  451         *cursoro = co;
  452     FUN(tok,finish)(tok);
  453     *argv = (const Char **)tok->argv;
  454     *argc = (int)tok->argc;
  455     return 0;
  456 }
  457 
  458 /* FUN(tok,str)():
  459  *  Simpler version of tok_line, taking a NUL terminated line
  460  *  and splitting into words, ignoring cursor state.
  461  */
  462 int
  463 FUN(tok,str)(TYPE(Tokenizer) *tok, const Char *line, int *argc,
  464     const Char ***argv)
  465 {
  466     TYPE(LineInfo) li;
  467 
  468     memset(&li, 0, sizeof(li));
  469     li.buffer = line;
  470     li.cursor = li.lastchar = Strchr(line, '\0');
  471     return FUN(tok,line)(tok, &li, argc, argv, NULL, NULL);
  472 }