"Fossies" - the Fresh Open Source Software Archive

Member "bind-9.11.23/lib/isc/regex.c" (7 Sep 2020, 7142 Bytes) of package /linux/misc/dns/bind9/9.11.23/bind-9.11.23.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "regex.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
    3  *
    4  * This Source Code Form is subject to the terms of the Mozilla Public
    5  * License, v. 2.0. If a copy of the MPL was not distributed with this
    6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
    7  *
    8  * See the COPYRIGHT file distributed with this work for additional
    9  * information regarding copyright ownership.
   10  */
   11 
   12 #include <config.h>
   13 
   14 #include <stdbool.h>
   15 
   16 #include <isc/file.h>
   17 #include <isc/print.h>
   18 #include <isc/regex.h>
   19 #include <isc/string.h>
   20 
   21 #if VALREGEX_REPORT_REASON
   22 #define FAIL(x) do { reason = (x); goto error; } while(0)
   23 #else
   24 #define FAIL(x) goto error
   25 #endif
   26 
   27 /*
   28  * Validate the regular expression 'C' locale.
   29  */
   30 int
   31 isc_regex_validate(const char *c) {
   32     enum {
   33         none, parse_bracket, parse_bound,
   34         parse_ce, parse_ec, parse_cc
   35     } state = none;
   36     /* Well known character classes. */
   37     const char *cc[] = {
   38         ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
   39         ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
   40         ":print:", ":xdigit:"
   41     };
   42     bool seen_comma = false;
   43     bool seen_high = false;
   44     bool seen_char = false;
   45     bool seen_ec = false;
   46     bool seen_ce = false;
   47     bool have_atom = false;
   48     int group = 0;
   49     int range = 0;
   50     int sub = 0;
   51     bool empty_ok = false;
   52     bool neg = false;
   53     bool was_multiple = false;
   54     unsigned int low = 0;
   55     unsigned int high = 0;
   56     const char *ccname = NULL;
   57     int range_start = 0;
   58 #if VALREGEX_REPORT_REASON
   59     const char *reason = "";
   60 #endif
   61 
   62     if (c == NULL || *c == 0)
   63         FAIL("empty string");
   64 
   65     while (c != NULL && *c != 0) {
   66         switch (state) {
   67         case none:
   68             switch (*c) {
   69             case '\\':  /* make literal */
   70                 ++c;
   71                 switch (*c) {
   72                 case '1': case '2': case '3':
   73                 case '4': case '5': case '6':
   74                 case '7': case '8': case '9':
   75                     if ((*c - '0') > sub)
   76                         FAIL("bad back reference");
   77                     have_atom = true;
   78                     was_multiple = false;
   79                     break;
   80                 case 0:
   81                     FAIL("escaped end-of-string");
   82                 default:
   83                     goto literal;
   84                 }
   85                 ++c;
   86                 break;
   87             case '[':   /* bracket start */
   88                 ++c;
   89                 neg = false;
   90                 was_multiple = false;
   91                 seen_char = false;
   92                 state = parse_bracket;
   93                 break;
   94             case '{':   /* bound start */
   95                 switch (c[1]) {
   96                 case '0': case '1': case '2': case '3':
   97                 case '4': case '5': case '6': case '7':
   98                 case '8': case '9':
   99                     if (!have_atom)
  100                         FAIL("no atom");
  101                     if (was_multiple)
  102                         FAIL("was multiple");
  103                     seen_comma = false;
  104                     seen_high = false;
  105                     low = high = 0;
  106                     state = parse_bound;
  107                     break;
  108                 default:
  109                     goto literal;
  110                 }
  111                 ++c;
  112                 have_atom = true;
  113                 was_multiple = true;
  114                 break;
  115             case '}':
  116                 goto literal;
  117             case '(':   /* group start */
  118                 have_atom = false;
  119                 was_multiple = false;
  120                 empty_ok = true;
  121                 ++group;
  122                 ++sub;
  123                 ++c;
  124                 break;
  125             case ')':   /* group end */
  126                 if (group && !have_atom && !empty_ok)
  127                     FAIL("empty alternative");
  128                 have_atom = true;
  129                 was_multiple = false;
  130                 if (group != 0)
  131                     --group;
  132                 ++c;
  133                 break;
  134             case '|':   /* alternative separator */
  135                 if (!have_atom)
  136                     FAIL("no atom");
  137                 have_atom = false;
  138                 empty_ok = false;
  139                 was_multiple = false;
  140                 ++c;
  141                 break;
  142             case '^':
  143             case '$':
  144                 have_atom = true;
  145                 was_multiple = true;
  146                 ++c;
  147                 break;
  148             case '+':
  149             case '*':
  150             case '?':
  151                 if (was_multiple)
  152                     FAIL("was multiple");
  153                 if (!have_atom)
  154                     FAIL("no atom");
  155                 have_atom = true;
  156                 was_multiple = true;
  157                 ++c;
  158                 break;
  159             case '.':
  160             default:
  161             literal:
  162                 have_atom = true;
  163                 was_multiple = false;
  164                 ++c;
  165                 break;
  166             }
  167             break;
  168         case parse_bound:
  169             switch (*c) {
  170             case '0': case '1': case '2': case '3': case '4':
  171             case '5': case '6': case '7': case '8': case '9':
  172                 if (!seen_comma) {
  173                     low = low * 10 + *c - '0';
  174                     if (low > 255)
  175                         FAIL("lower bound too big");
  176                 } else {
  177                     seen_high = true;
  178                     high = high * 10 + *c - '0';
  179                     if (high > 255)
  180                         FAIL("upper bound too big");
  181                 }
  182                 ++c;
  183                 break;
  184             case ',':
  185                 if (seen_comma)
  186                     FAIL("multiple commas");
  187                 seen_comma = true;
  188                 ++c;
  189                 break;
  190             default:
  191             case '{':
  192                 FAIL("non digit/comma");
  193             case '}':
  194                 if (seen_high && low > high)
  195                     FAIL("bad parse bound");
  196                 seen_comma = false;
  197                 state = none;
  198                 ++c;
  199                 break;
  200             }
  201             break;
  202         case parse_bracket:
  203             switch (*c) {
  204             case '^':
  205                 if (seen_char || neg) goto inside;
  206                 neg = true;
  207                 ++c;
  208                 break;
  209             case '-':
  210                 if (range == 2) goto inside;
  211                 if (!seen_char) goto inside;
  212                 if (range == 1)
  213                     FAIL("bad range");
  214                 range = 2;
  215                 ++c;
  216                 break;
  217             case '[':
  218                 ++c;
  219                 switch (*c) {
  220                 case '.':   /* collating element */
  221                     if (range != 0) --range;
  222                     ++c;
  223                     state = parse_ce;
  224                     seen_ce = false;
  225                     break;
  226                 case '=':   /* equivalence class */
  227                     if (range == 2)
  228                         FAIL("equivalence class in range");
  229                     ++c;
  230                     state = parse_ec;
  231                     seen_ec = false;
  232                     break;
  233                 case ':':   /* character class */
  234                     if (range == 2)
  235                           FAIL("character class in range");
  236                     ccname = c;
  237                     ++c;
  238                     state = parse_cc;
  239                     break;
  240                 }
  241                 seen_char = true;
  242                 break;
  243             case ']':
  244                 if (!c[1] && !seen_char)
  245                     FAIL("unfinished brace");
  246                 if (!seen_char)
  247                     goto inside;
  248                 ++c;
  249                 range = 0;
  250                 have_atom = true;
  251                 state = none;
  252                 break;
  253             default:
  254             inside:
  255                 seen_char = true;
  256                 if (range == 2 && (*c & 0xff) < range_start)
  257                     FAIL("out of order range");
  258                 if (range != 0)
  259                     --range;
  260                 range_start = *c & 0xff;
  261                 ++c;
  262                 break;
  263             };
  264             break;
  265         case parse_ce:
  266             switch (*c) {
  267             case '.':
  268                 ++c;
  269                 switch (*c) {
  270                 case ']':
  271                     if (!seen_ce)
  272                          FAIL("empty ce");
  273                     ++c;
  274                     state = parse_bracket;
  275                     break;
  276                 default:
  277                     if (seen_ce)
  278                         range_start = 256;
  279                     else
  280                         range_start = '.';
  281                     seen_ce = true;
  282                     break;
  283                 }
  284                 break;
  285             default:
  286                 if (seen_ce)
  287                     range_start = 256;
  288                 else
  289                     range_start = *c;
  290                 seen_ce = true;
  291                 ++c;
  292                 break;
  293             }
  294             break;
  295         case parse_ec:
  296             switch (*c) {
  297             case '=':
  298                 ++c;
  299                 switch (*c) {
  300                 case ']':
  301                     if (!seen_ec)
  302                         FAIL("no ec");
  303                     ++c;
  304                     state = parse_bracket;
  305                     break;
  306                 default:
  307                     seen_ec = true;
  308                     break;
  309                 }
  310                 break;
  311             default:
  312                 seen_ec = true;
  313                 ++c;
  314                 break;
  315             }
  316             break;
  317         case parse_cc:
  318             switch (*c) {
  319             case ':':
  320                 ++c;
  321                 switch (*c) {
  322                 case ']': {
  323                     unsigned int i;
  324                     bool found = false;
  325                     for (i = 0;
  326                          i < sizeof(cc)/sizeof(*cc);
  327                          i++)
  328                     {
  329                         unsigned int len;
  330                         len = strlen(cc[i]);
  331                         if (len !=
  332                             (unsigned int)(c - ccname))
  333                             continue;
  334                         if (strncmp(cc[i], ccname, len))
  335                             continue;
  336                         found = true;
  337                     }
  338                     if (!found)
  339                         FAIL("unknown cc");
  340                     ++c;
  341                     state = parse_bracket;
  342                     break;
  343                     }
  344                 default:
  345                     break;
  346                 }
  347                 break;
  348             default:
  349                 ++c;
  350                 break;
  351             }
  352             break;
  353         }
  354     }
  355     if (group != 0)
  356         FAIL("group open");
  357     if (state != none)
  358         FAIL("incomplete");
  359     if (!have_atom)
  360         FAIL("no atom");
  361     return (sub);
  362 
  363  error:
  364 #if VALREGEX_REPORT_REASON
  365     fprintf(stderr, "%s\n", reason);
  366 #endif
  367     return (-1);
  368 }