"Fossies" - the Fresh Open Source Software Archive

Member "bind-9.16.7/lib/isc/regex.c" (4 Sep 2020, 7741 Bytes) of package /linux/misc/dns/bind9/9.16.7/bind-9.16.7.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "regex.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
    3  *
    4  * This Source Code Form is subject to the terms of the Mozilla Public
    5  * License, v. 2.0. If a copy of the MPL was not distributed with this
    6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
    7  *
    8  * See the COPYRIGHT file distributed with this work for additional
    9  * information regarding copyright ownership.
   10  */
   11 
   12 #include <stdbool.h>
   13 
   14 #include <isc/file.h>
   15 #include <isc/print.h>
   16 #include <isc/regex.h>
   17 #include <isc/string.h>
   18 
   19 #if VALREGEX_REPORT_REASON
   20 #define FAIL(x)               \
   21     do {                  \
   22         reason = (x); \
   23         goto error;   \
   24     } while (0)
   25 #else /* if VALREGEX_REPORT_REASON */
   26 #define FAIL(x) goto error
   27 #endif /* if VALREGEX_REPORT_REASON */
   28 
   29 /*
   30  * Validate the regular expression 'C' locale.
   31  */
   32 int
   33 isc_regex_validate(const char *c) {
   34     enum { none,
   35            parse_bracket,
   36            parse_bound,
   37            parse_ce,
   38            parse_ec,
   39            parse_cc } state = none;
   40     /* Well known character classes. */
   41     const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
   42                  ":graph:", ":space:", ":blank:", ":lower:",
   43                  ":upper:", ":cntrl:", ":print:", ":xdigit:" };
   44     bool seen_comma = false;
   45     bool seen_high = false;
   46     bool seen_char = false;
   47     bool seen_ec = false;
   48     bool seen_ce = false;
   49     bool have_atom = false;
   50     int group = 0;
   51     int range = 0;
   52     int sub = 0;
   53     bool empty_ok = false;
   54     bool neg = false;
   55     bool was_multiple = false;
   56     unsigned int low = 0;
   57     unsigned int high = 0;
   58     const char *ccname = NULL;
   59     int range_start = 0;
   60 #if VALREGEX_REPORT_REASON
   61     const char *reason = "";
   62 #endif /* if VALREGEX_REPORT_REASON */
   63 
   64     if (c == NULL || *c == 0) {
   65         FAIL("empty string");
   66     }
   67 
   68     while (c != NULL && *c != 0) {
   69         switch (state) {
   70         case none:
   71             switch (*c) {
   72             case '\\': /* make literal */
   73                 ++c;
   74                 switch (*c) {
   75                 case '1':
   76                 case '2':
   77                 case '3':
   78                 case '4':
   79                 case '5':
   80                 case '6':
   81                 case '7':
   82                 case '8':
   83                 case '9':
   84                     if ((*c - '0') > sub) {
   85                         FAIL("bad back reference");
   86                     }
   87                     have_atom = true;
   88                     was_multiple = false;
   89                     break;
   90                 case 0:
   91                     FAIL("escaped end-of-string");
   92                 default:
   93                     goto literal;
   94                 }
   95                 ++c;
   96                 break;
   97             case '[': /* bracket start */
   98                 ++c;
   99                 neg = false;
  100                 was_multiple = false;
  101                 seen_char = false;
  102                 state = parse_bracket;
  103                 break;
  104             case '{': /* bound start */
  105                 switch (c[1]) {
  106                 case '0':
  107                 case '1':
  108                 case '2':
  109                 case '3':
  110                 case '4':
  111                 case '5':
  112                 case '6':
  113                 case '7':
  114                 case '8':
  115                 case '9':
  116                     if (!have_atom) {
  117                         FAIL("no atom");
  118                     }
  119                     if (was_multiple) {
  120                         FAIL("was multiple");
  121                     }
  122                     seen_comma = false;
  123                     seen_high = false;
  124                     low = high = 0;
  125                     state = parse_bound;
  126                     break;
  127                 default:
  128                     goto literal;
  129                 }
  130                 ++c;
  131                 have_atom = true;
  132                 was_multiple = true;
  133                 break;
  134             case '}':
  135                 goto literal;
  136             case '(': /* group start */
  137                 have_atom = false;
  138                 was_multiple = false;
  139                 empty_ok = true;
  140                 ++group;
  141                 ++sub;
  142                 ++c;
  143                 break;
  144             case ')': /* group end */
  145                 if (group && !have_atom && !empty_ok) {
  146                     FAIL("empty alternative");
  147                 }
  148                 have_atom = true;
  149                 was_multiple = false;
  150                 if (group != 0) {
  151                     --group;
  152                 }
  153                 ++c;
  154                 break;
  155             case '|': /* alternative separator */
  156                 if (!have_atom) {
  157                     FAIL("no atom");
  158                 }
  159                 have_atom = false;
  160                 empty_ok = false;
  161                 was_multiple = false;
  162                 ++c;
  163                 break;
  164             case '^':
  165             case '$':
  166                 have_atom = true;
  167                 was_multiple = true;
  168                 ++c;
  169                 break;
  170             case '+':
  171             case '*':
  172             case '?':
  173                 if (was_multiple) {
  174                     FAIL("was multiple");
  175                 }
  176                 if (!have_atom) {
  177                     FAIL("no atom");
  178                 }
  179                 have_atom = true;
  180                 was_multiple = true;
  181                 ++c;
  182                 break;
  183             case '.':
  184             default:
  185             literal:
  186                 have_atom = true;
  187                 was_multiple = false;
  188                 ++c;
  189                 break;
  190             }
  191             break;
  192         case parse_bound:
  193             switch (*c) {
  194             case '0':
  195             case '1':
  196             case '2':
  197             case '3':
  198             case '4':
  199             case '5':
  200             case '6':
  201             case '7':
  202             case '8':
  203             case '9':
  204                 if (!seen_comma) {
  205                     low = low * 10 + *c - '0';
  206                     if (low > 255) {
  207                         FAIL("lower bound too big");
  208                     }
  209                 } else {
  210                     seen_high = true;
  211                     high = high * 10 + *c - '0';
  212                     if (high > 255) {
  213                         FAIL("upper bound too big");
  214                     }
  215                 }
  216                 ++c;
  217                 break;
  218             case ',':
  219                 if (seen_comma) {
  220                     FAIL("multiple commas");
  221                 }
  222                 seen_comma = true;
  223                 ++c;
  224                 break;
  225             default:
  226             case '{':
  227                 FAIL("non digit/comma");
  228             case '}':
  229                 if (seen_high && low > high) {
  230                     FAIL("bad parse bound");
  231                 }
  232                 seen_comma = false;
  233                 state = none;
  234                 ++c;
  235                 break;
  236             }
  237             break;
  238         case parse_bracket:
  239             switch (*c) {
  240             case '^':
  241                 if (seen_char || neg) {
  242                     goto inside;
  243                 }
  244                 neg = true;
  245                 ++c;
  246                 break;
  247             case '-':
  248                 if (range == 2) {
  249                     goto inside;
  250                 }
  251                 if (!seen_char) {
  252                     goto inside;
  253                 }
  254                 if (range == 1) {
  255                     FAIL("bad range");
  256                 }
  257                 range = 2;
  258                 ++c;
  259                 break;
  260             case '[':
  261                 ++c;
  262                 switch (*c) {
  263                 case '.': /* collating element */
  264                     if (range != 0) {
  265                         --range;
  266                     }
  267                     ++c;
  268                     state = parse_ce;
  269                     seen_ce = false;
  270                     break;
  271                 case '=': /* equivalence class */
  272                     if (range == 2) {
  273                         FAIL("equivalence class in "
  274                              "range");
  275                     }
  276                     ++c;
  277                     state = parse_ec;
  278                     seen_ec = false;
  279                     break;
  280                 case ':': /* character class */
  281                     if (range == 2) {
  282                         FAIL("character class in "
  283                              "range");
  284                     }
  285                     ccname = c;
  286                     ++c;
  287                     state = parse_cc;
  288                     break;
  289                 }
  290                 seen_char = true;
  291                 break;
  292             case ']':
  293                 if (!c[1] && !seen_char) {
  294                     FAIL("unfinished brace");
  295                 }
  296                 if (!seen_char) {
  297                     goto inside;
  298                 }
  299                 ++c;
  300                 range = 0;
  301                 have_atom = true;
  302                 state = none;
  303                 break;
  304             default:
  305             inside:
  306                 seen_char = true;
  307                 if (range == 2 && (*c & 0xff) < range_start) {
  308                     FAIL("out of order range");
  309                 }
  310                 if (range != 0) {
  311                     --range;
  312                 }
  313                 range_start = *c & 0xff;
  314                 ++c;
  315                 break;
  316             }
  317             break;
  318         case parse_ce:
  319             switch (*c) {
  320             case '.':
  321                 ++c;
  322                 switch (*c) {
  323                 case ']':
  324                     if (!seen_ce) {
  325                         FAIL("empty ce");
  326                     }
  327                     ++c;
  328                     state = parse_bracket;
  329                     break;
  330                 default:
  331                     if (seen_ce) {
  332                         range_start = 256;
  333                     } else {
  334                         range_start = '.';
  335                     }
  336                     seen_ce = true;
  337                     break;
  338                 }
  339                 break;
  340             default:
  341                 if (seen_ce) {
  342                     range_start = 256;
  343                 } else {
  344                     range_start = *c;
  345                 }
  346                 seen_ce = true;
  347                 ++c;
  348                 break;
  349             }
  350             break;
  351         case parse_ec:
  352             switch (*c) {
  353             case '=':
  354                 ++c;
  355                 switch (*c) {
  356                 case ']':
  357                     if (!seen_ec) {
  358                         FAIL("no ec");
  359                     }
  360                     ++c;
  361                     state = parse_bracket;
  362                     break;
  363                 default:
  364                     seen_ec = true;
  365                     break;
  366                 }
  367                 break;
  368             default:
  369                 seen_ec = true;
  370                 ++c;
  371                 break;
  372             }
  373             break;
  374         case parse_cc:
  375             switch (*c) {
  376             case ':':
  377                 ++c;
  378                 switch (*c) {
  379                 case ']': {
  380                     unsigned int i;
  381                     bool found = false;
  382                     for (i = 0;
  383                          i < sizeof(cc) / sizeof(*cc); i++)
  384                     {
  385                         unsigned int len;
  386                         len = strlen(cc[i]);
  387                         if (len !=
  388                             (unsigned int)(c - ccname))
  389                         {
  390                             continue;
  391                         }
  392                         if (strncmp(cc[i], ccname, len))
  393                         {
  394                             continue;
  395                         }
  396                         found = true;
  397                     }
  398                     if (!found) {
  399                         FAIL("unknown cc");
  400                     }
  401                     ++c;
  402                     state = parse_bracket;
  403                     break;
  404                 }
  405                 default:
  406                     break;
  407                 }
  408                 break;
  409             default:
  410                 ++c;
  411                 break;
  412             }
  413             break;
  414         }
  415     }
  416     if (group != 0) {
  417         FAIL("group open");
  418     }
  419     if (state != none) {
  420         FAIL("incomplete");
  421     }
  422     if (!have_atom) {
  423         FAIL("no atom");
  424     }
  425     return (sub);
  426 
  427 error:
  428 #if VALREGEX_REPORT_REASON
  429     fprintf(stderr, "%s\n", reason);
  430 #endif /* if VALREGEX_REPORT_REASON */
  431     return (-1);
  432 }