"Fossies" - the Fresh Open Source Software Archive

Member "scalpel-2.0/tre-0.7.5-win32/lib/tre-compile.c" (20 Apr 2011, 61519 Bytes) of archive /linux/misc/scalpel-2.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tre-compile.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2   tre-compile.c - TRE regex compiler
    3 
    4   Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>
    5 
    6   This library is free software; you can redistribute it and/or
    7   modify it under the terms of the GNU Lesser General Public
    8   License as published by the Free Software Foundation; either
    9   version 2.1 of the License, or (at your option) any later version.
   10 
   11   This library is distributed in the hope that it will be useful,
   12   but WITHOUT ANY WARRANTY; without even the implied warranty of
   13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14   Lesser General Public License for more details.
   15 
   16   You should have received a copy of the GNU Lesser General Public
   17   License along with this library; if not, write to the Free Software
   18   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   19 
   20 */
   21 
   22 /*
   23   TODO:
   24    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
   25      function calls.
   26 */
   27 
   28 
   29 #ifdef HAVE_CONFIG_H
   30 #include <config.h>
   31 #endif /* HAVE_CONFIG_H */
   32 #include <stdio.h>
   33 #include <assert.h>
   34 #include <string.h>
   35 
   36 #include "tre-internal.h"
   37 #include "tre-mem.h"
   38 #include "tre-stack.h"
   39 #include "tre-ast.h"
   40 #include "tre-parse.h"
   41 #include "tre-compile.h"
   42 #include "regex.h"
   43 #include "xmalloc.h"
   44 
   45 /*
   46   Algorithms to setup tags so that submatch addressing can be done.
   47 */
   48 
   49 
   50 /* Inserts a catenation node to the root of the tree given in `node'.
   51    As the left child a new tag with number `tag_id' to `node' is added,
   52    and the right child is the old root. */
   53 static reg_errcode_t
   54 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
   55 {
   56   tre_catenation_t *c;
   57 
   58   DPRINT(("add_tag_left: tag %d\n", tag_id));
   59 
   60   c = tre_mem_alloc(mem, sizeof(*c));
   61   if (c == NULL)
   62     return REG_ESPACE;
   63   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
   64   if (c->left == NULL)
   65     return REG_ESPACE;
   66   c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
   67   if (c->right == NULL)
   68     return REG_ESPACE;
   69 
   70   c->right->obj = node->obj;
   71   c->right->type = node->type;
   72   c->right->nullable = -1;
   73   c->right->submatch_id = -1;
   74   c->right->firstpos = NULL;
   75   c->right->lastpos = NULL;
   76   c->right->num_tags = 0;
   77   node->obj = c;
   78   node->type = CATENATION;
   79   return REG_OK;
   80 }
   81 
   82 /* Inserts a catenation node to the root of the tree given in `node'.
   83    As the right child a new tag with number `tag_id' to `node' is added,
   84    and the left child is the old root. */
   85 static reg_errcode_t
   86 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
   87 {
   88   tre_catenation_t *c;
   89 
   90   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
   91 
   92   c = tre_mem_alloc(mem, sizeof(*c));
   93   if (c == NULL)
   94     return REG_ESPACE;
   95   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
   96   if (c->right == NULL)
   97     return REG_ESPACE;
   98   c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
   99   if (c->left == NULL)
  100     return REG_ESPACE;
  101 
  102   c->left->obj = node->obj;
  103   c->left->type = node->type;
  104   c->left->nullable = -1;
  105   c->left->submatch_id = -1;
  106   c->left->firstpos = NULL;
  107   c->left->lastpos = NULL;
  108   c->left->num_tags = 0;
  109   node->obj = c;
  110   node->type = CATENATION;
  111   return REG_OK;
  112 }
  113 
  114 typedef enum {
  115   ADDTAGS_RECURSE,
  116   ADDTAGS_AFTER_ITERATION,
  117   ADDTAGS_AFTER_UNION_LEFT,
  118   ADDTAGS_AFTER_UNION_RIGHT,
  119   ADDTAGS_AFTER_CAT_LEFT,
  120   ADDTAGS_AFTER_CAT_RIGHT,
  121   ADDTAGS_SET_SUBMATCH_END
  122 } tre_addtags_symbol_t;
  123 
  124 
  125 typedef struct {
  126   int tag;
  127   int next_tag;
  128 } tre_tag_states_t;
  129 
  130 /* Adds tags to appropriate locations in the parse tree in `tree', so that
  131    subexpressions marked for submatch addressing can be traced. */
  132 static reg_errcode_t
  133 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
  134          tre_tnfa_t *tnfa)
  135 {
  136   reg_errcode_t status = REG_OK;
  137   tre_addtags_symbol_t symbol;
  138   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
  139   int bottom = tre_stack_num_objects(stack);
  140   /* True for first pass (counting number of needed tags) */
  141   int first_pass = (mem == NULL || tnfa == NULL);
  142   int *regset, *orig_regset;
  143   int num_tags = 0; /* Total number of tags. */
  144   int num_minimals = 0;  /* Number of special minimal tags. */
  145   int tag = 0;      /* The tag that is to be added next. */
  146   int next_tag = 1; /* Next tag to use after this one. */
  147   int *parents;     /* Stack of submatches the current submatch is
  148                contained in. */
  149   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
  150   tre_tag_states_t *saved_states;
  151 
  152   tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
  153   if (!first_pass)
  154     {
  155       tnfa->end_tag = 0;
  156       tnfa->minimal_tags[0] = -1;
  157     }
  158 
  159   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
  160   if (regset == NULL)
  161     return REG_ESPACE;
  162   regset[0] = -1;
  163   orig_regset = regset;
  164 
  165   parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
  166   if (parents == NULL)
  167     {
  168       xfree(regset);
  169       return REG_ESPACE;
  170     }
  171   parents[0] = -1;
  172 
  173   saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
  174   if (saved_states == NULL)
  175     {
  176       xfree(regset);
  177       xfree(parents);
  178       return REG_ESPACE;
  179     }
  180   else
  181     {
  182       unsigned int i;
  183       for (i = 0; i <= tnfa->num_submatches; i++)
  184     saved_states[i].tag = -1;
  185     }
  186 
  187   STACK_PUSH(stack, voidptr, node);
  188   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
  189 
  190   while (tre_stack_num_objects(stack) > bottom)
  191     {
  192       if (status != REG_OK)
  193     break;
  194 
  195       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
  196       switch (symbol)
  197     {
  198 
  199     case ADDTAGS_SET_SUBMATCH_END:
  200       {
  201         int id = tre_stack_pop_int(stack);
  202         int i;
  203 
  204         /* Add end of this submatch to regset. */
  205         for (i = 0; regset[i] >= 0; i++);
  206         regset[i] = id * 2 + 1;
  207         regset[i + 1] = -1;
  208 
  209         /* Pop this submatch from the parents stack. */
  210         for (i = 0; parents[i] >= 0; i++);
  211         parents[i - 1] = -1;
  212         break;
  213       }
  214 
  215     case ADDTAGS_RECURSE:
  216       node = tre_stack_pop_voidptr(stack);
  217 
  218       if (node->submatch_id >= 0)
  219         {
  220           int id = node->submatch_id;
  221           int i;
  222 
  223 
  224           /* Add start of this submatch to regset. */
  225           for (i = 0; regset[i] >= 0; i++);
  226           regset[i] = id * 2;
  227           regset[i + 1] = -1;
  228 
  229           if (!first_pass)
  230         {
  231           for (i = 0; parents[i] >= 0; i++);
  232           tnfa->submatch_data[id].parents = NULL;
  233           if (i > 0)
  234             {
  235               int *p = xmalloc(sizeof(*p) * (i + 1));
  236               if (p == NULL)
  237             {
  238               status = REG_ESPACE;
  239               break;
  240             }
  241               assert(tnfa->submatch_data[id].parents == NULL);
  242               tnfa->submatch_data[id].parents = p;
  243               for (i = 0; parents[i] >= 0; i++)
  244             p[i] = parents[i];
  245               p[i] = -1;
  246             }
  247         }
  248 
  249           /* Add end of this submatch to regset after processing this
  250          node. */
  251           STACK_PUSHX(stack, int, node->submatch_id);
  252           STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
  253         }
  254 
  255       switch (node->type)
  256         {
  257         case LITERAL:
  258           {
  259         tre_literal_t *lit = node->obj;
  260 
  261         if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
  262           {
  263             int i;
  264             DPRINT(("Literal %d-%d\n",
  265                 (int)lit->code_min, (int)lit->code_max));
  266             if (regset[0] >= 0)
  267               {
  268             /* Regset is not empty, so add a tag before the
  269                literal or backref. */
  270             if (!first_pass)
  271               {
  272                 status = tre_add_tag_left(mem, node, tag);
  273                 tnfa->tag_directions[tag] = direction;
  274                 if (minimal_tag >= 0)
  275                   {
  276                 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
  277                 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
  278                 tnfa->minimal_tags[i] = tag;
  279                 tnfa->minimal_tags[i + 1] = minimal_tag;
  280                 tnfa->minimal_tags[i + 2] = -1;
  281                 minimal_tag = -1;
  282                 num_minimals++;
  283                   }
  284                 /* Go through the regset and set submatch data for
  285                    submatches that are using this tag. */
  286                 for (i = 0; regset[i] >= 0; i++)
  287                   {
  288                 int id = regset[i] / 2;
  289                 int start = !(regset[i] % 2);
  290                 DPRINT(("  Using tag %d for %s offset of "
  291                     "submatch %d\n", tag,
  292                     start ? "start" : "end", id));
  293                 if (start)
  294                   tnfa->submatch_data[id].so_tag = tag;
  295                 else
  296                   tnfa->submatch_data[id].eo_tag = tag;
  297                   }
  298               }
  299             else
  300               {
  301                 DPRINT(("  num_tags = 1\n"));
  302                 node->num_tags = 1;
  303               }
  304 
  305             DPRINT(("  num_tags++\n"));
  306             regset[0] = -1;
  307             tag = next_tag;
  308             num_tags++;
  309             next_tag++;
  310               }
  311           }
  312         else
  313           {
  314             assert(!IS_TAG(lit));
  315           }
  316         break;
  317           }
  318         case CATENATION:
  319           {
  320         tre_catenation_t *cat = node->obj;
  321         tre_ast_node_t *left = cat->left;
  322         tre_ast_node_t *right = cat->right;
  323         int reserved_tag = -1;
  324         DPRINT(("Catenation, next_tag = %d\n", next_tag));
  325 
  326 
  327         /* After processing right child. */
  328         STACK_PUSHX(stack, voidptr, node);
  329         STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
  330 
  331         /* Process right child. */
  332         STACK_PUSHX(stack, voidptr, right);
  333         STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
  334 
  335         /* After processing left child. */
  336         STACK_PUSHX(stack, int, next_tag + left->num_tags);
  337         DPRINT(("  Pushing %d for after left\n",
  338             next_tag + left->num_tags));
  339         if (left->num_tags > 0 && right->num_tags > 0)
  340           {
  341             /* Reserve the next tag to the right child. */
  342             DPRINT(("  Reserving next_tag %d to right child\n",
  343                 next_tag));
  344             reserved_tag = next_tag;
  345             next_tag++;
  346           }
  347         STACK_PUSHX(stack, int, reserved_tag);
  348         STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
  349 
  350         /* Process left child. */
  351         STACK_PUSHX(stack, voidptr, left);
  352         STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
  353 
  354         }
  355           break;
  356         case ITERATION:
  357           {
  358         tre_iteration_t *iter = node->obj;
  359         DPRINT(("Iteration\n"));
  360 
  361         if (first_pass)
  362           {
  363             STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
  364           }
  365         else
  366           {
  367             STACK_PUSHX(stack, int, tag);
  368             STACK_PUSHX(stack, int, iter->minimal);
  369           }
  370         STACK_PUSHX(stack, voidptr, node);
  371         STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
  372 
  373         STACK_PUSHX(stack, voidptr, iter->arg);
  374         STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
  375 
  376         /* Regset is not empty, so add a tag here. */
  377         if (regset[0] >= 0 || iter->minimal)
  378           {
  379             if (!first_pass)
  380               {
  381             int i;
  382             status = tre_add_tag_left(mem, node, tag);
  383             if (iter->minimal)
  384               tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
  385             else
  386               tnfa->tag_directions[tag] = direction;
  387             if (minimal_tag >= 0)
  388               {
  389                 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
  390                 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
  391                 tnfa->minimal_tags[i] = tag;
  392                 tnfa->minimal_tags[i + 1] = minimal_tag;
  393                 tnfa->minimal_tags[i + 2] = -1;
  394                 minimal_tag = -1;
  395                 num_minimals++;
  396               }
  397             /* Go through the regset and set submatch data for
  398                submatches that are using this tag. */
  399             for (i = 0; regset[i] >= 0; i++)
  400               {
  401                 int id = regset[i] / 2;
  402                 int start = !(regset[i] % 2);
  403                 DPRINT(("  Using tag %d for %s offset of "
  404                     "submatch %d\n", tag,
  405                     start ? "start" : "end", id));
  406                 if (start)
  407                   tnfa->submatch_data[id].so_tag = tag;
  408                 else
  409                   tnfa->submatch_data[id].eo_tag = tag;
  410               }
  411               }
  412 
  413             DPRINT(("  num_tags++\n"));
  414             regset[0] = -1;
  415             tag = next_tag;
  416             num_tags++;
  417             next_tag++;
  418           }
  419         direction = TRE_TAG_MINIMIZE;
  420           }
  421           break;
  422         case UNION:
  423           {
  424         tre_union_t *uni = node->obj;
  425         tre_ast_node_t *left = uni->left;
  426         tre_ast_node_t *right = uni->right;
  427         int left_tag;
  428         int right_tag;
  429 
  430         if (regset[0] >= 0)
  431           {
  432             left_tag = next_tag;
  433             right_tag = next_tag + 1;
  434           }
  435         else
  436           {
  437             left_tag = tag;
  438             right_tag = next_tag;
  439           }
  440 
  441         DPRINT(("Union\n"));
  442 
  443         /* After processing right child. */
  444         STACK_PUSHX(stack, int, right_tag);
  445         STACK_PUSHX(stack, int, left_tag);
  446         STACK_PUSHX(stack, voidptr, regset);
  447         STACK_PUSHX(stack, int, regset[0] >= 0);
  448         STACK_PUSHX(stack, voidptr, node);
  449         STACK_PUSHX(stack, voidptr, right);
  450         STACK_PUSHX(stack, voidptr, left);
  451         STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
  452 
  453         /* Process right child. */
  454         STACK_PUSHX(stack, voidptr, right);
  455         STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
  456 
  457         /* After processing left child. */
  458         STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
  459 
  460         /* Process left child. */
  461         STACK_PUSHX(stack, voidptr, left);
  462         STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
  463 
  464         /* Regset is not empty, so add a tag here. */
  465         if (regset[0] >= 0)
  466           {
  467             if (!first_pass)
  468               {
  469             int i;
  470             status = tre_add_tag_left(mem, node, tag);
  471             tnfa->tag_directions[tag] = direction;
  472             if (minimal_tag >= 0)
  473               {
  474                 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
  475                 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
  476                 tnfa->minimal_tags[i] = tag;
  477                 tnfa->minimal_tags[i + 1] = minimal_tag;
  478                 tnfa->minimal_tags[i + 2] = -1;
  479                 minimal_tag = -1;
  480                 num_minimals++;
  481               }
  482             /* Go through the regset and set submatch data for
  483                submatches that are using this tag. */
  484             for (i = 0; regset[i] >= 0; i++)
  485               {
  486                 int id = regset[i] / 2;
  487                 int start = !(regset[i] % 2);
  488                 DPRINT(("  Using tag %d for %s offset of "
  489                     "submatch %d\n", tag,
  490                     start ? "start" : "end", id));
  491                 if (start)
  492                   tnfa->submatch_data[id].so_tag = tag;
  493                 else
  494                   tnfa->submatch_data[id].eo_tag = tag;
  495               }
  496               }
  497 
  498             DPRINT(("  num_tags++\n"));
  499             regset[0] = -1;
  500             tag = next_tag;
  501             num_tags++;
  502             next_tag++;
  503           }
  504 
  505         if (node->num_submatches > 0)
  506           {
  507             /* The next two tags are reserved for markers. */
  508             next_tag++;
  509             tag = next_tag;
  510             next_tag++;
  511           }
  512 
  513         break;
  514           }
  515         }
  516 
  517       if (node->submatch_id >= 0)
  518         {
  519           int i;
  520           /* Push this submatch on the parents stack. */
  521           for (i = 0; parents[i] >= 0; i++);
  522           parents[i] = node->submatch_id;
  523           parents[i + 1] = -1;
  524         }
  525 
  526       break; /* end case: ADDTAGS_RECURSE */
  527 
  528     case ADDTAGS_AFTER_ITERATION:
  529       {
  530         int minimal = 0;
  531         int enter_tag;
  532         node = tre_stack_pop_voidptr(stack);
  533         if (first_pass)
  534           {
  535         node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
  536           + tre_stack_pop_int(stack);
  537         minimal_tag = -1;
  538           }
  539         else
  540           {
  541         minimal = tre_stack_pop_int(stack);
  542         enter_tag = tre_stack_pop_int(stack);
  543         if (minimal)
  544           minimal_tag = enter_tag;
  545           }
  546 
  547         DPRINT(("After iteration\n"));
  548         if (!first_pass)
  549           {
  550         DPRINT(("  Setting direction to %s\n",
  551             minimal ? "minimize" : "maximize"));
  552         if (minimal)
  553           direction = TRE_TAG_MINIMIZE;
  554         else
  555           direction = TRE_TAG_MAXIMIZE;
  556           }
  557         break;
  558       }
  559 
  560     case ADDTAGS_AFTER_CAT_LEFT:
  561       {
  562         int new_tag = tre_stack_pop_int(stack);
  563         next_tag = tre_stack_pop_int(stack);
  564         DPRINT(("After cat left, tag = %d, next_tag = %d\n",
  565             tag, next_tag));
  566         if (new_tag >= 0)
  567           {
  568         DPRINT(("  Setting tag to %d\n", new_tag));
  569         tag = new_tag;
  570           }
  571         break;
  572       }
  573 
  574     case ADDTAGS_AFTER_CAT_RIGHT:
  575       DPRINT(("After cat right\n"));
  576       node = tre_stack_pop_voidptr(stack);
  577       if (first_pass)
  578         node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
  579           + ((tre_catenation_t *)node->obj)->right->num_tags;
  580       break;
  581 
  582     case ADDTAGS_AFTER_UNION_LEFT:
  583       DPRINT(("After union left\n"));
  584       /* Lift the bottom of the `regset' array so that when processing
  585          the right operand the items currently in the array are
  586          invisible.  The original bottom was saved at ADDTAGS_UNION and
  587          will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
  588       while (*regset >= 0)
  589         regset++;
  590       break;
  591 
  592     case ADDTAGS_AFTER_UNION_RIGHT:
  593       {
  594         int added_tags, tag_left, tag_right;
  595         tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
  596         tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
  597         DPRINT(("After union right\n"));
  598         node = tre_stack_pop_voidptr(stack);
  599         added_tags = tre_stack_pop_int(stack);
  600         if (first_pass)
  601           {
  602         node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
  603           + ((tre_union_t *)node->obj)->right->num_tags + added_tags
  604           + ((node->num_submatches > 0) ? 2 : 0);
  605           }
  606         regset = tre_stack_pop_voidptr(stack);
  607         tag_left = tre_stack_pop_int(stack);
  608         tag_right = tre_stack_pop_int(stack);
  609 
  610         /* Add tags after both children, the left child gets a smaller
  611            tag than the right child.  This guarantees that we prefer
  612            the left child over the right child. */
  613         /* XXX - This is not always necessary (if the children have
  614            tags which must be seen for every match of that child). */
  615         /* XXX - Check if this is the only place where tre_add_tag_right
  616            is used.  If so, use tre_add_tag_left (putting the tag before
  617            the child as opposed after the child) and throw away
  618            tre_add_tag_right. */
  619         if (node->num_submatches > 0)
  620           {
  621         if (!first_pass)
  622           {
  623             status = tre_add_tag_right(mem, left, tag_left);
  624             tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
  625             status = tre_add_tag_right(mem, right, tag_right);
  626             tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
  627           }
  628         DPRINT(("  num_tags += 2\n"));
  629         num_tags += 2;
  630           }
  631         direction = TRE_TAG_MAXIMIZE;
  632         break;
  633       }
  634 
  635     default:
  636       assert(0);
  637       break;
  638 
  639     } /* end switch(symbol) */
  640     } /* end while(tre_stack_num_objects(stack) > bottom) */
  641 
  642   if (!first_pass)
  643     {
  644       int i;
  645       /* Go through the regset and set submatch data for
  646      submatches that are using this tag. */
  647       for (i = 0; regset[i] >= 0; i++)
  648     {
  649       int id = regset[i] / 2;
  650       int start = !(regset[i] % 2);
  651       DPRINT(("  Using tag %d for %s offset of "
  652           "submatch %d\n", num_tags,
  653           start ? "start" : "end", id));
  654       if (start)
  655         tnfa->submatch_data[id].so_tag = num_tags;
  656       else
  657         tnfa->submatch_data[id].eo_tag = num_tags;
  658     }
  659     }
  660 
  661   if (!first_pass && minimal_tag >= 0)
  662     {
  663       int i;
  664       DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
  665       for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
  666       tnfa->minimal_tags[i] = tag;
  667       tnfa->minimal_tags[i + 1] = minimal_tag;
  668       tnfa->minimal_tags[i + 2] = -1;
  669       minimal_tag = -1;
  670       num_minimals++;
  671     }
  672 
  673   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
  674       first_pass? "First pass" : "Second pass", num_tags));
  675 
  676   assert(tree->num_tags == num_tags);
  677   tnfa->end_tag = num_tags;
  678   tnfa->num_tags = num_tags;
  679   tnfa->num_minimals = num_minimals;
  680   xfree(orig_regset);
  681   xfree(parents);
  682   xfree(saved_states);
  683   return status;
  684 }
  685 
  686 
  687 
  688 /*
  689   AST to TNFA compilation routines.
  690 */
  691 
  692 typedef enum {
  693   COPY_RECURSE,
  694   COPY_SET_RESULT_PTR
  695 } tre_copyast_symbol_t;
  696 
  697 /* Flags for tre_copy_ast(). */
  698 #define COPY_REMOVE_TAGS     1
  699 #define COPY_MAXIMIZE_FIRST_TAG  2
  700 
  701 static reg_errcode_t
  702 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
  703          int flags, int *pos_add, tre_tag_direction_t *tag_directions,
  704          tre_ast_node_t **copy, int *max_pos)
  705 {
  706   reg_errcode_t status = REG_OK;
  707   int bottom = tre_stack_num_objects(stack);
  708   int num_copied = 0;
  709   int first_tag = 1;
  710   tre_ast_node_t **result = copy;
  711   tre_copyast_symbol_t symbol;
  712 
  713   STACK_PUSH(stack, voidptr, ast);
  714   STACK_PUSH(stack, int, COPY_RECURSE);
  715 
  716   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
  717     {
  718       tre_ast_node_t *node;
  719       if (status != REG_OK)
  720     break;
  721 
  722       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
  723       switch (symbol)
  724     {
  725     case COPY_SET_RESULT_PTR:
  726       result = tre_stack_pop_voidptr(stack);
  727       break;
  728     case COPY_RECURSE:
  729       node = tre_stack_pop_voidptr(stack);
  730       switch (node->type)
  731         {
  732         case LITERAL:
  733           {
  734         tre_literal_t *lit = node->obj;
  735         int pos = lit->position;
  736         int min = lit->code_min;
  737         int max = lit->code_max;
  738         if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
  739           {
  740             /* XXX - e.g. [ab] has only one position but two
  741                nodes, so we are creating holes in the state space
  742                here.  Not fatal, just wastes memory. */
  743             pos += *pos_add;
  744             num_copied++;
  745           }
  746         else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
  747           {
  748             /* Change this tag to empty. */
  749             min = EMPTY;
  750             max = pos = -1;
  751           }
  752         else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
  753              && first_tag)
  754           {
  755             /* Maximize the first tag. */
  756             tag_directions[max] = TRE_TAG_MAXIMIZE;
  757             first_tag = 0;
  758           }
  759         *result = tre_ast_new_literal(mem, min, max, pos);
  760         if (*result == NULL)
  761           status = REG_ESPACE;
  762 
  763         if (pos > *max_pos)
  764           *max_pos = pos;
  765         break;
  766           }
  767         case UNION:
  768           {
  769         tre_union_t *uni = node->obj;
  770         tre_union_t *tmp;
  771         *result = tre_ast_new_union(mem, uni->left, uni->right);
  772         if (*result == NULL)
  773           {
  774             status = REG_ESPACE;
  775             break;
  776           }
  777         tmp = (*result)->obj;
  778         result = &tmp->left;
  779         STACK_PUSHX(stack, voidptr, uni->right);
  780         STACK_PUSHX(stack, int, COPY_RECURSE);
  781         STACK_PUSHX(stack, voidptr, &tmp->right);
  782         STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
  783         STACK_PUSHX(stack, voidptr, uni->left);
  784         STACK_PUSHX(stack, int, COPY_RECURSE);
  785         break;
  786           }
  787         case CATENATION:
  788           {
  789         tre_catenation_t *cat = node->obj;
  790         tre_catenation_t *tmp;
  791         *result = tre_ast_new_catenation(mem, cat->left, cat->right);
  792         if (*result == NULL)
  793           {
  794             status = REG_ESPACE;
  795             break;
  796           }
  797         tmp = (*result)->obj;
  798         tmp->left = NULL;
  799         tmp->right = NULL;
  800         result = &tmp->left;
  801 
  802         STACK_PUSHX(stack, voidptr, cat->right);
  803         STACK_PUSHX(stack, int, COPY_RECURSE);
  804         STACK_PUSHX(stack, voidptr, &tmp->right);
  805         STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
  806         STACK_PUSHX(stack, voidptr, cat->left);
  807         STACK_PUSHX(stack, int, COPY_RECURSE);
  808         break;
  809           }
  810         case ITERATION:
  811           {
  812         tre_iteration_t *iter = node->obj;
  813         STACK_PUSHX(stack, voidptr, iter->arg);
  814         STACK_PUSHX(stack, int, COPY_RECURSE);
  815         *result = tre_ast_new_iter(mem, iter->arg, iter->min,
  816                        iter->max, iter->minimal);
  817         if (*result == NULL)
  818           {
  819             status = REG_ESPACE;
  820             break;
  821           }
  822         iter = (*result)->obj;
  823         result = &iter->arg;
  824         break;
  825           }
  826         default:
  827           assert(0);
  828           break;
  829         }
  830       break;
  831     }
  832     }
  833   *pos_add += num_copied;
  834   return status;
  835 }
  836 
  837 typedef enum {
  838   EXPAND_RECURSE,
  839   EXPAND_AFTER_ITER
  840 } tre_expand_ast_symbol_t;
  841 
  842 /* Expands each iteration node that has a finite nonzero minimum or maximum
  843    iteration count to a catenated sequence of copies of the node. */
  844 static reg_errcode_t
  845 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
  846            int *position, tre_tag_direction_t *tag_directions,
  847            int *max_depth)
  848 {
  849   reg_errcode_t status = REG_OK;
  850   int bottom = tre_stack_num_objects(stack);
  851   int pos_add = 0;
  852   int pos_add_total = 0;
  853   int max_pos = 0;
  854   /* Current approximate matching parameters. */
  855   int params[TRE_PARAM_LAST];
  856   /* Approximate parameter nesting level. */
  857   int params_depth = 0;
  858   int iter_depth = 0;
  859   int i;
  860 
  861   for (i = 0; i < TRE_PARAM_LAST; i++)
  862     params[i] = TRE_PARAM_DEFAULT;
  863 
  864   STACK_PUSHR(stack, voidptr, ast);
  865   STACK_PUSHR(stack, int, EXPAND_RECURSE);
  866   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
  867     {
  868       tre_ast_node_t *node;
  869       tre_expand_ast_symbol_t symbol;
  870 
  871       if (status != REG_OK)
  872     break;
  873 
  874       DPRINT(("pos_add %d\n", pos_add));
  875 
  876       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
  877       node = tre_stack_pop_voidptr(stack);
  878       switch (symbol)
  879     {
  880     case EXPAND_RECURSE:
  881       switch (node->type)
  882         {
  883         case LITERAL:
  884           {
  885         tre_literal_t *lit= node->obj;
  886         if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
  887           {
  888             lit->position += pos_add;
  889             if (lit->position > max_pos)
  890               max_pos = lit->position;
  891           }
  892         break;
  893           }
  894         case UNION:
  895           {
  896         tre_union_t *uni = node->obj;
  897         STACK_PUSHX(stack, voidptr, uni->right);
  898         STACK_PUSHX(stack, int, EXPAND_RECURSE);
  899         STACK_PUSHX(stack, voidptr, uni->left);
  900         STACK_PUSHX(stack, int, EXPAND_RECURSE);
  901         break;
  902           }
  903         case CATENATION:
  904           {
  905         tre_catenation_t *cat = node->obj;
  906         STACK_PUSHX(stack, voidptr, cat->right);
  907         STACK_PUSHX(stack, int, EXPAND_RECURSE);
  908         STACK_PUSHX(stack, voidptr, cat->left);
  909         STACK_PUSHX(stack, int, EXPAND_RECURSE);
  910         break;
  911           }
  912         case ITERATION:
  913           {
  914         tre_iteration_t *iter = node->obj;
  915         STACK_PUSHX(stack, int, pos_add);
  916         STACK_PUSHX(stack, voidptr, node);
  917         STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
  918         STACK_PUSHX(stack, voidptr, iter->arg);
  919         STACK_PUSHX(stack, int, EXPAND_RECURSE);
  920         /* If we are going to expand this node at EXPAND_AFTER_ITER
  921            then don't increase the `pos' fields of the nodes now, it
  922            will get done when expanding. */
  923         if (iter->min > 1 || iter->max > 1)
  924           pos_add = 0;
  925         iter_depth++;
  926         DPRINT(("iter\n"));
  927         break;
  928           }
  929         default:
  930           assert(0);
  931           break;
  932         }
  933       break;
  934     case EXPAND_AFTER_ITER:
  935       {
  936         tre_iteration_t *iter = node->obj;
  937         int pos_add_last;
  938         pos_add = tre_stack_pop_int(stack);
  939         pos_add_last = pos_add;
  940         if (iter->min > 1 || iter->max > 1)
  941           {
  942         tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
  943         int i;
  944         int pos_add_save = pos_add;
  945 
  946         /* Create a catenated sequence of copies of the node. */
  947         for (i = 0; i < iter->min; i++)
  948           {
  949             tre_ast_node_t *copy;
  950             /* Remove tags from all but the last copy. */
  951             int flags = ((i + 1 < iter->min)
  952                  ? COPY_REMOVE_TAGS
  953                  : COPY_MAXIMIZE_FIRST_TAG);
  954             DPRINT(("  pos_add %d\n", pos_add));
  955             pos_add_save = pos_add;
  956             status = tre_copy_ast(mem, stack, iter->arg, flags,
  957                       &pos_add, tag_directions, &copy,
  958                       &max_pos);
  959             if (status != REG_OK)
  960               return status;
  961             if (seq1 != NULL)
  962               seq1 = tre_ast_new_catenation(mem, seq1, copy);
  963             else
  964               seq1 = copy;
  965             if (seq1 == NULL)
  966               return REG_ESPACE;
  967           }
  968 
  969         if (iter->max == -1)
  970           {
  971             /* No upper limit. */
  972             pos_add_save = pos_add;
  973             status = tre_copy_ast(mem, stack, iter->arg, 0,
  974                       &pos_add, NULL, &seq2, &max_pos);
  975             if (status != REG_OK)
  976               return status;
  977             seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
  978             if (seq2 == NULL)
  979               return REG_ESPACE;
  980           }
  981         else
  982           {
  983             for (i = iter->min; i < iter->max; i++)
  984               {
  985             tre_ast_node_t *tmp, *copy;
  986             pos_add_save = pos_add;
  987             status = tre_copy_ast(mem, stack, iter->arg, 0,
  988                           &pos_add, NULL, &copy, &max_pos);
  989             if (status != REG_OK)
  990               return status;
  991             if (seq2 != NULL)
  992               seq2 = tre_ast_new_catenation(mem, copy, seq2);
  993             else
  994               seq2 = copy;
  995             if (seq2 == NULL)
  996               return REG_ESPACE;
  997             tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
  998             if (tmp == NULL)
  999               return REG_ESPACE;
 1000             seq2 = tre_ast_new_union(mem, tmp, seq2);
 1001             if (seq2 == NULL)
 1002               return REG_ESPACE;
 1003               }
 1004           }
 1005 
 1006         pos_add = pos_add_save;
 1007         if (seq1 == NULL)
 1008           seq1 = seq2;
 1009         else if (seq2 != NULL)
 1010           seq1 = tre_ast_new_catenation(mem, seq1, seq2);
 1011         if (seq1 == NULL)
 1012           return REG_ESPACE;
 1013         node->obj = seq1->obj;
 1014         node->type = seq1->type;
 1015           }
 1016 
 1017         iter_depth--;
 1018         pos_add_total += pos_add - pos_add_last;
 1019         if (iter_depth == 0)
 1020           pos_add = pos_add_total;
 1021 
 1022         /* If approximate parameters are specified, surround the result
 1023            with two parameter setting nodes.  The one on the left sets
 1024            the specified parameters, and the one on the right restores
 1025            the old parameters. */
 1026         if (iter->params)
 1027           {
 1028         tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
 1029         int *old_params;
 1030 
 1031         tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
 1032         if (!tmp_l)
 1033           return REG_ESPACE;
 1034         ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
 1035         iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
 1036         tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
 1037         if (!tmp_r)
 1038           return REG_ESPACE;
 1039         old_params = tre_mem_alloc(mem, sizeof(*old_params)
 1040                        * TRE_PARAM_LAST);
 1041         if (!old_params)
 1042           return REG_ESPACE;
 1043         for (i = 0; i < TRE_PARAM_LAST; i++)
 1044           old_params[i] = params[i];
 1045         ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
 1046         old_params[TRE_PARAM_DEPTH] = params_depth;
 1047         /* XXX - this is the only place where ast_new_node is
 1048            needed -- should be moved inside AST module. */
 1049         node_copy = tre_ast_new_node(mem, ITERATION,
 1050                          sizeof(tre_iteration_t));
 1051         if (!node_copy)
 1052           return REG_ESPACE;
 1053         node_copy->obj = node->obj;
 1054         tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
 1055         if (!tmp_node)
 1056           return REG_ESPACE;
 1057         tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
 1058         if (!tmp_node)
 1059           return REG_ESPACE;
 1060         /* Replace the contents of `node' with `tmp_node'. */
 1061         memcpy(node, tmp_node, sizeof(*node));
 1062         node->obj = tmp_node->obj;
 1063         node->type = tmp_node->type;
 1064         params_depth++;
 1065         if (params_depth > *max_depth)
 1066           *max_depth = params_depth;
 1067           }
 1068         break;
 1069       }
 1070     default:
 1071       assert(0);
 1072       break;
 1073     }
 1074     }
 1075 
 1076   *position += pos_add_total;
 1077 
 1078   /* `max_pos' should never be larger than `*position' if the above
 1079      code works, but just an extra safeguard let's make sure
 1080      `*position' is set large enough so enough memory will be
 1081      allocated for the transition table. */
 1082   if (max_pos > *position)
 1083     *position = max_pos;
 1084 
 1085 #ifdef TRE_DEBUG
 1086   DPRINT(("Expanded AST:\n"));
 1087   tre_ast_print(ast);
 1088   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
 1089 #endif
 1090 
 1091   return status;
 1092 }
 1093 
 1094 static tre_pos_and_tags_t *
 1095 tre_set_empty(tre_mem_t mem)
 1096 {
 1097   tre_pos_and_tags_t *new_set;
 1098 
 1099   new_set = tre_mem_calloc(mem, sizeof(*new_set));
 1100   if (new_set == NULL)
 1101     return NULL;
 1102 
 1103   new_set[0].position = -1;
 1104   new_set[0].code_min = -1;
 1105   new_set[0].code_max = -1;
 1106 
 1107   return new_set;
 1108 }
 1109 
 1110 static tre_pos_and_tags_t *
 1111 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
 1112         tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
 1113 {
 1114   tre_pos_and_tags_t *new_set;
 1115 
 1116   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
 1117   if (new_set == NULL)
 1118     return NULL;
 1119 
 1120   new_set[0].position = position;
 1121   new_set[0].code_min = code_min;
 1122   new_set[0].code_max = code_max;
 1123   new_set[0].class = class;
 1124   new_set[0].neg_classes = neg_classes;
 1125   new_set[0].backref = backref;
 1126   new_set[1].position = -1;
 1127   new_set[1].code_min = -1;
 1128   new_set[1].code_max = -1;
 1129 
 1130   return new_set;
 1131 }
 1132 
 1133 static tre_pos_and_tags_t *
 1134 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
 1135           int *tags, int assertions, int *params)
 1136 {
 1137   int s1, s2, i, j;
 1138   tre_pos_and_tags_t *new_set;
 1139   int *new_tags;
 1140   int num_tags;
 1141 
 1142   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
 1143   for (s1 = 0; set1[s1].position >= 0; s1++);
 1144   for (s2 = 0; set2[s2].position >= 0; s2++);
 1145   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
 1146   if (!new_set )
 1147     return NULL;
 1148 
 1149   for (s1 = 0; set1[s1].position >= 0; s1++)
 1150     {
 1151       new_set[s1].position = set1[s1].position;
 1152       new_set[s1].code_min = set1[s1].code_min;
 1153       new_set[s1].code_max = set1[s1].code_max;
 1154       new_set[s1].assertions = set1[s1].assertions | assertions;
 1155       new_set[s1].class = set1[s1].class;
 1156       new_set[s1].neg_classes = set1[s1].neg_classes;
 1157       new_set[s1].backref = set1[s1].backref;
 1158       if (set1[s1].tags == NULL && tags == NULL)
 1159     new_set[s1].tags = NULL;
 1160       else
 1161     {
 1162       for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
 1163       new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
 1164                      * (i + num_tags + 1)));
 1165       if (new_tags == NULL)
 1166         return NULL;
 1167       for (j = 0; j < i; j++)
 1168         new_tags[j] = set1[s1].tags[j];
 1169       for (i = 0; i < num_tags; i++)
 1170         new_tags[j + i] = tags[i];
 1171       new_tags[j + i] = -1;
 1172       new_set[s1].tags = new_tags;
 1173     }
 1174       if (set1[s1].params)
 1175     new_set[s1].params = set1[s1].params;
 1176       if (params)
 1177     {
 1178       if (!new_set[s1].params)
 1179         new_set[s1].params = params;
 1180       else
 1181         {
 1182           new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
 1183                          TRE_PARAM_LAST);
 1184           if (!new_set[s1].params)
 1185         return NULL;
 1186           for (i = 0; i < TRE_PARAM_LAST; i++)
 1187         if (params[i] != TRE_PARAM_UNSET)
 1188           new_set[s1].params[i] = params[i];
 1189         }
 1190     }
 1191     }
 1192 
 1193   for (s2 = 0; set2[s2].position >= 0; s2++)
 1194     {
 1195       new_set[s1 + s2].position = set2[s2].position;
 1196       new_set[s1 + s2].code_min = set2[s2].code_min;
 1197       new_set[s1 + s2].code_max = set2[s2].code_max;
 1198       /* XXX - why not | assertions here as well? */
 1199       new_set[s1 + s2].assertions = set2[s2].assertions;
 1200       new_set[s1 + s2].class = set2[s2].class;
 1201       new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
 1202       new_set[s1 + s2].backref = set2[s2].backref;
 1203       if (set2[s2].tags == NULL)
 1204     new_set[s1 + s2].tags = NULL;
 1205       else
 1206     {
 1207       for (i = 0; set2[s2].tags[i] >= 0; i++);
 1208       new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
 1209       if (new_tags == NULL)
 1210         return NULL;
 1211       for (j = 0; j < i; j++)
 1212         new_tags[j] = set2[s2].tags[j];
 1213       new_tags[j] = -1;
 1214       new_set[s1 + s2].tags = new_tags;
 1215     }
 1216       if (set2[s2].params)
 1217     new_set[s1 + s2].params = set2[s2].params;
 1218       if (params)
 1219     {
 1220       if (!new_set[s1 + s2].params)
 1221         new_set[s1 + s2].params = params;
 1222       else
 1223         {
 1224           new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
 1225                               TRE_PARAM_LAST);
 1226           if (!new_set[s1 + s2].params)
 1227         return NULL;
 1228           for (i = 0; i < TRE_PARAM_LAST; i++)
 1229         if (params[i] != TRE_PARAM_UNSET)
 1230           new_set[s1 + s2].params[i] = params[i];
 1231         }
 1232     }
 1233     }
 1234   new_set[s1 + s2].position = -1;
 1235   return new_set;
 1236 }
 1237 
 1238 /* Finds the empty path through `node' which is the one that should be
 1239    taken according to POSIX.2 rules, and adds the tags on that path to
 1240    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
 1241    set to the number of tags seen on the path. */
 1242 static reg_errcode_t
 1243 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
 1244         int *assertions, int *params, int *num_tags_seen,
 1245         int *params_seen)
 1246 {
 1247   tre_literal_t *lit;
 1248   tre_union_t *uni;
 1249   tre_catenation_t *cat;
 1250   tre_iteration_t *iter;
 1251   int i;
 1252   int bottom = tre_stack_num_objects(stack);
 1253   reg_errcode_t status = REG_OK;
 1254   if (num_tags_seen)
 1255     *num_tags_seen = 0;
 1256   if (params_seen)
 1257     *params_seen = 0;
 1258 
 1259   status = tre_stack_push_voidptr(stack, node);
 1260 
 1261   /* Walk through the tree recursively. */
 1262   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
 1263     {
 1264       node = tre_stack_pop_voidptr(stack);
 1265 
 1266       switch (node->type)
 1267     {
 1268     case LITERAL:
 1269       lit = (tre_literal_t *)node->obj;
 1270       switch (lit->code_min)
 1271         {
 1272         case TAG:
 1273           if (lit->code_max >= 0)
 1274         {
 1275           if (tags != NULL)
 1276             {
 1277               /* Add the tag to `tags'. */
 1278               for (i = 0; tags[i] >= 0; i++)
 1279             if (tags[i] == lit->code_max)
 1280               break;
 1281               if (tags[i] < 0)
 1282             {
 1283               tags[i] = lit->code_max;
 1284               tags[i + 1] = -1;
 1285             }
 1286             }
 1287           if (num_tags_seen)
 1288             (*num_tags_seen)++;
 1289         }
 1290           break;
 1291         case ASSERTION:
 1292           assert(lit->code_max >= 1
 1293              || lit->code_max <= ASSERT_LAST);
 1294           if (assertions != NULL)
 1295         *assertions |= lit->code_max;
 1296           break;
 1297         case PARAMETER:
 1298           if (params != NULL)
 1299         for (i = 0; i < TRE_PARAM_LAST; i++)
 1300           params[i] = lit->u.params[i];
 1301           if (params_seen != NULL)
 1302         *params_seen = 1;
 1303           break;
 1304         case EMPTY:
 1305           break;
 1306         default:
 1307           assert(0);
 1308           break;
 1309         }
 1310       break;
 1311 
 1312     case UNION:
 1313       /* Subexpressions starting earlier take priority over ones
 1314          starting later, so we prefer the left subexpression over the
 1315          right subexpression. */
 1316       uni = (tre_union_t *)node->obj;
 1317       if (uni->left->nullable)
 1318         STACK_PUSHX(stack, voidptr, uni->left)
 1319       else if (uni->right->nullable)
 1320         STACK_PUSHX(stack, voidptr, uni->right)
 1321       else
 1322         assert(0);
 1323       break;
 1324 
 1325     case CATENATION:
 1326       /* The path must go through both children. */
 1327       cat = (tre_catenation_t *)node->obj;
 1328       assert(cat->left->nullable);
 1329       assert(cat->right->nullable);
 1330       STACK_PUSHX(stack, voidptr, cat->left);
 1331       STACK_PUSHX(stack, voidptr, cat->right);
 1332       break;
 1333 
 1334     case ITERATION:
 1335       /* A match with an empty string is preferred over no match at
 1336          all, so we go through the argument if possible. */
 1337       iter = (tre_iteration_t *)node->obj;
 1338       if (iter->arg->nullable)
 1339         STACK_PUSHX(stack, voidptr, iter->arg);
 1340       break;
 1341 
 1342     default:
 1343       assert(0);
 1344       break;
 1345     }
 1346     }
 1347 
 1348   return status;
 1349 }
 1350 
 1351 
 1352 typedef enum {
 1353   NFL_RECURSE,
 1354   NFL_POST_UNION,
 1355   NFL_POST_CATENATION,
 1356   NFL_POST_ITERATION
 1357 } tre_nfl_stack_symbol_t;
 1358 
 1359 
 1360 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
 1361    the nodes of the AST `tree'. */
 1362 static reg_errcode_t
 1363 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
 1364 {
 1365   int bottom = tre_stack_num_objects(stack);
 1366 
 1367   STACK_PUSHR(stack, voidptr, tree);
 1368   STACK_PUSHR(stack, int, NFL_RECURSE);
 1369 
 1370   while (tre_stack_num_objects(stack) > bottom)
 1371     {
 1372       tre_nfl_stack_symbol_t symbol;
 1373       tre_ast_node_t *node;
 1374 
 1375       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
 1376       node = tre_stack_pop_voidptr(stack);
 1377       switch (symbol)
 1378     {
 1379     case NFL_RECURSE:
 1380       switch (node->type)
 1381         {
 1382         case LITERAL:
 1383           {
 1384         tre_literal_t *lit = (tre_literal_t *)node->obj;
 1385         if (IS_BACKREF(lit))
 1386           {
 1387             /* Back references: nullable = false, firstpos = {i},
 1388                lastpos = {i}. */
 1389             node->nullable = 0;
 1390             node->firstpos = tre_set_one(mem, lit->position, 0,
 1391                          TRE_CHAR_MAX, 0, NULL, -1);
 1392             if (!node->firstpos)
 1393               return REG_ESPACE;
 1394             node->lastpos = tre_set_one(mem, lit->position, 0,
 1395                         TRE_CHAR_MAX, 0, NULL,
 1396                         lit->code_max);
 1397             if (!node->lastpos)
 1398               return REG_ESPACE;
 1399           }
 1400         else if (lit->code_min < 0)
 1401           {
 1402             /* Tags, empty strings, params, and zero width assertions:
 1403                nullable = true, firstpos = {}, and lastpos = {}. */
 1404             node->nullable = 1;
 1405             node->firstpos = tre_set_empty(mem);
 1406             if (!node->firstpos)
 1407               return REG_ESPACE;
 1408             node->lastpos = tre_set_empty(mem);
 1409             if (!node->lastpos)
 1410               return REG_ESPACE;
 1411           }
 1412         else
 1413           {
 1414             /* Literal at position i: nullable = false, firstpos = {i},
 1415                lastpos = {i}. */
 1416             node->nullable = 0;
 1417             node->firstpos =
 1418               tre_set_one(mem, lit->position, lit->code_min,
 1419                   lit->code_max, 0, NULL, -1);
 1420             if (!node->firstpos)
 1421               return REG_ESPACE;
 1422             node->lastpos = tre_set_one(mem, lit->position,
 1423                         lit->code_min, lit->code_max,
 1424                         lit->u.class, lit->neg_classes,
 1425                         -1);
 1426             if (!node->lastpos)
 1427               return REG_ESPACE;
 1428           }
 1429         break;
 1430           }
 1431 
 1432         case UNION:
 1433           /* Compute the attributes for the two subtrees, and after that
 1434          for this node. */
 1435           STACK_PUSHR(stack, voidptr, node);
 1436           STACK_PUSHR(stack, int, NFL_POST_UNION);
 1437           STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
 1438           STACK_PUSHR(stack, int, NFL_RECURSE);
 1439           STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
 1440           STACK_PUSHR(stack, int, NFL_RECURSE);
 1441           break;
 1442 
 1443         case CATENATION:
 1444           /* Compute the attributes for the two subtrees, and after that
 1445          for this node. */
 1446           STACK_PUSHR(stack, voidptr, node);
 1447           STACK_PUSHR(stack, int, NFL_POST_CATENATION);
 1448           STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
 1449           STACK_PUSHR(stack, int, NFL_RECURSE);
 1450           STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
 1451           STACK_PUSHR(stack, int, NFL_RECURSE);
 1452           break;
 1453 
 1454         case ITERATION:
 1455           /* Compute the attributes for the subtree, and after that for
 1456          this node. */
 1457           STACK_PUSHR(stack, voidptr, node);
 1458           STACK_PUSHR(stack, int, NFL_POST_ITERATION);
 1459           STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
 1460           STACK_PUSHR(stack, int, NFL_RECURSE);
 1461           break;
 1462         }
 1463       break; /* end case: NFL_RECURSE */
 1464 
 1465     case NFL_POST_UNION:
 1466       {
 1467         tre_union_t *uni = (tre_union_t *)node->obj;
 1468         node->nullable = uni->left->nullable || uni->right->nullable;
 1469         node->firstpos = tre_set_union(mem, uni->left->firstpos,
 1470                        uni->right->firstpos, NULL, 0, NULL);
 1471         if (!node->firstpos)
 1472           return REG_ESPACE;
 1473         node->lastpos = tre_set_union(mem, uni->left->lastpos,
 1474                       uni->right->lastpos, NULL, 0, NULL);
 1475         if (!node->lastpos)
 1476           return REG_ESPACE;
 1477         break;
 1478       }
 1479 
 1480     case NFL_POST_ITERATION:
 1481       {
 1482         tre_iteration_t *iter = (tre_iteration_t *)node->obj;
 1483 
 1484         if (iter->min == 0 || iter->arg->nullable)
 1485           node->nullable = 1;
 1486         else
 1487           node->nullable = 0;
 1488         node->firstpos = iter->arg->firstpos;
 1489         node->lastpos = iter->arg->lastpos;
 1490         break;
 1491       }
 1492 
 1493     case NFL_POST_CATENATION:
 1494       {
 1495         int num_tags, *tags, assertions, params_seen;
 1496         int *params;
 1497         reg_errcode_t status;
 1498         tre_catenation_t *cat = node->obj;
 1499         node->nullable = cat->left->nullable && cat->right->nullable;
 1500 
 1501         /* Compute firstpos. */
 1502         if (cat->left->nullable)
 1503           {
 1504         /* The left side matches the empty string.  Make a first pass
 1505            with tre_match_empty() to get the number of tags and
 1506            parameters. */
 1507         status = tre_match_empty(stack, cat->left,
 1508                      NULL, NULL, NULL, &num_tags,
 1509                      &params_seen);
 1510         if (status != REG_OK)
 1511           return status;
 1512         /* Allocate arrays for the tags and parameters. */
 1513         tags = xmalloc(sizeof(*tags) * (num_tags + 1));
 1514         if (!tags)
 1515           return REG_ESPACE;
 1516         tags[0] = -1;
 1517         assertions = 0;
 1518         params = NULL;
 1519         if (params_seen)
 1520           {
 1521             params = tre_mem_alloc(mem, sizeof(*params)
 1522                        * TRE_PARAM_LAST);
 1523             if (!params)
 1524               {
 1525             xfree(tags);
 1526             return REG_ESPACE;
 1527               }
 1528           }
 1529         /* Second pass with tre_mach_empty() to get the list of
 1530            tags and parameters. */
 1531         status = tre_match_empty(stack, cat->left, tags,
 1532                      &assertions, params, NULL, NULL);
 1533         if (status != REG_OK)
 1534           {
 1535             xfree(tags);
 1536             return status;
 1537           }
 1538         node->firstpos =
 1539           tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
 1540                 tags, assertions, params);
 1541         xfree(tags);
 1542         if (!node->firstpos)
 1543           return REG_ESPACE;
 1544           }
 1545         else
 1546           {
 1547         node->firstpos = cat->left->firstpos;
 1548           }
 1549 
 1550         /* Compute lastpos. */
 1551         if (cat->right->nullable)
 1552           {
 1553         /* The right side matches the empty string.  Make a first pass
 1554            with tre_match_empty() to get the number of tags and
 1555            parameters. */
 1556         status = tre_match_empty(stack, cat->right,
 1557                      NULL, NULL, NULL, &num_tags,
 1558                      &params_seen);
 1559         if (status != REG_OK)
 1560           return status;
 1561         /* Allocate arrays for the tags and parameters. */
 1562         tags = xmalloc(sizeof(int) * (num_tags + 1));
 1563         if (!tags)
 1564           return REG_ESPACE;
 1565         tags[0] = -1;
 1566         assertions = 0;
 1567         params = NULL;
 1568         if (params_seen)
 1569           {
 1570             params = tre_mem_alloc(mem, sizeof(*params)
 1571                        * TRE_PARAM_LAST);
 1572             if (!params)
 1573               {
 1574             xfree(tags);
 1575             return REG_ESPACE;
 1576               }
 1577           }
 1578         /* Second pass with tre_mach_empty() to get the list of
 1579            tags and parameters. */
 1580         status = tre_match_empty(stack, cat->right, tags,
 1581                      &assertions, params, NULL, NULL);
 1582         if (status != REG_OK)
 1583           {
 1584             xfree(tags);
 1585             return status;
 1586           }
 1587         node->lastpos =
 1588           tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
 1589                 tags, assertions, params);
 1590         xfree(tags);
 1591         if (!node->lastpos)
 1592           return REG_ESPACE;
 1593           }
 1594         else
 1595           {
 1596         node->lastpos = cat->right->lastpos;
 1597           }
 1598         break;
 1599       }
 1600 
 1601     default:
 1602       assert(0);
 1603       break;
 1604     }
 1605     }
 1606 
 1607   return REG_OK;
 1608 }
 1609 
 1610 
 1611 /* Adds a transition from each position in `p1' to each position in `p2'. */
 1612 static reg_errcode_t
 1613 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
 1614            tre_tnfa_transition_t *transitions,
 1615            int *counts, int *offs)
 1616 {
 1617   tre_pos_and_tags_t *orig_p2 = p2;
 1618   tre_tnfa_transition_t *trans;
 1619   int i, j, k, l, dup, prev_p2_pos;
 1620 
 1621   if (transitions != NULL)
 1622     while (p1->position >= 0)
 1623       {
 1624     p2 = orig_p2;
 1625     prev_p2_pos = -1;
 1626     while (p2->position >= 0)
 1627       {
 1628         /* Optimization: if this position was already handled, skip it. */
 1629         if (p2->position == prev_p2_pos)
 1630           {
 1631         p2++;
 1632         continue;
 1633           }
 1634         prev_p2_pos = p2->position;
 1635         /* Set `trans' to point to the next unused transition from
 1636            position `p1->position'. */
 1637         trans = transitions + offs[p1->position];
 1638         while (trans->state != NULL)
 1639           {
 1640 #if 0
 1641         /* If we find a previous transition from `p1->position' to
 1642            `p2->position', it is overwritten.  This can happen only
 1643            if there are nested loops in the regexp, like in "((a)*)*".
 1644            In POSIX.2 repetition using the outer loop is always
 1645            preferred over using the inner loop.  Therefore the
 1646            transition for the inner loop is useless and can be thrown
 1647            away. */
 1648         /* XXX - The same position is used for all nodes in a bracket
 1649            expression, so this optimization cannot be used (it will
 1650            break bracket expressions) unless I figure out a way to
 1651            detect it here. */
 1652         if (trans->state_id == p2->position)
 1653           {
 1654             DPRINT(("*"));
 1655             break;
 1656           }
 1657 #endif
 1658         trans++;
 1659           }
 1660 
 1661         if (trans->state == NULL)
 1662           (trans + 1)->state = NULL;
 1663         /* Use the character ranges, assertions, etc. from `p1' for
 1664            the transition from `p1' to `p2'. */
 1665         trans->code_min = p1->code_min;
 1666         trans->code_max = p1->code_max;
 1667         trans->state = transitions + offs[p2->position];
 1668         trans->state_id = p2->position;
 1669         trans->assertions = p1->assertions | p2->assertions
 1670           | (p1->class ? ASSERT_CHAR_CLASS : 0)
 1671           | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
 1672         if (p1->backref >= 0)
 1673           {
 1674         assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
 1675         assert(p2->backref < 0);
 1676         trans->u.backref = p1->backref;
 1677         trans->assertions |= ASSERT_BACKREF;
 1678           }
 1679         else
 1680           trans->u.class = p1->class;
 1681         if (p1->neg_classes != NULL)
 1682           {
 1683         for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
 1684         trans->neg_classes =
 1685           xmalloc(sizeof(*trans->neg_classes) * (i + 1));
 1686         if (trans->neg_classes == NULL)
 1687           return REG_ESPACE;
 1688         for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
 1689           trans->neg_classes[i] = p1->neg_classes[i];
 1690         trans->neg_classes[i] = (tre_ctype_t)0;
 1691           }
 1692         else
 1693           trans->neg_classes = NULL;
 1694 
 1695         /* Find out how many tags this transition has. */
 1696         i = 0;
 1697         if (p1->tags != NULL)
 1698           while(p1->tags[i] >= 0)
 1699         i++;
 1700         j = 0;
 1701         if (p2->tags != NULL)
 1702           while(p2->tags[j] >= 0)
 1703         j++;
 1704 
 1705         /* If we are overwriting a transition, free the old tag array. */
 1706         if (trans->tags != NULL)
 1707           xfree(trans->tags);
 1708         trans->tags = NULL;
 1709 
 1710         /* If there were any tags, allocate an array and fill it. */
 1711         if (i + j > 0)
 1712           {
 1713         trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
 1714         if (!trans->tags)
 1715           return REG_ESPACE;
 1716         i = 0;
 1717         if (p1->tags != NULL)
 1718           while(p1->tags[i] >= 0)
 1719             {
 1720               trans->tags[i] = p1->tags[i];
 1721               i++;
 1722             }
 1723         l = i;
 1724         j = 0;
 1725         if (p2->tags != NULL)
 1726           while (p2->tags[j] >= 0)
 1727             {
 1728               /* Don't add duplicates. */
 1729               dup = 0;
 1730               for (k = 0; k < i; k++)
 1731             if (trans->tags[k] == p2->tags[j])
 1732               {
 1733                 dup = 1;
 1734                 break;
 1735               }
 1736               if (!dup)
 1737             trans->tags[l++] = p2->tags[j];
 1738               j++;
 1739             }
 1740         trans->tags[l] = -1;
 1741           }
 1742 
 1743         /* Set the parameter array.  If both `p2' and `p1' have same
 1744            parameters, the values in `p2' override those in `p1'. */
 1745         if (p1->params || p2->params)
 1746           {
 1747         if (!trans->params)
 1748           trans->params = xmalloc(sizeof(*trans->params)
 1749                       * TRE_PARAM_LAST);
 1750         if (!trans->params)
 1751           return REG_ESPACE;
 1752         for (i = 0; i < TRE_PARAM_LAST; i++)
 1753           {
 1754             trans->params[i] = TRE_PARAM_UNSET;
 1755             if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
 1756               trans->params[i] = p1->params[i];
 1757             if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
 1758               trans->params[i] = p2->params[i];
 1759           }
 1760           }
 1761         else
 1762           {
 1763         if (trans->params)
 1764           xfree(trans->params);
 1765         trans->params = NULL;
 1766           }
 1767 
 1768 
 1769 #ifdef TRE_DEBUG
 1770         {
 1771           int *tags;
 1772 
 1773           DPRINT(("  %2d -> %2d on %3d", p1->position, p2->position,
 1774               p1->code_min));
 1775           if (p1->code_max != p1->code_min)
 1776         DPRINT(("-%3d", p1->code_max));
 1777           tags = trans->tags;
 1778           if (tags)
 1779         {
 1780           DPRINT((", tags ["));
 1781           while (*tags >= 0)
 1782             {
 1783               DPRINT(("%d", *tags));
 1784               tags++;
 1785               if (*tags >= 0)
 1786             DPRINT((","));
 1787             }
 1788           DPRINT(("]"));
 1789         }
 1790           if (trans->assertions)
 1791         DPRINT((", assert %d", trans->assertions));
 1792           if (trans->assertions & ASSERT_BACKREF)
 1793         DPRINT((", backref %d", trans->u.backref));
 1794           else if (trans->u.class)
 1795         DPRINT((", class %ld", (long)trans->u.class));
 1796           if (trans->neg_classes)
 1797         DPRINT((", neg_classes %p", trans->neg_classes));
 1798           if (trans->params)
 1799         {
 1800           DPRINT((", "));
 1801           tre_print_params(trans->params);
 1802         }
 1803           DPRINT(("\n"));
 1804         }
 1805 #endif /* TRE_DEBUG */
 1806         p2++;
 1807       }
 1808     p1++;
 1809       }
 1810   else
 1811     /* Compute a maximum limit for the number of transitions leaving
 1812        from each state. */
 1813     while (p1->position >= 0)
 1814       {
 1815     p2 = orig_p2;
 1816     while (p2->position >= 0)
 1817       {
 1818         counts[p1->position]++;
 1819         p2++;
 1820       }
 1821     p1++;
 1822       }
 1823   return REG_OK;
 1824 }
 1825 
 1826 /* Converts the syntax tree to a TNFA.  All the transitions in the TNFA are
 1827    labelled with one character range (there are no transitions on empty
 1828    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
 1829    the regexp. */
 1830 static reg_errcode_t
 1831 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
 1832         int *counts, int *offs)
 1833 {
 1834   tre_union_t *uni;
 1835   tre_catenation_t *cat;
 1836   tre_iteration_t *iter;
 1837   reg_errcode_t errcode = REG_OK;
 1838 
 1839   /* XXX - recurse using a stack!. */
 1840   switch (node->type)
 1841     {
 1842     case LITERAL:
 1843       break;
 1844     case UNION:
 1845       uni = (tre_union_t *)node->obj;
 1846       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
 1847       if (errcode != REG_OK)
 1848     return errcode;
 1849       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
 1850       break;
 1851 
 1852     case CATENATION:
 1853       cat = (tre_catenation_t *)node->obj;
 1854       /* Add a transition from each position in cat->left->lastpos
 1855      to each position in cat->right->firstpos. */
 1856       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
 1857                    transitions, counts, offs);
 1858       if (errcode != REG_OK)
 1859     return errcode;
 1860       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
 1861       if (errcode != REG_OK)
 1862     return errcode;
 1863       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
 1864       break;
 1865 
 1866     case ITERATION:
 1867       iter = (tre_iteration_t *)node->obj;
 1868       assert(iter->max == -1 || iter->max == 1);
 1869 
 1870       if (iter->max == -1)
 1871     {
 1872       assert(iter->min == 0 || iter->min == 1);
 1873       /* Add a transition from each last position in the iterated
 1874          expression to each first position. */
 1875       errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
 1876                    transitions, counts, offs);
 1877       if (errcode != REG_OK)
 1878         return errcode;
 1879     }
 1880       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
 1881       break;
 1882     }
 1883   return errcode;
 1884 }
 1885 
 1886 
 1887 #define ERROR_EXIT(err)       \
 1888   do                  \
 1889     {                 \
 1890       errcode = err;          \
 1891       if (1) goto error_exit;     \
 1892     }                 \
 1893  while (0)
 1894 
 1895 
 1896 int
 1897 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
 1898 {
 1899   tre_stack_t *stack;
 1900   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
 1901   tre_pos_and_tags_t *p;
 1902   int *counts = NULL, *offs = NULL;
 1903   int i, add = 0;
 1904   tre_tnfa_transition_t *transitions, *initial;
 1905   tre_tnfa_t *tnfa = NULL;
 1906   tre_submatch_data_t *submatch_data;
 1907   tre_tag_direction_t *tag_directions = NULL;
 1908   reg_errcode_t errcode;
 1909   tre_mem_t mem;
 1910 
 1911   /* Parse context. */
 1912   tre_parse_ctx_t parse_ctx;
 1913 
 1914   /* Allocate a stack used throughout the compilation process for various
 1915      purposes. */
 1916   stack = tre_stack_new(512, 10240, 128);
 1917   if (!stack)
 1918     return REG_ESPACE;
 1919   /* Allocate a fast memory allocator. */
 1920   mem = tre_mem_new();
 1921   if (!mem)
 1922     {
 1923       tre_stack_destroy(stack);
 1924       return REG_ESPACE;
 1925     }
 1926 
 1927   /* Parse the regexp. */
 1928   memset(&parse_ctx, 0, sizeof(parse_ctx));
 1929   parse_ctx.mem = mem;
 1930   parse_ctx.stack = stack;
 1931   parse_ctx.re = regex;
 1932   parse_ctx.len = n;
 1933   parse_ctx.cflags = cflags;
 1934   parse_ctx.max_backref = -1;
 1935   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
 1936   errcode = tre_parse(&parse_ctx);
 1937   if (errcode != REG_OK)
 1938     ERROR_EXIT(errcode);
 1939   preg->re_nsub = parse_ctx.submatch_id - 1;
 1940   tree = parse_ctx.result;
 1941 
 1942   /* Back references and approximate matching cannot currently be used
 1943      in the same regexp. */
 1944   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
 1945     ERROR_EXIT(REG_BADPAT);
 1946 
 1947 #ifdef TRE_DEBUG
 1948   tre_ast_print(tree);
 1949 #endif /* TRE_DEBUG */
 1950 
 1951   /* Referring to nonexistent subexpressions is illegal. */
 1952   if (parse_ctx.max_backref > (int)preg->re_nsub)
 1953     ERROR_EXIT(REG_ESUBREG);
 1954 
 1955   /* Allocate the TNFA struct. */
 1956   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
 1957   if (tnfa == NULL)
 1958     ERROR_EXIT(REG_ESPACE);
 1959   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
 1960   tnfa->have_approx = parse_ctx.have_approx;
 1961   tnfa->num_submatches = parse_ctx.submatch_id;
 1962 
 1963   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
 1964      regexp does not have back references, this can be skipped. */
 1965   if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
 1966     {
 1967       DPRINT(("tre_compile: setting up tags\n"));
 1968 
 1969       /* Figure out how many tags we will need. */
 1970       errcode = tre_add_tags(NULL, stack, tree, tnfa);
 1971       if (errcode != REG_OK)
 1972     ERROR_EXIT(errcode);
 1973 #ifdef TRE_DEBUG
 1974       tre_ast_print(tree);
 1975 #endif /* TRE_DEBUG */
 1976 
 1977       if (tnfa->num_tags > 0)
 1978     {
 1979       tag_directions = xmalloc(sizeof(*tag_directions)
 1980                    * (tnfa->num_tags + 1));
 1981       if (tag_directions == NULL)
 1982         ERROR_EXIT(REG_ESPACE);
 1983       tnfa->tag_directions = tag_directions;
 1984       memset(tag_directions, -1,
 1985          sizeof(*tag_directions) * (tnfa->num_tags + 1));
 1986     }
 1987       tnfa->minimal_tags = xcalloc(tnfa->num_tags * 2 + 1,
 1988                    sizeof(tnfa->minimal_tags));
 1989       if (tnfa->minimal_tags == NULL)
 1990     ERROR_EXIT(REG_ESPACE);
 1991 
 1992       submatch_data = xcalloc(parse_ctx.submatch_id, sizeof(*submatch_data));
 1993       if (submatch_data == NULL)
 1994     ERROR_EXIT(REG_ESPACE);
 1995       tnfa->submatch_data = submatch_data;
 1996 
 1997       errcode = tre_add_tags(mem, stack, tree, tnfa);
 1998       if (errcode != REG_OK)
 1999     ERROR_EXIT(errcode);
 2000 
 2001 #ifdef TRE_DEBUG
 2002       for (i = 0; i < parse_ctx.submatch_id; i++)
 2003     DPRINT(("pmatch[%d] = {t%d, t%d}\n",
 2004         i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
 2005       for (i = 0; i < tnfa->num_tags; i++)
 2006     DPRINT(("t%d is %s\n", i,
 2007         tag_directions[i] == TRE_TAG_MINIMIZE ?
 2008         "minimized" : "maximized"));
 2009 #endif /* TRE_DEBUG */
 2010     }
 2011 
 2012   /* Expand iteration nodes. */
 2013   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
 2014                tag_directions, &tnfa->params_depth);
 2015   if (errcode != REG_OK)
 2016     ERROR_EXIT(errcode);
 2017 
 2018   /* Add a dummy node for the final state.
 2019      XXX - For certain patterns this dummy node can be optimized away,
 2020        for example "a*" or "ab*".   Figure out a simple way to detect
 2021        this possibility. */
 2022   tmp_ast_l = tree;
 2023   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
 2024   if (tmp_ast_r == NULL)
 2025     ERROR_EXIT(REG_ESPACE);
 2026 
 2027   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
 2028   if (tree == NULL)
 2029     ERROR_EXIT(REG_ESPACE);
 2030 
 2031 #ifdef TRE_DEBUG
 2032   tre_ast_print(tree);
 2033   DPRINT(("Number of states: %d\n", parse_ctx.position));
 2034 #endif /* TRE_DEBUG */
 2035 
 2036   errcode = tre_compute_nfl(mem, stack, tree);
 2037   if (errcode != REG_OK)
 2038     ERROR_EXIT(errcode);
 2039 
 2040   counts = xmalloc(sizeof(int) * parse_ctx.position);
 2041   if (counts == NULL)
 2042     ERROR_EXIT(REG_ESPACE);
 2043 
 2044   offs = xmalloc(sizeof(int) * parse_ctx.position);
 2045   if (offs == NULL)
 2046     ERROR_EXIT(REG_ESPACE);
 2047 
 2048   for (i = 0; i < parse_ctx.position; i++)
 2049     counts[i] = 0;
 2050   tre_ast_to_tnfa(tree, NULL, counts, NULL);
 2051 
 2052   add = 0;
 2053   for (i = 0; i < parse_ctx.position; i++)
 2054     {
 2055       offs[i] = add;
 2056       add += counts[i] + 1;
 2057       counts[i] = 0;
 2058     }
 2059   transitions = xcalloc(add + 1, sizeof(*transitions));
 2060   if (transitions == NULL)
 2061     ERROR_EXIT(REG_ESPACE);
 2062   tnfa->transitions = transitions;
 2063   tnfa->num_transitions = add;
 2064 
 2065   DPRINT(("Converting to TNFA:\n"));
 2066   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
 2067   if (errcode != REG_OK)
 2068     ERROR_EXIT(errcode);
 2069 
 2070   /* If in eight bit mode, compute a table of characters that can be the
 2071      first character of a match. */
 2072   tnfa->first_char = -1;
 2073   if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
 2074     {
 2075       int count = 0;
 2076       int k;
 2077       DPRINT(("Characters that can start a match:"));
 2078       tnfa->firstpos_chars = xcalloc(256, sizeof(char));
 2079       if (tnfa->firstpos_chars == NULL)
 2080     ERROR_EXIT(REG_ESPACE);
 2081       for (p = tree->firstpos; p->position >= 0; p++)
 2082     {
 2083       tre_tnfa_transition_t *j = transitions + offs[p->position];
 2084       while (j->state != NULL)
 2085         {
 2086           for (k = j->code_min; k <= j->code_max && k < 256; k++)
 2087         {
 2088           DPRINT((" %d", k));
 2089           tnfa->firstpos_chars[k] = 1;
 2090           count++;
 2091         }
 2092           j++;
 2093         }
 2094     }
 2095       DPRINT(("\n"));
 2096 #define TRE_OPTIMIZE_FIRST_CHAR 1
 2097 #if TRE_OPTIMIZE_FIRST_CHAR
 2098       if (count == 1)
 2099     {
 2100       for (k = 0; k < 256; k++)
 2101         if (tnfa->firstpos_chars[k])
 2102           {
 2103         DPRINT(("first char must be %d\n", k));
 2104         tnfa->first_char = k;
 2105         xfree(tnfa->firstpos_chars);
 2106         tnfa->firstpos_chars = NULL;
 2107         break;
 2108           }
 2109     }
 2110 #endif
 2111 
 2112     }
 2113   else
 2114     tnfa->firstpos_chars = NULL;
 2115 
 2116 
 2117   p = tree->firstpos;
 2118   i = 0;
 2119   while (p->position >= 0)
 2120     {
 2121       i++;
 2122 
 2123 #ifdef TRE_DEBUG
 2124       {
 2125     int *tags;
 2126     DPRINT(("initial: %d", p->position));
 2127     tags = p->tags;
 2128     if (tags != NULL)
 2129       {
 2130         if (*tags >= 0)
 2131           DPRINT(("/"));
 2132         while (*tags >= 0)
 2133           {
 2134         DPRINT(("%d", *tags));
 2135         tags++;
 2136         if (*tags >= 0)
 2137           DPRINT((","));
 2138           }
 2139       }
 2140     DPRINT((", assert %d", p->assertions));
 2141     if (p->params)
 2142       {
 2143         DPRINT((", "));
 2144         tre_print_params(p->params);
 2145       }
 2146     DPRINT(("\n"));
 2147       }
 2148 #endif /* TRE_DEBUG */
 2149 
 2150       p++;
 2151     }
 2152 
 2153   initial = xcalloc(i + 1, sizeof(tre_tnfa_transition_t));
 2154   if (initial == NULL)
 2155     ERROR_EXIT(REG_ESPACE);
 2156   tnfa->initial = initial;
 2157 
 2158   i = 0;
 2159   for (p = tree->firstpos; p->position >= 0; p++)
 2160     {
 2161       initial[i].state = transitions + offs[p->position];
 2162       initial[i].state_id = p->position;
 2163       initial[i].tags = NULL;
 2164       /* Copy the arrays p->tags, and p->params, they are allocated
 2165      from a tre_mem object. */
 2166       if (p->tags)
 2167     {
 2168       int j;
 2169       for (j = 0; p->tags[j] >= 0; j++);
 2170       initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
 2171       if (!initial[i].tags)
 2172         ERROR_EXIT(REG_ESPACE);
 2173       memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
 2174     }
 2175       initial[i].params = NULL;
 2176       if (p->params)
 2177     {
 2178       initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
 2179       if (!initial[i].params)
 2180         ERROR_EXIT(REG_ESPACE);
 2181       memcpy(initial[i].params, p->params,
 2182          sizeof(*p->params) * TRE_PARAM_LAST);
 2183     }
 2184       initial[i].assertions = p->assertions;
 2185       i++;
 2186     }
 2187   initial[i].state = NULL;
 2188 
 2189   tnfa->num_transitions = add;
 2190   tnfa->final = transitions + offs[tree->lastpos[0].position];
 2191   tnfa->num_states = parse_ctx.position;
 2192   tnfa->cflags = cflags;
 2193 
 2194   DPRINT(("final state %p\n", (void *)tnfa->final));
 2195 
 2196   tre_mem_destroy(mem);
 2197   tre_stack_destroy(stack);
 2198   xfree(counts);
 2199   xfree(offs);
 2200 
 2201   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
 2202   return REG_OK;
 2203 
 2204  error_exit:
 2205   /* Free everything that was allocated and return the error code. */
 2206   tre_mem_destroy(mem);
 2207   if (stack != NULL)
 2208     tre_stack_destroy(stack);
 2209   if (counts != NULL)
 2210     xfree(counts);
 2211   if (offs != NULL)
 2212     xfree(offs);
 2213   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
 2214   tre_free(preg);
 2215   return errcode;
 2216 }
 2217 
 2218 
 2219 
 2220 
 2221 void
 2222 tre_free(regex_t *preg)
 2223 {
 2224   tre_tnfa_t *tnfa;
 2225   unsigned int i;
 2226   tre_tnfa_transition_t *trans;
 2227 
 2228   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
 2229   if (!tnfa)
 2230     return;
 2231 
 2232   for (i = 0; i < tnfa->num_transitions; i++)
 2233     if (tnfa->transitions[i].state)
 2234       {
 2235     if (tnfa->transitions[i].tags)
 2236       xfree(tnfa->transitions[i].tags);
 2237     if (tnfa->transitions[i].neg_classes)
 2238       xfree(tnfa->transitions[i].neg_classes);
 2239     if (tnfa->transitions[i].params)
 2240       xfree(tnfa->transitions[i].params);
 2241       }
 2242   if (tnfa->transitions)
 2243     xfree(tnfa->transitions);
 2244 
 2245   if (tnfa->initial)
 2246     {
 2247       for (trans = tnfa->initial; trans->state; trans++)
 2248     {
 2249       if (trans->tags)
 2250         xfree(trans->tags);
 2251       if (trans->params)
 2252         xfree(trans->params);
 2253     }
 2254       xfree(tnfa->initial);
 2255     }
 2256 
 2257   if (tnfa->submatch_data)
 2258     {
 2259       for (i = 0; i < tnfa->num_submatches; i++)
 2260     if (tnfa->submatch_data[i].parents)
 2261       xfree(tnfa->submatch_data[i].parents);
 2262       xfree(tnfa->submatch_data);
 2263     }
 2264 
 2265   if (tnfa->tag_directions)
 2266     xfree(tnfa->tag_directions);
 2267   if (tnfa->firstpos_chars)
 2268     xfree(tnfa->firstpos_chars);
 2269   if (tnfa->minimal_tags)
 2270     xfree(tnfa->minimal_tags);
 2271   xfree(tnfa);
 2272 }
 2273 
 2274 char *
 2275 tre_version(void)
 2276 {
 2277   static char str[256];
 2278   char *version;
 2279 
 2280   if (str[0] == 0)
 2281     {
 2282       tre_config(TRE_CONFIG_VERSION, &version);
 2283       sprintf(str, "TRE %s (LGPL)", version);
 2284     }
 2285   return str;
 2286 }
 2287 
 2288 int
 2289 tre_config(int query, void *result)
 2290 {
 2291   int *int_result = result;
 2292   char **string_result = result;
 2293 
 2294   switch (query)
 2295     {
 2296     case TRE_CONFIG_APPROX:
 2297 #ifdef TRE_APPROX
 2298       *int_result = 1;
 2299 #else /* !TRE_APPROX */
 2300       *int_result = 0;
 2301 #endif /* !TRE_APPROX */
 2302       return REG_OK;
 2303 
 2304     case TRE_CONFIG_WCHAR:
 2305 #ifdef TRE_WCHAR
 2306       *int_result = 1;
 2307 #else /* !TRE_WCHAR */
 2308       *int_result = 0;
 2309 #endif /* !TRE_WCHAR */
 2310       return REG_OK;
 2311 
 2312     case TRE_CONFIG_MULTIBYTE:
 2313 #ifdef TRE_MULTIBYTE
 2314       *int_result = 1;
 2315 #else /* !TRE_MULTIBYTE */
 2316       *int_result = 0;
 2317 #endif /* !TRE_MULTIBYTE */
 2318       return REG_OK;
 2319 
 2320     case TRE_CONFIG_SYSTEM_ABI:
 2321 #ifdef TRE_CONFIG_SYSTEM_ABI
 2322       *int_result = 1;
 2323 #else /* !TRE_CONFIG_SYSTEM_ABI */
 2324       *int_result = 0;
 2325 #endif /* !TRE_CONFIG_SYSTEM_ABI */
 2326       return REG_OK;
 2327 
 2328     case TRE_CONFIG_VERSION:
 2329       *string_result = TRE_VERSION;
 2330       return REG_OK;
 2331     }
 2332 
 2333   return REG_NOMATCH;
 2334 }
 2335 
 2336 
 2337 /* EOF */