"Fossies" - the Fresh Open Source Software Archive

Member "gnuastro-0.8/bootstrapped/lib/regexec.c" (28 Dec 2018, 130233 Bytes) of package /linux/privat/gnuastro-0.8.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "regexec.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.7_vs_0.8.

    1 /* Extended regular expression matching and search library.
    2    Copyright (C) 2002-2018 Free Software Foundation, Inc.
    3    This file is part of the GNU C Library.
    4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
    5 
    6    The GNU C Library is free software; you can redistribute it and/or
    7    modify it under the terms of the GNU General Public
    8    License as published by the Free Software Foundation; either
    9    version 3 of the License, or (at your option) any later version.
   10 
   11    The GNU C Library is distributed in the hope that it will be useful,
   12    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14    General Public License for more details.
   15 
   16    You should have received a copy of the GNU General Public
   17    License along with the GNU C Library; if not, see
   18    <https://www.gnu.org/licenses/>.  */
   19 
   20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
   21                      Idx n);
   22 static void match_ctx_clean (re_match_context_t *mctx);
   23 static void match_ctx_free (re_match_context_t *cache);
   24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
   25                       Idx str_idx, Idx from, Idx to);
   26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
   27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
   28                        Idx str_idx);
   29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
   30                             Idx node, Idx str_idx);
   31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
   32                re_dfastate_t **limited_sts, Idx last_node,
   33                Idx last_str_idx);
   34 static reg_errcode_t re_search_internal (const regex_t *preg,
   35                      const char *string, Idx length,
   36                      Idx start, Idx last_start, Idx stop,
   37                      size_t nmatch, regmatch_t pmatch[],
   38                      int eflags);
   39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
   40                   const char *string1, Idx length1,
   41                   const char *string2, Idx length2,
   42                   Idx start, regoff_t range,
   43                   struct re_registers *regs,
   44                   Idx stop, bool ret_len);
   45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
   46                 const char *string, Idx length, Idx start,
   47                 regoff_t range, Idx stop,
   48                 struct re_registers *regs,
   49                 bool ret_len);
   50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
   51                               Idx nregs, int regs_allocated);
   52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
   53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
   54                Idx *p_match_first);
   55 static Idx check_halt_state_context (const re_match_context_t *mctx,
   56                      const re_dfastate_t *state, Idx idx);
   57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
   58              regmatch_t *prev_idx_match, Idx cur_node,
   59              Idx cur_idx, Idx nmatch);
   60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
   61                       Idx str_idx, Idx dest_node, Idx nregs,
   62                       regmatch_t *regs,
   63                       re_node_set *eps_via_nodes);
   64 static reg_errcode_t set_regs (const regex_t *preg,
   65                    const re_match_context_t *mctx,
   66                    size_t nmatch, regmatch_t *pmatch,
   67                    bool fl_backtrack);
   68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
   69 
   70 #ifdef RE_ENABLE_I18N
   71 static int sift_states_iter_mb (const re_match_context_t *mctx,
   72                 re_sift_context_t *sctx,
   73                 Idx node_idx, Idx str_idx, Idx max_str_idx);
   74 #endif /* RE_ENABLE_I18N */
   75 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
   76                        re_sift_context_t *sctx);
   77 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
   78                       re_sift_context_t *sctx, Idx str_idx,
   79                       re_node_set *cur_dest);
   80 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
   81                           re_sift_context_t *sctx,
   82                           Idx str_idx,
   83                           re_node_set *dest_nodes);
   84 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
   85                         re_node_set *dest_nodes,
   86                         const re_node_set *candidates);
   87 static bool check_dst_limits (const re_match_context_t *mctx,
   88                   const re_node_set *limits,
   89                   Idx dst_node, Idx dst_idx, Idx src_node,
   90                   Idx src_idx);
   91 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
   92                     int boundaries, Idx subexp_idx,
   93                     Idx from_node, Idx bkref_idx);
   94 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
   95                       Idx limit, Idx subexp_idx,
   96                       Idx node, Idx str_idx,
   97                       Idx bkref_idx);
   98 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
   99                       re_node_set *dest_nodes,
  100                       const re_node_set *candidates,
  101                       re_node_set *limits,
  102                       struct re_backref_cache_entry *bkref_ents,
  103                       Idx str_idx);
  104 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
  105                     re_sift_context_t *sctx,
  106                     Idx str_idx, const re_node_set *candidates);
  107 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
  108                     re_dfastate_t **dst,
  109                     re_dfastate_t **src, Idx num);
  110 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
  111                      re_match_context_t *mctx);
  112 static re_dfastate_t *transit_state (reg_errcode_t *err,
  113                      re_match_context_t *mctx,
  114                      re_dfastate_t *state);
  115 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
  116                         re_match_context_t *mctx,
  117                         re_dfastate_t *next_state);
  118 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
  119                         re_node_set *cur_nodes,
  120                         Idx str_idx);
  121 #if 0
  122 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
  123                     re_match_context_t *mctx,
  124                     re_dfastate_t *pstate);
  125 #endif
  126 #ifdef RE_ENABLE_I18N
  127 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
  128                        re_dfastate_t *pstate);
  129 #endif /* RE_ENABLE_I18N */
  130 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
  131                       const re_node_set *nodes);
  132 static reg_errcode_t get_subexp (re_match_context_t *mctx,
  133                  Idx bkref_node, Idx bkref_str_idx);
  134 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
  135                      const re_sub_match_top_t *sub_top,
  136                      re_sub_match_last_t *sub_last,
  137                      Idx bkref_node, Idx bkref_str);
  138 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
  139                  Idx subexp_idx, int type);
  140 static reg_errcode_t check_arrival (re_match_context_t *mctx,
  141                     state_array_t *path, Idx top_node,
  142                     Idx top_str, Idx last_node, Idx last_str,
  143                     int type);
  144 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
  145                            Idx str_idx,
  146                            re_node_set *cur_nodes,
  147                            re_node_set *next_nodes);
  148 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
  149                            re_node_set *cur_nodes,
  150                            Idx ex_subexp, int type);
  151 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
  152                            re_node_set *dst_nodes,
  153                            Idx target, Idx ex_subexp,
  154                            int type);
  155 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
  156                      re_node_set *cur_nodes, Idx cur_str,
  157                      Idx subexp_num, int type);
  158 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
  159 #ifdef RE_ENABLE_I18N
  160 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
  161                     const re_string_t *input, Idx idx);
  162 # ifdef _LIBC
  163 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
  164                            size_t name_len);
  165 # endif /* _LIBC */
  166 #endif /* RE_ENABLE_I18N */
  167 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
  168                        const re_dfastate_t *state,
  169                        re_node_set *states_node,
  170                        bitset_t *states_ch);
  171 static bool check_node_accept (const re_match_context_t *mctx,
  172                    const re_token_t *node, Idx idx);
  173 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
  174 
  175 /* Entry point for POSIX code.  */
  176 
  177 /* regexec searches for a given pattern, specified by PREG, in the
  178    string STRING.
  179 
  180    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
  181    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
  182    least NMATCH elements, and we set them to the offsets of the
  183    corresponding matched substrings.
  184 
  185    EFLAGS specifies "execution flags" which affect matching: if
  186    REG_NOTBOL is set, then ^ does not match at the beginning of the
  187    string; if REG_NOTEOL is set, then $ does not match at the end.
  188 
  189    We return 0 if we find a match and REG_NOMATCH if not.  */
  190 
  191 int
  192 regexec (const regex_t *__restrict preg, const char *__restrict string,
  193      size_t nmatch, regmatch_t pmatch[], int eflags)
  194 {
  195   reg_errcode_t err;
  196   Idx start, length;
  197   re_dfa_t *dfa = preg->buffer;
  198 
  199   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
  200     return REG_BADPAT;
  201 
  202   if (eflags & REG_STARTEND)
  203     {
  204       start = pmatch[0].rm_so;
  205       length = pmatch[0].rm_eo;
  206     }
  207   else
  208     {
  209       start = 0;
  210       length = strlen (string);
  211     }
  212 
  213   lock_lock (dfa->lock);
  214   if (preg->no_sub)
  215     err = re_search_internal (preg, string, length, start, length,
  216                   length, 0, NULL, eflags);
  217   else
  218     err = re_search_internal (preg, string, length, start, length,
  219                   length, nmatch, pmatch, eflags);
  220   lock_unlock (dfa->lock);
  221   return err != REG_NOERROR;
  222 }
  223 
  224 #ifdef _LIBC
  225 libc_hidden_def (__regexec)
  226 
  227 # include <shlib-compat.h>
  228 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
  229 
  230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
  231 __typeof__ (__regexec) __compat_regexec;
  232 
  233 int
  234 attribute_compat_text_section
  235 __compat_regexec (const regex_t *__restrict preg,
  236           const char *__restrict string, size_t nmatch,
  237           regmatch_t pmatch[], int eflags)
  238 {
  239   return regexec (preg, string, nmatch, pmatch,
  240           eflags & (REG_NOTBOL | REG_NOTEOL));
  241 }
  242 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
  243 # endif
  244 #endif
  245 
  246 /* Entry points for GNU code.  */
  247 
  248 /* re_match, re_search, re_match_2, re_search_2
  249 
  250    The former two functions operate on STRING with length LENGTH,
  251    while the later two operate on concatenation of STRING1 and STRING2
  252    with lengths LENGTH1 and LENGTH2, respectively.
  253 
  254    re_match() matches the compiled pattern in BUFP against the string,
  255    starting at index START.
  256 
  257    re_search() first tries matching at index START, then it tries to match
  258    starting from index START + 1, and so on.  The last start position tried
  259    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
  260    way as re_match().)
  261 
  262    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
  263    the first STOP characters of the concatenation of the strings should be
  264    concerned.
  265 
  266    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
  267    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
  268    computed relative to the concatenation, not relative to the individual
  269    strings.)
  270 
  271    On success, re_match* functions return the length of the match, re_search*
  272    return the position of the start of the match.  Return value -1 means no
  273    match was found and -2 indicates an internal error.  */
  274 
  275 regoff_t
  276 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
  277       Idx start, struct re_registers *regs)
  278 {
  279   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
  280 }
  281 #ifdef _LIBC
  282 weak_alias (__re_match, re_match)
  283 #endif
  284 
  285 regoff_t
  286 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
  287        Idx start, regoff_t range, struct re_registers *regs)
  288 {
  289   return re_search_stub (bufp, string, length, start, range, length, regs,
  290              false);
  291 }
  292 #ifdef _LIBC
  293 weak_alias (__re_search, re_search)
  294 #endif
  295 
  296 regoff_t
  297 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
  298         const char *string2, Idx length2, Idx start,
  299         struct re_registers *regs, Idx stop)
  300 {
  301   return re_search_2_stub (bufp, string1, length1, string2, length2,
  302                start, 0, regs, stop, true);
  303 }
  304 #ifdef _LIBC
  305 weak_alias (__re_match_2, re_match_2)
  306 #endif
  307 
  308 regoff_t
  309 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
  310          const char *string2, Idx length2, Idx start, regoff_t range,
  311          struct re_registers *regs, Idx stop)
  312 {
  313   return re_search_2_stub (bufp, string1, length1, string2, length2,
  314                start, range, regs, stop, false);
  315 }
  316 #ifdef _LIBC
  317 weak_alias (__re_search_2, re_search_2)
  318 #endif
  319 
  320 static regoff_t
  321 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
  322           Idx length1, const char *string2, Idx length2, Idx start,
  323           regoff_t range, struct re_registers *regs,
  324           Idx stop, bool ret_len)
  325 {
  326   const char *str;
  327   regoff_t rval;
  328   Idx len;
  329   char *s = NULL;
  330 
  331   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
  332              || INT_ADD_WRAPV (length1, length2, &len))))
  333     return -2;
  334 
  335   /* Concatenate the strings.  */
  336   if (length2 > 0)
  337     if (length1 > 0)
  338       {
  339     s = re_malloc (char, len);
  340 
  341     if (__glibc_unlikely (s == NULL))
  342       return -2;
  343 #ifdef _LIBC
  344     memcpy (__mempcpy (s, string1, length1), string2, length2);
  345 #else
  346     memcpy (s, string1, length1);
  347     memcpy (s + length1, string2, length2);
  348 #endif
  349     str = s;
  350       }
  351     else
  352       str = string2;
  353   else
  354     str = string1;
  355 
  356   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
  357              ret_len);
  358   re_free (s);
  359   return rval;
  360 }
  361 
  362 /* The parameters have the same meaning as those of re_search.
  363    Additional parameters:
  364    If RET_LEN is true the length of the match is returned (re_match style);
  365    otherwise the position of the match is returned.  */
  366 
  367 static regoff_t
  368 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
  369         Idx start, regoff_t range, Idx stop, struct re_registers *regs,
  370         bool ret_len)
  371 {
  372   reg_errcode_t result;
  373   regmatch_t *pmatch;
  374   Idx nregs;
  375   regoff_t rval;
  376   int eflags = 0;
  377   re_dfa_t *dfa = bufp->buffer;
  378   Idx last_start = start + range;
  379 
  380   /* Check for out-of-range.  */
  381   if (__glibc_unlikely (start < 0 || start > length))
  382     return -1;
  383   if (__glibc_unlikely (length < last_start
  384             || (0 <= range && last_start < start)))
  385     last_start = length;
  386   else if (__glibc_unlikely (last_start < 0
  387                  || (range < 0 && start <= last_start)))
  388     last_start = 0;
  389 
  390   lock_lock (dfa->lock);
  391 
  392   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
  393   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
  394 
  395   /* Compile fastmap if we haven't yet.  */
  396   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
  397     re_compile_fastmap (bufp);
  398 
  399   if (__glibc_unlikely (bufp->no_sub))
  400     regs = NULL;
  401 
  402   /* We need at least 1 register.  */
  403   if (regs == NULL)
  404     nregs = 1;
  405   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
  406                  && regs->num_regs <= bufp->re_nsub))
  407     {
  408       nregs = regs->num_regs;
  409       if (__glibc_unlikely (nregs < 1))
  410     {
  411       /* Nothing can be copied to regs.  */
  412       regs = NULL;
  413       nregs = 1;
  414     }
  415     }
  416   else
  417     nregs = bufp->re_nsub + 1;
  418   pmatch = re_malloc (regmatch_t, nregs);
  419   if (__glibc_unlikely (pmatch == NULL))
  420     {
  421       rval = -2;
  422       goto out;
  423     }
  424 
  425   result = re_search_internal (bufp, string, length, start, last_start, stop,
  426                    nregs, pmatch, eflags);
  427 
  428   rval = 0;
  429 
  430   /* I hope we needn't fill their regs with -1's when no match was found.  */
  431   if (result != REG_NOERROR)
  432     rval = result == REG_NOMATCH ? -1 : -2;
  433   else if (regs != NULL)
  434     {
  435       /* If caller wants register contents data back, copy them.  */
  436       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
  437                        bufp->regs_allocated);
  438       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
  439     rval = -2;
  440     }
  441 
  442   if (__glibc_likely (rval == 0))
  443     {
  444       if (ret_len)
  445     {
  446       assert (pmatch[0].rm_so == start);
  447       rval = pmatch[0].rm_eo - start;
  448     }
  449       else
  450     rval = pmatch[0].rm_so;
  451     }
  452   re_free (pmatch);
  453  out:
  454   lock_unlock (dfa->lock);
  455   return rval;
  456 }
  457 
  458 static unsigned
  459 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
  460           int regs_allocated)
  461 {
  462   int rval = REGS_REALLOCATE;
  463   Idx i;
  464   Idx need_regs = nregs + 1;
  465   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
  466      uses.  */
  467 
  468   /* Have the register data arrays been allocated?  */
  469   if (regs_allocated == REGS_UNALLOCATED)
  470     { /* No.  So allocate them with malloc.  */
  471       regs->start = re_malloc (regoff_t, need_regs);
  472       if (__glibc_unlikely (regs->start == NULL))
  473     return REGS_UNALLOCATED;
  474       regs->end = re_malloc (regoff_t, need_regs);
  475       if (__glibc_unlikely (regs->end == NULL))
  476     {
  477       re_free (regs->start);
  478       return REGS_UNALLOCATED;
  479     }
  480       regs->num_regs = need_regs;
  481     }
  482   else if (regs_allocated == REGS_REALLOCATE)
  483     { /* Yes.  If we need more elements than were already
  484      allocated, reallocate them.  If we need fewer, just
  485      leave it alone.  */
  486       if (__glibc_unlikely (need_regs > regs->num_regs))
  487     {
  488       regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
  489       regoff_t *new_end;
  490       if (__glibc_unlikely (new_start == NULL))
  491         return REGS_UNALLOCATED;
  492       new_end = re_realloc (regs->end, regoff_t, need_regs);
  493       if (__glibc_unlikely (new_end == NULL))
  494         {
  495           re_free (new_start);
  496           return REGS_UNALLOCATED;
  497         }
  498       regs->start = new_start;
  499       regs->end = new_end;
  500       regs->num_regs = need_regs;
  501     }
  502     }
  503   else
  504     {
  505       assert (regs_allocated == REGS_FIXED);
  506       /* This function may not be called with REGS_FIXED and nregs too big.  */
  507       assert (regs->num_regs >= nregs);
  508       rval = REGS_FIXED;
  509     }
  510 
  511   /* Copy the regs.  */
  512   for (i = 0; i < nregs; ++i)
  513     {
  514       regs->start[i] = pmatch[i].rm_so;
  515       regs->end[i] = pmatch[i].rm_eo;
  516     }
  517   for ( ; i < regs->num_regs; ++i)
  518     regs->start[i] = regs->end[i] = -1;
  519 
  520   return rval;
  521 }
  522 
  523 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
  524    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
  525    this memory for recording register information.  STARTS and ENDS
  526    must be allocated using the malloc library routine, and must each
  527    be at least NUM_REGS * sizeof (regoff_t) bytes long.
  528 
  529    If NUM_REGS == 0, then subsequent matches should allocate their own
  530    register data.
  531 
  532    Unless this function is called, the first search or match using
  533    PATTERN_BUFFER will allocate its own register data, without
  534    freeing the old data.  */
  535 
  536 void
  537 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
  538           __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
  539 {
  540   if (num_regs)
  541     {
  542       bufp->regs_allocated = REGS_REALLOCATE;
  543       regs->num_regs = num_regs;
  544       regs->start = starts;
  545       regs->end = ends;
  546     }
  547   else
  548     {
  549       bufp->regs_allocated = REGS_UNALLOCATED;
  550       regs->num_regs = 0;
  551       regs->start = regs->end = NULL;
  552     }
  553 }
  554 #ifdef _LIBC
  555 weak_alias (__re_set_registers, re_set_registers)
  556 #endif
  557 
  558 /* Entry points compatible with 4.2 BSD regex library.  We don't define
  559    them unless specifically requested.  */
  560 
  561 #if defined _REGEX_RE_COMP || defined _LIBC
  562 int
  563 # ifdef _LIBC
  564 weak_function
  565 # endif
  566 re_exec (const char *s)
  567 {
  568   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
  569 }
  570 #endif /* _REGEX_RE_COMP */
  571 
  572 /* Internal entry point.  */
  573 
  574 /* Searches for a compiled pattern PREG in the string STRING, whose
  575    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
  576    meaning as with regexec.  LAST_START is START + RANGE, where
  577    START and RANGE have the same meaning as with re_search.
  578    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
  579    otherwise return the error code.
  580    Note: We assume front end functions already check ranges.
  581    (0 <= LAST_START && LAST_START <= LENGTH)  */
  582 
  583 static reg_errcode_t
  584 __attribute_warn_unused_result__
  585 re_search_internal (const regex_t *preg, const char *string, Idx length,
  586             Idx start, Idx last_start, Idx stop, size_t nmatch,
  587             regmatch_t pmatch[], int eflags)
  588 {
  589   reg_errcode_t err;
  590   const re_dfa_t *dfa = preg->buffer;
  591   Idx left_lim, right_lim;
  592   int incr;
  593   bool fl_longest_match;
  594   int match_kind;
  595   Idx match_first;
  596   Idx match_last = -1;
  597   Idx extra_nmatch;
  598   bool sb;
  599   int ch;
  600 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
  601   re_match_context_t mctx = { .dfa = dfa };
  602 #else
  603   re_match_context_t mctx;
  604 #endif
  605   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
  606             && start != last_start && !preg->can_be_null)
  607            ? preg->fastmap : NULL);
  608   RE_TRANSLATE_TYPE t = preg->translate;
  609 
  610 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
  611   memset (&mctx, '\0', sizeof (re_match_context_t));
  612   mctx.dfa = dfa;
  613 #endif
  614 
  615   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
  616   nmatch -= extra_nmatch;
  617 
  618   /* Check if the DFA haven't been compiled.  */
  619   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
  620             || dfa->init_state_word == NULL
  621             || dfa->init_state_nl == NULL
  622             || dfa->init_state_begbuf == NULL))
  623     return REG_NOMATCH;
  624 
  625 #ifdef DEBUG
  626   /* We assume front-end functions already check them.  */
  627   assert (0 <= last_start && last_start <= length);
  628 #endif
  629 
  630   /* If initial states with non-begbuf contexts have no elements,
  631      the regex must be anchored.  If preg->newline_anchor is set,
  632      we'll never use init_state_nl, so do not check it.  */
  633   if (dfa->init_state->nodes.nelem == 0
  634       && dfa->init_state_word->nodes.nelem == 0
  635       && (dfa->init_state_nl->nodes.nelem == 0
  636       || !preg->newline_anchor))
  637     {
  638       if (start != 0 && last_start != 0)
  639         return REG_NOMATCH;
  640       start = last_start = 0;
  641     }
  642 
  643   /* We must check the longest matching, if nmatch > 0.  */
  644   fl_longest_match = (nmatch != 0 || dfa->nbackref);
  645 
  646   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
  647                 preg->translate, (preg->syntax & RE_ICASE) != 0,
  648                 dfa);
  649   if (__glibc_unlikely (err != REG_NOERROR))
  650     goto free_return;
  651   mctx.input.stop = stop;
  652   mctx.input.raw_stop = stop;
  653   mctx.input.newline_anchor = preg->newline_anchor;
  654 
  655   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
  656   if (__glibc_unlikely (err != REG_NOERROR))
  657     goto free_return;
  658 
  659   /* We will log all the DFA states through which the dfa pass,
  660      if nmatch > 1, or this dfa has "multibyte node", which is a
  661      back-reference or a node which can accept multibyte character or
  662      multi character collating element.  */
  663   if (nmatch > 1 || dfa->has_mb_node)
  664     {
  665       /* Avoid overflow.  */
  666       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
  667                  <= mctx.input.bufs_len)))
  668     {
  669       err = REG_ESPACE;
  670       goto free_return;
  671     }
  672 
  673       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
  674       if (__glibc_unlikely (mctx.state_log == NULL))
  675     {
  676       err = REG_ESPACE;
  677       goto free_return;
  678     }
  679     }
  680   else
  681     mctx.state_log = NULL;
  682 
  683   match_first = start;
  684   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
  685                : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
  686 
  687   /* Check incrementally whether the input string matches.  */
  688   incr = (last_start < start) ? -1 : 1;
  689   left_lim = (last_start < start) ? last_start : start;
  690   right_lim = (last_start < start) ? start : last_start;
  691   sb = dfa->mb_cur_max == 1;
  692   match_kind =
  693     (fastmap
  694      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
  695     | (start <= last_start ? 2 : 0)
  696     | (t != NULL ? 1 : 0))
  697      : 8);
  698 
  699   for (;; match_first += incr)
  700     {
  701       err = REG_NOMATCH;
  702       if (match_first < left_lim || right_lim < match_first)
  703     goto free_return;
  704 
  705       /* Advance as rapidly as possible through the string, until we
  706      find a plausible place to start matching.  This may be done
  707      with varying efficiency, so there are various possibilities:
  708      only the most common of them are specialized, in order to
  709      save on code size.  We use a switch statement for speed.  */
  710       switch (match_kind)
  711     {
  712     case 8:
  713       /* No fastmap.  */
  714       break;
  715 
  716     case 7:
  717       /* Fastmap with single-byte translation, match forward.  */
  718       while (__glibc_likely (match_first < right_lim)
  719          && !fastmap[t[(unsigned char) string[match_first]]])
  720         ++match_first;
  721       goto forward_match_found_start_or_reached_end;
  722 
  723     case 6:
  724       /* Fastmap without translation, match forward.  */
  725       while (__glibc_likely (match_first < right_lim)
  726          && !fastmap[(unsigned char) string[match_first]])
  727         ++match_first;
  728 
  729     forward_match_found_start_or_reached_end:
  730       if (__glibc_unlikely (match_first == right_lim))
  731         {
  732           ch = match_first >= length
  733                ? 0 : (unsigned char) string[match_first];
  734           if (!fastmap[t ? t[ch] : ch])
  735         goto free_return;
  736         }
  737       break;
  738 
  739     case 4:
  740     case 5:
  741       /* Fastmap without multi-byte translation, match backwards.  */
  742       while (match_first >= left_lim)
  743         {
  744           ch = match_first >= length
  745                ? 0 : (unsigned char) string[match_first];
  746           if (fastmap[t ? t[ch] : ch])
  747         break;
  748           --match_first;
  749         }
  750       if (match_first < left_lim)
  751         goto free_return;
  752       break;
  753 
  754     default:
  755       /* In this case, we can't determine easily the current byte,
  756          since it might be a component byte of a multibyte
  757          character.  Then we use the constructed buffer instead.  */
  758       for (;;)
  759         {
  760           /* If MATCH_FIRST is out of the valid range, reconstruct the
  761          buffers.  */
  762           __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
  763           if (__glibc_unlikely (offset
  764                     >= (__re_size_t) mctx.input.valid_raw_len))
  765         {
  766           err = re_string_reconstruct (&mctx.input, match_first,
  767                            eflags);
  768           if (__glibc_unlikely (err != REG_NOERROR))
  769             goto free_return;
  770 
  771           offset = match_first - mctx.input.raw_mbs_idx;
  772         }
  773           /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
  774          Note that MATCH_FIRST must not be smaller than 0.  */
  775           ch = (match_first >= length
  776             ? 0 : re_string_byte_at (&mctx.input, offset));
  777           if (fastmap[ch])
  778         break;
  779           match_first += incr;
  780           if (match_first < left_lim || match_first > right_lim)
  781         {
  782           err = REG_NOMATCH;
  783           goto free_return;
  784         }
  785         }
  786       break;
  787     }
  788 
  789       /* Reconstruct the buffers so that the matcher can assume that
  790      the matching starts from the beginning of the buffer.  */
  791       err = re_string_reconstruct (&mctx.input, match_first, eflags);
  792       if (__glibc_unlikely (err != REG_NOERROR))
  793     goto free_return;
  794 
  795 #ifdef RE_ENABLE_I18N
  796      /* Don't consider this char as a possible match start if it part,
  797     yet isn't the head, of a multibyte character.  */
  798       if (!sb && !re_string_first_byte (&mctx.input, 0))
  799     continue;
  800 #endif
  801 
  802       /* It seems to be appropriate one, then use the matcher.  */
  803       /* We assume that the matching starts from 0.  */
  804       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
  805       match_last = check_matching (&mctx, fl_longest_match,
  806                    start <= last_start ? &match_first : NULL);
  807       if (match_last != -1)
  808     {
  809       if (__glibc_unlikely (match_last == -2))
  810         {
  811           err = REG_ESPACE;
  812           goto free_return;
  813         }
  814       else
  815         {
  816           mctx.match_last = match_last;
  817           if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
  818         {
  819           re_dfastate_t *pstate = mctx.state_log[match_last];
  820           mctx.last_node = check_halt_state_context (&mctx, pstate,
  821                                  match_last);
  822         }
  823           if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
  824           || dfa->nbackref)
  825         {
  826           err = prune_impossible_nodes (&mctx);
  827           if (err == REG_NOERROR)
  828             break;
  829           if (__glibc_unlikely (err != REG_NOMATCH))
  830             goto free_return;
  831           match_last = -1;
  832         }
  833           else
  834         break; /* We found a match.  */
  835         }
  836     }
  837 
  838       match_ctx_clean (&mctx);
  839     }
  840 
  841 #ifdef DEBUG
  842   assert (match_last != -1);
  843   assert (err == REG_NOERROR);
  844 #endif
  845 
  846   /* Set pmatch[] if we need.  */
  847   if (nmatch > 0)
  848     {
  849       Idx reg_idx;
  850 
  851       /* Initialize registers.  */
  852       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
  853     pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
  854 
  855       /* Set the points where matching start/end.  */
  856       pmatch[0].rm_so = 0;
  857       pmatch[0].rm_eo = mctx.match_last;
  858       /* FIXME: This function should fail if mctx.match_last exceeds
  859      the maximum possible regoff_t value.  We need a new error
  860      code REG_OVERFLOW.  */
  861 
  862       if (!preg->no_sub && nmatch > 1)
  863     {
  864       err = set_regs (preg, &mctx, nmatch, pmatch,
  865               dfa->has_plural_match && dfa->nbackref > 0);
  866       if (__glibc_unlikely (err != REG_NOERROR))
  867         goto free_return;
  868     }
  869 
  870       /* At last, add the offset to each register, since we slid
  871      the buffers so that we could assume that the matching starts
  872      from 0.  */
  873       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
  874     if (pmatch[reg_idx].rm_so != -1)
  875       {
  876 #ifdef RE_ENABLE_I18N
  877         if (__glibc_unlikely (mctx.input.offsets_needed != 0))
  878           {
  879         pmatch[reg_idx].rm_so =
  880           (pmatch[reg_idx].rm_so == mctx.input.valid_len
  881            ? mctx.input.valid_raw_len
  882            : mctx.input.offsets[pmatch[reg_idx].rm_so]);
  883         pmatch[reg_idx].rm_eo =
  884           (pmatch[reg_idx].rm_eo == mctx.input.valid_len
  885            ? mctx.input.valid_raw_len
  886            : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
  887           }
  888 #else
  889         assert (mctx.input.offsets_needed == 0);
  890 #endif
  891         pmatch[reg_idx].rm_so += match_first;
  892         pmatch[reg_idx].rm_eo += match_first;
  893       }
  894       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
  895     {
  896       pmatch[nmatch + reg_idx].rm_so = -1;
  897       pmatch[nmatch + reg_idx].rm_eo = -1;
  898     }
  899 
  900       if (dfa->subexp_map)
  901     for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
  902       if (dfa->subexp_map[reg_idx] != reg_idx)
  903         {
  904           pmatch[reg_idx + 1].rm_so
  905         = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
  906           pmatch[reg_idx + 1].rm_eo
  907         = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
  908         }
  909     }
  910 
  911  free_return:
  912   re_free (mctx.state_log);
  913   if (dfa->nbackref)
  914     match_ctx_free (&mctx);
  915   re_string_destruct (&mctx.input);
  916   return err;
  917 }
  918 
  919 static reg_errcode_t
  920 __attribute_warn_unused_result__
  921 prune_impossible_nodes (re_match_context_t *mctx)
  922 {
  923   const re_dfa_t *const dfa = mctx->dfa;
  924   Idx halt_node, match_last;
  925   reg_errcode_t ret;
  926   re_dfastate_t **sifted_states;
  927   re_dfastate_t **lim_states = NULL;
  928   re_sift_context_t sctx;
  929 #ifdef DEBUG
  930   assert (mctx->state_log != NULL);
  931 #endif
  932   match_last = mctx->match_last;
  933   halt_node = mctx->last_node;
  934 
  935   /* Avoid overflow.  */
  936   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
  937             <= match_last))
  938     return REG_ESPACE;
  939 
  940   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
  941   if (__glibc_unlikely (sifted_states == NULL))
  942     {
  943       ret = REG_ESPACE;
  944       goto free_return;
  945     }
  946   if (dfa->nbackref)
  947     {
  948       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
  949       if (__glibc_unlikely (lim_states == NULL))
  950     {
  951       ret = REG_ESPACE;
  952       goto free_return;
  953     }
  954       while (1)
  955     {
  956       memset (lim_states, '\0',
  957           sizeof (re_dfastate_t *) * (match_last + 1));
  958       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
  959              match_last);
  960       ret = sift_states_backward (mctx, &sctx);
  961       re_node_set_free (&sctx.limits);
  962       if (__glibc_unlikely (ret != REG_NOERROR))
  963           goto free_return;
  964       if (sifted_states[0] != NULL || lim_states[0] != NULL)
  965         break;
  966       do
  967         {
  968           --match_last;
  969           if (match_last < 0)
  970         {
  971           ret = REG_NOMATCH;
  972           goto free_return;
  973         }
  974         } while (mctx->state_log[match_last] == NULL
  975              || !mctx->state_log[match_last]->halt);
  976       halt_node = check_halt_state_context (mctx,
  977                         mctx->state_log[match_last],
  978                         match_last);
  979     }
  980       ret = merge_state_array (dfa, sifted_states, lim_states,
  981                    match_last + 1);
  982       re_free (lim_states);
  983       lim_states = NULL;
  984       if (__glibc_unlikely (ret != REG_NOERROR))
  985     goto free_return;
  986     }
  987   else
  988     {
  989       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
  990       ret = sift_states_backward (mctx, &sctx);
  991       re_node_set_free (&sctx.limits);
  992       if (__glibc_unlikely (ret != REG_NOERROR))
  993     goto free_return;
  994       if (sifted_states[0] == NULL)
  995     {
  996       ret = REG_NOMATCH;
  997       goto free_return;
  998     }
  999     }
 1000   re_free (mctx->state_log);
 1001   mctx->state_log = sifted_states;
 1002   sifted_states = NULL;
 1003   mctx->last_node = halt_node;
 1004   mctx->match_last = match_last;
 1005   ret = REG_NOERROR;
 1006  free_return:
 1007   re_free (sifted_states);
 1008   re_free (lim_states);
 1009   return ret;
 1010 }
 1011 
 1012 /* Acquire an initial state and return it.
 1013    We must select appropriate initial state depending on the context,
 1014    since initial states may have constraints like "\<", "^", etc..  */
 1015 
 1016 static inline re_dfastate_t *
 1017 __attribute__ ((always_inline))
 1018 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
 1019                 Idx idx)
 1020 {
 1021   const re_dfa_t *const dfa = mctx->dfa;
 1022   if (dfa->init_state->has_constraint)
 1023     {
 1024       unsigned int context;
 1025       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
 1026       if (IS_WORD_CONTEXT (context))
 1027     return dfa->init_state_word;
 1028       else if (IS_ORDINARY_CONTEXT (context))
 1029     return dfa->init_state;
 1030       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 1031     return dfa->init_state_begbuf;
 1032       else if (IS_NEWLINE_CONTEXT (context))
 1033     return dfa->init_state_nl;
 1034       else if (IS_BEGBUF_CONTEXT (context))
 1035     {
 1036       /* It is relatively rare case, then calculate on demand.  */
 1037       return re_acquire_state_context (err, dfa,
 1038                        dfa->init_state->entrance_nodes,
 1039                        context);
 1040     }
 1041       else
 1042     /* Must not happen?  */
 1043     return dfa->init_state;
 1044     }
 1045   else
 1046     return dfa->init_state;
 1047 }
 1048 
 1049 /* Check whether the regular expression match input string INPUT or not,
 1050    and return the index where the matching end.  Return -1 if
 1051    there is no match, and return -2 in case of an error.
 1052    FL_LONGEST_MATCH means we want the POSIX longest matching.
 1053    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
 1054    next place where we may want to try matching.
 1055    Note that the matcher assumes that the matching starts from the current
 1056    index of the buffer.  */
 1057 
 1058 static Idx
 1059 __attribute_warn_unused_result__
 1060 check_matching (re_match_context_t *mctx, bool fl_longest_match,
 1061         Idx *p_match_first)
 1062 {
 1063   const re_dfa_t *const dfa = mctx->dfa;
 1064   reg_errcode_t err;
 1065   Idx match = 0;
 1066   Idx match_last = -1;
 1067   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
 1068   re_dfastate_t *cur_state;
 1069   bool at_init_state = p_match_first != NULL;
 1070   Idx next_start_idx = cur_str_idx;
 1071 
 1072   err = REG_NOERROR;
 1073   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
 1074   /* An initial state must not be NULL (invalid).  */
 1075   if (__glibc_unlikely (cur_state == NULL))
 1076     {
 1077       assert (err == REG_ESPACE);
 1078       return -2;
 1079     }
 1080 
 1081   if (mctx->state_log != NULL)
 1082     {
 1083       mctx->state_log[cur_str_idx] = cur_state;
 1084 
 1085       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
 1086      later.  E.g. Processing back references.  */
 1087       if (__glibc_unlikely (dfa->nbackref))
 1088     {
 1089       at_init_state = false;
 1090       err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
 1091       if (__glibc_unlikely (err != REG_NOERROR))
 1092         return err;
 1093 
 1094       if (cur_state->has_backref)
 1095         {
 1096           err = transit_state_bkref (mctx, &cur_state->nodes);
 1097           if (__glibc_unlikely (err != REG_NOERROR))
 1098         return err;
 1099         }
 1100     }
 1101     }
 1102 
 1103   /* If the RE accepts NULL string.  */
 1104   if (__glibc_unlikely (cur_state->halt))
 1105     {
 1106       if (!cur_state->has_constraint
 1107       || check_halt_state_context (mctx, cur_state, cur_str_idx))
 1108     {
 1109       if (!fl_longest_match)
 1110         return cur_str_idx;
 1111       else
 1112         {
 1113           match_last = cur_str_idx;
 1114           match = 1;
 1115         }
 1116     }
 1117     }
 1118 
 1119   while (!re_string_eoi (&mctx->input))
 1120     {
 1121       re_dfastate_t *old_state = cur_state;
 1122       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
 1123 
 1124       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
 1125        && mctx->input.bufs_len < mctx->input.len)
 1126       || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
 1127           && mctx->input.valid_len < mctx->input.len))
 1128     {
 1129       err = extend_buffers (mctx, next_char_idx + 1);
 1130       if (__glibc_unlikely (err != REG_NOERROR))
 1131         {
 1132           assert (err == REG_ESPACE);
 1133           return -2;
 1134         }
 1135     }
 1136 
 1137       cur_state = transit_state (&err, mctx, cur_state);
 1138       if (mctx->state_log != NULL)
 1139     cur_state = merge_state_with_log (&err, mctx, cur_state);
 1140 
 1141       if (cur_state == NULL)
 1142     {
 1143       /* Reached the invalid state or an error.  Try to recover a valid
 1144          state using the state log, if available and if we have not
 1145          already found a valid (even if not the longest) match.  */
 1146       if (__glibc_unlikely (err != REG_NOERROR))
 1147         return -2;
 1148 
 1149       if (mctx->state_log == NULL
 1150           || (match && !fl_longest_match)
 1151           || (cur_state = find_recover_state (&err, mctx)) == NULL)
 1152         break;
 1153     }
 1154 
 1155       if (__glibc_unlikely (at_init_state))
 1156     {
 1157       if (old_state == cur_state)
 1158         next_start_idx = next_char_idx;
 1159       else
 1160         at_init_state = false;
 1161     }
 1162 
 1163       if (cur_state->halt)
 1164     {
 1165       /* Reached a halt state.
 1166          Check the halt state can satisfy the current context.  */
 1167       if (!cur_state->has_constraint
 1168           || check_halt_state_context (mctx, cur_state,
 1169                        re_string_cur_idx (&mctx->input)))
 1170         {
 1171           /* We found an appropriate halt state.  */
 1172           match_last = re_string_cur_idx (&mctx->input);
 1173           match = 1;
 1174 
 1175           /* We found a match, do not modify match_first below.  */
 1176           p_match_first = NULL;
 1177           if (!fl_longest_match)
 1178         break;
 1179         }
 1180     }
 1181     }
 1182 
 1183   if (p_match_first)
 1184     *p_match_first += next_start_idx;
 1185 
 1186   return match_last;
 1187 }
 1188 
 1189 /* Check NODE match the current context.  */
 1190 
 1191 static bool
 1192 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
 1193 {
 1194   re_token_type_t type = dfa->nodes[node].type;
 1195   unsigned int constraint = dfa->nodes[node].constraint;
 1196   if (type != END_OF_RE)
 1197     return false;
 1198   if (!constraint)
 1199     return true;
 1200   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
 1201     return false;
 1202   return true;
 1203 }
 1204 
 1205 /* Check the halt state STATE match the current context.
 1206    Return 0 if not match, if the node, STATE has, is a halt node and
 1207    match the context, return the node.  */
 1208 
 1209 static Idx
 1210 check_halt_state_context (const re_match_context_t *mctx,
 1211               const re_dfastate_t *state, Idx idx)
 1212 {
 1213   Idx i;
 1214   unsigned int context;
 1215 #ifdef DEBUG
 1216   assert (state->halt);
 1217 #endif
 1218   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
 1219   for (i = 0; i < state->nodes.nelem; ++i)
 1220     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
 1221       return state->nodes.elems[i];
 1222   return 0;
 1223 }
 1224 
 1225 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
 1226    corresponding to the DFA).
 1227    Return the destination node, and update EPS_VIA_NODES;
 1228    return -1 in case of errors.  */
 1229 
 1230 static Idx
 1231 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
 1232            Idx *pidx, Idx node, re_node_set *eps_via_nodes,
 1233            struct re_fail_stack_t *fs)
 1234 {
 1235   const re_dfa_t *const dfa = mctx->dfa;
 1236   Idx i;
 1237   bool ok;
 1238   if (IS_EPSILON_NODE (dfa->nodes[node].type))
 1239     {
 1240       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
 1241       re_node_set *edests = &dfa->edests[node];
 1242       Idx dest_node;
 1243       ok = re_node_set_insert (eps_via_nodes, node);
 1244       if (__glibc_unlikely (! ok))
 1245     return -2;
 1246       /* Pick up a valid destination, or return -1 if none
 1247      is found.  */
 1248       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
 1249     {
 1250       Idx candidate = edests->elems[i];
 1251       if (!re_node_set_contains (cur_nodes, candidate))
 1252         continue;
 1253           if (dest_node == -1)
 1254         dest_node = candidate;
 1255 
 1256       else
 1257         {
 1258           /* In order to avoid infinite loop like "(a*)*", return the second
 1259          epsilon-transition if the first was already considered.  */
 1260           if (re_node_set_contains (eps_via_nodes, dest_node))
 1261         return candidate;
 1262 
 1263           /* Otherwise, push the second epsilon-transition on the fail stack.  */
 1264           else if (fs != NULL
 1265                && push_fail_stack (fs, *pidx, candidate, nregs, regs,
 1266                        eps_via_nodes))
 1267         return -2;
 1268 
 1269           /* We know we are going to exit.  */
 1270           break;
 1271         }
 1272     }
 1273       return dest_node;
 1274     }
 1275   else
 1276     {
 1277       Idx naccepted = 0;
 1278       re_token_type_t type = dfa->nodes[node].type;
 1279 
 1280 #ifdef RE_ENABLE_I18N
 1281       if (dfa->nodes[node].accept_mb)
 1282     naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
 1283       else
 1284 #endif /* RE_ENABLE_I18N */
 1285       if (type == OP_BACK_REF)
 1286     {
 1287       Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
 1288       naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
 1289       if (fs != NULL)
 1290         {
 1291           if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
 1292         return -1;
 1293           else if (naccepted)
 1294         {
 1295           char *buf = (char *) re_string_get_buffer (&mctx->input);
 1296           if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
 1297                   naccepted) != 0)
 1298             return -1;
 1299         }
 1300         }
 1301 
 1302       if (naccepted == 0)
 1303         {
 1304           Idx dest_node;
 1305           ok = re_node_set_insert (eps_via_nodes, node);
 1306           if (__glibc_unlikely (! ok))
 1307         return -2;
 1308           dest_node = dfa->edests[node].elems[0];
 1309           if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
 1310                     dest_node))
 1311         return dest_node;
 1312         }
 1313     }
 1314 
 1315       if (naccepted != 0
 1316       || check_node_accept (mctx, dfa->nodes + node, *pidx))
 1317     {
 1318       Idx dest_node = dfa->nexts[node];
 1319       *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
 1320       if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
 1321              || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
 1322                            dest_node)))
 1323         return -1;
 1324       re_node_set_empty (eps_via_nodes);
 1325       return dest_node;
 1326     }
 1327     }
 1328   return -1;
 1329 }
 1330 
 1331 static reg_errcode_t
 1332 __attribute_warn_unused_result__
 1333 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
 1334          Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
 1335 {
 1336   reg_errcode_t err;
 1337   Idx num = fs->num++;
 1338   if (fs->num == fs->alloc)
 1339     {
 1340       struct re_fail_stack_ent_t *new_array;
 1341       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
 1342                               fs->alloc * 2);
 1343       if (new_array == NULL)
 1344     return REG_ESPACE;
 1345       fs->alloc *= 2;
 1346       fs->stack = new_array;
 1347     }
 1348   fs->stack[num].idx = str_idx;
 1349   fs->stack[num].node = dest_node;
 1350   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
 1351   if (fs->stack[num].regs == NULL)
 1352     return REG_ESPACE;
 1353   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
 1354   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
 1355   return err;
 1356 }
 1357 
 1358 static Idx
 1359 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
 1360         regmatch_t *regs, re_node_set *eps_via_nodes)
 1361 {
 1362   Idx num = --fs->num;
 1363   assert (num >= 0);
 1364   *pidx = fs->stack[num].idx;
 1365   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
 1366   re_node_set_free (eps_via_nodes);
 1367   re_free (fs->stack[num].regs);
 1368   *eps_via_nodes = fs->stack[num].eps_via_nodes;
 1369   return fs->stack[num].node;
 1370 }
 1371 
 1372 /* Set the positions where the subexpressions are starts/ends to registers
 1373    PMATCH.
 1374    Note: We assume that pmatch[0] is already set, and
 1375    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
 1376 
 1377 static reg_errcode_t
 1378 __attribute_warn_unused_result__
 1379 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
 1380       regmatch_t *pmatch, bool fl_backtrack)
 1381 {
 1382   const re_dfa_t *dfa = preg->buffer;
 1383   Idx idx, cur_node;
 1384   re_node_set eps_via_nodes;
 1385   struct re_fail_stack_t *fs;
 1386   struct re_fail_stack_t fs_body = { 0, 2, NULL };
 1387   regmatch_t *prev_idx_match;
 1388   bool prev_idx_match_malloced = false;
 1389 
 1390 #ifdef DEBUG
 1391   assert (nmatch > 1);
 1392   assert (mctx->state_log != NULL);
 1393 #endif
 1394   if (fl_backtrack)
 1395     {
 1396       fs = &fs_body;
 1397       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
 1398       if (fs->stack == NULL)
 1399     return REG_ESPACE;
 1400     }
 1401   else
 1402     fs = NULL;
 1403 
 1404   cur_node = dfa->init_node;
 1405   re_node_set_init_empty (&eps_via_nodes);
 1406 
 1407   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
 1408     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
 1409   else
 1410     {
 1411       prev_idx_match = re_malloc (regmatch_t, nmatch);
 1412       if (prev_idx_match == NULL)
 1413     {
 1414       free_fail_stack_return (fs);
 1415       return REG_ESPACE;
 1416     }
 1417       prev_idx_match_malloced = true;
 1418     }
 1419   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
 1420 
 1421   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
 1422     {
 1423       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
 1424 
 1425       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
 1426     {
 1427       Idx reg_idx;
 1428       if (fs)
 1429         {
 1430           for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 1431         if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
 1432           break;
 1433           if (reg_idx == nmatch)
 1434         {
 1435           re_node_set_free (&eps_via_nodes);
 1436           if (prev_idx_match_malloced)
 1437             re_free (prev_idx_match);
 1438           return free_fail_stack_return (fs);
 1439         }
 1440           cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
 1441                      &eps_via_nodes);
 1442         }
 1443       else
 1444         {
 1445           re_node_set_free (&eps_via_nodes);
 1446           if (prev_idx_match_malloced)
 1447         re_free (prev_idx_match);
 1448           return REG_NOERROR;
 1449         }
 1450     }
 1451 
 1452       /* Proceed to next node.  */
 1453       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
 1454                     &eps_via_nodes, fs);
 1455 
 1456       if (__glibc_unlikely (cur_node < 0))
 1457     {
 1458       if (__glibc_unlikely (cur_node == -2))
 1459         {
 1460           re_node_set_free (&eps_via_nodes);
 1461           if (prev_idx_match_malloced)
 1462         re_free (prev_idx_match);
 1463           free_fail_stack_return (fs);
 1464           return REG_ESPACE;
 1465         }
 1466       if (fs)
 1467         cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
 1468                        &eps_via_nodes);
 1469       else
 1470         {
 1471           re_node_set_free (&eps_via_nodes);
 1472           if (prev_idx_match_malloced)
 1473         re_free (prev_idx_match);
 1474           return REG_NOMATCH;
 1475         }
 1476     }
 1477     }
 1478   re_node_set_free (&eps_via_nodes);
 1479   if (prev_idx_match_malloced)
 1480     re_free (prev_idx_match);
 1481   return free_fail_stack_return (fs);
 1482 }
 1483 
 1484 static reg_errcode_t
 1485 free_fail_stack_return (struct re_fail_stack_t *fs)
 1486 {
 1487   if (fs)
 1488     {
 1489       Idx fs_idx;
 1490       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
 1491     {
 1492       re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
 1493       re_free (fs->stack[fs_idx].regs);
 1494     }
 1495       re_free (fs->stack);
 1496     }
 1497   return REG_NOERROR;
 1498 }
 1499 
 1500 static void
 1501 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
 1502          regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
 1503 {
 1504   int type = dfa->nodes[cur_node].type;
 1505   if (type == OP_OPEN_SUBEXP)
 1506     {
 1507       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
 1508 
 1509       /* We are at the first node of this sub expression.  */
 1510       if (reg_num < nmatch)
 1511     {
 1512       pmatch[reg_num].rm_so = cur_idx;
 1513       pmatch[reg_num].rm_eo = -1;
 1514     }
 1515     }
 1516   else if (type == OP_CLOSE_SUBEXP)
 1517     {
 1518       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
 1519       if (reg_num < nmatch)
 1520     {
 1521       /* We are at the last node of this sub expression.  */
 1522       if (pmatch[reg_num].rm_so < cur_idx)
 1523         {
 1524           pmatch[reg_num].rm_eo = cur_idx;
 1525           /* This is a non-empty match or we are not inside an optional
 1526          subexpression.  Accept this right away.  */
 1527           memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
 1528         }
 1529       else
 1530         {
 1531           if (dfa->nodes[cur_node].opt_subexp
 1532           && prev_idx_match[reg_num].rm_so != -1)
 1533         /* We transited through an empty match for an optional
 1534            subexpression, like (a?)*, and this is not the subexp's
 1535            first match.  Copy back the old content of the registers
 1536            so that matches of an inner subexpression are undone as
 1537            well, like in ((a?))*.  */
 1538         memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
 1539           else
 1540         /* We completed a subexpression, but it may be part of
 1541            an optional one, so do not update PREV_IDX_MATCH.  */
 1542         pmatch[reg_num].rm_eo = cur_idx;
 1543         }
 1544     }
 1545     }
 1546 }
 1547 
 1548 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
 1549    and sift the nodes in each states according to the following rules.
 1550    Updated state_log will be wrote to STATE_LOG.
 1551 
 1552    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
 1553      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
 1554     If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
 1555     the LAST_NODE, we throw away the node 'a'.
 1556      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
 1557     string 's' and transit to 'b':
 1558     i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
 1559        away the node 'a'.
 1560     ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
 1561         thrown away, we throw away the node 'a'.
 1562      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
 1563     i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
 1564        node 'a'.
 1565     ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
 1566         we throw away the node 'a'.  */
 1567 
 1568 #define STATE_NODE_CONTAINS(state,node) \
 1569   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
 1570 
 1571 static reg_errcode_t
 1572 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
 1573 {
 1574   reg_errcode_t err;
 1575   int null_cnt = 0;
 1576   Idx str_idx = sctx->last_str_idx;
 1577   re_node_set cur_dest;
 1578 
 1579 #ifdef DEBUG
 1580   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
 1581 #endif
 1582 
 1583   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
 1584      transit to the last_node and the last_node itself.  */
 1585   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
 1586   if (__glibc_unlikely (err != REG_NOERROR))
 1587     return err;
 1588   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
 1589   if (__glibc_unlikely (err != REG_NOERROR))
 1590     goto free_return;
 1591 
 1592   /* Then check each states in the state_log.  */
 1593   while (str_idx > 0)
 1594     {
 1595       /* Update counters.  */
 1596       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
 1597       if (null_cnt > mctx->max_mb_elem_len)
 1598     {
 1599       memset (sctx->sifted_states, '\0',
 1600           sizeof (re_dfastate_t *) * str_idx);
 1601       re_node_set_free (&cur_dest);
 1602       return REG_NOERROR;
 1603     }
 1604       re_node_set_empty (&cur_dest);
 1605       --str_idx;
 1606 
 1607       if (mctx->state_log[str_idx])
 1608     {
 1609       err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
 1610       if (__glibc_unlikely (err != REG_NOERROR))
 1611         goto free_return;
 1612     }
 1613 
 1614       /* Add all the nodes which satisfy the following conditions:
 1615      - It can epsilon transit to a node in CUR_DEST.
 1616      - It is in CUR_SRC.
 1617      And update state_log.  */
 1618       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
 1619       if (__glibc_unlikely (err != REG_NOERROR))
 1620     goto free_return;
 1621     }
 1622   err = REG_NOERROR;
 1623  free_return:
 1624   re_node_set_free (&cur_dest);
 1625   return err;
 1626 }
 1627 
 1628 static reg_errcode_t
 1629 __attribute_warn_unused_result__
 1630 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
 1631              Idx str_idx, re_node_set *cur_dest)
 1632 {
 1633   const re_dfa_t *const dfa = mctx->dfa;
 1634   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
 1635   Idx i;
 1636 
 1637   /* Then build the next sifted state.
 1638      We build the next sifted state on 'cur_dest', and update
 1639      'sifted_states[str_idx]' with 'cur_dest'.
 1640      Note:
 1641      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
 1642      'cur_src' points the node_set of the old 'state_log[str_idx]'
 1643      (with the epsilon nodes pre-filtered out).  */
 1644   for (i = 0; i < cur_src->nelem; i++)
 1645     {
 1646       Idx prev_node = cur_src->elems[i];
 1647       int naccepted = 0;
 1648       bool ok;
 1649 
 1650 #ifdef DEBUG
 1651       re_token_type_t type = dfa->nodes[prev_node].type;
 1652       assert (!IS_EPSILON_NODE (type));
 1653 #endif
 1654 #ifdef RE_ENABLE_I18N
 1655       /* If the node may accept "multi byte".  */
 1656       if (dfa->nodes[prev_node].accept_mb)
 1657     naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
 1658                      str_idx, sctx->last_str_idx);
 1659 #endif /* RE_ENABLE_I18N */
 1660 
 1661       /* We don't check backreferences here.
 1662      See update_cur_sifted_state().  */
 1663       if (!naccepted
 1664       && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
 1665       && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
 1666                   dfa->nexts[prev_node]))
 1667     naccepted = 1;
 1668 
 1669       if (naccepted == 0)
 1670     continue;
 1671 
 1672       if (sctx->limits.nelem)
 1673     {
 1674       Idx to_idx = str_idx + naccepted;
 1675       if (check_dst_limits (mctx, &sctx->limits,
 1676                 dfa->nexts[prev_node], to_idx,
 1677                 prev_node, str_idx))
 1678         continue;
 1679     }
 1680       ok = re_node_set_insert (cur_dest, prev_node);
 1681       if (__glibc_unlikely (! ok))
 1682     return REG_ESPACE;
 1683     }
 1684 
 1685   return REG_NOERROR;
 1686 }
 1687 
 1688 /* Helper functions.  */
 1689 
 1690 static reg_errcode_t
 1691 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
 1692 {
 1693   Idx top = mctx->state_log_top;
 1694 
 1695   if ((next_state_log_idx >= mctx->input.bufs_len
 1696        && mctx->input.bufs_len < mctx->input.len)
 1697       || (next_state_log_idx >= mctx->input.valid_len
 1698       && mctx->input.valid_len < mctx->input.len))
 1699     {
 1700       reg_errcode_t err;
 1701       err = extend_buffers (mctx, next_state_log_idx + 1);
 1702       if (__glibc_unlikely (err != REG_NOERROR))
 1703     return err;
 1704     }
 1705 
 1706   if (top < next_state_log_idx)
 1707     {
 1708       memset (mctx->state_log + top + 1, '\0',
 1709           sizeof (re_dfastate_t *) * (next_state_log_idx - top));
 1710       mctx->state_log_top = next_state_log_idx;
 1711     }
 1712   return REG_NOERROR;
 1713 }
 1714 
 1715 static reg_errcode_t
 1716 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
 1717            re_dfastate_t **src, Idx num)
 1718 {
 1719   Idx st_idx;
 1720   reg_errcode_t err;
 1721   for (st_idx = 0; st_idx < num; ++st_idx)
 1722     {
 1723       if (dst[st_idx] == NULL)
 1724     dst[st_idx] = src[st_idx];
 1725       else if (src[st_idx] != NULL)
 1726     {
 1727       re_node_set merged_set;
 1728       err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
 1729                     &src[st_idx]->nodes);
 1730       if (__glibc_unlikely (err != REG_NOERROR))
 1731         return err;
 1732       dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
 1733       re_node_set_free (&merged_set);
 1734       if (__glibc_unlikely (err != REG_NOERROR))
 1735         return err;
 1736     }
 1737     }
 1738   return REG_NOERROR;
 1739 }
 1740 
 1741 static reg_errcode_t
 1742 update_cur_sifted_state (const re_match_context_t *mctx,
 1743              re_sift_context_t *sctx, Idx str_idx,
 1744              re_node_set *dest_nodes)
 1745 {
 1746   const re_dfa_t *const dfa = mctx->dfa;
 1747   reg_errcode_t err = REG_NOERROR;
 1748   const re_node_set *candidates;
 1749   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
 1750         : &mctx->state_log[str_idx]->nodes);
 1751 
 1752   if (dest_nodes->nelem == 0)
 1753     sctx->sifted_states[str_idx] = NULL;
 1754   else
 1755     {
 1756       if (candidates)
 1757     {
 1758       /* At first, add the nodes which can epsilon transit to a node in
 1759          DEST_NODE.  */
 1760       err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
 1761       if (__glibc_unlikely (err != REG_NOERROR))
 1762         return err;
 1763 
 1764       /* Then, check the limitations in the current sift_context.  */
 1765       if (sctx->limits.nelem)
 1766         {
 1767           err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
 1768                      mctx->bkref_ents, str_idx);
 1769           if (__glibc_unlikely (err != REG_NOERROR))
 1770         return err;
 1771         }
 1772     }
 1773 
 1774       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
 1775       if (__glibc_unlikely (err != REG_NOERROR))
 1776     return err;
 1777     }
 1778 
 1779   if (candidates && mctx->state_log[str_idx]->has_backref)
 1780     {
 1781       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
 1782       if (__glibc_unlikely (err != REG_NOERROR))
 1783     return err;
 1784     }
 1785   return REG_NOERROR;
 1786 }
 1787 
 1788 static reg_errcode_t
 1789 __attribute_warn_unused_result__
 1790 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
 1791                const re_node_set *candidates)
 1792 {
 1793   reg_errcode_t err = REG_NOERROR;
 1794   Idx i;
 1795 
 1796   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
 1797   if (__glibc_unlikely (err != REG_NOERROR))
 1798     return err;
 1799 
 1800   if (!state->inveclosure.alloc)
 1801     {
 1802       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
 1803       if (__glibc_unlikely (err != REG_NOERROR))
 1804     return REG_ESPACE;
 1805       for (i = 0; i < dest_nodes->nelem; i++)
 1806     {
 1807       err = re_node_set_merge (&state->inveclosure,
 1808                    dfa->inveclosures + dest_nodes->elems[i]);
 1809       if (__glibc_unlikely (err != REG_NOERROR))
 1810         return REG_ESPACE;
 1811     }
 1812     }
 1813   return re_node_set_add_intersect (dest_nodes, candidates,
 1814                     &state->inveclosure);
 1815 }
 1816 
 1817 static reg_errcode_t
 1818 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
 1819                const re_node_set *candidates)
 1820 {
 1821     Idx ecl_idx;
 1822     reg_errcode_t err;
 1823     re_node_set *inv_eclosure = dfa->inveclosures + node;
 1824     re_node_set except_nodes;
 1825     re_node_set_init_empty (&except_nodes);
 1826     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
 1827       {
 1828     Idx cur_node = inv_eclosure->elems[ecl_idx];
 1829     if (cur_node == node)
 1830       continue;
 1831     if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
 1832       {
 1833         Idx edst1 = dfa->edests[cur_node].elems[0];
 1834         Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
 1835              ? dfa->edests[cur_node].elems[1] : -1);
 1836         if ((!re_node_set_contains (inv_eclosure, edst1)
 1837          && re_node_set_contains (dest_nodes, edst1))
 1838         || (edst2 > 0
 1839             && !re_node_set_contains (inv_eclosure, edst2)
 1840             && re_node_set_contains (dest_nodes, edst2)))
 1841           {
 1842         err = re_node_set_add_intersect (&except_nodes, candidates,
 1843                          dfa->inveclosures + cur_node);
 1844         if (__glibc_unlikely (err != REG_NOERROR))
 1845           {
 1846             re_node_set_free (&except_nodes);
 1847             return err;
 1848           }
 1849           }
 1850       }
 1851       }
 1852     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
 1853       {
 1854     Idx cur_node = inv_eclosure->elems[ecl_idx];
 1855     if (!re_node_set_contains (&except_nodes, cur_node))
 1856       {
 1857         Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
 1858         re_node_set_remove_at (dest_nodes, idx);
 1859       }
 1860       }
 1861     re_node_set_free (&except_nodes);
 1862     return REG_NOERROR;
 1863 }
 1864 
 1865 static bool
 1866 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
 1867           Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
 1868 {
 1869   const re_dfa_t *const dfa = mctx->dfa;
 1870   Idx lim_idx, src_pos, dst_pos;
 1871 
 1872   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
 1873   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
 1874   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
 1875     {
 1876       Idx subexp_idx;
 1877       struct re_backref_cache_entry *ent;
 1878       ent = mctx->bkref_ents + limits->elems[lim_idx];
 1879       subexp_idx = dfa->nodes[ent->node].opr.idx;
 1880 
 1881       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
 1882                        subexp_idx, dst_node, dst_idx,
 1883                        dst_bkref_idx);
 1884       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
 1885                        subexp_idx, src_node, src_idx,
 1886                        src_bkref_idx);
 1887 
 1888       /* In case of:
 1889      <src> <dst> ( <subexp> )
 1890      ( <subexp> ) <src> <dst>
 1891      ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
 1892       if (src_pos == dst_pos)
 1893     continue; /* This is unrelated limitation.  */
 1894       else
 1895     return true;
 1896     }
 1897   return false;
 1898 }
 1899 
 1900 static int
 1901 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
 1902                  Idx subexp_idx, Idx from_node, Idx bkref_idx)
 1903 {
 1904   const re_dfa_t *const dfa = mctx->dfa;
 1905   const re_node_set *eclosures = dfa->eclosures + from_node;
 1906   Idx node_idx;
 1907 
 1908   /* Else, we are on the boundary: examine the nodes on the epsilon
 1909      closure.  */
 1910   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
 1911     {
 1912       Idx node = eclosures->elems[node_idx];
 1913       switch (dfa->nodes[node].type)
 1914     {
 1915     case OP_BACK_REF:
 1916       if (bkref_idx != -1)
 1917         {
 1918           struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
 1919           do
 1920         {
 1921           Idx dst;
 1922           int cpos;
 1923 
 1924           if (ent->node != node)
 1925             continue;
 1926 
 1927           if (subexp_idx < BITSET_WORD_BITS
 1928               && !(ent->eps_reachable_subexps_map
 1929                & ((bitset_word_t) 1 << subexp_idx)))
 1930             continue;
 1931 
 1932           /* Recurse trying to reach the OP_OPEN_SUBEXP and
 1933              OP_CLOSE_SUBEXP cases below.  But, if the
 1934              destination node is the same node as the source
 1935              node, don't recurse because it would cause an
 1936              infinite loop: a regex that exhibits this behavior
 1937              is ()\1*\1*  */
 1938           dst = dfa->edests[node].elems[0];
 1939           if (dst == from_node)
 1940             {
 1941               if (boundaries & 1)
 1942             return -1;
 1943               else /* if (boundaries & 2) */
 1944             return 0;
 1945             }
 1946 
 1947           cpos =
 1948             check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
 1949                          dst, bkref_idx);
 1950           if (cpos == -1 /* && (boundaries & 1) */)
 1951             return -1;
 1952           if (cpos == 0 && (boundaries & 2))
 1953             return 0;
 1954 
 1955           if (subexp_idx < BITSET_WORD_BITS)
 1956             ent->eps_reachable_subexps_map
 1957               &= ~((bitset_word_t) 1 << subexp_idx);
 1958         }
 1959           while (ent++->more);
 1960         }
 1961       break;
 1962 
 1963     case OP_OPEN_SUBEXP:
 1964       if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
 1965         return -1;
 1966       break;
 1967 
 1968     case OP_CLOSE_SUBEXP:
 1969       if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
 1970         return 0;
 1971       break;
 1972 
 1973     default:
 1974         break;
 1975     }
 1976     }
 1977 
 1978   return (boundaries & 2) ? 1 : 0;
 1979 }
 1980 
 1981 static int
 1982 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
 1983                Idx subexp_idx, Idx from_node, Idx str_idx,
 1984                Idx bkref_idx)
 1985 {
 1986   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
 1987   int boundaries;
 1988 
 1989   /* If we are outside the range of the subexpression, return -1 or 1.  */
 1990   if (str_idx < lim->subexp_from)
 1991     return -1;
 1992 
 1993   if (lim->subexp_to < str_idx)
 1994     return 1;
 1995 
 1996   /* If we are within the subexpression, return 0.  */
 1997   boundaries = (str_idx == lim->subexp_from);
 1998   boundaries |= (str_idx == lim->subexp_to) << 1;
 1999   if (boundaries == 0)
 2000     return 0;
 2001 
 2002   /* Else, examine epsilon closure.  */
 2003   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
 2004                       from_node, bkref_idx);
 2005 }
 2006 
 2007 /* Check the limitations of sub expressions LIMITS, and remove the nodes
 2008    which are against limitations from DEST_NODES. */
 2009 
 2010 static reg_errcode_t
 2011 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
 2012              const re_node_set *candidates, re_node_set *limits,
 2013              struct re_backref_cache_entry *bkref_ents, Idx str_idx)
 2014 {
 2015   reg_errcode_t err;
 2016   Idx node_idx, lim_idx;
 2017 
 2018   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
 2019     {
 2020       Idx subexp_idx;
 2021       struct re_backref_cache_entry *ent;
 2022       ent = bkref_ents + limits->elems[lim_idx];
 2023 
 2024       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
 2025     continue; /* This is unrelated limitation.  */
 2026 
 2027       subexp_idx = dfa->nodes[ent->node].opr.idx;
 2028       if (ent->subexp_to == str_idx)
 2029     {
 2030       Idx ops_node = -1;
 2031       Idx cls_node = -1;
 2032       for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 2033         {
 2034           Idx node = dest_nodes->elems[node_idx];
 2035           re_token_type_t type = dfa->nodes[node].type;
 2036           if (type == OP_OPEN_SUBEXP
 2037           && subexp_idx == dfa->nodes[node].opr.idx)
 2038         ops_node = node;
 2039           else if (type == OP_CLOSE_SUBEXP
 2040                && subexp_idx == dfa->nodes[node].opr.idx)
 2041         cls_node = node;
 2042         }
 2043 
 2044       /* Check the limitation of the open subexpression.  */
 2045       /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
 2046       if (ops_node >= 0)
 2047         {
 2048           err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
 2049                        candidates);
 2050           if (__glibc_unlikely (err != REG_NOERROR))
 2051         return err;
 2052         }
 2053 
 2054       /* Check the limitation of the close subexpression.  */
 2055       if (cls_node >= 0)
 2056         for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 2057           {
 2058         Idx node = dest_nodes->elems[node_idx];
 2059         if (!re_node_set_contains (dfa->inveclosures + node,
 2060                        cls_node)
 2061             && !re_node_set_contains (dfa->eclosures + node,
 2062                           cls_node))
 2063           {
 2064             /* It is against this limitation.
 2065                Remove it form the current sifted state.  */
 2066             err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
 2067                          candidates);
 2068             if (__glibc_unlikely (err != REG_NOERROR))
 2069               return err;
 2070             --node_idx;
 2071           }
 2072           }
 2073     }
 2074       else /* (ent->subexp_to != str_idx)  */
 2075     {
 2076       for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 2077         {
 2078           Idx node = dest_nodes->elems[node_idx];
 2079           re_token_type_t type = dfa->nodes[node].type;
 2080           if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
 2081         {
 2082           if (subexp_idx != dfa->nodes[node].opr.idx)
 2083             continue;
 2084           /* It is against this limitation.
 2085              Remove it form the current sifted state.  */
 2086           err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
 2087                            candidates);
 2088           if (__glibc_unlikely (err != REG_NOERROR))
 2089             return err;
 2090         }
 2091         }
 2092     }
 2093     }
 2094   return REG_NOERROR;
 2095 }
 2096 
 2097 static reg_errcode_t
 2098 __attribute_warn_unused_result__
 2099 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
 2100            Idx str_idx, const re_node_set *candidates)
 2101 {
 2102   const re_dfa_t *const dfa = mctx->dfa;
 2103   reg_errcode_t err;
 2104   Idx node_idx, node;
 2105   re_sift_context_t local_sctx;
 2106   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
 2107 
 2108   if (first_idx == -1)
 2109     return REG_NOERROR;
 2110 
 2111   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
 2112 
 2113   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
 2114     {
 2115       Idx enabled_idx;
 2116       re_token_type_t type;
 2117       struct re_backref_cache_entry *entry;
 2118       node = candidates->elems[node_idx];
 2119       type = dfa->nodes[node].type;
 2120       /* Avoid infinite loop for the REs like "()\1+".  */
 2121       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
 2122     continue;
 2123       if (type != OP_BACK_REF)
 2124     continue;
 2125 
 2126       entry = mctx->bkref_ents + first_idx;
 2127       enabled_idx = first_idx;
 2128       do
 2129     {
 2130       Idx subexp_len;
 2131       Idx to_idx;
 2132       Idx dst_node;
 2133       bool ok;
 2134       re_dfastate_t *cur_state;
 2135 
 2136       if (entry->node != node)
 2137         continue;
 2138       subexp_len = entry->subexp_to - entry->subexp_from;
 2139       to_idx = str_idx + subexp_len;
 2140       dst_node = (subexp_len ? dfa->nexts[node]
 2141               : dfa->edests[node].elems[0]);
 2142 
 2143       if (to_idx > sctx->last_str_idx
 2144           || sctx->sifted_states[to_idx] == NULL
 2145           || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
 2146           || check_dst_limits (mctx, &sctx->limits, node,
 2147                    str_idx, dst_node, to_idx))
 2148         continue;
 2149 
 2150       if (local_sctx.sifted_states == NULL)
 2151         {
 2152           local_sctx = *sctx;
 2153           err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
 2154           if (__glibc_unlikely (err != REG_NOERROR))
 2155         goto free_return;
 2156         }
 2157       local_sctx.last_node = node;
 2158       local_sctx.last_str_idx = str_idx;
 2159       ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
 2160       if (__glibc_unlikely (! ok))
 2161         {
 2162           err = REG_ESPACE;
 2163           goto free_return;
 2164         }
 2165       cur_state = local_sctx.sifted_states[str_idx];
 2166       err = sift_states_backward (mctx, &local_sctx);
 2167       if (__glibc_unlikely (err != REG_NOERROR))
 2168         goto free_return;
 2169       if (sctx->limited_states != NULL)
 2170         {
 2171           err = merge_state_array (dfa, sctx->limited_states,
 2172                        local_sctx.sifted_states,
 2173                        str_idx + 1);
 2174           if (__glibc_unlikely (err != REG_NOERROR))
 2175         goto free_return;
 2176         }
 2177       local_sctx.sifted_states[str_idx] = cur_state;
 2178       re_node_set_remove (&local_sctx.limits, enabled_idx);
 2179 
 2180       /* mctx->bkref_ents may have changed, reload the pointer.  */
 2181       entry = mctx->bkref_ents + enabled_idx;
 2182     }
 2183       while (enabled_idx++, entry++->more);
 2184     }
 2185   err = REG_NOERROR;
 2186  free_return:
 2187   if (local_sctx.sifted_states != NULL)
 2188     {
 2189       re_node_set_free (&local_sctx.limits);
 2190     }
 2191 
 2192   return err;
 2193 }
 2194 
 2195 
 2196 #ifdef RE_ENABLE_I18N
 2197 static int
 2198 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
 2199              Idx node_idx, Idx str_idx, Idx max_str_idx)
 2200 {
 2201   const re_dfa_t *const dfa = mctx->dfa;
 2202   int naccepted;
 2203   /* Check the node can accept "multi byte".  */
 2204   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
 2205   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
 2206       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
 2207                 dfa->nexts[node_idx]))
 2208     /* The node can't accept the "multi byte", or the
 2209        destination was already thrown away, then the node
 2210        could't accept the current input "multi byte".   */
 2211     naccepted = 0;
 2212   /* Otherwise, it is sure that the node could accept
 2213      'naccepted' bytes input.  */
 2214   return naccepted;
 2215 }
 2216 #endif /* RE_ENABLE_I18N */
 2217 
 2218 
 2219 /* Functions for state transition.  */
 2220 
 2221 /* Return the next state to which the current state STATE will transit by
 2222    accepting the current input byte, and update STATE_LOG if necessary.
 2223    If STATE can accept a multibyte char/collating element/back reference
 2224    update the destination of STATE_LOG.  */
 2225 
 2226 static re_dfastate_t *
 2227 __attribute_warn_unused_result__
 2228 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
 2229            re_dfastate_t *state)
 2230 {
 2231   re_dfastate_t **trtable;
 2232   unsigned char ch;
 2233 
 2234 #ifdef RE_ENABLE_I18N
 2235   /* If the current state can accept multibyte.  */
 2236   if (__glibc_unlikely (state->accept_mb))
 2237     {
 2238       *err = transit_state_mb (mctx, state);
 2239       if (__glibc_unlikely (*err != REG_NOERROR))
 2240     return NULL;
 2241     }
 2242 #endif /* RE_ENABLE_I18N */
 2243 
 2244   /* Then decide the next state with the single byte.  */
 2245 #if 0
 2246   if (0)
 2247     /* don't use transition table  */
 2248     return transit_state_sb (err, mctx, state);
 2249 #endif
 2250 
 2251   /* Use transition table  */
 2252   ch = re_string_fetch_byte (&mctx->input);
 2253   for (;;)
 2254     {
 2255       trtable = state->trtable;
 2256       if (__glibc_likely (trtable != NULL))
 2257     return trtable[ch];
 2258 
 2259       trtable = state->word_trtable;
 2260       if (__glibc_likely (trtable != NULL))
 2261     {
 2262       unsigned int context;
 2263       context
 2264         = re_string_context_at (&mctx->input,
 2265                     re_string_cur_idx (&mctx->input) - 1,
 2266                     mctx->eflags);
 2267       if (IS_WORD_CONTEXT (context))
 2268         return trtable[ch + SBC_MAX];
 2269       else
 2270         return trtable[ch];
 2271     }
 2272 
 2273       if (!build_trtable (mctx->dfa, state))
 2274     {
 2275       *err = REG_ESPACE;
 2276       return NULL;
 2277     }
 2278 
 2279       /* Retry, we now have a transition table.  */
 2280     }
 2281 }
 2282 
 2283 /* Update the state_log if we need */
 2284 static re_dfastate_t *
 2285 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
 2286               re_dfastate_t *next_state)
 2287 {
 2288   const re_dfa_t *const dfa = mctx->dfa;
 2289   Idx cur_idx = re_string_cur_idx (&mctx->input);
 2290 
 2291   if (cur_idx > mctx->state_log_top)
 2292     {
 2293       mctx->state_log[cur_idx] = next_state;
 2294       mctx->state_log_top = cur_idx;
 2295     }
 2296   else if (mctx->state_log[cur_idx] == 0)
 2297     {
 2298       mctx->state_log[cur_idx] = next_state;
 2299     }
 2300   else
 2301     {
 2302       re_dfastate_t *pstate;
 2303       unsigned int context;
 2304       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
 2305       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
 2306      the destination of a multibyte char/collating element/
 2307      back reference.  Then the next state is the union set of
 2308      these destinations and the results of the transition table.  */
 2309       pstate = mctx->state_log[cur_idx];
 2310       log_nodes = pstate->entrance_nodes;
 2311       if (next_state != NULL)
 2312     {
 2313       table_nodes = next_state->entrance_nodes;
 2314       *err = re_node_set_init_union (&next_nodes, table_nodes,
 2315                          log_nodes);
 2316       if (__glibc_unlikely (*err != REG_NOERROR))
 2317         return NULL;
 2318     }
 2319       else
 2320     next_nodes = *log_nodes;
 2321       /* Note: We already add the nodes of the initial state,
 2322      then we don't need to add them here.  */
 2323 
 2324       context = re_string_context_at (&mctx->input,
 2325                       re_string_cur_idx (&mctx->input) - 1,
 2326                       mctx->eflags);
 2327       next_state = mctx->state_log[cur_idx]
 2328     = re_acquire_state_context (err, dfa, &next_nodes, context);
 2329       /* We don't need to check errors here, since the return value of
 2330      this function is next_state and ERR is already set.  */
 2331 
 2332       if (table_nodes != NULL)
 2333     re_node_set_free (&next_nodes);
 2334     }
 2335 
 2336   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
 2337     {
 2338       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
 2339      later.  We must check them here, since the back references in the
 2340      next state might use them.  */
 2341       *err = check_subexp_matching_top (mctx, &next_state->nodes,
 2342                     cur_idx);
 2343       if (__glibc_unlikely (*err != REG_NOERROR))
 2344     return NULL;
 2345 
 2346       /* If the next state has back references.  */
 2347       if (next_state->has_backref)
 2348     {
 2349       *err = transit_state_bkref (mctx, &next_state->nodes);
 2350       if (__glibc_unlikely (*err != REG_NOERROR))
 2351         return NULL;
 2352       next_state = mctx->state_log[cur_idx];
 2353     }
 2354     }
 2355 
 2356   return next_state;
 2357 }
 2358 
 2359 /* Skip bytes in the input that correspond to part of a
 2360    multi-byte match, then look in the log for a state
 2361    from which to restart matching.  */
 2362 static re_dfastate_t *
 2363 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
 2364 {
 2365   re_dfastate_t *cur_state;
 2366   do
 2367     {
 2368       Idx max = mctx->state_log_top;
 2369       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
 2370 
 2371       do
 2372     {
 2373       if (++cur_str_idx > max)
 2374         return NULL;
 2375       re_string_skip_bytes (&mctx->input, 1);
 2376     }
 2377       while (mctx->state_log[cur_str_idx] == NULL);
 2378 
 2379       cur_state = merge_state_with_log (err, mctx, NULL);
 2380     }
 2381   while (*err == REG_NOERROR && cur_state == NULL);
 2382   return cur_state;
 2383 }
 2384 
 2385 /* Helper functions for transit_state.  */
 2386 
 2387 /* From the node set CUR_NODES, pick up the nodes whose types are
 2388    OP_OPEN_SUBEXP and which have corresponding back references in the regular
 2389    expression. And register them to use them later for evaluating the
 2390    corresponding back references.  */
 2391 
 2392 static reg_errcode_t
 2393 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
 2394                Idx str_idx)
 2395 {
 2396   const re_dfa_t *const dfa = mctx->dfa;
 2397   Idx node_idx;
 2398   reg_errcode_t err;
 2399 
 2400   /* TODO: This isn't efficient.
 2401        Because there might be more than one nodes whose types are
 2402        OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
 2403        nodes.
 2404        E.g. RE: (a){2}  */
 2405   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
 2406     {
 2407       Idx node = cur_nodes->elems[node_idx];
 2408       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
 2409       && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
 2410       && (dfa->used_bkref_map
 2411           & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
 2412     {
 2413       err = match_ctx_add_subtop (mctx, node, str_idx);
 2414       if (__glibc_unlikely (err != REG_NOERROR))
 2415         return err;
 2416     }
 2417     }
 2418   return REG_NOERROR;
 2419 }
 2420 
 2421 #if 0
 2422 /* Return the next state to which the current state STATE will transit by
 2423    accepting the current input byte.  */
 2424 
 2425 static re_dfastate_t *
 2426 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
 2427           re_dfastate_t *state)
 2428 {
 2429   const re_dfa_t *const dfa = mctx->dfa;
 2430   re_node_set next_nodes;
 2431   re_dfastate_t *next_state;
 2432   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
 2433   unsigned int context;
 2434 
 2435   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
 2436   if (__glibc_unlikely (*err != REG_NOERROR))
 2437     return NULL;
 2438   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
 2439     {
 2440       Idx cur_node = state->nodes.elems[node_cnt];
 2441       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
 2442     {
 2443       *err = re_node_set_merge (&next_nodes,
 2444                     dfa->eclosures + dfa->nexts[cur_node]);
 2445       if (__glibc_unlikely (*err != REG_NOERROR))
 2446         {
 2447           re_node_set_free (&next_nodes);
 2448           return NULL;
 2449         }
 2450     }
 2451     }
 2452   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
 2453   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
 2454   /* We don't need to check errors here, since the return value of
 2455      this function is next_state and ERR is already set.  */
 2456 
 2457   re_node_set_free (&next_nodes);
 2458   re_string_skip_bytes (&mctx->input, 1);
 2459   return next_state;
 2460 }
 2461 #endif
 2462 
 2463 #ifdef RE_ENABLE_I18N
 2464 static reg_errcode_t
 2465 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
 2466 {
 2467   const re_dfa_t *const dfa = mctx->dfa;
 2468   reg_errcode_t err;
 2469   Idx i;
 2470 
 2471   for (i = 0; i < pstate->nodes.nelem; ++i)
 2472     {
 2473       re_node_set dest_nodes, *new_nodes;
 2474       Idx cur_node_idx = pstate->nodes.elems[i];
 2475       int naccepted;
 2476       Idx dest_idx;
 2477       unsigned int context;
 2478       re_dfastate_t *dest_state;
 2479 
 2480       if (!dfa->nodes[cur_node_idx].accept_mb)
 2481     continue;
 2482 
 2483       if (dfa->nodes[cur_node_idx].constraint)
 2484     {
 2485       context = re_string_context_at (&mctx->input,
 2486                       re_string_cur_idx (&mctx->input),
 2487                       mctx->eflags);
 2488       if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
 2489                        context))
 2490         continue;
 2491     }
 2492 
 2493       /* How many bytes the node can accept?  */
 2494       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
 2495                        re_string_cur_idx (&mctx->input));
 2496       if (naccepted == 0)
 2497     continue;
 2498 
 2499       /* The node can accepts 'naccepted' bytes.  */
 2500       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
 2501       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
 2502                    : mctx->max_mb_elem_len);
 2503       err = clean_state_log_if_needed (mctx, dest_idx);
 2504       if (__glibc_unlikely (err != REG_NOERROR))
 2505     return err;
 2506 #ifdef DEBUG
 2507       assert (dfa->nexts[cur_node_idx] != -1);
 2508 #endif
 2509       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
 2510 
 2511       dest_state = mctx->state_log[dest_idx];
 2512       if (dest_state == NULL)
 2513     dest_nodes = *new_nodes;
 2514       else
 2515     {
 2516       err = re_node_set_init_union (&dest_nodes,
 2517                     dest_state->entrance_nodes, new_nodes);
 2518       if (__glibc_unlikely (err != REG_NOERROR))
 2519         return err;
 2520     }
 2521       context = re_string_context_at (&mctx->input, dest_idx - 1,
 2522                       mctx->eflags);
 2523       mctx->state_log[dest_idx]
 2524     = re_acquire_state_context (&err, dfa, &dest_nodes, context);
 2525       if (dest_state != NULL)
 2526     re_node_set_free (&dest_nodes);
 2527       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
 2528                 && err != REG_NOERROR))
 2529     return err;
 2530     }
 2531   return REG_NOERROR;
 2532 }
 2533 #endif /* RE_ENABLE_I18N */
 2534 
 2535 static reg_errcode_t
 2536 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
 2537 {
 2538   const re_dfa_t *const dfa = mctx->dfa;
 2539   reg_errcode_t err;
 2540   Idx i;
 2541   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
 2542 
 2543   for (i = 0; i < nodes->nelem; ++i)
 2544     {
 2545       Idx dest_str_idx, prev_nelem, bkc_idx;
 2546       Idx node_idx = nodes->elems[i];
 2547       unsigned int context;
 2548       const re_token_t *node = dfa->nodes + node_idx;
 2549       re_node_set *new_dest_nodes;
 2550 
 2551       /* Check whether 'node' is a backreference or not.  */
 2552       if (node->type != OP_BACK_REF)
 2553     continue;
 2554 
 2555       if (node->constraint)
 2556     {
 2557       context = re_string_context_at (&mctx->input, cur_str_idx,
 2558                       mctx->eflags);
 2559       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
 2560         continue;
 2561     }
 2562 
 2563       /* 'node' is a backreference.
 2564      Check the substring which the substring matched.  */
 2565       bkc_idx = mctx->nbkref_ents;
 2566       err = get_subexp (mctx, node_idx, cur_str_idx);
 2567       if (__glibc_unlikely (err != REG_NOERROR))
 2568     goto free_return;
 2569 
 2570       /* And add the epsilon closures (which is 'new_dest_nodes') of
 2571      the backreference to appropriate state_log.  */
 2572 #ifdef DEBUG
 2573       assert (dfa->nexts[node_idx] != -1);
 2574 #endif
 2575       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
 2576     {
 2577       Idx subexp_len;
 2578       re_dfastate_t *dest_state;
 2579       struct re_backref_cache_entry *bkref_ent;
 2580       bkref_ent = mctx->bkref_ents + bkc_idx;
 2581       if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
 2582         continue;
 2583       subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
 2584       new_dest_nodes = (subexp_len == 0
 2585                 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
 2586                 : dfa->eclosures + dfa->nexts[node_idx]);
 2587       dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
 2588               - bkref_ent->subexp_from);
 2589       context = re_string_context_at (&mctx->input, dest_str_idx - 1,
 2590                       mctx->eflags);
 2591       dest_state = mctx->state_log[dest_str_idx];
 2592       prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
 2593             : mctx->state_log[cur_str_idx]->nodes.nelem);
 2594       /* Add 'new_dest_node' to state_log.  */
 2595       if (dest_state == NULL)
 2596         {
 2597           mctx->state_log[dest_str_idx]
 2598         = re_acquire_state_context (&err, dfa, new_dest_nodes,
 2599                         context);
 2600           if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
 2601                     && err != REG_NOERROR))
 2602         goto free_return;
 2603         }
 2604       else
 2605         {
 2606           re_node_set dest_nodes;
 2607           err = re_node_set_init_union (&dest_nodes,
 2608                         dest_state->entrance_nodes,
 2609                         new_dest_nodes);
 2610           if (__glibc_unlikely (err != REG_NOERROR))
 2611         {
 2612           re_node_set_free (&dest_nodes);
 2613           goto free_return;
 2614         }
 2615           mctx->state_log[dest_str_idx]
 2616         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
 2617           re_node_set_free (&dest_nodes);
 2618           if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
 2619                     && err != REG_NOERROR))
 2620         goto free_return;
 2621         }
 2622       /* We need to check recursively if the backreference can epsilon
 2623          transit.  */
 2624       if (subexp_len == 0
 2625           && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
 2626         {
 2627           err = check_subexp_matching_top (mctx, new_dest_nodes,
 2628                            cur_str_idx);
 2629           if (__glibc_unlikely (err != REG_NOERROR))
 2630         goto free_return;
 2631           err = transit_state_bkref (mctx, new_dest_nodes);
 2632           if (__glibc_unlikely (err != REG_NOERROR))
 2633         goto free_return;
 2634         }
 2635     }
 2636     }
 2637   err = REG_NOERROR;
 2638  free_return:
 2639   return err;
 2640 }
 2641 
 2642 /* Enumerate all the candidates which the backreference BKREF_NODE can match
 2643    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
 2644    Note that we might collect inappropriate candidates here.
 2645    However, the cost of checking them strictly here is too high, then we
 2646    delay these checking for prune_impossible_nodes().  */
 2647 
 2648 static reg_errcode_t
 2649 __attribute_warn_unused_result__
 2650 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
 2651 {
 2652   const re_dfa_t *const dfa = mctx->dfa;
 2653   Idx subexp_num, sub_top_idx;
 2654   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
 2655   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
 2656   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
 2657   if (cache_idx != -1)
 2658     {
 2659       const struct re_backref_cache_entry *entry
 2660     = mctx->bkref_ents + cache_idx;
 2661       do
 2662     if (entry->node == bkref_node)
 2663       return REG_NOERROR; /* We already checked it.  */
 2664       while (entry++->more);
 2665     }
 2666 
 2667   subexp_num = dfa->nodes[bkref_node].opr.idx;
 2668 
 2669   /* For each sub expression  */
 2670   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
 2671     {
 2672       reg_errcode_t err;
 2673       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
 2674       re_sub_match_last_t *sub_last;
 2675       Idx sub_last_idx, sl_str, bkref_str_off;
 2676 
 2677       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
 2678     continue; /* It isn't related.  */
 2679 
 2680       sl_str = sub_top->str_idx;
 2681       bkref_str_off = bkref_str_idx;
 2682       /* At first, check the last node of sub expressions we already
 2683      evaluated.  */
 2684       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
 2685     {
 2686       regoff_t sl_str_diff;
 2687       sub_last = sub_top->lasts[sub_last_idx];
 2688       sl_str_diff = sub_last->str_idx - sl_str;
 2689       /* The matched string by the sub expression match with the substring
 2690          at the back reference?  */
 2691       if (sl_str_diff > 0)
 2692         {
 2693           if (__glibc_unlikely (bkref_str_off + sl_str_diff
 2694                     > mctx->input.valid_len))
 2695         {
 2696           /* Not enough chars for a successful match.  */
 2697           if (bkref_str_off + sl_str_diff > mctx->input.len)
 2698             break;
 2699 
 2700           err = clean_state_log_if_needed (mctx,
 2701                            bkref_str_off
 2702                            + sl_str_diff);
 2703           if (__glibc_unlikely (err != REG_NOERROR))
 2704             return err;
 2705           buf = (const char *) re_string_get_buffer (&mctx->input);
 2706         }
 2707           if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
 2708         /* We don't need to search this sub expression any more.  */
 2709         break;
 2710         }
 2711       bkref_str_off += sl_str_diff;
 2712       sl_str += sl_str_diff;
 2713       err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
 2714                 bkref_str_idx);
 2715 
 2716       /* Reload buf, since the preceding call might have reallocated
 2717          the buffer.  */
 2718       buf = (const char *) re_string_get_buffer (&mctx->input);
 2719 
 2720       if (err == REG_NOMATCH)
 2721         continue;
 2722       if (__glibc_unlikely (err != REG_NOERROR))
 2723         return err;
 2724     }
 2725 
 2726       if (sub_last_idx < sub_top->nlasts)
 2727     continue;
 2728       if (sub_last_idx > 0)
 2729     ++sl_str;
 2730       /* Then, search for the other last nodes of the sub expression.  */
 2731       for (; sl_str <= bkref_str_idx; ++sl_str)
 2732     {
 2733       Idx cls_node;
 2734       regoff_t sl_str_off;
 2735       const re_node_set *nodes;
 2736       sl_str_off = sl_str - sub_top->str_idx;
 2737       /* The matched string by the sub expression match with the substring
 2738          at the back reference?  */
 2739       if (sl_str_off > 0)
 2740         {
 2741           if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
 2742         {
 2743           /* If we are at the end of the input, we cannot match.  */
 2744           if (bkref_str_off >= mctx->input.len)
 2745             break;
 2746 
 2747           err = extend_buffers (mctx, bkref_str_off + 1);
 2748           if (__glibc_unlikely (err != REG_NOERROR))
 2749             return err;
 2750 
 2751           buf = (const char *) re_string_get_buffer (&mctx->input);
 2752         }
 2753           if (buf [bkref_str_off++] != buf[sl_str - 1])
 2754         break; /* We don't need to search this sub expression
 2755               any more.  */
 2756         }
 2757       if (mctx->state_log[sl_str] == NULL)
 2758         continue;
 2759       /* Does this state have a ')' of the sub expression?  */
 2760       nodes = &mctx->state_log[sl_str]->nodes;
 2761       cls_node = find_subexp_node (dfa, nodes, subexp_num,
 2762                        OP_CLOSE_SUBEXP);
 2763       if (cls_node == -1)
 2764         continue; /* No.  */
 2765       if (sub_top->path == NULL)
 2766         {
 2767           sub_top->path = calloc (sizeof (state_array_t),
 2768                       sl_str - sub_top->str_idx + 1);
 2769           if (sub_top->path == NULL)
 2770         return REG_ESPACE;
 2771         }
 2772       /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
 2773          in the current context?  */
 2774       err = check_arrival (mctx, sub_top->path, sub_top->node,
 2775                    sub_top->str_idx, cls_node, sl_str,
 2776                    OP_CLOSE_SUBEXP);
 2777       if (err == REG_NOMATCH)
 2778           continue;
 2779       if (__glibc_unlikely (err != REG_NOERROR))
 2780           return err;
 2781       sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
 2782       if (__glibc_unlikely (sub_last == NULL))
 2783         return REG_ESPACE;
 2784       err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
 2785                 bkref_str_idx);
 2786       buf = (const char *) re_string_get_buffer (&mctx->input);
 2787       if (err == REG_NOMATCH)
 2788         continue;
 2789       if (__glibc_unlikely (err != REG_NOERROR))
 2790         return err;
 2791     }
 2792     }
 2793   return REG_NOERROR;
 2794 }
 2795 
 2796 /* Helper functions for get_subexp().  */
 2797 
 2798 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
 2799    If it can arrive, register the sub expression expressed with SUB_TOP
 2800    and SUB_LAST.  */
 2801 
 2802 static reg_errcode_t
 2803 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
 2804         re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
 2805 {
 2806   reg_errcode_t err;
 2807   Idx to_idx;
 2808   /* Can the subexpression arrive the back reference?  */
 2809   err = check_arrival (mctx, &sub_last->path, sub_last->node,
 2810                sub_last->str_idx, bkref_node, bkref_str,
 2811                OP_OPEN_SUBEXP);
 2812   if (err != REG_NOERROR)
 2813     return err;
 2814   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
 2815                  sub_last->str_idx);
 2816   if (__glibc_unlikely (err != REG_NOERROR))
 2817     return err;
 2818   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
 2819   return clean_state_log_if_needed (mctx, to_idx);
 2820 }
 2821 
 2822 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
 2823    Search '(' if FL_OPEN, or search ')' otherwise.
 2824    TODO: This function isn't efficient...
 2825      Because there might be more than one nodes whose types are
 2826      OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
 2827      nodes.
 2828      E.g. RE: (a){2}  */
 2829 
 2830 static Idx
 2831 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 2832           Idx subexp_idx, int type)
 2833 {
 2834   Idx cls_idx;
 2835   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
 2836     {
 2837       Idx cls_node = nodes->elems[cls_idx];
 2838       const re_token_t *node = dfa->nodes + cls_node;
 2839       if (node->type == type
 2840       && node->opr.idx == subexp_idx)
 2841     return cls_node;
 2842     }
 2843   return -1;
 2844 }
 2845 
 2846 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
 2847    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
 2848    heavily reused.
 2849    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
 2850 
 2851 static reg_errcode_t
 2852 __attribute_warn_unused_result__
 2853 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
 2854            Idx top_str, Idx last_node, Idx last_str, int type)
 2855 {
 2856   const re_dfa_t *const dfa = mctx->dfa;
 2857   reg_errcode_t err = REG_NOERROR;
 2858   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
 2859   re_dfastate_t *cur_state = NULL;
 2860   re_node_set *cur_nodes, next_nodes;
 2861   re_dfastate_t **backup_state_log;
 2862   unsigned int context;
 2863 
 2864   subexp_num = dfa->nodes[top_node].opr.idx;
 2865   /* Extend the buffer if we need.  */
 2866   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
 2867     {
 2868       re_dfastate_t **new_array;
 2869       Idx old_alloc = path->alloc;
 2870       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
 2871       Idx new_alloc;
 2872       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
 2873     return REG_ESPACE;
 2874       new_alloc = old_alloc + incr_alloc;
 2875       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
 2876     return REG_ESPACE;
 2877       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
 2878       if (__glibc_unlikely (new_array == NULL))
 2879     return REG_ESPACE;
 2880       path->array = new_array;
 2881       path->alloc = new_alloc;
 2882       memset (new_array + old_alloc, '\0',
 2883           sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
 2884     }
 2885 
 2886   str_idx = path->next_idx ? path->next_idx : top_str;
 2887 
 2888   /* Temporary modify MCTX.  */
 2889   backup_state_log = mctx->state_log;
 2890   backup_cur_idx = mctx->input.cur_idx;
 2891   mctx->state_log = path->array;
 2892   mctx->input.cur_idx = str_idx;
 2893 
 2894   /* Setup initial node set.  */
 2895   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
 2896   if (str_idx == top_str)
 2897     {
 2898       err = re_node_set_init_1 (&next_nodes, top_node);
 2899       if (__glibc_unlikely (err != REG_NOERROR))
 2900     return err;
 2901       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
 2902       if (__glibc_unlikely (err != REG_NOERROR))
 2903     {
 2904       re_node_set_free (&next_nodes);
 2905       return err;
 2906     }
 2907     }
 2908   else
 2909     {
 2910       cur_state = mctx->state_log[str_idx];
 2911       if (cur_state && cur_state->has_backref)
 2912     {
 2913       err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
 2914       if (__glibc_unlikely (err != REG_NOERROR))
 2915         return err;
 2916     }
 2917       else
 2918     re_node_set_init_empty (&next_nodes);
 2919     }
 2920   if (str_idx == top_str || (cur_state && cur_state->has_backref))
 2921     {
 2922       if (next_nodes.nelem)
 2923     {
 2924       err = expand_bkref_cache (mctx, &next_nodes, str_idx,
 2925                     subexp_num, type);
 2926       if (__glibc_unlikely (err != REG_NOERROR))
 2927         {
 2928           re_node_set_free (&next_nodes);
 2929           return err;
 2930         }
 2931     }
 2932       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
 2933       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
 2934     {
 2935       re_node_set_free (&next_nodes);
 2936       return err;
 2937     }
 2938       mctx->state_log[str_idx] = cur_state;
 2939     }
 2940 
 2941   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
 2942     {
 2943       re_node_set_empty (&next_nodes);
 2944       if (mctx->state_log[str_idx + 1])
 2945     {
 2946       err = re_node_set_merge (&next_nodes,
 2947                    &mctx->state_log[str_idx + 1]->nodes);
 2948       if (__glibc_unlikely (err != REG_NOERROR))
 2949         {
 2950           re_node_set_free (&next_nodes);
 2951           return err;
 2952         }
 2953     }
 2954       if (cur_state)
 2955     {
 2956       err = check_arrival_add_next_nodes (mctx, str_idx,
 2957                           &cur_state->non_eps_nodes,
 2958                           &next_nodes);
 2959       if (__glibc_unlikely (err != REG_NOERROR))
 2960         {
 2961           re_node_set_free (&next_nodes);
 2962           return err;
 2963         }
 2964     }
 2965       ++str_idx;
 2966       if (next_nodes.nelem)
 2967     {
 2968       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
 2969       if (__glibc_unlikely (err != REG_NOERROR))
 2970         {
 2971           re_node_set_free (&next_nodes);
 2972           return err;
 2973         }
 2974       err = expand_bkref_cache (mctx, &next_nodes, str_idx,
 2975                     subexp_num, type);
 2976       if (__glibc_unlikely (err != REG_NOERROR))
 2977         {
 2978           re_node_set_free (&next_nodes);
 2979           return err;
 2980         }
 2981     }
 2982       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
 2983       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
 2984       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
 2985     {
 2986       re_node_set_free (&next_nodes);
 2987       return err;
 2988     }
 2989       mctx->state_log[str_idx] = cur_state;
 2990       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
 2991     }
 2992   re_node_set_free (&next_nodes);
 2993   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
 2994            : &mctx->state_log[last_str]->nodes);
 2995   path->next_idx = str_idx;
 2996 
 2997   /* Fix MCTX.  */
 2998   mctx->state_log = backup_state_log;
 2999   mctx->input.cur_idx = backup_cur_idx;
 3000 
 3001   /* Then check the current node set has the node LAST_NODE.  */
 3002   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
 3003     return REG_NOERROR;
 3004 
 3005   return REG_NOMATCH;
 3006 }
 3007 
 3008 /* Helper functions for check_arrival.  */
 3009 
 3010 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
 3011    to NEXT_NODES.
 3012    TODO: This function is similar to the functions transit_state*(),
 3013      however this function has many additional works.
 3014      Can't we unify them?  */
 3015 
 3016 static reg_errcode_t
 3017 __attribute_warn_unused_result__
 3018 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
 3019                   re_node_set *cur_nodes, re_node_set *next_nodes)
 3020 {
 3021   const re_dfa_t *const dfa = mctx->dfa;
 3022   bool ok;
 3023   Idx cur_idx;
 3024 #ifdef RE_ENABLE_I18N
 3025   reg_errcode_t err = REG_NOERROR;
 3026 #endif
 3027   re_node_set union_set;
 3028   re_node_set_init_empty (&union_set);
 3029   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
 3030     {
 3031       int naccepted = 0;
 3032       Idx cur_node = cur_nodes->elems[cur_idx];
 3033 #ifdef DEBUG
 3034       re_token_type_t type = dfa->nodes[cur_node].type;
 3035       assert (!IS_EPSILON_NODE (type));
 3036 #endif
 3037 #ifdef RE_ENABLE_I18N
 3038       /* If the node may accept "multi byte".  */
 3039       if (dfa->nodes[cur_node].accept_mb)
 3040     {
 3041       naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
 3042                            str_idx);
 3043       if (naccepted > 1)
 3044         {
 3045           re_dfastate_t *dest_state;
 3046           Idx next_node = dfa->nexts[cur_node];
 3047           Idx next_idx = str_idx + naccepted;
 3048           dest_state = mctx->state_log[next_idx];
 3049           re_node_set_empty (&union_set);
 3050           if (dest_state)
 3051         {
 3052           err = re_node_set_merge (&union_set, &dest_state->nodes);
 3053           if (__glibc_unlikely (err != REG_NOERROR))
 3054             {
 3055               re_node_set_free (&union_set);
 3056               return err;
 3057             }
 3058         }
 3059           ok = re_node_set_insert (&union_set, next_node);
 3060           if (__glibc_unlikely (! ok))
 3061         {
 3062           re_node_set_free (&union_set);
 3063           return REG_ESPACE;
 3064         }
 3065           mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
 3066                                 &union_set);
 3067           if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
 3068                     && err != REG_NOERROR))
 3069         {
 3070           re_node_set_free (&union_set);
 3071           return err;
 3072         }
 3073         }
 3074     }
 3075 #endif /* RE_ENABLE_I18N */
 3076       if (naccepted
 3077       || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
 3078     {
 3079       ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
 3080       if (__glibc_unlikely (! ok))
 3081         {
 3082           re_node_set_free (&union_set);
 3083           return REG_ESPACE;
 3084         }
 3085     }
 3086     }
 3087   re_node_set_free (&union_set);
 3088   return REG_NOERROR;
 3089 }
 3090 
 3091 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
 3092    CUR_NODES, however exclude the nodes which are:
 3093     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
 3094     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
 3095 */
 3096 
 3097 static reg_errcode_t
 3098 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
 3099               Idx ex_subexp, int type)
 3100 {
 3101   reg_errcode_t err;
 3102   Idx idx, outside_node;
 3103   re_node_set new_nodes;
 3104 #ifdef DEBUG
 3105   assert (cur_nodes->nelem);
 3106 #endif
 3107   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
 3108   if (__glibc_unlikely (err != REG_NOERROR))
 3109     return err;
 3110   /* Create a new node set NEW_NODES with the nodes which are epsilon
 3111      closures of the node in CUR_NODES.  */
 3112 
 3113   for (idx = 0; idx < cur_nodes->nelem; ++idx)
 3114     {
 3115       Idx cur_node = cur_nodes->elems[idx];
 3116       const re_node_set *eclosure = dfa->eclosures + cur_node;
 3117       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
 3118       if (outside_node == -1)
 3119     {
 3120       /* There are no problematic nodes, just merge them.  */
 3121       err = re_node_set_merge (&new_nodes, eclosure);
 3122       if (__glibc_unlikely (err != REG_NOERROR))
 3123         {
 3124           re_node_set_free (&new_nodes);
 3125           return err;
 3126         }
 3127     }
 3128       else
 3129     {
 3130       /* There are problematic nodes, re-calculate incrementally.  */
 3131       err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
 3132                           ex_subexp, type);
 3133       if (__glibc_unlikely (err != REG_NOERROR))
 3134         {
 3135           re_node_set_free (&new_nodes);
 3136           return err;
 3137         }
 3138     }
 3139     }
 3140   re_node_set_free (cur_nodes);
 3141   *cur_nodes = new_nodes;
 3142   return REG_NOERROR;
 3143 }
 3144 
 3145 /* Helper function for check_arrival_expand_ecl.
 3146    Check incrementally the epsilon closure of TARGET, and if it isn't
 3147    problematic append it to DST_NODES.  */
 3148 
 3149 static reg_errcode_t
 3150 __attribute_warn_unused_result__
 3151 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
 3152                   Idx target, Idx ex_subexp, int type)
 3153 {
 3154   Idx cur_node;
 3155   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
 3156     {
 3157       bool ok;
 3158 
 3159       if (dfa->nodes[cur_node].type == type
 3160       && dfa->nodes[cur_node].opr.idx == ex_subexp)
 3161     {
 3162       if (type == OP_CLOSE_SUBEXP)
 3163         {
 3164           ok = re_node_set_insert (dst_nodes, cur_node);
 3165           if (__glibc_unlikely (! ok))
 3166         return REG_ESPACE;
 3167         }
 3168       break;
 3169     }
 3170       ok = re_node_set_insert (dst_nodes, cur_node);
 3171       if (__glibc_unlikely (! ok))
 3172     return REG_ESPACE;
 3173       if (dfa->edests[cur_node].nelem == 0)
 3174     break;
 3175       if (dfa->edests[cur_node].nelem == 2)
 3176     {
 3177       reg_errcode_t err;
 3178       err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
 3179                           dfa->edests[cur_node].elems[1],
 3180                           ex_subexp, type);
 3181       if (__glibc_unlikely (err != REG_NOERROR))
 3182         return err;
 3183     }
 3184       cur_node = dfa->edests[cur_node].elems[0];
 3185     }
 3186   return REG_NOERROR;
 3187 }
 3188 
 3189 
 3190 /* For all the back references in the current state, calculate the
 3191    destination of the back references by the appropriate entry
 3192    in MCTX->BKREF_ENTS.  */
 3193 
 3194 static reg_errcode_t
 3195 __attribute_warn_unused_result__
 3196 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
 3197             Idx cur_str, Idx subexp_num, int type)
 3198 {
 3199   const re_dfa_t *const dfa = mctx->dfa;
 3200   reg_errcode_t err;
 3201   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
 3202   struct re_backref_cache_entry *ent;
 3203 
 3204   if (cache_idx_start == -1)
 3205     return REG_NOERROR;
 3206 
 3207  restart:
 3208   ent = mctx->bkref_ents + cache_idx_start;
 3209   do
 3210     {
 3211       Idx to_idx, next_node;
 3212 
 3213       /* Is this entry ENT is appropriate?  */
 3214       if (!re_node_set_contains (cur_nodes, ent->node))
 3215     continue; /* No.  */
 3216 
 3217       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
 3218       /* Calculate the destination of the back reference, and append it
 3219      to MCTX->STATE_LOG.  */
 3220       if (to_idx == cur_str)
 3221     {
 3222       /* The backreference did epsilon transit, we must re-check all the
 3223          node in the current state.  */
 3224       re_node_set new_dests;
 3225       reg_errcode_t err2, err3;
 3226       next_node = dfa->edests[ent->node].elems[0];
 3227       if (re_node_set_contains (cur_nodes, next_node))
 3228         continue;
 3229       err = re_node_set_init_1 (&new_dests, next_node);
 3230       err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
 3231       err3 = re_node_set_merge (cur_nodes, &new_dests);
 3232       re_node_set_free (&new_dests);
 3233       if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
 3234                 || err3 != REG_NOERROR))
 3235         {
 3236           err = (err != REG_NOERROR ? err
 3237              : (err2 != REG_NOERROR ? err2 : err3));
 3238           return err;
 3239         }
 3240       /* TODO: It is still inefficient...  */
 3241       goto restart;
 3242     }
 3243       else
 3244     {
 3245       re_node_set union_set;
 3246       next_node = dfa->nexts[ent->node];
 3247       if (mctx->state_log[to_idx])
 3248         {
 3249           bool ok;
 3250           if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
 3251                     next_node))
 3252         continue;
 3253           err = re_node_set_init_copy (&union_set,
 3254                        &mctx->state_log[to_idx]->nodes);
 3255           ok = re_node_set_insert (&union_set, next_node);
 3256           if (__glibc_unlikely (err != REG_NOERROR || ! ok))
 3257         {
 3258           re_node_set_free (&union_set);
 3259           err = err != REG_NOERROR ? err : REG_ESPACE;
 3260           return err;
 3261         }
 3262         }
 3263       else
 3264         {
 3265           err = re_node_set_init_1 (&union_set, next_node);
 3266           if (__glibc_unlikely (err != REG_NOERROR))
 3267         return err;
 3268         }
 3269       mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
 3270       re_node_set_free (&union_set);
 3271       if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
 3272                 && err != REG_NOERROR))
 3273         return err;
 3274     }
 3275     }
 3276   while (ent++->more);
 3277   return REG_NOERROR;
 3278 }
 3279 
 3280 /* Build transition table for the state.
 3281    Return true if successful.  */
 3282 
 3283 static bool
 3284 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
 3285 {
 3286   reg_errcode_t err;
 3287   Idx i, j;
 3288   int ch;
 3289   bool need_word_trtable = false;
 3290   bitset_word_t elem, mask;
 3291   bool dests_node_malloced = false;
 3292   bool dest_states_malloced = false;
 3293   Idx ndests; /* Number of the destination states from 'state'.  */
 3294   re_dfastate_t **trtable;
 3295   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
 3296   re_node_set follows, *dests_node;
 3297   bitset_t *dests_ch;
 3298   bitset_t acceptable;
 3299 
 3300   struct dests_alloc
 3301   {
 3302     re_node_set dests_node[SBC_MAX];
 3303     bitset_t dests_ch[SBC_MAX];
 3304   } *dests_alloc;
 3305 
 3306   /* We build DFA states which corresponds to the destination nodes
 3307      from 'state'.  'dests_node[i]' represents the nodes which i-th
 3308      destination state contains, and 'dests_ch[i]' represents the
 3309      characters which i-th destination state accepts.  */
 3310   if (__libc_use_alloca (sizeof (struct dests_alloc)))
 3311     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
 3312   else
 3313     {
 3314       dests_alloc = re_malloc (struct dests_alloc, 1);
 3315       if (__glibc_unlikely (dests_alloc == NULL))
 3316     return false;
 3317       dests_node_malloced = true;
 3318     }
 3319   dests_node = dests_alloc->dests_node;
 3320   dests_ch = dests_alloc->dests_ch;
 3321 
 3322   /* Initialize transition table.  */
 3323   state->word_trtable = state->trtable = NULL;
 3324 
 3325   /* At first, group all nodes belonging to 'state' into several
 3326      destinations.  */
 3327   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
 3328   if (__glibc_unlikely (ndests <= 0))
 3329     {
 3330       if (dests_node_malloced)
 3331     re_free (dests_alloc);
 3332       /* Return false in case of an error, true otherwise.  */
 3333       if (ndests == 0)
 3334     {
 3335       state->trtable = (re_dfastate_t **)
 3336         calloc (sizeof (re_dfastate_t *), SBC_MAX);
 3337           if (__glibc_unlikely (state->trtable == NULL))
 3338             return false;
 3339       return true;
 3340     }
 3341       return false;
 3342     }
 3343 
 3344   err = re_node_set_alloc (&follows, ndests + 1);
 3345   if (__glibc_unlikely (err != REG_NOERROR))
 3346     goto out_free;
 3347 
 3348   /* Avoid arithmetic overflow in size calculation.  */
 3349   size_t ndests_max
 3350     = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
 3351        / (3 * sizeof (re_dfastate_t *)));
 3352   if (__glibc_unlikely (ndests_max < ndests))
 3353     goto out_free;
 3354 
 3355   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
 3356              + ndests * 3 * sizeof (re_dfastate_t *)))
 3357     dest_states = (re_dfastate_t **)
 3358       alloca (ndests * 3 * sizeof (re_dfastate_t *));
 3359   else
 3360     {
 3361       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
 3362       if (__glibc_unlikely (dest_states == NULL))
 3363     {
 3364 out_free:
 3365       if (dest_states_malloced)
 3366         re_free (dest_states);
 3367       re_node_set_free (&follows);
 3368       for (i = 0; i < ndests; ++i)
 3369         re_node_set_free (dests_node + i);
 3370       if (dests_node_malloced)
 3371         re_free (dests_alloc);
 3372       return false;
 3373     }
 3374       dest_states_malloced = true;
 3375     }
 3376   dest_states_word = dest_states + ndests;
 3377   dest_states_nl = dest_states_word + ndests;
 3378   bitset_empty (acceptable);
 3379 
 3380   /* Then build the states for all destinations.  */
 3381   for (i = 0; i < ndests; ++i)
 3382     {
 3383       Idx next_node;
 3384       re_node_set_empty (&follows);
 3385       /* Merge the follows of this destination states.  */
 3386       for (j = 0; j < dests_node[i].nelem; ++j)
 3387     {
 3388       next_node = dfa->nexts[dests_node[i].elems[j]];
 3389       if (next_node != -1)
 3390         {
 3391           err = re_node_set_merge (&follows, dfa->eclosures + next_node);
 3392           if (__glibc_unlikely (err != REG_NOERROR))
 3393         goto out_free;
 3394         }
 3395     }
 3396       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
 3397       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
 3398     goto out_free;
 3399       /* If the new state has context constraint,
 3400      build appropriate states for these contexts.  */
 3401       if (dest_states[i]->has_constraint)
 3402     {
 3403       dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
 3404                               CONTEXT_WORD);
 3405       if (__glibc_unlikely (dest_states_word[i] == NULL
 3406                 && err != REG_NOERROR))
 3407         goto out_free;
 3408 
 3409       if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
 3410         need_word_trtable = true;
 3411 
 3412       dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
 3413                             CONTEXT_NEWLINE);
 3414       if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
 3415         goto out_free;
 3416     }
 3417       else
 3418     {
 3419       dest_states_word[i] = dest_states[i];
 3420       dest_states_nl[i] = dest_states[i];
 3421     }
 3422       bitset_merge (acceptable, dests_ch[i]);
 3423     }
 3424 
 3425   if (!__glibc_unlikely (need_word_trtable))
 3426     {
 3427       /* We don't care about whether the following character is a word
 3428      character, or we are in a single-byte character set so we can
 3429      discern by looking at the character code: allocate a
 3430      256-entry transition table.  */
 3431       trtable = state->trtable =
 3432     (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
 3433       if (__glibc_unlikely (trtable == NULL))
 3434     goto out_free;
 3435 
 3436       /* For all characters ch...:  */
 3437       for (i = 0; i < BITSET_WORDS; ++i)
 3438     for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
 3439          elem;
 3440          mask <<= 1, elem >>= 1, ++ch)
 3441       if (__glibc_unlikely (elem & 1))
 3442         {
 3443           /* There must be exactly one destination which accepts
 3444          character ch.  See group_nodes_into_DFAstates.  */
 3445           for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
 3446         ;
 3447 
 3448           /* j-th destination accepts the word character ch.  */
 3449           if (dfa->word_char[i] & mask)
 3450         trtable[ch] = dest_states_word[j];
 3451           else
 3452         trtable[ch] = dest_states[j];
 3453         }
 3454     }
 3455   else
 3456     {
 3457       /* We care about whether the following character is a word
 3458      character, and we are in a multi-byte character set: discern
 3459      by looking at the character code: build two 256-entry
 3460      transition tables, one starting at trtable[0] and one
 3461      starting at trtable[SBC_MAX].  */
 3462       trtable = state->word_trtable =
 3463     (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
 3464       if (__glibc_unlikely (trtable == NULL))
 3465     goto out_free;
 3466 
 3467       /* For all characters ch...:  */
 3468       for (i = 0; i < BITSET_WORDS; ++i)
 3469     for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
 3470          elem;
 3471          mask <<= 1, elem >>= 1, ++ch)
 3472       if (__glibc_unlikely (elem & 1))
 3473         {
 3474           /* There must be exactly one destination which accepts
 3475          character ch.  See group_nodes_into_DFAstates.  */
 3476           for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
 3477         ;
 3478 
 3479           /* j-th destination accepts the word character ch.  */
 3480           trtable[ch] = dest_states[j];
 3481           trtable[ch + SBC_MAX] = dest_states_word[j];
 3482         }
 3483     }
 3484 
 3485   /* new line */
 3486   if (bitset_contain (acceptable, NEWLINE_CHAR))
 3487     {
 3488       /* The current state accepts newline character.  */
 3489       for (j = 0; j < ndests; ++j)
 3490     if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
 3491       {
 3492         /* k-th destination accepts newline character.  */
 3493         trtable[NEWLINE_CHAR] = dest_states_nl[j];
 3494         if (need_word_trtable)
 3495           trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
 3496         /* There must be only one destination which accepts
 3497            newline.  See group_nodes_into_DFAstates.  */
 3498         break;
 3499       }
 3500     }
 3501 
 3502   if (dest_states_malloced)
 3503     re_free (dest_states);
 3504 
 3505   re_node_set_free (&follows);
 3506   for (i = 0; i < ndests; ++i)
 3507     re_node_set_free (dests_node + i);
 3508 
 3509   if (dests_node_malloced)
 3510     re_free (dests_alloc);
 3511 
 3512   return true;
 3513 }
 3514 
 3515 /* Group all nodes belonging to STATE into several destinations.
 3516    Then for all destinations, set the nodes belonging to the destination
 3517    to DESTS_NODE[i] and set the characters accepted by the destination
 3518    to DEST_CH[i].  This function return the number of destinations.  */
 3519 
 3520 static Idx
 3521 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 3522                 re_node_set *dests_node, bitset_t *dests_ch)
 3523 {
 3524   reg_errcode_t err;
 3525   bool ok;
 3526   Idx i, j, k;
 3527   Idx ndests; /* Number of the destinations from 'state'.  */
 3528   bitset_t accepts; /* Characters a node can accept.  */
 3529   const re_node_set *cur_nodes = &state->nodes;
 3530   bitset_empty (accepts);
 3531   ndests = 0;
 3532 
 3533   /* For all the nodes belonging to 'state',  */
 3534   for (i = 0; i < cur_nodes->nelem; ++i)
 3535     {
 3536       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
 3537       re_token_type_t type = node->type;
 3538       unsigned int constraint = node->constraint;
 3539 
 3540       /* Enumerate all single byte character this node can accept.  */
 3541       if (type == CHARACTER)
 3542     bitset_set (accepts, node->opr.c);
 3543       else if (type == SIMPLE_BRACKET)
 3544     {
 3545       bitset_merge (accepts, node->opr.sbcset);
 3546     }
 3547       else if (type == OP_PERIOD)
 3548     {
 3549 #ifdef RE_ENABLE_I18N
 3550       if (dfa->mb_cur_max > 1)
 3551         bitset_merge (accepts, dfa->sb_char);
 3552       else
 3553 #endif
 3554         bitset_set_all (accepts);
 3555       if (!(dfa->syntax & RE_DOT_NEWLINE))
 3556         bitset_clear (accepts, '\n');
 3557       if (dfa->syntax & RE_DOT_NOT_NULL)
 3558         bitset_clear (accepts, '\0');
 3559     }
 3560 #ifdef RE_ENABLE_I18N
 3561       else if (type == OP_UTF8_PERIOD)
 3562     {
 3563       if (ASCII_CHARS % BITSET_WORD_BITS == 0)
 3564         memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
 3565       else
 3566         bitset_merge (accepts, utf8_sb_map);
 3567       if (!(dfa->syntax & RE_DOT_NEWLINE))
 3568         bitset_clear (accepts, '\n');
 3569       if (dfa->syntax & RE_DOT_NOT_NULL)
 3570         bitset_clear (accepts, '\0');
 3571     }
 3572 #endif
 3573       else
 3574     continue;
 3575 
 3576       /* Check the 'accepts' and sift the characters which are not
 3577      match it the context.  */
 3578       if (constraint)
 3579     {
 3580       if (constraint & NEXT_NEWLINE_CONSTRAINT)
 3581         {
 3582           bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
 3583           bitset_empty (accepts);
 3584           if (accepts_newline)
 3585         bitset_set (accepts, NEWLINE_CHAR);
 3586           else
 3587         continue;
 3588         }
 3589       if (constraint & NEXT_ENDBUF_CONSTRAINT)
 3590         {
 3591           bitset_empty (accepts);
 3592           continue;
 3593         }
 3594 
 3595       if (constraint & NEXT_WORD_CONSTRAINT)
 3596         {
 3597           bitset_word_t any_set = 0;
 3598           if (type == CHARACTER && !node->word_char)
 3599         {
 3600           bitset_empty (accepts);
 3601           continue;
 3602         }
 3603 #ifdef RE_ENABLE_I18N
 3604           if (dfa->mb_cur_max > 1)
 3605         for (j = 0; j < BITSET_WORDS; ++j)
 3606           any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
 3607           else
 3608 #endif
 3609         for (j = 0; j < BITSET_WORDS; ++j)
 3610           any_set |= (accepts[j] &= dfa->word_char[j]);
 3611           if (!any_set)
 3612         continue;
 3613         }
 3614       if (constraint & NEXT_NOTWORD_CONSTRAINT)
 3615         {
 3616           bitset_word_t any_set = 0;
 3617           if (type == CHARACTER && node->word_char)
 3618         {
 3619           bitset_empty (accepts);
 3620           continue;
 3621         }
 3622 #ifdef RE_ENABLE_I18N
 3623           if (dfa->mb_cur_max > 1)
 3624         for (j = 0; j < BITSET_WORDS; ++j)
 3625           any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
 3626           else
 3627 #endif
 3628         for (j = 0; j < BITSET_WORDS; ++j)
 3629           any_set |= (accepts[j] &= ~dfa->word_char[j]);
 3630           if (!any_set)
 3631         continue;
 3632         }
 3633     }
 3634 
 3635       /* Then divide 'accepts' into DFA states, or create a new
 3636      state.  Above, we make sure that accepts is not empty.  */
 3637       for (j = 0; j < ndests; ++j)
 3638     {
 3639       bitset_t intersec; /* Intersection sets, see below.  */
 3640       bitset_t remains;
 3641       /* Flags, see below.  */
 3642       bitset_word_t has_intersec, not_subset, not_consumed;
 3643 
 3644       /* Optimization, skip if this state doesn't accept the character.  */
 3645       if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
 3646         continue;
 3647 
 3648       /* Enumerate the intersection set of this state and 'accepts'.  */
 3649       has_intersec = 0;
 3650       for (k = 0; k < BITSET_WORDS; ++k)
 3651         has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
 3652       /* And skip if the intersection set is empty.  */
 3653       if (!has_intersec)
 3654         continue;
 3655 
 3656       /* Then check if this state is a subset of 'accepts'.  */
 3657       not_subset = not_consumed = 0;
 3658       for (k = 0; k < BITSET_WORDS; ++k)
 3659         {
 3660           not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
 3661           not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
 3662         }
 3663 
 3664       /* If this state isn't a subset of 'accepts', create a
 3665          new group state, which has the 'remains'. */
 3666       if (not_subset)
 3667         {
 3668           bitset_copy (dests_ch[ndests], remains);
 3669           bitset_copy (dests_ch[j], intersec);
 3670           err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
 3671           if (__glibc_unlikely (err != REG_NOERROR))
 3672         goto error_return;
 3673           ++ndests;
 3674         }
 3675 
 3676       /* Put the position in the current group. */
 3677       ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
 3678       if (__glibc_unlikely (! ok))
 3679         goto error_return;
 3680 
 3681       /* If all characters are consumed, go to next node. */
 3682       if (!not_consumed)
 3683         break;
 3684     }
 3685       /* Some characters remain, create a new group. */
 3686       if (j == ndests)
 3687     {
 3688       bitset_copy (dests_ch[ndests], accepts);
 3689       err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
 3690       if (__glibc_unlikely (err != REG_NOERROR))
 3691         goto error_return;
 3692       ++ndests;
 3693       bitset_empty (accepts);
 3694     }
 3695     }
 3696   return ndests;
 3697  error_return:
 3698   for (j = 0; j < ndests; ++j)
 3699     re_node_set_free (dests_node + j);
 3700   return -1;
 3701 }
 3702 
 3703 #ifdef RE_ENABLE_I18N
 3704 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
 3705    Return the number of the bytes the node accepts.
 3706    STR_IDX is the current index of the input string.
 3707 
 3708    This function handles the nodes which can accept one character, or
 3709    one collating element like '.', '[a-z]', opposite to the other nodes
 3710    can only accept one byte.  */
 3711 
 3712 # ifdef _LIBC
 3713 #  include <locale/weight.h>
 3714 # endif
 3715 
 3716 static int
 3717 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 3718              const re_string_t *input, Idx str_idx)
 3719 {
 3720   const re_token_t *node = dfa->nodes + node_idx;
 3721   int char_len, elem_len;
 3722   Idx i;
 3723 
 3724   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
 3725     {
 3726       unsigned char c = re_string_byte_at (input, str_idx), d;
 3727       if (__glibc_likely (c < 0xc2))
 3728     return 0;
 3729 
 3730       if (str_idx + 2 > input->len)
 3731     return 0;
 3732 
 3733       d = re_string_byte_at (input, str_idx + 1);
 3734       if (c < 0xe0)
 3735     return (d < 0x80 || d > 0xbf) ? 0 : 2;
 3736       else if (c < 0xf0)
 3737     {
 3738       char_len = 3;
 3739       if (c == 0xe0 && d < 0xa0)
 3740         return 0;
 3741     }
 3742       else if (c < 0xf8)
 3743     {
 3744       char_len = 4;
 3745       if (c == 0xf0 && d < 0x90)
 3746         return 0;
 3747     }
 3748       else if (c < 0xfc)
 3749     {
 3750       char_len = 5;
 3751       if (c == 0xf8 && d < 0x88)
 3752         return 0;
 3753     }
 3754       else if (c < 0xfe)
 3755     {
 3756       char_len = 6;
 3757       if (c == 0xfc && d < 0x84)
 3758         return 0;
 3759     }
 3760       else
 3761     return 0;
 3762 
 3763       if (str_idx + char_len > input->len)
 3764     return 0;
 3765 
 3766       for (i = 1; i < char_len; ++i)
 3767     {
 3768       d = re_string_byte_at (input, str_idx + i);
 3769       if (d < 0x80 || d > 0xbf)
 3770         return 0;
 3771     }
 3772       return char_len;
 3773     }
 3774 
 3775   char_len = re_string_char_size_at (input, str_idx);
 3776   if (node->type == OP_PERIOD)
 3777     {
 3778       if (char_len <= 1)
 3779     return 0;
 3780       /* FIXME: I don't think this if is needed, as both '\n'
 3781      and '\0' are char_len == 1.  */
 3782       /* '.' accepts any one character except the following two cases.  */
 3783       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
 3784        re_string_byte_at (input, str_idx) == '\n') ||
 3785       ((dfa->syntax & RE_DOT_NOT_NULL) &&
 3786        re_string_byte_at (input, str_idx) == '\0'))
 3787     return 0;
 3788       return char_len;
 3789     }
 3790 
 3791   elem_len = re_string_elem_size_at (input, str_idx);
 3792   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
 3793     return 0;
 3794 
 3795   if (node->type == COMPLEX_BRACKET)
 3796     {
 3797       const re_charset_t *cset = node->opr.mbcset;
 3798 # ifdef _LIBC
 3799       const unsigned char *pin
 3800     = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
 3801       Idx j;
 3802       uint32_t nrules;
 3803 # endif /* _LIBC */
 3804       int match_len = 0;
 3805       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
 3806             ? re_string_wchar_at (input, str_idx) : 0);
 3807 
 3808       /* match with multibyte character?  */
 3809       for (i = 0; i < cset->nmbchars; ++i)
 3810     if (wc == cset->mbchars[i])
 3811       {
 3812         match_len = char_len;
 3813         goto check_node_accept_bytes_match;
 3814       }
 3815       /* match with character_class?  */
 3816       for (i = 0; i < cset->nchar_classes; ++i)
 3817     {
 3818       wctype_t wt = cset->char_classes[i];
 3819       if (__iswctype (wc, wt))
 3820         {
 3821           match_len = char_len;
 3822           goto check_node_accept_bytes_match;
 3823         }
 3824     }
 3825 
 3826 # ifdef _LIBC
 3827       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 3828       if (nrules != 0)
 3829     {
 3830       unsigned int in_collseq = 0;
 3831       const int32_t *table, *indirect;
 3832       const unsigned char *weights, *extra;
 3833       const char *collseqwc;
 3834 
 3835       /* match with collating_symbol?  */
 3836       if (cset->ncoll_syms)
 3837         extra = (const unsigned char *)
 3838           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
 3839       for (i = 0; i < cset->ncoll_syms; ++i)
 3840         {
 3841           const unsigned char *coll_sym = extra + cset->coll_syms[i];
 3842           /* Compare the length of input collating element and
 3843          the length of current collating element.  */
 3844           if (*coll_sym != elem_len)
 3845         continue;
 3846           /* Compare each bytes.  */
 3847           for (j = 0; j < *coll_sym; j++)
 3848         if (pin[j] != coll_sym[1 + j])
 3849           break;
 3850           if (j == *coll_sym)
 3851         {
 3852           /* Match if every bytes is equal.  */
 3853           match_len = j;
 3854           goto check_node_accept_bytes_match;
 3855         }
 3856         }
 3857 
 3858       if (cset->nranges)
 3859         {
 3860           if (elem_len <= char_len)
 3861         {
 3862           collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
 3863           in_collseq = __collseq_table_lookup (collseqwc, wc);
 3864         }
 3865           else
 3866         in_collseq = find_collation_sequence_value (pin, elem_len);
 3867         }
 3868       /* match with range expression?  */
 3869       /* FIXME: Implement rational ranges here, too.  */
 3870       for (i = 0; i < cset->nranges; ++i)
 3871         if (cset->range_starts[i] <= in_collseq
 3872         && in_collseq <= cset->range_ends[i])
 3873           {
 3874         match_len = elem_len;
 3875         goto check_node_accept_bytes_match;
 3876           }
 3877 
 3878       /* match with equivalence_class?  */
 3879       if (cset->nequiv_classes)
 3880         {
 3881           const unsigned char *cp = pin;
 3882           table = (const int32_t *)
 3883         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 3884           weights = (const unsigned char *)
 3885         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 3886           extra = (const unsigned char *)
 3887         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 3888           indirect = (const int32_t *)
 3889         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 3890           int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
 3891           int32_t rule = idx >> 24;
 3892           idx &= 0xffffff;
 3893           if (idx > 0)
 3894         {
 3895           size_t weight_len = weights[idx];
 3896           for (i = 0; i < cset->nequiv_classes; ++i)
 3897             {
 3898               int32_t equiv_class_idx = cset->equiv_classes[i];
 3899               int32_t equiv_class_rule = equiv_class_idx >> 24;
 3900               equiv_class_idx &= 0xffffff;
 3901               if (weights[equiv_class_idx] == weight_len
 3902               && equiv_class_rule == rule
 3903               && memcmp (weights + idx + 1,
 3904                      weights + equiv_class_idx + 1,
 3905                      weight_len) == 0)
 3906             {
 3907               match_len = elem_len;
 3908               goto check_node_accept_bytes_match;
 3909             }
 3910             }
 3911         }
 3912         }
 3913     }
 3914       else
 3915 # endif /* _LIBC */
 3916     {
 3917       /* match with range expression?  */
 3918       for (i = 0; i < cset->nranges; ++i)
 3919         {
 3920           if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
 3921         {
 3922           match_len = char_len;
 3923           goto check_node_accept_bytes_match;
 3924         }
 3925         }
 3926     }
 3927     check_node_accept_bytes_match:
 3928       if (!cset->non_match)
 3929     return match_len;
 3930       else
 3931     {
 3932       if (match_len > 0)
 3933         return 0;
 3934       else
 3935         return (elem_len > char_len) ? elem_len : char_len;
 3936     }
 3937     }
 3938   return 0;
 3939 }
 3940 
 3941 # ifdef _LIBC
 3942 static unsigned int
 3943 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
 3944 {
 3945   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 3946   if (nrules == 0)
 3947     {
 3948       if (mbs_len == 1)
 3949     {
 3950       /* No valid character.  Match it as a single byte character.  */
 3951       const unsigned char *collseq = (const unsigned char *)
 3952         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
 3953       return collseq[mbs[0]];
 3954     }
 3955       return UINT_MAX;
 3956     }
 3957   else
 3958     {
 3959       int32_t idx;
 3960       const unsigned char *extra = (const unsigned char *)
 3961     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
 3962       int32_t extrasize = (const unsigned char *)
 3963     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
 3964 
 3965       for (idx = 0; idx < extrasize;)
 3966     {
 3967       int mbs_cnt;
 3968       bool found = false;
 3969       int32_t elem_mbs_len;
 3970       /* Skip the name of collating element name.  */
 3971       idx = idx + extra[idx] + 1;
 3972       elem_mbs_len = extra[idx++];
 3973       if (mbs_len == elem_mbs_len)
 3974         {
 3975           for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
 3976         if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
 3977           break;
 3978           if (mbs_cnt == elem_mbs_len)
 3979         /* Found the entry.  */
 3980         found = true;
 3981         }
 3982       /* Skip the byte sequence of the collating element.  */
 3983       idx += elem_mbs_len;
 3984       /* Adjust for the alignment.  */
 3985       idx = (idx + 3) & ~3;
 3986       /* Skip the collation sequence value.  */
 3987       idx += sizeof (uint32_t);
 3988       /* Skip the wide char sequence of the collating element.  */
 3989       idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
 3990       /* If we found the entry, return the sequence value.  */
 3991       if (found)
 3992         return *(uint32_t *) (extra + idx);
 3993       /* Skip the collation sequence value.  */
 3994       idx += sizeof (uint32_t);
 3995     }
 3996       return UINT_MAX;
 3997     }
 3998 }
 3999 # endif /* _LIBC */
 4000 #endif /* RE_ENABLE_I18N */
 4001 
 4002 /* Check whether the node accepts the byte which is IDX-th
 4003    byte of the INPUT.  */
 4004 
 4005 static bool
 4006 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
 4007            Idx idx)
 4008 {
 4009   unsigned char ch;
 4010   ch = re_string_byte_at (&mctx->input, idx);
 4011   switch (node->type)
 4012     {
 4013     case CHARACTER:
 4014       if (node->opr.c != ch)
 4015         return false;
 4016       break;
 4017 
 4018     case SIMPLE_BRACKET:
 4019       if (!bitset_contain (node->opr.sbcset, ch))
 4020         return false;
 4021       break;
 4022 
 4023 #ifdef RE_ENABLE_I18N
 4024     case OP_UTF8_PERIOD:
 4025       if (ch >= ASCII_CHARS)
 4026         return false;
 4027       FALLTHROUGH;
 4028 #endif
 4029     case OP_PERIOD:
 4030       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
 4031       || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
 4032     return false;
 4033       break;
 4034 
 4035     default:
 4036       return false;
 4037     }
 4038 
 4039   if (node->constraint)
 4040     {
 4041       /* The node has constraints.  Check whether the current context
 4042      satisfies the constraints.  */
 4043       unsigned int context = re_string_context_at (&mctx->input, idx,
 4044                            mctx->eflags);
 4045       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
 4046     return false;
 4047     }
 4048 
 4049   return true;
 4050 }
 4051 
 4052 /* Extend the buffers, if the buffers have run out.  */
 4053 
 4054 static reg_errcode_t
 4055 __attribute_warn_unused_result__
 4056 extend_buffers (re_match_context_t *mctx, int min_len)
 4057 {
 4058   reg_errcode_t ret;
 4059   re_string_t *pstr = &mctx->input;
 4060 
 4061   /* Avoid overflow.  */
 4062   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
 4063             <= pstr->bufs_len))
 4064     return REG_ESPACE;
 4065 
 4066   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
 4067   ret = re_string_realloc_buffers (pstr,
 4068                    MAX (min_len,
 4069                     MIN (pstr->len, pstr->bufs_len * 2)));
 4070   if (__glibc_unlikely (ret != REG_NOERROR))
 4071     return ret;
 4072 
 4073   if (mctx->state_log != NULL)
 4074     {
 4075       /* And double the length of state_log.  */
 4076       /* XXX We have no indication of the size of this buffer.  If this
 4077      allocation fail we have no indication that the state_log array
 4078      does not have the right size.  */
 4079       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
 4080                           pstr->bufs_len + 1);
 4081       if (__glibc_unlikely (new_array == NULL))
 4082     return REG_ESPACE;
 4083       mctx->state_log = new_array;
 4084     }
 4085 
 4086   /* Then reconstruct the buffers.  */
 4087   if (pstr->icase)
 4088     {
 4089 #ifdef RE_ENABLE_I18N
 4090       if (pstr->mb_cur_max > 1)
 4091     {
 4092       ret = build_wcs_upper_buffer (pstr);
 4093       if (__glibc_unlikely (ret != REG_NOERROR))
 4094         return ret;
 4095     }
 4096       else
 4097 #endif /* RE_ENABLE_I18N  */
 4098     build_upper_buffer (pstr);
 4099     }
 4100   else
 4101     {
 4102 #ifdef RE_ENABLE_I18N
 4103       if (pstr->mb_cur_max > 1)
 4104     build_wcs_buffer (pstr);
 4105       else
 4106 #endif /* RE_ENABLE_I18N  */
 4107     {
 4108       if (pstr->trans != NULL)
 4109         re_string_translate_buffer (pstr);
 4110     }
 4111     }
 4112   return REG_NOERROR;
 4113 }
 4114 
 4115 
 4116 /* Functions for matching context.  */
 4117 
 4118 /* Initialize MCTX.  */
 4119 
 4120 static reg_errcode_t
 4121 __attribute_warn_unused_result__
 4122 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
 4123 {
 4124   mctx->eflags = eflags;
 4125   mctx->match_last = -1;
 4126   if (n > 0)
 4127     {
 4128       /* Avoid overflow.  */
 4129       size_t max_object_size =
 4130     MAX (sizeof (struct re_backref_cache_entry),
 4131          sizeof (re_sub_match_top_t *));
 4132       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
 4133     return REG_ESPACE;
 4134 
 4135       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
 4136       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
 4137       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
 4138     return REG_ESPACE;
 4139     }
 4140   /* Already zero-ed by the caller.
 4141      else
 4142        mctx->bkref_ents = NULL;
 4143      mctx->nbkref_ents = 0;
 4144      mctx->nsub_tops = 0;  */
 4145   mctx->abkref_ents = n;
 4146   mctx->max_mb_elem_len = 1;
 4147   mctx->asub_tops = n;
 4148   return REG_NOERROR;
 4149 }
 4150 
 4151 /* Clean the entries which depend on the current input in MCTX.
 4152    This function must be invoked when the matcher changes the start index
 4153    of the input, or changes the input string.  */
 4154 
 4155 static void
 4156 match_ctx_clean (re_match_context_t *mctx)
 4157 {
 4158   Idx st_idx;
 4159   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
 4160     {
 4161       Idx sl_idx;
 4162       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
 4163       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
 4164     {
 4165       re_sub_match_last_t *last = top->lasts[sl_idx];
 4166       re_free (last->path.array);
 4167       re_free (last);
 4168     }
 4169       re_free (top->lasts);
 4170       if (top->path)
 4171     {
 4172       re_free (top->path->array);
 4173       re_free (top->path);
 4174     }
 4175       re_free (top);
 4176     }
 4177 
 4178   mctx->nsub_tops = 0;
 4179   mctx->nbkref_ents = 0;
 4180 }
 4181 
 4182 /* Free all the memory associated with MCTX.  */
 4183 
 4184 static void
 4185 match_ctx_free (re_match_context_t *mctx)
 4186 {
 4187   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
 4188   match_ctx_clean (mctx);
 4189   re_free (mctx->sub_tops);
 4190   re_free (mctx->bkref_ents);
 4191 }
 4192 
 4193 /* Add a new backreference entry to MCTX.
 4194    Note that we assume that caller never call this function with duplicate
 4195    entry, and call with STR_IDX which isn't smaller than any existing entry.
 4196 */
 4197 
 4198 static reg_errcode_t
 4199 __attribute_warn_unused_result__
 4200 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
 4201              Idx to)
 4202 {
 4203   if (mctx->nbkref_ents >= mctx->abkref_ents)
 4204     {
 4205       struct re_backref_cache_entry* new_entry;
 4206       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
 4207                   mctx->abkref_ents * 2);
 4208       if (__glibc_unlikely (new_entry == NULL))
 4209     {
 4210       re_free (mctx->bkref_ents);
 4211       return REG_ESPACE;
 4212     }
 4213       mctx->bkref_ents = new_entry;
 4214       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
 4215           sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
 4216       mctx->abkref_ents *= 2;
 4217     }
 4218   if (mctx->nbkref_ents > 0
 4219       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
 4220     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
 4221 
 4222   mctx->bkref_ents[mctx->nbkref_ents].node = node;
 4223   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
 4224   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
 4225   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
 4226 
 4227   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
 4228      If bit N is clear, means that this entry won't epsilon-transition to
 4229      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
 4230      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
 4231      such node.
 4232 
 4233      A backreference does not epsilon-transition unless it is empty, so set
 4234      to all zeros if FROM != TO.  */
 4235   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
 4236     = (from == to ? -1 : 0);
 4237 
 4238   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
 4239   if (mctx->max_mb_elem_len < to - from)
 4240     mctx->max_mb_elem_len = to - from;
 4241   return REG_NOERROR;
 4242 }
 4243 
 4244 /* Return the first entry with the same str_idx, or -1 if none is
 4245    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
 4246 
 4247 static Idx
 4248 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
 4249 {
 4250   Idx left, right, mid, last;
 4251   last = right = mctx->nbkref_ents;
 4252   for (left = 0; left < right;)
 4253     {
 4254       mid = (left + right) / 2;
 4255       if (mctx->bkref_ents[mid].str_idx < str_idx)
 4256     left = mid + 1;
 4257       else
 4258     right = mid;
 4259     }
 4260   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
 4261     return left;
 4262   else
 4263     return -1;
 4264 }
 4265 
 4266 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
 4267    at STR_IDX.  */
 4268 
 4269 static reg_errcode_t
 4270 __attribute_warn_unused_result__
 4271 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
 4272 {
 4273 #ifdef DEBUG
 4274   assert (mctx->sub_tops != NULL);
 4275   assert (mctx->asub_tops > 0);
 4276 #endif
 4277   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
 4278     {
 4279       Idx new_asub_tops = mctx->asub_tops * 2;
 4280       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
 4281                            re_sub_match_top_t *,
 4282                            new_asub_tops);
 4283       if (__glibc_unlikely (new_array == NULL))
 4284     return REG_ESPACE;
 4285       mctx->sub_tops = new_array;
 4286       mctx->asub_tops = new_asub_tops;
 4287     }
 4288   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
 4289   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
 4290     return REG_ESPACE;
 4291   mctx->sub_tops[mctx->nsub_tops]->node = node;
 4292   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
 4293   return REG_NOERROR;
 4294 }
 4295 
 4296 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
 4297    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
 4298 
 4299 static re_sub_match_last_t *
 4300 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
 4301 {
 4302   re_sub_match_last_t *new_entry;
 4303   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
 4304     {
 4305       Idx new_alasts = 2 * subtop->alasts + 1;
 4306       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
 4307                             re_sub_match_last_t *,
 4308                             new_alasts);
 4309       if (__glibc_unlikely (new_array == NULL))
 4310     return NULL;
 4311       subtop->lasts = new_array;
 4312       subtop->alasts = new_alasts;
 4313     }
 4314   new_entry = calloc (1, sizeof (re_sub_match_last_t));
 4315   if (__glibc_likely (new_entry != NULL))
 4316     {
 4317       subtop->lasts[subtop->nlasts] = new_entry;
 4318       new_entry->node = node;
 4319       new_entry->str_idx = str_idx;
 4320       ++subtop->nlasts;
 4321     }
 4322   return new_entry;
 4323 }
 4324 
 4325 static void
 4326 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
 4327            re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
 4328 {
 4329   sctx->sifted_states = sifted_sts;
 4330   sctx->limited_states = limited_sts;
 4331   sctx->last_node = last_node;
 4332   sctx->last_str_idx = last_str_idx;
 4333   re_node_set_init_empty (&sctx->limits);
 4334 }