"Fossies" - the Fresh Open Source Software Archive

Member "gawk-5.1.0/support/regexec.c" (6 Feb 2020, 129919 Bytes) of package /linux/misc/gawk-5.1.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "regexec.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.0.1_vs_5.1.0.

    1 /* Extended regular expression matching and search library.
    2    Copyright (C) 2002-2019 Free Software Foundation, Inc.
    3    This file is part of the GNU C Library.
    4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
    5 
    6    The GNU C Library is free software; you can redistribute it and/or
    7    modify it under the terms of the GNU Lesser General Public
    8    License as published by the Free Software Foundation; either
    9    version 2.1 of the License, or (at your option) any later version.
   10 
   11    The GNU C Library is distributed in the hope that it will be useful,
   12    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14    Lesser General Public License for more details.
   15 
   16    You should have received a copy of the GNU Lesser General Public
   17    License along with the GNU C Library; if not, see
   18    <https://www.gnu.org/licenses/>.  */
   19 
   20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
   21                      Idx n);
   22 static void match_ctx_clean (re_match_context_t *mctx);
   23 static void match_ctx_free (re_match_context_t *cache);
   24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
   25                       Idx str_idx, Idx from, Idx to);
   26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
   27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
   28                        Idx str_idx);
   29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
   30                             Idx node, Idx str_idx);
   31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
   32                re_dfastate_t **limited_sts, Idx last_node,
   33                Idx last_str_idx);
   34 static reg_errcode_t re_search_internal (const regex_t *preg,
   35                      const char *string, Idx length,
   36                      Idx start, Idx last_start, Idx stop,
   37                      size_t nmatch, regmatch_t pmatch[],
   38                      int eflags);
   39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
   40                   const char *string1, Idx length1,
   41                   const char *string2, Idx length2,
   42                   Idx start, regoff_t range,
   43                   struct re_registers *regs,
   44                   Idx stop, bool ret_len);
   45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
   46                 const char *string, Idx length, Idx start,
   47                 regoff_t range, Idx stop,
   48                 struct re_registers *regs,
   49                 bool ret_len);
   50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
   51                               Idx nregs, int regs_allocated);
   52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
   53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
   54                Idx *p_match_first);
   55 static Idx check_halt_state_context (const re_match_context_t *mctx,
   56                      const re_dfastate_t *state, Idx idx);
   57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
   58              regmatch_t *prev_idx_match, Idx cur_node,
   59              Idx cur_idx, Idx nmatch);
   60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
   61                       Idx str_idx, Idx dest_node, Idx nregs,
   62                       regmatch_t *regs,
   63                       re_node_set *eps_via_nodes);
   64 static reg_errcode_t set_regs (const regex_t *preg,
   65                    const re_match_context_t *mctx,
   66                    size_t nmatch, regmatch_t *pmatch,
   67                    bool fl_backtrack);
   68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
   69 
   70 #ifdef RE_ENABLE_I18N
   71 static int sift_states_iter_mb (const re_match_context_t *mctx,
   72                 re_sift_context_t *sctx,
   73                 Idx node_idx, Idx str_idx, Idx max_str_idx);
   74 #endif /* RE_ENABLE_I18N */
   75 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
   76                        re_sift_context_t *sctx);
   77 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
   78                       re_sift_context_t *sctx, Idx str_idx,
   79                       re_node_set *cur_dest);
   80 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
   81                           re_sift_context_t *sctx,
   82                           Idx str_idx,
   83                           re_node_set *dest_nodes);
   84 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
   85                         re_node_set *dest_nodes,
   86                         const re_node_set *candidates);
   87 static bool check_dst_limits (const re_match_context_t *mctx,
   88                   const re_node_set *limits,
   89                   Idx dst_node, Idx dst_idx, Idx src_node,
   90                   Idx src_idx);
   91 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
   92                     int boundaries, Idx subexp_idx,
   93                     Idx from_node, Idx bkref_idx);
   94 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
   95                       Idx limit, Idx subexp_idx,
   96                       Idx node, Idx str_idx,
   97                       Idx bkref_idx);
   98 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
   99                       re_node_set *dest_nodes,
  100                       const re_node_set *candidates,
  101                       re_node_set *limits,
  102                       struct re_backref_cache_entry *bkref_ents,
  103                       Idx str_idx);
  104 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
  105                     re_sift_context_t *sctx,
  106                     Idx str_idx, const re_node_set *candidates);
  107 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
  108                     re_dfastate_t **dst,
  109                     re_dfastate_t **src, Idx num);
  110 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
  111                      re_match_context_t *mctx);
  112 static re_dfastate_t *transit_state (reg_errcode_t *err,
  113                      re_match_context_t *mctx,
  114                      re_dfastate_t *state);
  115 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
  116                         re_match_context_t *mctx,
  117                         re_dfastate_t *next_state);
  118 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
  119                         re_node_set *cur_nodes,
  120                         Idx str_idx);
  121 #if 0
  122 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
  123                     re_match_context_t *mctx,
  124                     re_dfastate_t *pstate);
  125 #endif
  126 #ifdef RE_ENABLE_I18N
  127 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
  128                        re_dfastate_t *pstate);
  129 #endif /* RE_ENABLE_I18N */
  130 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
  131                       const re_node_set *nodes);
  132 static reg_errcode_t get_subexp (re_match_context_t *mctx,
  133                  Idx bkref_node, Idx bkref_str_idx);
  134 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
  135                      const re_sub_match_top_t *sub_top,
  136                      re_sub_match_last_t *sub_last,
  137                      Idx bkref_node, Idx bkref_str);
  138 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
  139                  Idx subexp_idx, int type);
  140 static reg_errcode_t check_arrival (re_match_context_t *mctx,
  141                     state_array_t *path, Idx top_node,
  142                     Idx top_str, Idx last_node, Idx last_str,
  143                     int type);
  144 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
  145                            Idx str_idx,
  146                            re_node_set *cur_nodes,
  147                            re_node_set *next_nodes);
  148 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
  149                            re_node_set *cur_nodes,
  150                            Idx ex_subexp, int type);
  151 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
  152                            re_node_set *dst_nodes,
  153                            Idx target, Idx ex_subexp,
  154                            int type);
  155 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
  156                      re_node_set *cur_nodes, Idx cur_str,
  157                      Idx subexp_num, int type);
  158 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
  159 #ifdef RE_ENABLE_I18N
  160 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
  161                     const re_string_t *input, Idx idx);
  162 # ifdef _LIBC
  163 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
  164                            size_t name_len);
  165 # endif /* _LIBC */
  166 #endif /* RE_ENABLE_I18N */
  167 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
  168                        const re_dfastate_t *state,
  169                        re_node_set *states_node,
  170                        bitset_t *states_ch);
  171 static bool check_node_accept (const re_match_context_t *mctx,
  172                    const re_token_t *node, Idx idx);
  173 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
  174 
  175 /* Entry point for POSIX code.  */
  176 
  177 /* regexec searches for a given pattern, specified by PREG, in the
  178    string STRING.
  179 
  180    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
  181    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
  182    least NMATCH elements, and we set them to the offsets of the
  183    corresponding matched substrings.
  184 
  185    EFLAGS specifies "execution flags" which affect matching: if
  186    REG_NOTBOL is set, then ^ does not match at the beginning of the
  187    string; if REG_NOTEOL is set, then $ does not match at the end.
  188 
  189    We return 0 if we find a match and REG_NOMATCH if not.  */
  190 
  191 int
  192 regexec (const regex_t *__restrict preg, const char *__restrict string,
  193      size_t nmatch, regmatch_t pmatch[], int eflags)
  194 {
  195   reg_errcode_t err;
  196   Idx start, length;
  197   re_dfa_t *dfa = preg->buffer;
  198 
  199   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
  200     return REG_BADPAT;
  201 
  202   if (eflags & REG_STARTEND)
  203     {
  204       start = pmatch[0].rm_so;
  205       length = pmatch[0].rm_eo;
  206     }
  207   else
  208     {
  209       start = 0;
  210       length = strlen (string);
  211     }
  212 
  213   lock_lock (dfa->lock);
  214   if (preg->no_sub)
  215     err = re_search_internal (preg, string, length, start, length,
  216                   length, 0, NULL, eflags);
  217   else
  218     err = re_search_internal (preg, string, length, start, length,
  219                   length, nmatch, pmatch, eflags);
  220   lock_unlock (dfa->lock);
  221   return err != REG_NOERROR;
  222 }
  223 
  224 #ifdef _LIBC
  225 libc_hidden_def (__regexec)
  226 
  227 # include <shlib-compat.h>
  228 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
  229 
  230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
  231 __typeof__ (__regexec) __compat_regexec;
  232 
  233 int
  234 attribute_compat_text_section
  235 __compat_regexec (const regex_t *__restrict preg,
  236           const char *__restrict string, size_t nmatch,
  237           regmatch_t pmatch[], int eflags)
  238 {
  239   return regexec (preg, string, nmatch, pmatch,
  240           eflags & (REG_NOTBOL | REG_NOTEOL));
  241 }
  242 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
  243 # endif
  244 #endif
  245 
  246 /* Entry points for GNU code.  */
  247 
  248 /* re_match, re_search, re_match_2, re_search_2
  249 
  250    The former two functions operate on STRING with length LENGTH,
  251    while the later two operate on concatenation of STRING1 and STRING2
  252    with lengths LENGTH1 and LENGTH2, respectively.
  253 
  254    re_match() matches the compiled pattern in BUFP against the string,
  255    starting at index START.
  256 
  257    re_search() first tries matching at index START, then it tries to match
  258    starting from index START + 1, and so on.  The last start position tried
  259    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
  260    way as re_match().)
  261 
  262    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
  263    the first STOP characters of the concatenation of the strings should be
  264    concerned.
  265 
  266    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
  267    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
  268    computed relative to the concatenation, not relative to the individual
  269    strings.)
  270 
  271    On success, re_match* functions return the length of the match, re_search*
  272    return the position of the start of the match.  Return value -1 means no
  273    match was found and -2 indicates an internal error.  */
  274 
  275 regoff_t
  276 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
  277       Idx start, struct re_registers *regs)
  278 {
  279   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
  280 }
  281 #ifdef _LIBC
  282 weak_alias (__re_match, re_match)
  283 #endif
  284 
  285 regoff_t
  286 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
  287        Idx start, regoff_t range, struct re_registers *regs)
  288 {
  289   return re_search_stub (bufp, string, length, start, range, length, regs,
  290              false);
  291 }
  292 #ifdef _LIBC
  293 weak_alias (__re_search, re_search)
  294 #endif
  295 
  296 regoff_t
  297 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
  298         const char *string2, Idx length2, Idx start,
  299         struct re_registers *regs, Idx stop)
  300 {
  301   return re_search_2_stub (bufp, string1, length1, string2, length2,
  302                start, 0, regs, stop, true);
  303 }
  304 #ifdef _LIBC
  305 weak_alias (__re_match_2, re_match_2)
  306 #endif
  307 
  308 regoff_t
  309 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
  310          const char *string2, Idx length2, Idx start, regoff_t range,
  311          struct re_registers *regs, Idx stop)
  312 {
  313   return re_search_2_stub (bufp, string1, length1, string2, length2,
  314                start, range, regs, stop, false);
  315 }
  316 #ifdef _LIBC
  317 weak_alias (__re_search_2, re_search_2)
  318 #endif
  319 
  320 static regoff_t
  321 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
  322           Idx length1, const char *string2, Idx length2, Idx start,
  323           regoff_t range, struct re_registers *regs,
  324           Idx stop, bool ret_len)
  325 {
  326   const char *str;
  327   regoff_t rval;
  328   Idx len;
  329   char *s = NULL;
  330 
  331   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
  332              || INT_ADD_WRAPV (length1, length2, &len))))
  333     return -2;
  334 
  335   /* Concatenate the strings.  */
  336   if (length2 > 0)
  337     if (length1 > 0)
  338       {
  339     s = re_malloc (char, len);
  340 
  341     if (__glibc_unlikely (s == NULL))
  342       return -2;
  343 #ifdef _LIBC
  344     memcpy (__mempcpy (s, string1, length1), string2, length2);
  345 #else
  346     memcpy (s, string1, length1);
  347     memcpy (s + length1, string2, length2);
  348 #endif
  349     str = s;
  350       }
  351     else
  352       str = string2;
  353   else
  354     str = string1;
  355 
  356   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
  357              ret_len);
  358   re_free (s);
  359   return rval;
  360 }
  361 
  362 /* The parameters have the same meaning as those of re_search.
  363    Additional parameters:
  364    If RET_LEN is true the length of the match is returned (re_match style);
  365    otherwise the position of the match is returned.  */
  366 
  367 static regoff_t
  368 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
  369         Idx start, regoff_t range, Idx stop, struct re_registers *regs,
  370         bool ret_len)
  371 {
  372   reg_errcode_t result;
  373   regmatch_t *pmatch;
  374   Idx nregs;
  375   regoff_t rval;
  376   int eflags = 0;
  377   re_dfa_t *dfa = bufp->buffer;
  378   Idx last_start = start + range;
  379 
  380   /* Check for out-of-range.  */
  381   if (__glibc_unlikely (start < 0 || start > length))
  382     return -1;
  383   if (__glibc_unlikely (length < last_start
  384             || (0 <= range && last_start < start)))
  385     last_start = length;
  386   else if (__glibc_unlikely (last_start < 0
  387                  || (range < 0 && start <= last_start)))
  388     last_start = 0;
  389 
  390   lock_lock (dfa->lock);
  391 
  392   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
  393   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
  394 
  395   /* Compile fastmap if we haven't yet.  */
  396   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
  397     re_compile_fastmap (bufp);
  398 
  399   if (__glibc_unlikely (bufp->no_sub))
  400     regs = NULL;
  401 
  402   /* We need at least 1 register.  */
  403   if (regs == NULL)
  404     nregs = 1;
  405   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
  406                  && regs->num_regs <= bufp->re_nsub))
  407     {
  408       nregs = regs->num_regs;
  409       if (__glibc_unlikely (nregs < 1))
  410     {
  411       /* Nothing can be copied to regs.  */
  412       regs = NULL;
  413       nregs = 1;
  414     }
  415     }
  416   else
  417     nregs = bufp->re_nsub + 1;
  418   pmatch = re_malloc (regmatch_t, nregs);
  419   if (__glibc_unlikely (pmatch == NULL))
  420     {
  421       rval = -2;
  422       goto out;
  423     }
  424 
  425   result = re_search_internal (bufp, string, length, start, last_start, stop,
  426                    nregs, pmatch, eflags);
  427 
  428   rval = 0;
  429 
  430   /* I hope we needn't fill their regs with -1's when no match was found.  */
  431   if (result != REG_NOERROR)
  432     rval = result == REG_NOMATCH ? -1 : -2;
  433   else if (regs != NULL)
  434     {
  435       /* If caller wants register contents data back, copy them.  */
  436       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
  437                        bufp->regs_allocated);
  438       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
  439     rval = -2;
  440     }
  441 
  442   if (__glibc_likely (rval == 0))
  443     {
  444       if (ret_len)
  445     {
  446       DEBUG_ASSERT (pmatch[0].rm_so == start);
  447       rval = pmatch[0].rm_eo - start;
  448     }
  449       else
  450     rval = pmatch[0].rm_so;
  451     }
  452   re_free (pmatch);
  453  out:
  454   lock_unlock (dfa->lock);
  455   return rval;
  456 }
  457 
  458 static unsigned
  459 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
  460           int regs_allocated)
  461 {
  462   int rval = REGS_REALLOCATE;
  463   Idx i;
  464   Idx need_regs = nregs + 1;
  465   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
  466      uses.  */
  467 
  468   /* Have the register data arrays been allocated?  */
  469   if (regs_allocated == REGS_UNALLOCATED)
  470     { /* No.  So allocate them with malloc.  */
  471       regs->start = re_malloc (regoff_t, need_regs);
  472       if (__glibc_unlikely (regs->start == NULL))
  473     return REGS_UNALLOCATED;
  474       regs->end = re_malloc (regoff_t, need_regs);
  475       if (__glibc_unlikely (regs->end == NULL))
  476     {
  477       re_free (regs->start);
  478       return REGS_UNALLOCATED;
  479     }
  480       regs->num_regs = need_regs;
  481     }
  482   else if (regs_allocated == REGS_REALLOCATE)
  483     { /* Yes.  If we need more elements than were already
  484      allocated, reallocate them.  If we need fewer, just
  485      leave it alone.  */
  486       if (__glibc_unlikely (need_regs > regs->num_regs))
  487     {
  488       regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
  489       regoff_t *new_end;
  490       if (__glibc_unlikely (new_start == NULL))
  491         return REGS_UNALLOCATED;
  492       new_end = re_realloc (regs->end, regoff_t, need_regs);
  493       if (__glibc_unlikely (new_end == NULL))
  494         {
  495           re_free (new_start);
  496           return REGS_UNALLOCATED;
  497         }
  498       regs->start = new_start;
  499       regs->end = new_end;
  500       regs->num_regs = need_regs;
  501     }
  502     }
  503   else
  504     {
  505       DEBUG_ASSERT (regs_allocated == REGS_FIXED);
  506       /* This function may not be called with REGS_FIXED and nregs too big.  */
  507       DEBUG_ASSERT (nregs <= regs->num_regs);
  508       rval = REGS_FIXED;
  509     }
  510 
  511   /* Copy the regs.  */
  512   for (i = 0; i < nregs; ++i)
  513     {
  514       regs->start[i] = pmatch[i].rm_so;
  515       regs->end[i] = pmatch[i].rm_eo;
  516     }
  517   for ( ; i < regs->num_regs; ++i)
  518     regs->start[i] = regs->end[i] = -1;
  519 
  520   return rval;
  521 }
  522 
  523 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
  524    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
  525    this memory for recording register information.  STARTS and ENDS
  526    must be allocated using the malloc library routine, and must each
  527    be at least NUM_REGS * sizeof (regoff_t) bytes long.
  528 
  529    If NUM_REGS == 0, then subsequent matches should allocate their own
  530    register data.
  531 
  532    Unless this function is called, the first search or match using
  533    PATTERN_BUFFER will allocate its own register data, without
  534    freeing the old data.  */
  535 
  536 void
  537 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
  538           __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
  539 {
  540   if (num_regs)
  541     {
  542       bufp->regs_allocated = REGS_REALLOCATE;
  543       regs->num_regs = num_regs;
  544       regs->start = starts;
  545       regs->end = ends;
  546     }
  547   else
  548     {
  549       bufp->regs_allocated = REGS_UNALLOCATED;
  550       regs->num_regs = 0;
  551       regs->start = regs->end = NULL;
  552     }
  553 }
  554 #ifdef _LIBC
  555 weak_alias (__re_set_registers, re_set_registers)
  556 #endif
  557 
  558 /* Entry points compatible with 4.2 BSD regex library.  We don't define
  559    them unless specifically requested.  */
  560 
  561 #if defined _REGEX_RE_COMP || defined _LIBC
  562 int
  563 # ifdef _LIBC
  564 weak_function
  565 # endif
  566 re_exec (const char *s)
  567 {
  568   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
  569 }
  570 #endif /* _REGEX_RE_COMP */
  571 
  572 /* Internal entry point.  */
  573 
  574 /* Searches for a compiled pattern PREG in the string STRING, whose
  575    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
  576    meaning as with regexec.  LAST_START is START + RANGE, where
  577    START and RANGE have the same meaning as with re_search.
  578    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
  579    otherwise return the error code.
  580    Note: We assume front end functions already check ranges.
  581    (0 <= LAST_START && LAST_START <= LENGTH)  */
  582 
  583 static reg_errcode_t
  584 __attribute_warn_unused_result__
  585 re_search_internal (const regex_t *preg, const char *string, Idx length,
  586             Idx start, Idx last_start, Idx stop, size_t nmatch,
  587             regmatch_t pmatch[], int eflags)
  588 {
  589   reg_errcode_t err;
  590   const re_dfa_t *dfa = preg->buffer;
  591   Idx left_lim, right_lim;
  592   int incr;
  593   bool fl_longest_match;
  594   int match_kind;
  595   Idx match_first;
  596   Idx match_last = -1;
  597   Idx extra_nmatch;
  598   bool sb;
  599   int ch;
  600   re_match_context_t mctx = { .dfa = dfa };
  601   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
  602             && start != last_start && !preg->can_be_null)
  603            ? preg->fastmap : NULL);
  604   RE_TRANSLATE_TYPE t = preg->translate;
  605 
  606   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
  607   nmatch -= extra_nmatch;
  608 
  609   /* Check if the DFA haven't been compiled.  */
  610   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
  611             || dfa->init_state_word == NULL
  612             || dfa->init_state_nl == NULL
  613             || dfa->init_state_begbuf == NULL))
  614     return REG_NOMATCH;
  615 
  616   /* We assume front-end functions already check them.  */
  617   DEBUG_ASSERT (0 <= last_start && last_start <= length);
  618 
  619   /* If initial states with non-begbuf contexts have no elements,
  620      the regex must be anchored.  If preg->newline_anchor is set,
  621      we'll never use init_state_nl, so do not check it.  */
  622   if (dfa->init_state->nodes.nelem == 0
  623       && dfa->init_state_word->nodes.nelem == 0
  624       && (dfa->init_state_nl->nodes.nelem == 0
  625       || !preg->newline_anchor))
  626     {
  627       if (start != 0 && last_start != 0)
  628         return REG_NOMATCH;
  629       start = last_start = 0;
  630     }
  631 
  632   /* We must check the longest matching, if nmatch > 0.  */
  633   fl_longest_match = (nmatch != 0 || dfa->nbackref);
  634 
  635   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
  636                 preg->translate, (preg->syntax & RE_ICASE) != 0,
  637                 dfa);
  638   if (__glibc_unlikely (err != REG_NOERROR))
  639     goto free_return;
  640   mctx.input.stop = stop;
  641   mctx.input.raw_stop = stop;
  642   mctx.input.newline_anchor = preg->newline_anchor;
  643 
  644   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
  645   if (__glibc_unlikely (err != REG_NOERROR))
  646     goto free_return;
  647 
  648   /* We will log all the DFA states through which the dfa pass,
  649      if nmatch > 1, or this dfa has "multibyte node", which is a
  650      back-reference or a node which can accept multibyte character or
  651      multi character collating element.  */
  652   if (nmatch > 1 || dfa->has_mb_node)
  653     {
  654       /* Avoid overflow.  */
  655       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
  656                  <= mctx.input.bufs_len)))
  657     {
  658       err = REG_ESPACE;
  659       goto free_return;
  660     }
  661 
  662       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
  663       if (__glibc_unlikely (mctx.state_log == NULL))
  664     {
  665       err = REG_ESPACE;
  666       goto free_return;
  667     }
  668     }
  669 
  670   match_first = start;
  671   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
  672                : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
  673 
  674   /* Check incrementally whether the input string matches.  */
  675   incr = (last_start < start) ? -1 : 1;
  676   left_lim = (last_start < start) ? last_start : start;
  677   right_lim = (last_start < start) ? start : last_start;
  678   sb = dfa->mb_cur_max == 1;
  679   match_kind =
  680     (fastmap
  681      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
  682     | (start <= last_start ? 2 : 0)
  683     | (t != NULL ? 1 : 0))
  684      : 8);
  685 
  686   for (;; match_first += incr)
  687     {
  688       err = REG_NOMATCH;
  689       if (match_first < left_lim || right_lim < match_first)
  690     goto free_return;
  691 
  692       /* Advance as rapidly as possible through the string, until we
  693      find a plausible place to start matching.  This may be done
  694      with varying efficiency, so there are various possibilities:
  695      only the most common of them are specialized, in order to
  696      save on code size.  We use a switch statement for speed.  */
  697       switch (match_kind)
  698     {
  699     case 8:
  700       /* No fastmap.  */
  701       break;
  702 
  703     case 7:
  704       /* Fastmap with single-byte translation, match forward.  */
  705       while (__glibc_likely (match_first < right_lim)
  706          && !fastmap[t[(unsigned char) string[match_first]]])
  707         ++match_first;
  708       goto forward_match_found_start_or_reached_end;
  709 
  710     case 6:
  711       /* Fastmap without translation, match forward.  */
  712       while (__glibc_likely (match_first < right_lim)
  713          && !fastmap[(unsigned char) string[match_first]])
  714         ++match_first;
  715 
  716     forward_match_found_start_or_reached_end:
  717       if (__glibc_unlikely (match_first == right_lim))
  718         {
  719           ch = match_first >= length
  720                ? 0 : (unsigned char) string[match_first];
  721           if (!fastmap[t ? t[ch] : ch])
  722         goto free_return;
  723         }
  724       break;
  725 
  726     case 4:
  727     case 5:
  728       /* Fastmap without multi-byte translation, match backwards.  */
  729       while (match_first >= left_lim)
  730         {
  731           ch = match_first >= length
  732                ? 0 : (unsigned char) string[match_first];
  733           if (fastmap[t ? t[ch] : ch])
  734         break;
  735           --match_first;
  736         }
  737       if (match_first < left_lim)
  738         goto free_return;
  739       break;
  740 
  741     default:
  742       /* In this case, we can't determine easily the current byte,
  743          since it might be a component byte of a multibyte
  744          character.  Then we use the constructed buffer instead.  */
  745       for (;;)
  746         {
  747           /* If MATCH_FIRST is out of the valid range, reconstruct the
  748          buffers.  */
  749           __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
  750           if (__glibc_unlikely (offset
  751                     >= (__re_size_t) mctx.input.valid_raw_len))
  752         {
  753           err = re_string_reconstruct (&mctx.input, match_first,
  754                            eflags);
  755           if (__glibc_unlikely (err != REG_NOERROR))
  756             goto free_return;
  757 
  758           offset = match_first - mctx.input.raw_mbs_idx;
  759         }
  760           /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
  761          Note that MATCH_FIRST must not be smaller than 0.  */
  762           ch = (match_first >= length
  763             ? 0 : re_string_byte_at (&mctx.input, offset));
  764           if (fastmap[ch])
  765         break;
  766           match_first += incr;
  767           if (match_first < left_lim || match_first > right_lim)
  768         {
  769           err = REG_NOMATCH;
  770           goto free_return;
  771         }
  772         }
  773       break;
  774     }
  775 
  776       /* Reconstruct the buffers so that the matcher can assume that
  777      the matching starts from the beginning of the buffer.  */
  778       err = re_string_reconstruct (&mctx.input, match_first, eflags);
  779       if (__glibc_unlikely (err != REG_NOERROR))
  780     goto free_return;
  781 
  782 #ifdef RE_ENABLE_I18N
  783      /* Don't consider this char as a possible match start if it part,
  784     yet isn't the head, of a multibyte character.  */
  785       if (!sb && !re_string_first_byte (&mctx.input, 0))
  786     continue;
  787 #endif
  788 
  789       /* It seems to be appropriate one, then use the matcher.  */
  790       /* We assume that the matching starts from 0.  */
  791       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
  792       match_last = check_matching (&mctx, fl_longest_match,
  793                    start <= last_start ? &match_first : NULL);
  794       if (match_last != -1)
  795     {
  796       if (__glibc_unlikely (match_last == -2))
  797         {
  798           err = REG_ESPACE;
  799           goto free_return;
  800         }
  801       else
  802         {
  803           mctx.match_last = match_last;
  804           if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
  805         {
  806           re_dfastate_t *pstate = mctx.state_log[match_last];
  807           mctx.last_node = check_halt_state_context (&mctx, pstate,
  808                                  match_last);
  809         }
  810           if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
  811           || dfa->nbackref)
  812         {
  813           err = prune_impossible_nodes (&mctx);
  814           if (err == REG_NOERROR)
  815             break;
  816           if (__glibc_unlikely (err != REG_NOMATCH))
  817             goto free_return;
  818           match_last = -1;
  819         }
  820           else
  821         break; /* We found a match.  */
  822         }
  823     }
  824 
  825       match_ctx_clean (&mctx);
  826     }
  827 
  828   DEBUG_ASSERT (match_last != -1);
  829   DEBUG_ASSERT (err == REG_NOERROR);
  830 
  831   /* Set pmatch[] if we need.  */
  832   if (nmatch > 0)
  833     {
  834       Idx reg_idx;
  835 
  836       /* Initialize registers.  */
  837       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
  838     pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
  839 
  840       /* Set the points where matching start/end.  */
  841       pmatch[0].rm_so = 0;
  842       pmatch[0].rm_eo = mctx.match_last;
  843       /* FIXME: This function should fail if mctx.match_last exceeds
  844      the maximum possible regoff_t value.  We need a new error
  845      code REG_OVERFLOW.  */
  846 
  847       if (!preg->no_sub && nmatch > 1)
  848     {
  849       err = set_regs (preg, &mctx, nmatch, pmatch,
  850               dfa->has_plural_match && dfa->nbackref > 0);
  851       if (__glibc_unlikely (err != REG_NOERROR))
  852         goto free_return;
  853     }
  854 
  855       /* At last, add the offset to each register, since we slid
  856      the buffers so that we could assume that the matching starts
  857      from 0.  */
  858       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
  859     if (pmatch[reg_idx].rm_so != -1)
  860       {
  861 #ifdef RE_ENABLE_I18N
  862         if (__glibc_unlikely (mctx.input.offsets_needed != 0))
  863           {
  864         pmatch[reg_idx].rm_so =
  865           (pmatch[reg_idx].rm_so == mctx.input.valid_len
  866            ? mctx.input.valid_raw_len
  867            : mctx.input.offsets[pmatch[reg_idx].rm_so]);
  868         pmatch[reg_idx].rm_eo =
  869           (pmatch[reg_idx].rm_eo == mctx.input.valid_len
  870            ? mctx.input.valid_raw_len
  871            : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
  872           }
  873 #else
  874         DEBUG_ASSERT (mctx.input.offsets_needed == 0);
  875 #endif
  876         pmatch[reg_idx].rm_so += match_first;
  877         pmatch[reg_idx].rm_eo += match_first;
  878       }
  879       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
  880     {
  881       pmatch[nmatch + reg_idx].rm_so = -1;
  882       pmatch[nmatch + reg_idx].rm_eo = -1;
  883     }
  884 
  885       if (dfa->subexp_map)
  886     for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
  887       if (dfa->subexp_map[reg_idx] != reg_idx)
  888         {
  889           pmatch[reg_idx + 1].rm_so
  890         = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
  891           pmatch[reg_idx + 1].rm_eo
  892         = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
  893         }
  894     }
  895 
  896  free_return:
  897   re_free (mctx.state_log);
  898   if (dfa->nbackref)
  899     match_ctx_free (&mctx);
  900   re_string_destruct (&mctx.input);
  901   return err;
  902 }
  903 
  904 static reg_errcode_t
  905 __attribute_warn_unused_result__
  906 prune_impossible_nodes (re_match_context_t *mctx)
  907 {
  908   const re_dfa_t *const dfa = mctx->dfa;
  909   Idx halt_node, match_last;
  910   reg_errcode_t ret;
  911   re_dfastate_t **sifted_states;
  912   re_dfastate_t **lim_states = NULL;
  913   re_sift_context_t sctx;
  914   DEBUG_ASSERT (mctx->state_log != NULL);
  915   match_last = mctx->match_last;
  916   halt_node = mctx->last_node;
  917 
  918   /* Avoid overflow.  */
  919   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
  920             <= match_last))
  921     return REG_ESPACE;
  922 
  923   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
  924   if (__glibc_unlikely (sifted_states == NULL))
  925     {
  926       ret = REG_ESPACE;
  927       goto free_return;
  928     }
  929   if (dfa->nbackref)
  930     {
  931       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
  932       if (__glibc_unlikely (lim_states == NULL))
  933     {
  934       ret = REG_ESPACE;
  935       goto free_return;
  936     }
  937       while (1)
  938     {
  939       memset (lim_states, '\0',
  940           sizeof (re_dfastate_t *) * (match_last + 1));
  941       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
  942              match_last);
  943       ret = sift_states_backward (mctx, &sctx);
  944       re_node_set_free (&sctx.limits);
  945       if (__glibc_unlikely (ret != REG_NOERROR))
  946           goto free_return;
  947       if (sifted_states[0] != NULL || lim_states[0] != NULL)
  948         break;
  949       do
  950         {
  951           --match_last;
  952           if (match_last < 0)
  953         {
  954           ret = REG_NOMATCH;
  955           goto free_return;
  956         }
  957         } while (mctx->state_log[match_last] == NULL
  958              || !mctx->state_log[match_last]->halt);
  959       halt_node = check_halt_state_context (mctx,
  960                         mctx->state_log[match_last],
  961                         match_last);
  962     }
  963       ret = merge_state_array (dfa, sifted_states, lim_states,
  964                    match_last + 1);
  965       re_free (lim_states);
  966       lim_states = NULL;
  967       if (__glibc_unlikely (ret != REG_NOERROR))
  968     goto free_return;
  969     }
  970   else
  971     {
  972       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
  973       ret = sift_states_backward (mctx, &sctx);
  974       re_node_set_free (&sctx.limits);
  975       if (__glibc_unlikely (ret != REG_NOERROR))
  976     goto free_return;
  977       if (sifted_states[0] == NULL)
  978     {
  979       ret = REG_NOMATCH;
  980       goto free_return;
  981     }
  982     }
  983   re_free (mctx->state_log);
  984   mctx->state_log = sifted_states;
  985   sifted_states = NULL;
  986   mctx->last_node = halt_node;
  987   mctx->match_last = match_last;
  988   ret = REG_NOERROR;
  989  free_return:
  990   re_free (sifted_states);
  991   re_free (lim_states);
  992   return ret;
  993 }
  994 
  995 /* Acquire an initial state and return it.
  996    We must select appropriate initial state depending on the context,
  997    since initial states may have constraints like "\<", "^", etc..  */
  998 
  999 static inline re_dfastate_t *
 1000 __attribute__ ((always_inline))
 1001 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
 1002                 Idx idx)
 1003 {
 1004   const re_dfa_t *const dfa = mctx->dfa;
 1005   if (dfa->init_state->has_constraint)
 1006     {
 1007       unsigned int context;
 1008       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
 1009       if (IS_WORD_CONTEXT (context))
 1010     return dfa->init_state_word;
 1011       else if (IS_ORDINARY_CONTEXT (context))
 1012     return dfa->init_state;
 1013       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 1014     return dfa->init_state_begbuf;
 1015       else if (IS_NEWLINE_CONTEXT (context))
 1016     return dfa->init_state_nl;
 1017       else if (IS_BEGBUF_CONTEXT (context))
 1018     {
 1019       /* It is relatively rare case, then calculate on demand.  */
 1020       return re_acquire_state_context (err, dfa,
 1021                        dfa->init_state->entrance_nodes,
 1022                        context);
 1023     }
 1024       else
 1025     /* Must not happen?  */
 1026     return dfa->init_state;
 1027     }
 1028   else
 1029     return dfa->init_state;
 1030 }
 1031 
 1032 /* Check whether the regular expression match input string INPUT or not,
 1033    and return the index where the matching end.  Return -1 if
 1034    there is no match, and return -2 in case of an error.
 1035    FL_LONGEST_MATCH means we want the POSIX longest matching.
 1036    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
 1037    next place where we may want to try matching.
 1038    Note that the matcher assumes that the matching starts from the current
 1039    index of the buffer.  */
 1040 
 1041 static Idx
 1042 __attribute_warn_unused_result__
 1043 check_matching (re_match_context_t *mctx, bool fl_longest_match,
 1044         Idx *p_match_first)
 1045 {
 1046   const re_dfa_t *const dfa = mctx->dfa;
 1047   reg_errcode_t err;
 1048   Idx match = 0;
 1049   Idx match_last = -1;
 1050   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
 1051   re_dfastate_t *cur_state;
 1052   bool at_init_state = p_match_first != NULL;
 1053   Idx next_start_idx = cur_str_idx;
 1054 
 1055   err = REG_NOERROR;
 1056   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
 1057   /* An initial state must not be NULL (invalid).  */
 1058   if (__glibc_unlikely (cur_state == NULL))
 1059     {
 1060       DEBUG_ASSERT (err == REG_ESPACE);
 1061       return -2;
 1062     }
 1063 
 1064   if (mctx->state_log != NULL)
 1065     {
 1066       mctx->state_log[cur_str_idx] = cur_state;
 1067 
 1068       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
 1069      later.  E.g. Processing back references.  */
 1070       if (__glibc_unlikely (dfa->nbackref))
 1071     {
 1072       at_init_state = false;
 1073       err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
 1074       if (__glibc_unlikely (err != REG_NOERROR))
 1075         return err;
 1076 
 1077       if (cur_state->has_backref)
 1078         {
 1079           err = transit_state_bkref (mctx, &cur_state->nodes);
 1080           if (__glibc_unlikely (err != REG_NOERROR))
 1081         return err;
 1082         }
 1083     }
 1084     }
 1085 
 1086   /* If the RE accepts NULL string.  */
 1087   if (__glibc_unlikely (cur_state->halt))
 1088     {
 1089       if (!cur_state->has_constraint
 1090       || check_halt_state_context (mctx, cur_state, cur_str_idx))
 1091     {
 1092       if (!fl_longest_match)
 1093         return cur_str_idx;
 1094       else
 1095         {
 1096           match_last = cur_str_idx;
 1097           match = 1;
 1098         }
 1099     }
 1100     }
 1101 
 1102   while (!re_string_eoi (&mctx->input))
 1103     {
 1104       re_dfastate_t *old_state = cur_state;
 1105       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
 1106 
 1107       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
 1108        && mctx->input.bufs_len < mctx->input.len)
 1109       || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
 1110           && mctx->input.valid_len < mctx->input.len))
 1111     {
 1112       err = extend_buffers (mctx, next_char_idx + 1);
 1113       if (__glibc_unlikely (err != REG_NOERROR))
 1114         {
 1115           DEBUG_ASSERT (err == REG_ESPACE);
 1116           return -2;
 1117         }
 1118     }
 1119 
 1120       cur_state = transit_state (&err, mctx, cur_state);
 1121       if (mctx->state_log != NULL)
 1122     cur_state = merge_state_with_log (&err, mctx, cur_state);
 1123 
 1124       if (cur_state == NULL)
 1125     {
 1126       /* Reached the invalid state or an error.  Try to recover a valid
 1127          state using the state log, if available and if we have not
 1128          already found a valid (even if not the longest) match.  */
 1129       if (__glibc_unlikely (err != REG_NOERROR))
 1130         return -2;
 1131 
 1132       if (mctx->state_log == NULL
 1133           || (match && !fl_longest_match)
 1134           || (cur_state = find_recover_state (&err, mctx)) == NULL)
 1135         break;
 1136     }
 1137 
 1138       if (__glibc_unlikely (at_init_state))
 1139     {
 1140       if (old_state == cur_state)
 1141         next_start_idx = next_char_idx;
 1142       else
 1143         at_init_state = false;
 1144     }
 1145 
 1146       if (cur_state->halt)
 1147     {
 1148       /* Reached a halt state.
 1149          Check the halt state can satisfy the current context.  */
 1150       if (!cur_state->has_constraint
 1151           || check_halt_state_context (mctx, cur_state,
 1152                        re_string_cur_idx (&mctx->input)))
 1153         {
 1154           /* We found an appropriate halt state.  */
 1155           match_last = re_string_cur_idx (&mctx->input);
 1156           match = 1;
 1157 
 1158           /* We found a match, do not modify match_first below.  */
 1159           p_match_first = NULL;
 1160           if (!fl_longest_match)
 1161         break;
 1162         }
 1163     }
 1164     }
 1165 
 1166   if (p_match_first)
 1167     *p_match_first += next_start_idx;
 1168 
 1169   return match_last;
 1170 }
 1171 
 1172 /* Check NODE match the current context.  */
 1173 
 1174 static bool
 1175 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
 1176 {
 1177   re_token_type_t type = dfa->nodes[node].type;
 1178   unsigned int constraint = dfa->nodes[node].constraint;
 1179   if (type != END_OF_RE)
 1180     return false;
 1181   if (!constraint)
 1182     return true;
 1183   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
 1184     return false;
 1185   return true;
 1186 }
 1187 
 1188 /* Check the halt state STATE match the current context.
 1189    Return 0 if not match, if the node, STATE has, is a halt node and
 1190    match the context, return the node.  */
 1191 
 1192 static Idx
 1193 check_halt_state_context (const re_match_context_t *mctx,
 1194               const re_dfastate_t *state, Idx idx)
 1195 {
 1196   Idx i;
 1197   unsigned int context;
 1198   DEBUG_ASSERT (state->halt);
 1199   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
 1200   for (i = 0; i < state->nodes.nelem; ++i)
 1201     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
 1202       return state->nodes.elems[i];
 1203   return 0;
 1204 }
 1205 
 1206 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
 1207    corresponding to the DFA).
 1208    Return the destination node, and update EPS_VIA_NODES;
 1209    return -1 in case of errors.  */
 1210 
 1211 static Idx
 1212 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
 1213            Idx *pidx, Idx node, re_node_set *eps_via_nodes,
 1214            struct re_fail_stack_t *fs)
 1215 {
 1216   const re_dfa_t *const dfa = mctx->dfa;
 1217   Idx i;
 1218   bool ok;
 1219   if (IS_EPSILON_NODE (dfa->nodes[node].type))
 1220     {
 1221       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
 1222       re_node_set *edests = &dfa->edests[node];
 1223       Idx dest_node;
 1224       ok = re_node_set_insert (eps_via_nodes, node);
 1225       if (__glibc_unlikely (! ok))
 1226     return -2;
 1227       /* Pick up a valid destination, or return -1 if none
 1228      is found.  */
 1229       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
 1230     {
 1231       Idx candidate = edests->elems[i];
 1232       if (!re_node_set_contains (cur_nodes, candidate))
 1233         continue;
 1234           if (dest_node == -1)
 1235         dest_node = candidate;
 1236 
 1237       else
 1238         {
 1239           /* In order to avoid infinite loop like "(a*)*", return the second
 1240          epsilon-transition if the first was already considered.  */
 1241           if (re_node_set_contains (eps_via_nodes, dest_node))
 1242         return candidate;
 1243 
 1244           /* Otherwise, push the second epsilon-transition on the fail stack.  */
 1245           else if (fs != NULL
 1246                && push_fail_stack (fs, *pidx, candidate, nregs, regs,
 1247                        eps_via_nodes))
 1248         return -2;
 1249 
 1250           /* We know we are going to exit.  */
 1251           break;
 1252         }
 1253     }
 1254       return dest_node;
 1255     }
 1256   else
 1257     {
 1258       Idx naccepted = 0;
 1259       re_token_type_t type = dfa->nodes[node].type;
 1260 
 1261 #ifdef RE_ENABLE_I18N
 1262       if (dfa->nodes[node].accept_mb)
 1263     naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
 1264       else
 1265 #endif /* RE_ENABLE_I18N */
 1266       if (type == OP_BACK_REF)
 1267     {
 1268       Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
 1269       if (subexp_idx < nregs)
 1270         naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
 1271       if (fs != NULL)
 1272         {
 1273           if (subexp_idx >= nregs
 1274           || regs[subexp_idx].rm_so == -1
 1275           || regs[subexp_idx].rm_eo == -1)
 1276         return -1;
 1277           else if (naccepted)
 1278         {
 1279           char *buf = (char *) re_string_get_buffer (&mctx->input);
 1280           if (mctx->input.valid_len - *pidx < naccepted
 1281               || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
 1282                   naccepted)
 1283               != 0))
 1284             return -1;
 1285         }
 1286         }
 1287 
 1288       if (naccepted == 0)
 1289         {
 1290           Idx dest_node;
 1291           ok = re_node_set_insert (eps_via_nodes, node);
 1292           if (__glibc_unlikely (! ok))
 1293         return -2;
 1294           dest_node = dfa->edests[node].elems[0];
 1295           if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
 1296                     dest_node))
 1297         return dest_node;
 1298         }
 1299     }
 1300 
 1301       if (naccepted != 0
 1302       || check_node_accept (mctx, dfa->nodes + node, *pidx))
 1303     {
 1304       Idx dest_node = dfa->nexts[node];
 1305       *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
 1306       if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
 1307              || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
 1308                            dest_node)))
 1309         return -1;
 1310       re_node_set_empty (eps_via_nodes);
 1311       return dest_node;
 1312     }
 1313     }
 1314   return -1;
 1315 }
 1316 
 1317 static reg_errcode_t
 1318 __attribute_warn_unused_result__
 1319 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
 1320          Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
 1321 {
 1322   reg_errcode_t err;
 1323   Idx num = fs->num++;
 1324   if (fs->num == fs->alloc)
 1325     {
 1326       struct re_fail_stack_ent_t *new_array;
 1327       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
 1328                               fs->alloc * 2);
 1329       if (new_array == NULL)
 1330     return REG_ESPACE;
 1331       fs->alloc *= 2;
 1332       fs->stack = new_array;
 1333     }
 1334   fs->stack[num].idx = str_idx;
 1335   fs->stack[num].node = dest_node;
 1336   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
 1337   if (fs->stack[num].regs == NULL)
 1338     return REG_ESPACE;
 1339   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
 1340   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
 1341   return err;
 1342 }
 1343 
 1344 static Idx
 1345 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
 1346         regmatch_t *regs, re_node_set *eps_via_nodes)
 1347 {
 1348   Idx num = --fs->num;
 1349   DEBUG_ASSERT (num >= 0);
 1350   *pidx = fs->stack[num].idx;
 1351   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
 1352   re_node_set_free (eps_via_nodes);
 1353   re_free (fs->stack[num].regs);
 1354   *eps_via_nodes = fs->stack[num].eps_via_nodes;
 1355   return fs->stack[num].node;
 1356 }
 1357 
 1358 /* Set the positions where the subexpressions are starts/ends to registers
 1359    PMATCH.
 1360    Note: We assume that pmatch[0] is already set, and
 1361    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
 1362 
 1363 static reg_errcode_t
 1364 __attribute_warn_unused_result__
 1365 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
 1366       regmatch_t *pmatch, bool fl_backtrack)
 1367 {
 1368   const re_dfa_t *dfa = preg->buffer;
 1369   Idx idx, cur_node;
 1370   re_node_set eps_via_nodes;
 1371   struct re_fail_stack_t *fs;
 1372   struct re_fail_stack_t fs_body = { 0, 2, NULL };
 1373   regmatch_t *prev_idx_match;
 1374   bool prev_idx_match_malloced = false;
 1375 
 1376   DEBUG_ASSERT (nmatch > 1);
 1377   DEBUG_ASSERT (mctx->state_log != NULL);
 1378   if (fl_backtrack)
 1379     {
 1380       fs = &fs_body;
 1381       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
 1382       if (fs->stack == NULL)
 1383     return REG_ESPACE;
 1384     }
 1385   else
 1386     fs = NULL;
 1387 
 1388   cur_node = dfa->init_node;
 1389   re_node_set_init_empty (&eps_via_nodes);
 1390 
 1391   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
 1392     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
 1393   else
 1394     {
 1395       prev_idx_match = re_malloc (regmatch_t, nmatch);
 1396       if (prev_idx_match == NULL)
 1397     {
 1398       free_fail_stack_return (fs);
 1399       return REG_ESPACE;
 1400     }
 1401       prev_idx_match_malloced = true;
 1402     }
 1403   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
 1404 
 1405   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
 1406     {
 1407       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
 1408 
 1409       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
 1410     {
 1411       Idx reg_idx;
 1412       if (fs)
 1413         {
 1414           for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 1415         if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
 1416           break;
 1417           if (reg_idx == nmatch)
 1418         {
 1419           re_node_set_free (&eps_via_nodes);
 1420           if (prev_idx_match_malloced)
 1421             re_free (prev_idx_match);
 1422           return free_fail_stack_return (fs);
 1423         }
 1424           cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
 1425                      &eps_via_nodes);
 1426         }
 1427       else
 1428         {
 1429           re_node_set_free (&eps_via_nodes);
 1430           if (prev_idx_match_malloced)
 1431         re_free (prev_idx_match);
 1432           return REG_NOERROR;
 1433         }
 1434     }
 1435 
 1436       /* Proceed to next node.  */
 1437       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
 1438                     &eps_via_nodes, fs);
 1439 
 1440       if (__glibc_unlikely (cur_node < 0))
 1441     {
 1442       if (__glibc_unlikely (cur_node == -2))
 1443         {
 1444           re_node_set_free (&eps_via_nodes);
 1445           if (prev_idx_match_malloced)
 1446         re_free (prev_idx_match);
 1447           free_fail_stack_return (fs);
 1448           return REG_ESPACE;
 1449         }
 1450       if (fs)
 1451         cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
 1452                        &eps_via_nodes);
 1453       else
 1454         {
 1455           re_node_set_free (&eps_via_nodes);
 1456           if (prev_idx_match_malloced)
 1457         re_free (prev_idx_match);
 1458           return REG_NOMATCH;
 1459         }
 1460     }
 1461     }
 1462   re_node_set_free (&eps_via_nodes);
 1463   if (prev_idx_match_malloced)
 1464     re_free (prev_idx_match);
 1465   return free_fail_stack_return (fs);
 1466 }
 1467 
 1468 static reg_errcode_t
 1469 free_fail_stack_return (struct re_fail_stack_t *fs)
 1470 {
 1471   if (fs)
 1472     {
 1473       Idx fs_idx;
 1474       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
 1475     {
 1476       re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
 1477       re_free (fs->stack[fs_idx].regs);
 1478     }
 1479       re_free (fs->stack);
 1480     }
 1481   return REG_NOERROR;
 1482 }
 1483 
 1484 static void
 1485 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
 1486          regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
 1487 {
 1488   int type = dfa->nodes[cur_node].type;
 1489   if (type == OP_OPEN_SUBEXP)
 1490     {
 1491       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
 1492 
 1493       /* We are at the first node of this sub expression.  */
 1494       if (reg_num < nmatch)
 1495     {
 1496       pmatch[reg_num].rm_so = cur_idx;
 1497       pmatch[reg_num].rm_eo = -1;
 1498     }
 1499     }
 1500   else if (type == OP_CLOSE_SUBEXP)
 1501     {
 1502       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
 1503       if (reg_num < nmatch)
 1504     {
 1505       /* We are at the last node of this sub expression.  */
 1506       if (pmatch[reg_num].rm_so < cur_idx)
 1507         {
 1508           pmatch[reg_num].rm_eo = cur_idx;
 1509           /* This is a non-empty match or we are not inside an optional
 1510          subexpression.  Accept this right away.  */
 1511           memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
 1512         }
 1513       else
 1514         {
 1515           if (dfa->nodes[cur_node].opt_subexp
 1516           && prev_idx_match[reg_num].rm_so != -1)
 1517         /* We transited through an empty match for an optional
 1518            subexpression, like (a?)*, and this is not the subexp's
 1519            first match.  Copy back the old content of the registers
 1520            so that matches of an inner subexpression are undone as
 1521            well, like in ((a?))*.  */
 1522         memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
 1523           else
 1524         /* We completed a subexpression, but it may be part of
 1525            an optional one, so do not update PREV_IDX_MATCH.  */
 1526         pmatch[reg_num].rm_eo = cur_idx;
 1527         }
 1528     }
 1529     }
 1530 }
 1531 
 1532 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
 1533    and sift the nodes in each states according to the following rules.
 1534    Updated state_log will be wrote to STATE_LOG.
 1535 
 1536    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
 1537      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
 1538     If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
 1539     the LAST_NODE, we throw away the node 'a'.
 1540      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
 1541     string 's' and transit to 'b':
 1542     i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
 1543        away the node 'a'.
 1544     ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
 1545         thrown away, we throw away the node 'a'.
 1546      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
 1547     i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
 1548        node 'a'.
 1549     ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
 1550         we throw away the node 'a'.  */
 1551 
 1552 #define STATE_NODE_CONTAINS(state,node) \
 1553   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
 1554 
 1555 static reg_errcode_t
 1556 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
 1557 {
 1558   reg_errcode_t err;
 1559   int null_cnt = 0;
 1560   Idx str_idx = sctx->last_str_idx;
 1561   re_node_set cur_dest;
 1562 
 1563   DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
 1564 
 1565   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
 1566      transit to the last_node and the last_node itself.  */
 1567   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
 1568   if (__glibc_unlikely (err != REG_NOERROR))
 1569     return err;
 1570   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
 1571   if (__glibc_unlikely (err != REG_NOERROR))
 1572     goto free_return;
 1573 
 1574   /* Then check each states in the state_log.  */
 1575   while (str_idx > 0)
 1576     {
 1577       /* Update counters.  */
 1578       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
 1579       if (null_cnt > mctx->max_mb_elem_len)
 1580     {
 1581       memset (sctx->sifted_states, '\0',
 1582           sizeof (re_dfastate_t *) * str_idx);
 1583       re_node_set_free (&cur_dest);
 1584       return REG_NOERROR;
 1585     }
 1586       re_node_set_empty (&cur_dest);
 1587       --str_idx;
 1588 
 1589       if (mctx->state_log[str_idx])
 1590     {
 1591       err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
 1592       if (__glibc_unlikely (err != REG_NOERROR))
 1593         goto free_return;
 1594     }
 1595 
 1596       /* Add all the nodes which satisfy the following conditions:
 1597      - It can epsilon transit to a node in CUR_DEST.
 1598      - It is in CUR_SRC.
 1599      And update state_log.  */
 1600       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
 1601       if (__glibc_unlikely (err != REG_NOERROR))
 1602     goto free_return;
 1603     }
 1604   err = REG_NOERROR;
 1605  free_return:
 1606   re_node_set_free (&cur_dest);
 1607   return err;
 1608 }
 1609 
 1610 static reg_errcode_t
 1611 __attribute_warn_unused_result__
 1612 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
 1613              Idx str_idx, re_node_set *cur_dest)
 1614 {
 1615   const re_dfa_t *const dfa = mctx->dfa;
 1616   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
 1617   Idx i;
 1618 
 1619   /* Then build the next sifted state.
 1620      We build the next sifted state on 'cur_dest', and update
 1621      'sifted_states[str_idx]' with 'cur_dest'.
 1622      Note:
 1623      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
 1624      'cur_src' points the node_set of the old 'state_log[str_idx]'
 1625      (with the epsilon nodes pre-filtered out).  */
 1626   for (i = 0; i < cur_src->nelem; i++)
 1627     {
 1628       Idx prev_node = cur_src->elems[i];
 1629       int naccepted = 0;
 1630       bool ok;
 1631       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
 1632 
 1633 #ifdef RE_ENABLE_I18N
 1634       /* If the node may accept "multi byte".  */
 1635       if (dfa->nodes[prev_node].accept_mb)
 1636     naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
 1637                      str_idx, sctx->last_str_idx);
 1638 #endif /* RE_ENABLE_I18N */
 1639 
 1640       /* We don't check backreferences here.
 1641      See update_cur_sifted_state().  */
 1642       if (!naccepted
 1643       && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
 1644       && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
 1645                   dfa->nexts[prev_node]))
 1646     naccepted = 1;
 1647 
 1648       if (naccepted == 0)
 1649     continue;
 1650 
 1651       if (sctx->limits.nelem)
 1652     {
 1653       Idx to_idx = str_idx + naccepted;
 1654       if (check_dst_limits (mctx, &sctx->limits,
 1655                 dfa->nexts[prev_node], to_idx,
 1656                 prev_node, str_idx))
 1657         continue;
 1658     }
 1659       ok = re_node_set_insert (cur_dest, prev_node);
 1660       if (__glibc_unlikely (! ok))
 1661     return REG_ESPACE;
 1662     }
 1663 
 1664   return REG_NOERROR;
 1665 }
 1666 
 1667 /* Helper functions.  */
 1668 
 1669 static reg_errcode_t
 1670 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
 1671 {
 1672   Idx top = mctx->state_log_top;
 1673 
 1674   if ((next_state_log_idx >= mctx->input.bufs_len
 1675        && mctx->input.bufs_len < mctx->input.len)
 1676       || (next_state_log_idx >= mctx->input.valid_len
 1677       && mctx->input.valid_len < mctx->input.len))
 1678     {
 1679       reg_errcode_t err;
 1680       err = extend_buffers (mctx, next_state_log_idx + 1);
 1681       if (__glibc_unlikely (err != REG_NOERROR))
 1682     return err;
 1683     }
 1684 
 1685   if (top < next_state_log_idx)
 1686     {
 1687       memset (mctx->state_log + top + 1, '\0',
 1688           sizeof (re_dfastate_t *) * (next_state_log_idx - top));
 1689       mctx->state_log_top = next_state_log_idx;
 1690     }
 1691   return REG_NOERROR;
 1692 }
 1693 
 1694 static reg_errcode_t
 1695 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
 1696            re_dfastate_t **src, Idx num)
 1697 {
 1698   Idx st_idx;
 1699   reg_errcode_t err;
 1700   for (st_idx = 0; st_idx < num; ++st_idx)
 1701     {
 1702       if (dst[st_idx] == NULL)
 1703     dst[st_idx] = src[st_idx];
 1704       else if (src[st_idx] != NULL)
 1705     {
 1706       re_node_set merged_set;
 1707       err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
 1708                     &src[st_idx]->nodes);
 1709       if (__glibc_unlikely (err != REG_NOERROR))
 1710         return err;
 1711       dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
 1712       re_node_set_free (&merged_set);
 1713       if (__glibc_unlikely (err != REG_NOERROR))
 1714         return err;
 1715     }
 1716     }
 1717   return REG_NOERROR;
 1718 }
 1719 
 1720 static reg_errcode_t
 1721 update_cur_sifted_state (const re_match_context_t *mctx,
 1722              re_sift_context_t *sctx, Idx str_idx,
 1723              re_node_set *dest_nodes)
 1724 {
 1725   const re_dfa_t *const dfa = mctx->dfa;
 1726   reg_errcode_t err = REG_NOERROR;
 1727   const re_node_set *candidates;
 1728   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
 1729         : &mctx->state_log[str_idx]->nodes);
 1730 
 1731   if (dest_nodes->nelem == 0)
 1732     sctx->sifted_states[str_idx] = NULL;
 1733   else
 1734     {
 1735       if (candidates)
 1736     {
 1737       /* At first, add the nodes which can epsilon transit to a node in
 1738          DEST_NODE.  */
 1739       err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
 1740       if (__glibc_unlikely (err != REG_NOERROR))
 1741         return err;
 1742 
 1743       /* Then, check the limitations in the current sift_context.  */
 1744       if (sctx->limits.nelem)
 1745         {
 1746           err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
 1747                      mctx->bkref_ents, str_idx);
 1748           if (__glibc_unlikely (err != REG_NOERROR))
 1749         return err;
 1750         }
 1751     }
 1752 
 1753       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
 1754       if (__glibc_unlikely (err != REG_NOERROR))
 1755     return err;
 1756     }
 1757 
 1758   if (candidates && mctx->state_log[str_idx]->has_backref)
 1759     {
 1760       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
 1761       if (__glibc_unlikely (err != REG_NOERROR))
 1762     return err;
 1763     }
 1764   return REG_NOERROR;
 1765 }
 1766 
 1767 static reg_errcode_t
 1768 __attribute_warn_unused_result__
 1769 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
 1770                const re_node_set *candidates)
 1771 {
 1772   reg_errcode_t err = REG_NOERROR;
 1773   Idx i;
 1774 
 1775   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
 1776   if (__glibc_unlikely (err != REG_NOERROR))
 1777     return err;
 1778 
 1779   if (!state->inveclosure.alloc)
 1780     {
 1781       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
 1782       if (__glibc_unlikely (err != REG_NOERROR))
 1783     return REG_ESPACE;
 1784       for (i = 0; i < dest_nodes->nelem; i++)
 1785     {
 1786       err = re_node_set_merge (&state->inveclosure,
 1787                    dfa->inveclosures + dest_nodes->elems[i]);
 1788       if (__glibc_unlikely (err != REG_NOERROR))
 1789         return REG_ESPACE;
 1790     }
 1791     }
 1792   return re_node_set_add_intersect (dest_nodes, candidates,
 1793                     &state->inveclosure);
 1794 }
 1795 
 1796 static reg_errcode_t
 1797 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
 1798                const re_node_set *candidates)
 1799 {
 1800     Idx ecl_idx;
 1801     reg_errcode_t err;
 1802     re_node_set *inv_eclosure = dfa->inveclosures + node;
 1803     re_node_set except_nodes;
 1804     re_node_set_init_empty (&except_nodes);
 1805     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
 1806       {
 1807     Idx cur_node = inv_eclosure->elems[ecl_idx];
 1808     if (cur_node == node)
 1809       continue;
 1810     if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
 1811       {
 1812         Idx edst1 = dfa->edests[cur_node].elems[0];
 1813         Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
 1814              ? dfa->edests[cur_node].elems[1] : -1);
 1815         if ((!re_node_set_contains (inv_eclosure, edst1)
 1816          && re_node_set_contains (dest_nodes, edst1))
 1817         || (edst2 > 0
 1818             && !re_node_set_contains (inv_eclosure, edst2)
 1819             && re_node_set_contains (dest_nodes, edst2)))
 1820           {
 1821         err = re_node_set_add_intersect (&except_nodes, candidates,
 1822                          dfa->inveclosures + cur_node);
 1823         if (__glibc_unlikely (err != REG_NOERROR))
 1824           {
 1825             re_node_set_free (&except_nodes);
 1826             return err;
 1827           }
 1828           }
 1829       }
 1830       }
 1831     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
 1832       {
 1833     Idx cur_node = inv_eclosure->elems[ecl_idx];
 1834     if (!re_node_set_contains (&except_nodes, cur_node))
 1835       {
 1836         Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
 1837         re_node_set_remove_at (dest_nodes, idx);
 1838       }
 1839       }
 1840     re_node_set_free (&except_nodes);
 1841     return REG_NOERROR;
 1842 }
 1843 
 1844 static bool
 1845 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
 1846           Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
 1847 {
 1848   const re_dfa_t *const dfa = mctx->dfa;
 1849   Idx lim_idx, src_pos, dst_pos;
 1850 
 1851   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
 1852   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
 1853   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
 1854     {
 1855       Idx subexp_idx;
 1856       struct re_backref_cache_entry *ent;
 1857       ent = mctx->bkref_ents + limits->elems[lim_idx];
 1858       subexp_idx = dfa->nodes[ent->node].opr.idx;
 1859 
 1860       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
 1861                        subexp_idx, dst_node, dst_idx,
 1862                        dst_bkref_idx);
 1863       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
 1864                        subexp_idx, src_node, src_idx,
 1865                        src_bkref_idx);
 1866 
 1867       /* In case of:
 1868      <src> <dst> ( <subexp> )
 1869      ( <subexp> ) <src> <dst>
 1870      ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
 1871       if (src_pos == dst_pos)
 1872     continue; /* This is unrelated limitation.  */
 1873       else
 1874     return true;
 1875     }
 1876   return false;
 1877 }
 1878 
 1879 static int
 1880 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
 1881                  Idx subexp_idx, Idx from_node, Idx bkref_idx)
 1882 {
 1883   const re_dfa_t *const dfa = mctx->dfa;
 1884   const re_node_set *eclosures = dfa->eclosures + from_node;
 1885   Idx node_idx;
 1886 
 1887   /* Else, we are on the boundary: examine the nodes on the epsilon
 1888      closure.  */
 1889   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
 1890     {
 1891       Idx node = eclosures->elems[node_idx];
 1892       switch (dfa->nodes[node].type)
 1893     {
 1894     case OP_BACK_REF:
 1895       if (bkref_idx != -1)
 1896         {
 1897           struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
 1898           do
 1899         {
 1900           Idx dst;
 1901           int cpos;
 1902 
 1903           if (ent->node != node)
 1904             continue;
 1905 
 1906           if (subexp_idx < BITSET_WORD_BITS
 1907               && !(ent->eps_reachable_subexps_map
 1908                & ((bitset_word_t) 1 << subexp_idx)))
 1909             continue;
 1910 
 1911           /* Recurse trying to reach the OP_OPEN_SUBEXP and
 1912              OP_CLOSE_SUBEXP cases below.  But, if the
 1913              destination node is the same node as the source
 1914              node, don't recurse because it would cause an
 1915              infinite loop: a regex that exhibits this behavior
 1916              is ()\1*\1*  */
 1917           dst = dfa->edests[node].elems[0];
 1918           if (dst == from_node)
 1919             {
 1920               if (boundaries & 1)
 1921             return -1;
 1922               else /* if (boundaries & 2) */
 1923             return 0;
 1924             }
 1925 
 1926           cpos =
 1927             check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
 1928                          dst, bkref_idx);
 1929           if (cpos == -1 /* && (boundaries & 1) */)
 1930             return -1;
 1931           if (cpos == 0 && (boundaries & 2))
 1932             return 0;
 1933 
 1934           if (subexp_idx < BITSET_WORD_BITS)
 1935             ent->eps_reachable_subexps_map
 1936               &= ~((bitset_word_t) 1 << subexp_idx);
 1937         }
 1938           while (ent++->more);
 1939         }
 1940       break;
 1941 
 1942     case OP_OPEN_SUBEXP:
 1943       if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
 1944         return -1;
 1945       break;
 1946 
 1947     case OP_CLOSE_SUBEXP:
 1948       if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
 1949         return 0;
 1950       break;
 1951 
 1952     default:
 1953         break;
 1954     }
 1955     }
 1956 
 1957   return (boundaries & 2) ? 1 : 0;
 1958 }
 1959 
 1960 static int
 1961 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
 1962                Idx subexp_idx, Idx from_node, Idx str_idx,
 1963                Idx bkref_idx)
 1964 {
 1965   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
 1966   int boundaries;
 1967 
 1968   /* If we are outside the range of the subexpression, return -1 or 1.  */
 1969   if (str_idx < lim->subexp_from)
 1970     return -1;
 1971 
 1972   if (lim->subexp_to < str_idx)
 1973     return 1;
 1974 
 1975   /* If we are within the subexpression, return 0.  */
 1976   boundaries = (str_idx == lim->subexp_from);
 1977   boundaries |= (str_idx == lim->subexp_to) << 1;
 1978   if (boundaries == 0)
 1979     return 0;
 1980 
 1981   /* Else, examine epsilon closure.  */
 1982   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
 1983                       from_node, bkref_idx);
 1984 }
 1985 
 1986 /* Check the limitations of sub expressions LIMITS, and remove the nodes
 1987    which are against limitations from DEST_NODES. */
 1988 
 1989 static reg_errcode_t
 1990 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
 1991              const re_node_set *candidates, re_node_set *limits,
 1992              struct re_backref_cache_entry *bkref_ents, Idx str_idx)
 1993 {
 1994   reg_errcode_t err;
 1995   Idx node_idx, lim_idx;
 1996 
 1997   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
 1998     {
 1999       Idx subexp_idx;
 2000       struct re_backref_cache_entry *ent;
 2001       ent = bkref_ents + limits->elems[lim_idx];
 2002 
 2003       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
 2004     continue; /* This is unrelated limitation.  */
 2005 
 2006       subexp_idx = dfa->nodes[ent->node].opr.idx;
 2007       if (ent->subexp_to == str_idx)
 2008     {
 2009       Idx ops_node = -1;
 2010       Idx cls_node = -1;
 2011       for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 2012         {
 2013           Idx node = dest_nodes->elems[node_idx];
 2014           re_token_type_t type = dfa->nodes[node].type;
 2015           if (type == OP_OPEN_SUBEXP
 2016           && subexp_idx == dfa->nodes[node].opr.idx)
 2017         ops_node = node;
 2018           else if (type == OP_CLOSE_SUBEXP
 2019                && subexp_idx == dfa->nodes[node].opr.idx)
 2020         cls_node = node;
 2021         }
 2022 
 2023       /* Check the limitation of the open subexpression.  */
 2024       /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
 2025       if (ops_node >= 0)
 2026         {
 2027           err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
 2028                        candidates);
 2029           if (__glibc_unlikely (err != REG_NOERROR))
 2030         return err;
 2031         }
 2032 
 2033       /* Check the limitation of the close subexpression.  */
 2034       if (cls_node >= 0)
 2035         for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 2036           {
 2037         Idx node = dest_nodes->elems[node_idx];
 2038         if (!re_node_set_contains (dfa->inveclosures + node,
 2039                        cls_node)
 2040             && !re_node_set_contains (dfa->eclosures + node,
 2041                           cls_node))
 2042           {
 2043             /* It is against this limitation.
 2044                Remove it form the current sifted state.  */
 2045             err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
 2046                          candidates);
 2047             if (__glibc_unlikely (err != REG_NOERROR))
 2048               return err;
 2049             --node_idx;
 2050           }
 2051           }
 2052     }
 2053       else /* (ent->subexp_to != str_idx)  */
 2054     {
 2055       for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 2056         {
 2057           Idx node = dest_nodes->elems[node_idx];
 2058           re_token_type_t type = dfa->nodes[node].type;
 2059           if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
 2060         {
 2061           if (subexp_idx != dfa->nodes[node].opr.idx)
 2062             continue;
 2063           /* It is against this limitation.
 2064              Remove it form the current sifted state.  */
 2065           err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
 2066                            candidates);
 2067           if (__glibc_unlikely (err != REG_NOERROR))
 2068             return err;
 2069         }
 2070         }
 2071     }
 2072     }
 2073   return REG_NOERROR;
 2074 }
 2075 
 2076 static reg_errcode_t
 2077 __attribute_warn_unused_result__
 2078 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
 2079            Idx str_idx, const re_node_set *candidates)
 2080 {
 2081   const re_dfa_t *const dfa = mctx->dfa;
 2082   reg_errcode_t err;
 2083   Idx node_idx, node;
 2084   re_sift_context_t local_sctx;
 2085   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
 2086 
 2087   if (first_idx == -1)
 2088     return REG_NOERROR;
 2089 
 2090   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
 2091 
 2092   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
 2093     {
 2094       Idx enabled_idx;
 2095       re_token_type_t type;
 2096       struct re_backref_cache_entry *entry;
 2097       node = candidates->elems[node_idx];
 2098       type = dfa->nodes[node].type;
 2099       /* Avoid infinite loop for the REs like "()\1+".  */
 2100       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
 2101     continue;
 2102       if (type != OP_BACK_REF)
 2103     continue;
 2104 
 2105       entry = mctx->bkref_ents + first_idx;
 2106       enabled_idx = first_idx;
 2107       do
 2108     {
 2109       Idx subexp_len;
 2110       Idx to_idx;
 2111       Idx dst_node;
 2112       bool ok;
 2113       re_dfastate_t *cur_state;
 2114 
 2115       if (entry->node != node)
 2116         continue;
 2117       subexp_len = entry->subexp_to - entry->subexp_from;
 2118       to_idx = str_idx + subexp_len;
 2119       dst_node = (subexp_len ? dfa->nexts[node]
 2120               : dfa->edests[node].elems[0]);
 2121 
 2122       if (to_idx > sctx->last_str_idx
 2123           || sctx->sifted_states[to_idx] == NULL
 2124           || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
 2125           || check_dst_limits (mctx, &sctx->limits, node,
 2126                    str_idx, dst_node, to_idx))
 2127         continue;
 2128 
 2129       if (local_sctx.sifted_states == NULL)
 2130         {
 2131           local_sctx = *sctx;
 2132           err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
 2133           if (__glibc_unlikely (err != REG_NOERROR))
 2134         goto free_return;
 2135         }
 2136       local_sctx.last_node = node;
 2137       local_sctx.last_str_idx = str_idx;
 2138       ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
 2139       if (__glibc_unlikely (! ok))
 2140         {
 2141           err = REG_ESPACE;
 2142           goto free_return;
 2143         }
 2144       cur_state = local_sctx.sifted_states[str_idx];
 2145       err = sift_states_backward (mctx, &local_sctx);
 2146       if (__glibc_unlikely (err != REG_NOERROR))
 2147         goto free_return;
 2148       if (sctx->limited_states != NULL)
 2149         {
 2150           err = merge_state_array (dfa, sctx->limited_states,
 2151                        local_sctx.sifted_states,
 2152                        str_idx + 1);
 2153           if (__glibc_unlikely (err != REG_NOERROR))
 2154         goto free_return;
 2155         }
 2156       local_sctx.sifted_states[str_idx] = cur_state;
 2157       re_node_set_remove (&local_sctx.limits, enabled_idx);
 2158 
 2159       /* mctx->bkref_ents may have changed, reload the pointer.  */
 2160       entry = mctx->bkref_ents + enabled_idx;
 2161     }
 2162       while (enabled_idx++, entry++->more);
 2163     }
 2164   err = REG_NOERROR;
 2165  free_return:
 2166   if (local_sctx.sifted_states != NULL)
 2167     {
 2168       re_node_set_free (&local_sctx.limits);
 2169     }
 2170 
 2171   return err;
 2172 }
 2173 
 2174 
 2175 #ifdef RE_ENABLE_I18N
 2176 static int
 2177 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
 2178              Idx node_idx, Idx str_idx, Idx max_str_idx)
 2179 {
 2180   const re_dfa_t *const dfa = mctx->dfa;
 2181   int naccepted;
 2182   /* Check the node can accept "multi byte".  */
 2183   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
 2184   if (naccepted > 0 && str_idx + naccepted <= max_str_idx
 2185       && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
 2186                    dfa->nexts[node_idx]))
 2187     /* The node can't accept the "multi byte", or the
 2188        destination was already thrown away, then the node
 2189        couldn't accept the current input "multi byte".   */
 2190     naccepted = 0;
 2191   /* Otherwise, it is sure that the node could accept
 2192      'naccepted' bytes input.  */
 2193   return naccepted;
 2194 }
 2195 #endif /* RE_ENABLE_I18N */
 2196 
 2197 
 2198 /* Functions for state transition.  */
 2199 
 2200 /* Return the next state to which the current state STATE will transit by
 2201    accepting the current input byte, and update STATE_LOG if necessary.
 2202    If STATE can accept a multibyte char/collating element/back reference
 2203    update the destination of STATE_LOG.  */
 2204 
 2205 static re_dfastate_t *
 2206 __attribute_warn_unused_result__
 2207 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
 2208            re_dfastate_t *state)
 2209 {
 2210   re_dfastate_t **trtable;
 2211   unsigned char ch;
 2212 
 2213 #ifdef RE_ENABLE_I18N
 2214   /* If the current state can accept multibyte.  */
 2215   if (__glibc_unlikely (state->accept_mb))
 2216     {
 2217       *err = transit_state_mb (mctx, state);
 2218       if (__glibc_unlikely (*err != REG_NOERROR))
 2219     return NULL;
 2220     }
 2221 #endif /* RE_ENABLE_I18N */
 2222 
 2223   /* Then decide the next state with the single byte.  */
 2224 #if 0
 2225   if (0)
 2226     /* don't use transition table  */
 2227     return transit_state_sb (err, mctx, state);
 2228 #endif
 2229 
 2230   /* Use transition table  */
 2231   ch = re_string_fetch_byte (&mctx->input);
 2232   for (;;)
 2233     {
 2234       trtable = state->trtable;
 2235       if (__glibc_likely (trtable != NULL))
 2236     return trtable[ch];
 2237 
 2238       trtable = state->word_trtable;
 2239       if (__glibc_likely (trtable != NULL))
 2240     {
 2241       unsigned int context;
 2242       context
 2243         = re_string_context_at (&mctx->input,
 2244                     re_string_cur_idx (&mctx->input) - 1,
 2245                     mctx->eflags);
 2246       if (IS_WORD_CONTEXT (context))
 2247         return trtable[ch + SBC_MAX];
 2248       else
 2249         return trtable[ch];
 2250     }
 2251 
 2252       if (!build_trtable (mctx->dfa, state))
 2253     {
 2254       *err = REG_ESPACE;
 2255       return NULL;
 2256     }
 2257 
 2258       /* Retry, we now have a transition table.  */
 2259     }
 2260 }
 2261 
 2262 /* Update the state_log if we need */
 2263 static re_dfastate_t *
 2264 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
 2265               re_dfastate_t *next_state)
 2266 {
 2267   const re_dfa_t *const dfa = mctx->dfa;
 2268   Idx cur_idx = re_string_cur_idx (&mctx->input);
 2269 
 2270   if (cur_idx > mctx->state_log_top)
 2271     {
 2272       mctx->state_log[cur_idx] = next_state;
 2273       mctx->state_log_top = cur_idx;
 2274     }
 2275   else if (mctx->state_log[cur_idx] == 0)
 2276     {
 2277       mctx->state_log[cur_idx] = next_state;
 2278     }
 2279   else
 2280     {
 2281       re_dfastate_t *pstate;
 2282       unsigned int context;
 2283       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
 2284       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
 2285      the destination of a multibyte char/collating element/
 2286      back reference.  Then the next state is the union set of
 2287      these destinations and the results of the transition table.  */
 2288       pstate = mctx->state_log[cur_idx];
 2289       log_nodes = pstate->entrance_nodes;
 2290       if (next_state != NULL)
 2291     {
 2292       table_nodes = next_state->entrance_nodes;
 2293       *err = re_node_set_init_union (&next_nodes, table_nodes,
 2294                          log_nodes);
 2295       if (__glibc_unlikely (*err != REG_NOERROR))
 2296         return NULL;
 2297     }
 2298       else
 2299     next_nodes = *log_nodes;
 2300       /* Note: We already add the nodes of the initial state,
 2301      then we don't need to add them here.  */
 2302 
 2303       context = re_string_context_at (&mctx->input,
 2304                       re_string_cur_idx (&mctx->input) - 1,
 2305                       mctx->eflags);
 2306       next_state = mctx->state_log[cur_idx]
 2307     = re_acquire_state_context (err, dfa, &next_nodes, context);
 2308       /* We don't need to check errors here, since the return value of
 2309      this function is next_state and ERR is already set.  */
 2310 
 2311       if (table_nodes != NULL)
 2312     re_node_set_free (&next_nodes);
 2313     }
 2314 
 2315   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
 2316     {
 2317       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
 2318      later.  We must check them here, since the back references in the
 2319      next state might use them.  */
 2320       *err = check_subexp_matching_top (mctx, &next_state->nodes,
 2321                     cur_idx);
 2322       if (__glibc_unlikely (*err != REG_NOERROR))
 2323     return NULL;
 2324 
 2325       /* If the next state has back references.  */
 2326       if (next_state->has_backref)
 2327     {
 2328       *err = transit_state_bkref (mctx, &next_state->nodes);
 2329       if (__glibc_unlikely (*err != REG_NOERROR))
 2330         return NULL;
 2331       next_state = mctx->state_log[cur_idx];
 2332     }
 2333     }
 2334 
 2335   return next_state;
 2336 }
 2337 
 2338 /* Skip bytes in the input that correspond to part of a
 2339    multi-byte match, then look in the log for a state
 2340    from which to restart matching.  */
 2341 static re_dfastate_t *
 2342 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
 2343 {
 2344   re_dfastate_t *cur_state;
 2345   do
 2346     {
 2347       Idx max = mctx->state_log_top;
 2348       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
 2349 
 2350       do
 2351     {
 2352       if (++cur_str_idx > max)
 2353         return NULL;
 2354       re_string_skip_bytes (&mctx->input, 1);
 2355     }
 2356       while (mctx->state_log[cur_str_idx] == NULL);
 2357 
 2358       cur_state = merge_state_with_log (err, mctx, NULL);
 2359     }
 2360   while (*err == REG_NOERROR && cur_state == NULL);
 2361   return cur_state;
 2362 }
 2363 
 2364 /* Helper functions for transit_state.  */
 2365 
 2366 /* From the node set CUR_NODES, pick up the nodes whose types are
 2367    OP_OPEN_SUBEXP and which have corresponding back references in the regular
 2368    expression. And register them to use them later for evaluating the
 2369    corresponding back references.  */
 2370 
 2371 static reg_errcode_t
 2372 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
 2373                Idx str_idx)
 2374 {
 2375   const re_dfa_t *const dfa = mctx->dfa;
 2376   Idx node_idx;
 2377   reg_errcode_t err;
 2378 
 2379   /* TODO: This isn't efficient.
 2380        Because there might be more than one nodes whose types are
 2381        OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
 2382        nodes.
 2383        E.g. RE: (a){2}  */
 2384   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
 2385     {
 2386       Idx node = cur_nodes->elems[node_idx];
 2387       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
 2388       && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
 2389       && (dfa->used_bkref_map
 2390           & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
 2391     {
 2392       err = match_ctx_add_subtop (mctx, node, str_idx);
 2393       if (__glibc_unlikely (err != REG_NOERROR))
 2394         return err;
 2395     }
 2396     }
 2397   return REG_NOERROR;
 2398 }
 2399 
 2400 #if 0
 2401 /* Return the next state to which the current state STATE will transit by
 2402    accepting the current input byte.  */
 2403 
 2404 static re_dfastate_t *
 2405 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
 2406           re_dfastate_t *state)
 2407 {
 2408   const re_dfa_t *const dfa = mctx->dfa;
 2409   re_node_set next_nodes;
 2410   re_dfastate_t *next_state;
 2411   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
 2412   unsigned int context;
 2413 
 2414   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
 2415   if (__glibc_unlikely (*err != REG_NOERROR))
 2416     return NULL;
 2417   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
 2418     {
 2419       Idx cur_node = state->nodes.elems[node_cnt];
 2420       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
 2421     {
 2422       *err = re_node_set_merge (&next_nodes,
 2423                     dfa->eclosures + dfa->nexts[cur_node]);
 2424       if (__glibc_unlikely (*err != REG_NOERROR))
 2425         {
 2426           re_node_set_free (&next_nodes);
 2427           return NULL;
 2428         }
 2429     }
 2430     }
 2431   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
 2432   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
 2433   /* We don't need to check errors here, since the return value of
 2434      this function is next_state and ERR is already set.  */
 2435 
 2436   re_node_set_free (&next_nodes);
 2437   re_string_skip_bytes (&mctx->input, 1);
 2438   return next_state;
 2439 }
 2440 #endif
 2441 
 2442 #ifdef RE_ENABLE_I18N
 2443 static reg_errcode_t
 2444 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
 2445 {
 2446   const re_dfa_t *const dfa = mctx->dfa;
 2447   reg_errcode_t err;
 2448   Idx i;
 2449 
 2450   for (i = 0; i < pstate->nodes.nelem; ++i)
 2451     {
 2452       re_node_set dest_nodes, *new_nodes;
 2453       Idx cur_node_idx = pstate->nodes.elems[i];
 2454       int naccepted;
 2455       Idx dest_idx;
 2456       unsigned int context;
 2457       re_dfastate_t *dest_state;
 2458 
 2459       if (!dfa->nodes[cur_node_idx].accept_mb)
 2460     continue;
 2461 
 2462       if (dfa->nodes[cur_node_idx].constraint)
 2463     {
 2464       context = re_string_context_at (&mctx->input,
 2465                       re_string_cur_idx (&mctx->input),
 2466                       mctx->eflags);
 2467       if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
 2468                        context))
 2469         continue;
 2470     }
 2471 
 2472       /* How many bytes the node can accept?  */
 2473       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
 2474                        re_string_cur_idx (&mctx->input));
 2475       if (naccepted == 0)
 2476     continue;
 2477 
 2478       /* The node can accepts 'naccepted' bytes.  */
 2479       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
 2480       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
 2481                    : mctx->max_mb_elem_len);
 2482       err = clean_state_log_if_needed (mctx, dest_idx);
 2483       if (__glibc_unlikely (err != REG_NOERROR))
 2484     return err;
 2485       DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
 2486       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
 2487 
 2488       dest_state = mctx->state_log[dest_idx];
 2489       if (dest_state == NULL)
 2490     dest_nodes = *new_nodes;
 2491       else
 2492     {
 2493       err = re_node_set_init_union (&dest_nodes,
 2494                     dest_state->entrance_nodes, new_nodes);
 2495       if (__glibc_unlikely (err != REG_NOERROR))
 2496         return err;
 2497     }
 2498       context = re_string_context_at (&mctx->input, dest_idx - 1,
 2499                       mctx->eflags);
 2500       mctx->state_log[dest_idx]
 2501     = re_acquire_state_context (&err, dfa, &dest_nodes, context);
 2502       if (dest_state != NULL)
 2503     re_node_set_free (&dest_nodes);
 2504       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
 2505                 && err != REG_NOERROR))
 2506     return err;
 2507     }
 2508   return REG_NOERROR;
 2509 }
 2510 #endif /* RE_ENABLE_I18N */
 2511 
 2512 static reg_errcode_t
 2513 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
 2514 {
 2515   const re_dfa_t *const dfa = mctx->dfa;
 2516   reg_errcode_t err;
 2517   Idx i;
 2518   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
 2519 
 2520   for (i = 0; i < nodes->nelem; ++i)
 2521     {
 2522       Idx dest_str_idx, prev_nelem, bkc_idx;
 2523       Idx node_idx = nodes->elems[i];
 2524       unsigned int context;
 2525       const re_token_t *node = dfa->nodes + node_idx;
 2526       re_node_set *new_dest_nodes;
 2527 
 2528       /* Check whether 'node' is a backreference or not.  */
 2529       if (node->type != OP_BACK_REF)
 2530     continue;
 2531 
 2532       if (node->constraint)
 2533     {
 2534       context = re_string_context_at (&mctx->input, cur_str_idx,
 2535                       mctx->eflags);
 2536       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
 2537         continue;
 2538     }
 2539 
 2540       /* 'node' is a backreference.
 2541      Check the substring which the substring matched.  */
 2542       bkc_idx = mctx->nbkref_ents;
 2543       err = get_subexp (mctx, node_idx, cur_str_idx);
 2544       if (__glibc_unlikely (err != REG_NOERROR))
 2545     goto free_return;
 2546 
 2547       /* And add the epsilon closures (which is 'new_dest_nodes') of
 2548      the backreference to appropriate state_log.  */
 2549       DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
 2550       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
 2551     {
 2552       Idx subexp_len;
 2553       re_dfastate_t *dest_state;
 2554       struct re_backref_cache_entry *bkref_ent;
 2555       bkref_ent = mctx->bkref_ents + bkc_idx;
 2556       if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
 2557         continue;
 2558       subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
 2559       new_dest_nodes = (subexp_len == 0
 2560                 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
 2561                 : dfa->eclosures + dfa->nexts[node_idx]);
 2562       dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
 2563               - bkref_ent->subexp_from);
 2564       context = re_string_context_at (&mctx->input, dest_str_idx - 1,
 2565                       mctx->eflags);
 2566       dest_state = mctx->state_log[dest_str_idx];
 2567       prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
 2568             : mctx->state_log[cur_str_idx]->nodes.nelem);
 2569       /* Add 'new_dest_node' to state_log.  */
 2570       if (dest_state == NULL)
 2571         {
 2572           mctx->state_log[dest_str_idx]
 2573         = re_acquire_state_context (&err, dfa, new_dest_nodes,
 2574                         context);
 2575           if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
 2576                     && err != REG_NOERROR))
 2577         goto free_return;
 2578         }
 2579       else
 2580         {
 2581           re_node_set dest_nodes;
 2582           err = re_node_set_init_union (&dest_nodes,
 2583                         dest_state->entrance_nodes,
 2584                         new_dest_nodes);
 2585           if (__glibc_unlikely (err != REG_NOERROR))
 2586         {
 2587           re_node_set_free (&dest_nodes);
 2588           goto free_return;
 2589         }
 2590           mctx->state_log[dest_str_idx]
 2591         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
 2592           re_node_set_free (&dest_nodes);
 2593           if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
 2594                     && err != REG_NOERROR))
 2595         goto free_return;
 2596         }
 2597       /* We need to check recursively if the backreference can epsilon
 2598          transit.  */
 2599       if (subexp_len == 0
 2600           && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
 2601         {
 2602           err = check_subexp_matching_top (mctx, new_dest_nodes,
 2603                            cur_str_idx);
 2604           if (__glibc_unlikely (err != REG_NOERROR))
 2605         goto free_return;
 2606           err = transit_state_bkref (mctx, new_dest_nodes);
 2607           if (__glibc_unlikely (err != REG_NOERROR))
 2608         goto free_return;
 2609         }
 2610     }
 2611     }
 2612   err = REG_NOERROR;
 2613  free_return:
 2614   return err;
 2615 }
 2616 
 2617 /* Enumerate all the candidates which the backreference BKREF_NODE can match
 2618    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
 2619    Note that we might collect inappropriate candidates here.
 2620    However, the cost of checking them strictly here is too high, then we
 2621    delay these checking for prune_impossible_nodes().  */
 2622 
 2623 static reg_errcode_t
 2624 __attribute_warn_unused_result__
 2625 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
 2626 {
 2627   const re_dfa_t *const dfa = mctx->dfa;
 2628   Idx subexp_num, sub_top_idx;
 2629   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
 2630   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
 2631   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
 2632   if (cache_idx != -1)
 2633     {
 2634       const struct re_backref_cache_entry *entry
 2635     = mctx->bkref_ents + cache_idx;
 2636       do
 2637     if (entry->node == bkref_node)
 2638       return REG_NOERROR; /* We already checked it.  */
 2639       while (entry++->more);
 2640     }
 2641 
 2642   subexp_num = dfa->nodes[bkref_node].opr.idx;
 2643 
 2644   /* For each sub expression  */
 2645   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
 2646     {
 2647       reg_errcode_t err;
 2648       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
 2649       re_sub_match_last_t *sub_last;
 2650       Idx sub_last_idx, sl_str, bkref_str_off;
 2651 
 2652       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
 2653     continue; /* It isn't related.  */
 2654 
 2655       sl_str = sub_top->str_idx;
 2656       bkref_str_off = bkref_str_idx;
 2657       /* At first, check the last node of sub expressions we already
 2658      evaluated.  */
 2659       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
 2660     {
 2661       regoff_t sl_str_diff;
 2662       sub_last = sub_top->lasts[sub_last_idx];
 2663       sl_str_diff = sub_last->str_idx - sl_str;
 2664       /* The matched string by the sub expression match with the substring
 2665          at the back reference?  */
 2666       if (sl_str_diff > 0)
 2667         {
 2668           if (__glibc_unlikely (bkref_str_off + sl_str_diff
 2669                     > mctx->input.valid_len))
 2670         {
 2671           /* Not enough chars for a successful match.  */
 2672           if (bkref_str_off + sl_str_diff > mctx->input.len)
 2673             break;
 2674 
 2675           err = clean_state_log_if_needed (mctx,
 2676                            bkref_str_off
 2677                            + sl_str_diff);
 2678           if (__glibc_unlikely (err != REG_NOERROR))
 2679             return err;
 2680           buf = (const char *) re_string_get_buffer (&mctx->input);
 2681         }
 2682           if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
 2683         /* We don't need to search this sub expression any more.  */
 2684         break;
 2685         }
 2686       bkref_str_off += sl_str_diff;
 2687       sl_str += sl_str_diff;
 2688       err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
 2689                 bkref_str_idx);
 2690 
 2691       /* Reload buf, since the preceding call might have reallocated
 2692          the buffer.  */
 2693       buf = (const char *) re_string_get_buffer (&mctx->input);
 2694 
 2695       if (err == REG_NOMATCH)
 2696         continue;
 2697       if (__glibc_unlikely (err != REG_NOERROR))
 2698         return err;
 2699     }
 2700 
 2701       if (sub_last_idx < sub_top->nlasts)
 2702     continue;
 2703       if (sub_last_idx > 0)
 2704     ++sl_str;
 2705       /* Then, search for the other last nodes of the sub expression.  */
 2706       for (; sl_str <= bkref_str_idx; ++sl_str)
 2707     {
 2708       Idx cls_node;
 2709       regoff_t sl_str_off;
 2710       const re_node_set *nodes;
 2711       sl_str_off = sl_str - sub_top->str_idx;
 2712       /* The matched string by the sub expression match with the substring
 2713          at the back reference?  */
 2714       if (sl_str_off > 0)
 2715         {
 2716           if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
 2717         {
 2718           /* If we are at the end of the input, we cannot match.  */
 2719           if (bkref_str_off >= mctx->input.len)
 2720             break;
 2721 
 2722           err = extend_buffers (mctx, bkref_str_off + 1);
 2723           if (__glibc_unlikely (err != REG_NOERROR))
 2724             return err;
 2725 
 2726           buf = (const char *) re_string_get_buffer (&mctx->input);
 2727         }
 2728           if (buf [bkref_str_off++] != buf[sl_str - 1])
 2729         break; /* We don't need to search this sub expression
 2730               any more.  */
 2731         }
 2732       if (mctx->state_log[sl_str] == NULL)
 2733         continue;
 2734       /* Does this state have a ')' of the sub expression?  */
 2735       nodes = &mctx->state_log[sl_str]->nodes;
 2736       cls_node = find_subexp_node (dfa, nodes, subexp_num,
 2737                        OP_CLOSE_SUBEXP);
 2738       if (cls_node == -1)
 2739         continue; /* No.  */
 2740       if (sub_top->path == NULL)
 2741         {
 2742           sub_top->path = calloc (sizeof (state_array_t),
 2743                       sl_str - sub_top->str_idx + 1);
 2744           if (sub_top->path == NULL)
 2745         return REG_ESPACE;
 2746         }
 2747       /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
 2748          in the current context?  */
 2749       err = check_arrival (mctx, sub_top->path, sub_top->node,
 2750                    sub_top->str_idx, cls_node, sl_str,
 2751                    OP_CLOSE_SUBEXP);
 2752       if (err == REG_NOMATCH)
 2753           continue;
 2754       if (__glibc_unlikely (err != REG_NOERROR))
 2755           return err;
 2756       sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
 2757       if (__glibc_unlikely (sub_last == NULL))
 2758         return REG_ESPACE;
 2759       err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
 2760                 bkref_str_idx);
 2761       buf = (const char *) re_string_get_buffer (&mctx->input);
 2762       if (err == REG_NOMATCH)
 2763         continue;
 2764       if (__glibc_unlikely (err != REG_NOERROR))
 2765         return err;
 2766     }
 2767     }
 2768   return REG_NOERROR;
 2769 }
 2770 
 2771 /* Helper functions for get_subexp().  */
 2772 
 2773 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
 2774    If it can arrive, register the sub expression expressed with SUB_TOP
 2775    and SUB_LAST.  */
 2776 
 2777 static reg_errcode_t
 2778 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
 2779         re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
 2780 {
 2781   reg_errcode_t err;
 2782   Idx to_idx;
 2783   /* Can the subexpression arrive the back reference?  */
 2784   err = check_arrival (mctx, &sub_last->path, sub_last->node,
 2785                sub_last->str_idx, bkref_node, bkref_str,
 2786                OP_OPEN_SUBEXP);
 2787   if (err != REG_NOERROR)
 2788     return err;
 2789   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
 2790                  sub_last->str_idx);
 2791   if (__glibc_unlikely (err != REG_NOERROR))
 2792     return err;
 2793   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
 2794   return clean_state_log_if_needed (mctx, to_idx);
 2795 }
 2796 
 2797 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
 2798    Search '(' if FL_OPEN, or search ')' otherwise.
 2799    TODO: This function isn't efficient...
 2800      Because there might be more than one nodes whose types are
 2801      OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
 2802      nodes.
 2803      E.g. RE: (a){2}  */
 2804 
 2805 static Idx
 2806 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 2807           Idx subexp_idx, int type)
 2808 {
 2809   Idx cls_idx;
 2810   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
 2811     {
 2812       Idx cls_node = nodes->elems[cls_idx];
 2813       const re_token_t *node = dfa->nodes + cls_node;
 2814       if (node->type == type
 2815       && node->opr.idx == subexp_idx)
 2816     return cls_node;
 2817     }
 2818   return -1;
 2819 }
 2820 
 2821 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
 2822    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
 2823    heavily reused.
 2824    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
 2825 
 2826 static reg_errcode_t
 2827 __attribute_warn_unused_result__
 2828 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
 2829            Idx top_str, Idx last_node, Idx last_str, int type)
 2830 {
 2831   const re_dfa_t *const dfa = mctx->dfa;
 2832   reg_errcode_t err = REG_NOERROR;
 2833   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
 2834   re_dfastate_t *cur_state = NULL;
 2835   re_node_set *cur_nodes, next_nodes;
 2836   re_dfastate_t **backup_state_log;
 2837   unsigned int context;
 2838 
 2839   subexp_num = dfa->nodes[top_node].opr.idx;
 2840   /* Extend the buffer if we need.  */
 2841   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
 2842     {
 2843       re_dfastate_t **new_array;
 2844       Idx old_alloc = path->alloc;
 2845       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
 2846       Idx new_alloc;
 2847       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
 2848     return REG_ESPACE;
 2849       new_alloc = old_alloc + incr_alloc;
 2850       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
 2851     return REG_ESPACE;
 2852       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
 2853       if (__glibc_unlikely (new_array == NULL))
 2854     return REG_ESPACE;
 2855       path->array = new_array;
 2856       path->alloc = new_alloc;
 2857       memset (new_array + old_alloc, '\0',
 2858           sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
 2859     }
 2860 
 2861   str_idx = path->next_idx ? path->next_idx : top_str;
 2862 
 2863   /* Temporary modify MCTX.  */
 2864   backup_state_log = mctx->state_log;
 2865   backup_cur_idx = mctx->input.cur_idx;
 2866   mctx->state_log = path->array;
 2867   mctx->input.cur_idx = str_idx;
 2868 
 2869   /* Setup initial node set.  */
 2870   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
 2871   if (str_idx == top_str)
 2872     {
 2873       err = re_node_set_init_1 (&next_nodes, top_node);
 2874       if (__glibc_unlikely (err != REG_NOERROR))
 2875     return err;
 2876       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
 2877       if (__glibc_unlikely (err != REG_NOERROR))
 2878     {
 2879       re_node_set_free (&next_nodes);
 2880       return err;
 2881     }
 2882     }
 2883   else
 2884     {
 2885       cur_state = mctx->state_log[str_idx];
 2886       if (cur_state && cur_state->has_backref)
 2887     {
 2888       err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
 2889       if (__glibc_unlikely (err != REG_NOERROR))
 2890         return err;
 2891     }
 2892       else
 2893     re_node_set_init_empty (&next_nodes);
 2894     }
 2895   if (str_idx == top_str || (cur_state && cur_state->has_backref))
 2896     {
 2897       if (next_nodes.nelem)
 2898     {
 2899       err = expand_bkref_cache (mctx, &next_nodes, str_idx,
 2900                     subexp_num, type);
 2901       if (__glibc_unlikely (err != REG_NOERROR))
 2902         {
 2903           re_node_set_free (&next_nodes);
 2904           return err;
 2905         }
 2906     }
 2907       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
 2908       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
 2909     {
 2910       re_node_set_free (&next_nodes);
 2911       return err;
 2912     }
 2913       mctx->state_log[str_idx] = cur_state;
 2914     }
 2915 
 2916   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
 2917     {
 2918       re_node_set_empty (&next_nodes);
 2919       if (mctx->state_log[str_idx + 1])
 2920     {
 2921       err = re_node_set_merge (&next_nodes,
 2922                    &mctx->state_log[str_idx + 1]->nodes);
 2923       if (__glibc_unlikely (err != REG_NOERROR))
 2924         {
 2925           re_node_set_free (&next_nodes);
 2926           return err;
 2927         }
 2928     }
 2929       if (cur_state)
 2930     {
 2931       err = check_arrival_add_next_nodes (mctx, str_idx,
 2932                           &cur_state->non_eps_nodes,
 2933                           &next_nodes);
 2934       if (__glibc_unlikely (err != REG_NOERROR))
 2935         {
 2936           re_node_set_free (&next_nodes);
 2937           return err;
 2938         }
 2939     }
 2940       ++str_idx;
 2941       if (next_nodes.nelem)
 2942     {
 2943       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
 2944       if (__glibc_unlikely (err != REG_NOERROR))
 2945         {
 2946           re_node_set_free (&next_nodes);
 2947           return err;
 2948         }
 2949       err = expand_bkref_cache (mctx, &next_nodes, str_idx,
 2950                     subexp_num, type);
 2951       if (__glibc_unlikely (err != REG_NOERROR))
 2952         {
 2953           re_node_set_free (&next_nodes);
 2954           return err;
 2955         }
 2956     }
 2957       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
 2958       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
 2959       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
 2960     {
 2961       re_node_set_free (&next_nodes);
 2962       return err;
 2963     }
 2964       mctx->state_log[str_idx] = cur_state;
 2965       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
 2966     }
 2967   re_node_set_free (&next_nodes);
 2968   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
 2969            : &mctx->state_log[last_str]->nodes);
 2970   path->next_idx = str_idx;
 2971 
 2972   /* Fix MCTX.  */
 2973   mctx->state_log = backup_state_log;
 2974   mctx->input.cur_idx = backup_cur_idx;
 2975 
 2976   /* Then check the current node set has the node LAST_NODE.  */
 2977   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
 2978     return REG_NOERROR;
 2979 
 2980   return REG_NOMATCH;
 2981 }
 2982 
 2983 /* Helper functions for check_arrival.  */
 2984 
 2985 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
 2986    to NEXT_NODES.
 2987    TODO: This function is similar to the functions transit_state*(),
 2988      however this function has many additional works.
 2989      Can't we unify them?  */
 2990 
 2991 static reg_errcode_t
 2992 __attribute_warn_unused_result__
 2993 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
 2994                   re_node_set *cur_nodes, re_node_set *next_nodes)
 2995 {
 2996   const re_dfa_t *const dfa = mctx->dfa;
 2997   bool ok;
 2998   Idx cur_idx;
 2999 #ifdef RE_ENABLE_I18N
 3000   reg_errcode_t err = REG_NOERROR;
 3001 #endif
 3002   re_node_set union_set;
 3003   re_node_set_init_empty (&union_set);
 3004   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
 3005     {
 3006       int naccepted = 0;
 3007       Idx cur_node = cur_nodes->elems[cur_idx];
 3008       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
 3009 
 3010 #ifdef RE_ENABLE_I18N
 3011       /* If the node may accept "multi byte".  */
 3012       if (dfa->nodes[cur_node].accept_mb)
 3013     {
 3014       naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
 3015                            str_idx);
 3016       if (naccepted > 1)
 3017         {
 3018           re_dfastate_t *dest_state;
 3019           Idx next_node = dfa->nexts[cur_node];
 3020           Idx next_idx = str_idx + naccepted;
 3021           dest_state = mctx->state_log[next_idx];
 3022           re_node_set_empty (&union_set);
 3023           if (dest_state)
 3024         {
 3025           err = re_node_set_merge (&union_set, &dest_state->nodes);
 3026           if (__glibc_unlikely (err != REG_NOERROR))
 3027             {
 3028               re_node_set_free (&union_set);
 3029               return err;
 3030             }
 3031         }
 3032           ok = re_node_set_insert (&union_set, next_node);
 3033           if (__glibc_unlikely (! ok))
 3034         {
 3035           re_node_set_free (&union_set);
 3036           return REG_ESPACE;
 3037         }
 3038           mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
 3039                                 &union_set);
 3040           if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
 3041                     && err != REG_NOERROR))
 3042         {
 3043           re_node_set_free (&union_set);
 3044           return err;
 3045         }
 3046         }
 3047     }
 3048 #endif /* RE_ENABLE_I18N */
 3049       if (naccepted
 3050       || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
 3051     {
 3052       ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
 3053       if (__glibc_unlikely (! ok))
 3054         {
 3055           re_node_set_free (&union_set);
 3056           return REG_ESPACE;
 3057         }
 3058     }
 3059     }
 3060   re_node_set_free (&union_set);
 3061   return REG_NOERROR;
 3062 }
 3063 
 3064 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
 3065    CUR_NODES, however exclude the nodes which are:
 3066     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
 3067     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
 3068 */
 3069 
 3070 static reg_errcode_t
 3071 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
 3072               Idx ex_subexp, int type)
 3073 {
 3074   reg_errcode_t err;
 3075   Idx idx, outside_node;
 3076   re_node_set new_nodes;
 3077   DEBUG_ASSERT (cur_nodes->nelem);
 3078   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
 3079   if (__glibc_unlikely (err != REG_NOERROR))
 3080     return err;
 3081   /* Create a new node set NEW_NODES with the nodes which are epsilon
 3082      closures of the node in CUR_NODES.  */
 3083 
 3084   for (idx = 0; idx < cur_nodes->nelem; ++idx)
 3085     {
 3086       Idx cur_node = cur_nodes->elems[idx];
 3087       const re_node_set *eclosure = dfa->eclosures + cur_node;
 3088       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
 3089       if (outside_node == -1)
 3090     {
 3091       /* There are no problematic nodes, just merge them.  */
 3092       err = re_node_set_merge (&new_nodes, eclosure);
 3093       if (__glibc_unlikely (err != REG_NOERROR))
 3094         {
 3095           re_node_set_free (&new_nodes);
 3096           return err;
 3097         }
 3098     }
 3099       else
 3100     {
 3101       /* There are problematic nodes, re-calculate incrementally.  */
 3102       err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
 3103                           ex_subexp, type);
 3104       if (__glibc_unlikely (err != REG_NOERROR))
 3105         {
 3106           re_node_set_free (&new_nodes);
 3107           return err;
 3108         }
 3109     }
 3110     }
 3111   re_node_set_free (cur_nodes);
 3112   *cur_nodes = new_nodes;
 3113   return REG_NOERROR;
 3114 }
 3115 
 3116 /* Helper function for check_arrival_expand_ecl.
 3117    Check incrementally the epsilon closure of TARGET, and if it isn't
 3118    problematic append it to DST_NODES.  */
 3119 
 3120 static reg_errcode_t
 3121 __attribute_warn_unused_result__
 3122 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
 3123                   Idx target, Idx ex_subexp, int type)
 3124 {
 3125   Idx cur_node;
 3126   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
 3127     {
 3128       bool ok;
 3129 
 3130       if (dfa->nodes[cur_node].type == type
 3131       && dfa->nodes[cur_node].opr.idx == ex_subexp)
 3132     {
 3133       if (type == OP_CLOSE_SUBEXP)
 3134         {
 3135           ok = re_node_set_insert (dst_nodes, cur_node);
 3136           if (__glibc_unlikely (! ok))
 3137         return REG_ESPACE;
 3138         }
 3139       break;
 3140     }
 3141       ok = re_node_set_insert (dst_nodes, cur_node);
 3142       if (__glibc_unlikely (! ok))
 3143     return REG_ESPACE;
 3144       if (dfa->edests[cur_node].nelem == 0)
 3145     break;
 3146       if (dfa->edests[cur_node].nelem == 2)
 3147     {
 3148       reg_errcode_t err;
 3149       err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
 3150                           dfa->edests[cur_node].elems[1],
 3151                           ex_subexp, type);
 3152       if (__glibc_unlikely (err != REG_NOERROR))
 3153         return err;
 3154     }
 3155       cur_node = dfa->edests[cur_node].elems[0];
 3156     }
 3157   return REG_NOERROR;
 3158 }
 3159 
 3160 
 3161 /* For all the back references in the current state, calculate the
 3162    destination of the back references by the appropriate entry
 3163    in MCTX->BKREF_ENTS.  */
 3164 
 3165 static reg_errcode_t
 3166 __attribute_warn_unused_result__
 3167 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
 3168             Idx cur_str, Idx subexp_num, int type)
 3169 {
 3170   const re_dfa_t *const dfa = mctx->dfa;
 3171   reg_errcode_t err;
 3172   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
 3173   struct re_backref_cache_entry *ent;
 3174 
 3175   if (cache_idx_start == -1)
 3176     return REG_NOERROR;
 3177 
 3178  restart:
 3179   ent = mctx->bkref_ents + cache_idx_start;
 3180   do
 3181     {
 3182       Idx to_idx, next_node;
 3183 
 3184       /* Is this entry ENT is appropriate?  */
 3185       if (!re_node_set_contains (cur_nodes, ent->node))
 3186     continue; /* No.  */
 3187 
 3188       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
 3189       /* Calculate the destination of the back reference, and append it
 3190      to MCTX->STATE_LOG.  */
 3191       if (to_idx == cur_str)
 3192     {
 3193       /* The backreference did epsilon transit, we must re-check all the
 3194          node in the current state.  */
 3195       re_node_set new_dests;
 3196       reg_errcode_t err2, err3;
 3197       next_node = dfa->edests[ent->node].elems[0];
 3198       if (re_node_set_contains (cur_nodes, next_node))
 3199         continue;
 3200       err = re_node_set_init_1 (&new_dests, next_node);
 3201       err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
 3202       err3 = re_node_set_merge (cur_nodes, &new_dests);
 3203       re_node_set_free (&new_dests);
 3204       if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
 3205                 || err3 != REG_NOERROR))
 3206         {
 3207           err = (err != REG_NOERROR ? err
 3208              : (err2 != REG_NOERROR ? err2 : err3));
 3209           return err;
 3210         }
 3211       /* TODO: It is still inefficient...  */
 3212       goto restart;
 3213     }
 3214       else
 3215     {
 3216       re_node_set union_set;
 3217       next_node = dfa->nexts[ent->node];
 3218       if (mctx->state_log[to_idx])
 3219         {
 3220           bool ok;
 3221           if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
 3222                     next_node))
 3223         continue;
 3224           err = re_node_set_init_copy (&union_set,
 3225                        &mctx->state_log[to_idx]->nodes);
 3226           ok = re_node_set_insert (&union_set, next_node);
 3227           if (__glibc_unlikely (err != REG_NOERROR || ! ok))
 3228         {
 3229           re_node_set_free (&union_set);
 3230           err = err != REG_NOERROR ? err : REG_ESPACE;
 3231           return err;
 3232         }
 3233         }
 3234       else
 3235         {
 3236           err = re_node_set_init_1 (&union_set, next_node);
 3237           if (__glibc_unlikely (err != REG_NOERROR))
 3238         return err;
 3239         }
 3240       mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
 3241       re_node_set_free (&union_set);
 3242       if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
 3243                 && err != REG_NOERROR))
 3244         return err;
 3245     }
 3246     }
 3247   while (ent++->more);
 3248   return REG_NOERROR;
 3249 }
 3250 
 3251 /* Build transition table for the state.
 3252    Return true if successful.  */
 3253 
 3254 static bool
 3255 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
 3256 {
 3257   reg_errcode_t err;
 3258   Idx i, j;
 3259   int ch;
 3260   bool need_word_trtable = false;
 3261   bitset_word_t elem, mask;
 3262   bool dests_node_malloced = false;
 3263   bool dest_states_malloced = false;
 3264   Idx ndests; /* Number of the destination states from 'state'.  */
 3265   re_dfastate_t **trtable;
 3266   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
 3267   re_node_set follows, *dests_node;
 3268   bitset_t *dests_ch;
 3269   bitset_t acceptable;
 3270 
 3271   struct dests_alloc
 3272   {
 3273     re_node_set dests_node[SBC_MAX];
 3274     bitset_t dests_ch[SBC_MAX];
 3275   } *dests_alloc;
 3276 
 3277   /* We build DFA states which corresponds to the destination nodes
 3278      from 'state'.  'dests_node[i]' represents the nodes which i-th
 3279      destination state contains, and 'dests_ch[i]' represents the
 3280      characters which i-th destination state accepts.  */
 3281   if (__libc_use_alloca (sizeof (struct dests_alloc)))
 3282     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
 3283   else
 3284     {
 3285       dests_alloc = re_malloc (struct dests_alloc, 1);
 3286       if (__glibc_unlikely (dests_alloc == NULL))
 3287     return false;
 3288       dests_node_malloced = true;
 3289     }
 3290   dests_node = dests_alloc->dests_node;
 3291   dests_ch = dests_alloc->dests_ch;
 3292 
 3293   /* Initialize transition table.  */
 3294   state->word_trtable = state->trtable = NULL;
 3295 
 3296   /* At first, group all nodes belonging to 'state' into several
 3297      destinations.  */
 3298   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
 3299   if (__glibc_unlikely (ndests <= 0))
 3300     {
 3301       if (dests_node_malloced)
 3302     re_free (dests_alloc);
 3303       /* Return false in case of an error, true otherwise.  */
 3304       if (ndests == 0)
 3305     {
 3306       state->trtable = (re_dfastate_t **)
 3307         calloc (sizeof (re_dfastate_t *), SBC_MAX);
 3308           if (__glibc_unlikely (state->trtable == NULL))
 3309             return false;
 3310       return true;
 3311     }
 3312       return false;
 3313     }
 3314 
 3315   err = re_node_set_alloc (&follows, ndests + 1);
 3316   if (__glibc_unlikely (err != REG_NOERROR))
 3317     goto out_free;
 3318 
 3319   /* Avoid arithmetic overflow in size calculation.  */
 3320   size_t ndests_max
 3321     = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
 3322        / (3 * sizeof (re_dfastate_t *)));
 3323   if (__glibc_unlikely (ndests_max < ndests))
 3324     goto out_free;
 3325 
 3326   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
 3327              + ndests * 3 * sizeof (re_dfastate_t *)))
 3328     dest_states = (re_dfastate_t **)
 3329       alloca (ndests * 3 * sizeof (re_dfastate_t *));
 3330   else
 3331     {
 3332       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
 3333       if (__glibc_unlikely (dest_states == NULL))
 3334     {
 3335 out_free:
 3336       if (dest_states_malloced)
 3337         re_free (dest_states);
 3338       re_node_set_free (&follows);
 3339       for (i = 0; i < ndests; ++i)
 3340         re_node_set_free (dests_node + i);
 3341       if (dests_node_malloced)
 3342         re_free (dests_alloc);
 3343       return false;
 3344     }
 3345       dest_states_malloced = true;
 3346     }
 3347   dest_states_word = dest_states + ndests;
 3348   dest_states_nl = dest_states_word + ndests;
 3349   bitset_empty (acceptable);
 3350 
 3351   /* Then build the states for all destinations.  */
 3352   for (i = 0; i < ndests; ++i)
 3353     {
 3354       Idx next_node;
 3355       re_node_set_empty (&follows);
 3356       /* Merge the follows of this destination states.  */
 3357       for (j = 0; j < dests_node[i].nelem; ++j)
 3358     {
 3359       next_node = dfa->nexts[dests_node[i].elems[j]];
 3360       if (next_node != -1)
 3361         {
 3362           err = re_node_set_merge (&follows, dfa->eclosures + next_node);
 3363           if (__glibc_unlikely (err != REG_NOERROR))
 3364         goto out_free;
 3365         }
 3366     }
 3367       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
 3368       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
 3369     goto out_free;
 3370       /* If the new state has context constraint,
 3371      build appropriate states for these contexts.  */
 3372       if (dest_states[i]->has_constraint)
 3373     {
 3374       dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
 3375                               CONTEXT_WORD);
 3376       if (__glibc_unlikely (dest_states_word[i] == NULL
 3377                 && err != REG_NOERROR))
 3378         goto out_free;
 3379 
 3380       if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
 3381         need_word_trtable = true;
 3382 
 3383       dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
 3384                             CONTEXT_NEWLINE);
 3385       if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
 3386         goto out_free;
 3387     }
 3388       else
 3389     {
 3390       dest_states_word[i] = dest_states[i];
 3391       dest_states_nl[i] = dest_states[i];
 3392     }
 3393       bitset_merge (acceptable, dests_ch[i]);
 3394     }
 3395 
 3396   if (!__glibc_unlikely (need_word_trtable))
 3397     {
 3398       /* We don't care about whether the following character is a word
 3399      character, or we are in a single-byte character set so we can
 3400      discern by looking at the character code: allocate a
 3401      256-entry transition table.  */
 3402       trtable = state->trtable =
 3403     (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
 3404       if (__glibc_unlikely (trtable == NULL))
 3405     goto out_free;
 3406 
 3407       /* For all characters ch...:  */
 3408       for (i = 0; i < BITSET_WORDS; ++i)
 3409     for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
 3410          elem;
 3411          mask <<= 1, elem >>= 1, ++ch)
 3412       if (__glibc_unlikely (elem & 1))
 3413         {
 3414           /* There must be exactly one destination which accepts
 3415          character ch.  See group_nodes_into_DFAstates.  */
 3416           for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
 3417         ;
 3418 
 3419           /* j-th destination accepts the word character ch.  */
 3420           if (dfa->word_char[i] & mask)
 3421         trtable[ch] = dest_states_word[j];
 3422           else
 3423         trtable[ch] = dest_states[j];
 3424         }
 3425     }
 3426   else
 3427     {
 3428       /* We care about whether the following character is a word
 3429      character, and we are in a multi-byte character set: discern
 3430      by looking at the character code: build two 256-entry
 3431      transition tables, one starting at trtable[0] and one
 3432      starting at trtable[SBC_MAX].  */
 3433       trtable = state->word_trtable =
 3434     (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
 3435       if (__glibc_unlikely (trtable == NULL))
 3436     goto out_free;
 3437 
 3438       /* For all characters ch...:  */
 3439       for (i = 0; i < BITSET_WORDS; ++i)
 3440     for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
 3441          elem;
 3442          mask <<= 1, elem >>= 1, ++ch)
 3443       if (__glibc_unlikely (elem & 1))
 3444         {
 3445           /* There must be exactly one destination which accepts
 3446          character ch.  See group_nodes_into_DFAstates.  */
 3447           for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
 3448         ;
 3449 
 3450           /* j-th destination accepts the word character ch.  */
 3451           trtable[ch] = dest_states[j];
 3452           trtable[ch + SBC_MAX] = dest_states_word[j];
 3453         }
 3454     }
 3455 
 3456   /* new line */
 3457   if (bitset_contain (acceptable, NEWLINE_CHAR))
 3458     {
 3459       /* The current state accepts newline character.  */
 3460       for (j = 0; j < ndests; ++j)
 3461     if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
 3462       {
 3463         /* k-th destination accepts newline character.  */
 3464         trtable[NEWLINE_CHAR] = dest_states_nl[j];
 3465         if (need_word_trtable)
 3466           trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
 3467         /* There must be only one destination which accepts
 3468            newline.  See group_nodes_into_DFAstates.  */
 3469         break;
 3470       }
 3471     }
 3472 
 3473   if (dest_states_malloced)
 3474     re_free (dest_states);
 3475 
 3476   re_node_set_free (&follows);
 3477   for (i = 0; i < ndests; ++i)
 3478     re_node_set_free (dests_node + i);
 3479 
 3480   if (dests_node_malloced)
 3481     re_free (dests_alloc);
 3482 
 3483   return true;
 3484 }
 3485 
 3486 /* Group all nodes belonging to STATE into several destinations.
 3487    Then for all destinations, set the nodes belonging to the destination
 3488    to DESTS_NODE[i] and set the characters accepted by the destination
 3489    to DEST_CH[i].  This function return the number of destinations.  */
 3490 
 3491 static Idx
 3492 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 3493                 re_node_set *dests_node, bitset_t *dests_ch)
 3494 {
 3495   reg_errcode_t err;
 3496   bool ok;
 3497   Idx i, j, k;
 3498   Idx ndests; /* Number of the destinations from 'state'.  */
 3499   bitset_t accepts; /* Characters a node can accept.  */
 3500   const re_node_set *cur_nodes = &state->nodes;
 3501   bitset_empty (accepts);
 3502   ndests = 0;
 3503 
 3504   /* For all the nodes belonging to 'state',  */
 3505   for (i = 0; i < cur_nodes->nelem; ++i)
 3506     {
 3507       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
 3508       re_token_type_t type = node->type;
 3509       unsigned int constraint = node->constraint;
 3510 
 3511       /* Enumerate all single byte character this node can accept.  */
 3512       if (type == CHARACTER)
 3513     bitset_set (accepts, node->opr.c);
 3514       else if (type == SIMPLE_BRACKET)
 3515     {
 3516       bitset_merge (accepts, node->opr.sbcset);
 3517     }
 3518       else if (type == OP_PERIOD)
 3519     {
 3520 #ifdef RE_ENABLE_I18N
 3521       if (dfa->mb_cur_max > 1)
 3522         bitset_merge (accepts, dfa->sb_char);
 3523       else
 3524 #endif
 3525         bitset_set_all (accepts);
 3526       if (!(dfa->syntax & RE_DOT_NEWLINE))
 3527         bitset_clear (accepts, '\n');
 3528       if (dfa->syntax & RE_DOT_NOT_NULL)
 3529         bitset_clear (accepts, '\0');
 3530     }
 3531 #ifdef RE_ENABLE_I18N
 3532       else if (type == OP_UTF8_PERIOD)
 3533     {
 3534       if (ASCII_CHARS % BITSET_WORD_BITS == 0)
 3535         memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
 3536       else
 3537         bitset_merge (accepts, utf8_sb_map);
 3538       if (!(dfa->syntax & RE_DOT_NEWLINE))
 3539         bitset_clear (accepts, '\n');
 3540       if (dfa->syntax & RE_DOT_NOT_NULL)
 3541         bitset_clear (accepts, '\0');
 3542     }
 3543 #endif
 3544       else
 3545     continue;
 3546 
 3547       /* Check the 'accepts' and sift the characters which are not
 3548      match it the context.  */
 3549       if (constraint)
 3550     {
 3551       if (constraint & NEXT_NEWLINE_CONSTRAINT)
 3552         {
 3553           bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
 3554           bitset_empty (accepts);
 3555           if (accepts_newline)
 3556         bitset_set (accepts, NEWLINE_CHAR);
 3557           else
 3558         continue;
 3559         }
 3560       if (constraint & NEXT_ENDBUF_CONSTRAINT)
 3561         {
 3562           bitset_empty (accepts);
 3563           continue;
 3564         }
 3565 
 3566       if (constraint & NEXT_WORD_CONSTRAINT)
 3567         {
 3568           bitset_word_t any_set = 0;
 3569           if (type == CHARACTER && !node->word_char)
 3570         {
 3571           bitset_empty (accepts);
 3572           continue;
 3573         }
 3574 #ifdef RE_ENABLE_I18N
 3575           if (dfa->mb_cur_max > 1)
 3576         for (j = 0; j < BITSET_WORDS; ++j)
 3577           any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
 3578           else
 3579 #endif
 3580         for (j = 0; j < BITSET_WORDS; ++j)
 3581           any_set |= (accepts[j] &= dfa->word_char[j]);
 3582           if (!any_set)
 3583         continue;
 3584         }
 3585       if (constraint & NEXT_NOTWORD_CONSTRAINT)
 3586         {
 3587           bitset_word_t any_set = 0;
 3588           if (type == CHARACTER && node->word_char)
 3589         {
 3590           bitset_empty (accepts);
 3591           continue;
 3592         }
 3593 #ifdef RE_ENABLE_I18N
 3594           if (dfa->mb_cur_max > 1)
 3595         for (j = 0; j < BITSET_WORDS; ++j)
 3596           any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
 3597           else
 3598 #endif
 3599         for (j = 0; j < BITSET_WORDS; ++j)
 3600           any_set |= (accepts[j] &= ~dfa->word_char[j]);
 3601           if (!any_set)
 3602         continue;
 3603         }
 3604     }
 3605 
 3606       /* Then divide 'accepts' into DFA states, or create a new
 3607      state.  Above, we make sure that accepts is not empty.  */
 3608       for (j = 0; j < ndests; ++j)
 3609     {
 3610       bitset_t intersec; /* Intersection sets, see below.  */
 3611       bitset_t remains;
 3612       /* Flags, see below.  */
 3613       bitset_word_t has_intersec, not_subset, not_consumed;
 3614 
 3615       /* Optimization, skip if this state doesn't accept the character.  */
 3616       if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
 3617         continue;
 3618 
 3619       /* Enumerate the intersection set of this state and 'accepts'.  */
 3620       has_intersec = 0;
 3621       for (k = 0; k < BITSET_WORDS; ++k)
 3622         has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
 3623       /* And skip if the intersection set is empty.  */
 3624       if (!has_intersec)
 3625         continue;
 3626 
 3627       /* Then check if this state is a subset of 'accepts'.  */
 3628       not_subset = not_consumed = 0;
 3629       for (k = 0; k < BITSET_WORDS; ++k)
 3630         {
 3631           not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
 3632           not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
 3633         }
 3634 
 3635       /* If this state isn't a subset of 'accepts', create a
 3636          new group state, which has the 'remains'. */
 3637       if (not_subset)
 3638         {
 3639           bitset_copy (dests_ch[ndests], remains);
 3640           bitset_copy (dests_ch[j], intersec);
 3641           err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
 3642           if (__glibc_unlikely (err != REG_NOERROR))
 3643         goto error_return;
 3644           ++ndests;
 3645         }
 3646 
 3647       /* Put the position in the current group. */
 3648       ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
 3649       if (__glibc_unlikely (! ok))
 3650         goto error_return;
 3651 
 3652       /* If all characters are consumed, go to next node. */
 3653       if (!not_consumed)
 3654         break;
 3655     }
 3656       /* Some characters remain, create a new group. */
 3657       if (j == ndests)
 3658     {
 3659       bitset_copy (dests_ch[ndests], accepts);
 3660       err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
 3661       if (__glibc_unlikely (err != REG_NOERROR))
 3662         goto error_return;
 3663       ++ndests;
 3664       bitset_empty (accepts);
 3665     }
 3666     }
 3667   assume (ndests <= SBC_MAX);
 3668   return ndests;
 3669  error_return:
 3670   for (j = 0; j < ndests; ++j)
 3671     re_node_set_free (dests_node + j);
 3672   return -1;
 3673 }
 3674 
 3675 #ifdef RE_ENABLE_I18N
 3676 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
 3677    Return the number of the bytes the node accepts.
 3678    STR_IDX is the current index of the input string.
 3679 
 3680    This function handles the nodes which can accept one character, or
 3681    one collating element like '.', '[a-z]', opposite to the other nodes
 3682    can only accept one byte.  */
 3683 
 3684 # ifdef _LIBC
 3685 #  include <locale/weight.h>
 3686 # endif
 3687 
 3688 static int
 3689 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 3690              const re_string_t *input, Idx str_idx)
 3691 {
 3692   const re_token_t *node = dfa->nodes + node_idx;
 3693   int char_len, elem_len;
 3694   Idx i;
 3695 
 3696   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
 3697     {
 3698       unsigned char c = re_string_byte_at (input, str_idx), d;
 3699       if (__glibc_likely (c < 0xc2))
 3700     return 0;
 3701 
 3702       if (str_idx + 2 > input->len)
 3703     return 0;
 3704 
 3705       d = re_string_byte_at (input, str_idx + 1);
 3706       if (c < 0xe0)
 3707     return (d < 0x80 || d > 0xbf) ? 0 : 2;
 3708       else if (c < 0xf0)
 3709     {
 3710       char_len = 3;
 3711       if (c == 0xe0 && d < 0xa0)
 3712         return 0;
 3713     }
 3714       else if (c < 0xf8)
 3715     {
 3716       char_len = 4;
 3717       if (c == 0xf0 && d < 0x90)
 3718         return 0;
 3719     }
 3720       else if (c < 0xfc)
 3721     {
 3722       char_len = 5;
 3723       if (c == 0xf8 && d < 0x88)
 3724         return 0;
 3725     }
 3726       else if (c < 0xfe)
 3727     {
 3728       char_len = 6;
 3729       if (c == 0xfc && d < 0x84)
 3730         return 0;
 3731     }
 3732       else
 3733     return 0;
 3734 
 3735       if (str_idx + char_len > input->len)
 3736     return 0;
 3737 
 3738       for (i = 1; i < char_len; ++i)
 3739     {
 3740       d = re_string_byte_at (input, str_idx + i);
 3741       if (d < 0x80 || d > 0xbf)
 3742         return 0;
 3743     }
 3744       return char_len;
 3745     }
 3746 
 3747   char_len = re_string_char_size_at (input, str_idx);
 3748   if (node->type == OP_PERIOD)
 3749     {
 3750       if (char_len <= 1)
 3751     return 0;
 3752       /* FIXME: I don't think this if is needed, as both '\n'
 3753      and '\0' are char_len == 1.  */
 3754       /* '.' accepts any one character except the following two cases.  */
 3755       if ((!(dfa->syntax & RE_DOT_NEWLINE)
 3756        && re_string_byte_at (input, str_idx) == '\n')
 3757       || ((dfa->syntax & RE_DOT_NOT_NULL)
 3758           && re_string_byte_at (input, str_idx) == '\0'))
 3759     return 0;
 3760       return char_len;
 3761     }
 3762 
 3763   elem_len = re_string_elem_size_at (input, str_idx);
 3764   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
 3765     return 0;
 3766 
 3767   if (node->type == COMPLEX_BRACKET)
 3768     {
 3769       const re_charset_t *cset = node->opr.mbcset;
 3770 # ifdef _LIBC
 3771       const unsigned char *pin
 3772     = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
 3773       Idx j;
 3774       uint32_t nrules;
 3775 # endif /* _LIBC */
 3776       int match_len = 0;
 3777       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
 3778             ? re_string_wchar_at (input, str_idx) : 0);
 3779 
 3780       /* match with multibyte character?  */
 3781       for (i = 0; i < cset->nmbchars; ++i)
 3782     if (wc == cset->mbchars[i])
 3783       {
 3784         match_len = char_len;
 3785         goto check_node_accept_bytes_match;
 3786       }
 3787       /* match with character_class?  */
 3788       for (i = 0; i < cset->nchar_classes; ++i)
 3789     {
 3790       wctype_t wt = cset->char_classes[i];
 3791       if (__iswctype (wc, wt))
 3792         {
 3793           match_len = char_len;
 3794           goto check_node_accept_bytes_match;
 3795         }
 3796     }
 3797 
 3798 # ifdef _LIBC
 3799       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 3800       if (nrules != 0)
 3801     {
 3802       unsigned int in_collseq = 0;
 3803       const int32_t *table, *indirect;
 3804       const unsigned char *weights, *extra;
 3805       const char *collseqwc;
 3806 
 3807       /* match with collating_symbol?  */
 3808       if (cset->ncoll_syms)
 3809         extra = (const unsigned char *)
 3810           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
 3811       for (i = 0; i < cset->ncoll_syms; ++i)
 3812         {
 3813           const unsigned char *coll_sym = extra + cset->coll_syms[i];
 3814           /* Compare the length of input collating element and
 3815          the length of current collating element.  */
 3816           if (*coll_sym != elem_len)
 3817         continue;
 3818           /* Compare each bytes.  */
 3819           for (j = 0; j < *coll_sym; j++)
 3820         if (pin[j] != coll_sym[1 + j])
 3821           break;
 3822           if (j == *coll_sym)
 3823         {
 3824           /* Match if every bytes is equal.  */
 3825           match_len = j;
 3826           goto check_node_accept_bytes_match;
 3827         }
 3828         }
 3829 
 3830       if (cset->nranges)
 3831         {
 3832           if (elem_len <= char_len)
 3833         {
 3834           collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
 3835           in_collseq = __collseq_table_lookup (collseqwc, wc);
 3836         }
 3837           else
 3838         in_collseq = find_collation_sequence_value (pin, elem_len);
 3839         }
 3840       /* match with range expression?  */
 3841       /* FIXME: Implement rational ranges here, too.  */
 3842       for (i = 0; i < cset->nranges; ++i)
 3843         if (cset->range_starts[i] <= in_collseq
 3844         && in_collseq <= cset->range_ends[i])
 3845           {
 3846         match_len = elem_len;
 3847         goto check_node_accept_bytes_match;
 3848           }
 3849 
 3850       /* match with equivalence_class?  */
 3851       if (cset->nequiv_classes)
 3852         {
 3853           const unsigned char *cp = pin;
 3854           table = (const int32_t *)
 3855         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 3856           weights = (const unsigned char *)
 3857         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 3858           extra = (const unsigned char *)
 3859         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 3860           indirect = (const int32_t *)
 3861         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 3862           int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
 3863           int32_t rule = idx >> 24;
 3864           idx &= 0xffffff;
 3865           if (idx > 0)
 3866         {
 3867           size_t weight_len = weights[idx];
 3868           for (i = 0; i < cset->nequiv_classes; ++i)
 3869             {
 3870               int32_t equiv_class_idx = cset->equiv_classes[i];
 3871               int32_t equiv_class_rule = equiv_class_idx >> 24;
 3872               equiv_class_idx &= 0xffffff;
 3873               if (weights[equiv_class_idx] == weight_len
 3874               && equiv_class_rule == rule
 3875               && memcmp (weights + idx + 1,
 3876                      weights + equiv_class_idx + 1,
 3877                      weight_len) == 0)
 3878             {
 3879               match_len = elem_len;
 3880               goto check_node_accept_bytes_match;
 3881             }
 3882             }
 3883         }
 3884         }
 3885     }
 3886       else
 3887 # endif /* _LIBC */
 3888     {
 3889       /* match with range expression?  */
 3890       for (i = 0; i < cset->nranges; ++i)
 3891         {
 3892           if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
 3893         {
 3894           match_len = char_len;
 3895           goto check_node_accept_bytes_match;
 3896         }
 3897         }
 3898     }
 3899     check_node_accept_bytes_match:
 3900       if (!cset->non_match)
 3901     return match_len;
 3902       else
 3903     {
 3904       if (match_len > 0)
 3905         return 0;
 3906       else
 3907         return (elem_len > char_len) ? elem_len : char_len;
 3908     }
 3909     }
 3910   return 0;
 3911 }
 3912 
 3913 # ifdef _LIBC
 3914 static unsigned int
 3915 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
 3916 {
 3917   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 3918   if (nrules == 0)
 3919     {
 3920       if (mbs_len == 1)
 3921     {
 3922       /* No valid character.  Match it as a single byte character.  */
 3923       const unsigned char *collseq = (const unsigned char *)
 3924         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
 3925       return collseq[mbs[0]];
 3926     }
 3927       return UINT_MAX;
 3928     }
 3929   else
 3930     {
 3931       int32_t idx;
 3932       const unsigned char *extra = (const unsigned char *)
 3933     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
 3934       int32_t extrasize = (const unsigned char *)
 3935     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
 3936 
 3937       for (idx = 0; idx < extrasize;)
 3938     {
 3939       int mbs_cnt;
 3940       bool found = false;
 3941       int32_t elem_mbs_len;
 3942       /* Skip the name of collating element name.  */
 3943       idx = idx + extra[idx] + 1;
 3944       elem_mbs_len = extra[idx++];
 3945       if (mbs_len == elem_mbs_len)
 3946         {
 3947           for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
 3948         if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
 3949           break;
 3950           if (mbs_cnt == elem_mbs_len)
 3951         /* Found the entry.  */
 3952         found = true;
 3953         }
 3954       /* Skip the byte sequence of the collating element.  */
 3955       idx += elem_mbs_len;
 3956       /* Adjust for the alignment.  */
 3957       idx = (idx + 3) & ~3;
 3958       /* Skip the collation sequence value.  */
 3959       idx += sizeof (uint32_t);
 3960       /* Skip the wide char sequence of the collating element.  */
 3961       idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
 3962       /* If we found the entry, return the sequence value.  */
 3963       if (found)
 3964         return *(uint32_t *) (extra + idx);
 3965       /* Skip the collation sequence value.  */
 3966       idx += sizeof (uint32_t);
 3967     }
 3968       return UINT_MAX;
 3969     }
 3970 }
 3971 # endif /* _LIBC */
 3972 #endif /* RE_ENABLE_I18N */
 3973 
 3974 /* Check whether the node accepts the byte which is IDX-th
 3975    byte of the INPUT.  */
 3976 
 3977 static bool
 3978 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
 3979            Idx idx)
 3980 {
 3981   unsigned char ch;
 3982   ch = re_string_byte_at (&mctx->input, idx);
 3983   switch (node->type)
 3984     {
 3985     case CHARACTER:
 3986       if (node->opr.c != ch)
 3987         return false;
 3988       break;
 3989 
 3990     case SIMPLE_BRACKET:
 3991       if (!bitset_contain (node->opr.sbcset, ch))
 3992         return false;
 3993       break;
 3994 
 3995 #ifdef RE_ENABLE_I18N
 3996     case OP_UTF8_PERIOD:
 3997       if (ch >= ASCII_CHARS)
 3998         return false;
 3999       FALLTHROUGH;
 4000 #endif
 4001     case OP_PERIOD:
 4002       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
 4003       || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
 4004     return false;
 4005       break;
 4006 
 4007     default:
 4008       return false;
 4009     }
 4010 
 4011   if (node->constraint)
 4012     {
 4013       /* The node has constraints.  Check whether the current context
 4014      satisfies the constraints.  */
 4015       unsigned int context = re_string_context_at (&mctx->input, idx,
 4016                            mctx->eflags);
 4017       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
 4018     return false;
 4019     }
 4020 
 4021   return true;
 4022 }
 4023 
 4024 /* Extend the buffers, if the buffers have run out.  */
 4025 
 4026 static reg_errcode_t
 4027 __attribute_warn_unused_result__
 4028 extend_buffers (re_match_context_t *mctx, int min_len)
 4029 {
 4030   reg_errcode_t ret;
 4031   re_string_t *pstr = &mctx->input;
 4032 
 4033   /* Avoid overflow.  */
 4034   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
 4035             <= pstr->bufs_len))
 4036     return REG_ESPACE;
 4037 
 4038   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
 4039   ret = re_string_realloc_buffers (pstr,
 4040                    MAX (min_len,
 4041                     MIN (pstr->len, pstr->bufs_len * 2)));
 4042   if (__glibc_unlikely (ret != REG_NOERROR))
 4043     return ret;
 4044 
 4045   if (mctx->state_log != NULL)
 4046     {
 4047       /* And double the length of state_log.  */
 4048       /* XXX We have no indication of the size of this buffer.  If this
 4049      allocation fail we have no indication that the state_log array
 4050      does not have the right size.  */
 4051       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
 4052                           pstr->bufs_len + 1);
 4053       if (__glibc_unlikely (new_array == NULL))
 4054     return REG_ESPACE;
 4055       mctx->state_log = new_array;
 4056     }
 4057 
 4058   /* Then reconstruct the buffers.  */
 4059   if (pstr->icase)
 4060     {
 4061 #ifdef RE_ENABLE_I18N
 4062       if (pstr->mb_cur_max > 1)
 4063     {
 4064       ret = build_wcs_upper_buffer (pstr);
 4065       if (__glibc_unlikely (ret != REG_NOERROR))
 4066         return ret;
 4067     }
 4068       else
 4069 #endif /* RE_ENABLE_I18N  */
 4070     build_upper_buffer (pstr);
 4071     }
 4072   else
 4073     {
 4074 #ifdef RE_ENABLE_I18N
 4075       if (pstr->mb_cur_max > 1)
 4076     build_wcs_buffer (pstr);
 4077       else
 4078 #endif /* RE_ENABLE_I18N  */
 4079     {
 4080       if (pstr->trans != NULL)
 4081         re_string_translate_buffer (pstr);
 4082     }
 4083     }
 4084   return REG_NOERROR;
 4085 }
 4086 
 4087 
 4088 /* Functions for matching context.  */
 4089 
 4090 /* Initialize MCTX.  */
 4091 
 4092 static reg_errcode_t
 4093 __attribute_warn_unused_result__
 4094 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
 4095 {
 4096   mctx->eflags = eflags;
 4097   mctx->match_last = -1;
 4098   if (n > 0)
 4099     {
 4100       /* Avoid overflow.  */
 4101       size_t max_object_size =
 4102     MAX (sizeof (struct re_backref_cache_entry),
 4103          sizeof (re_sub_match_top_t *));
 4104       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
 4105     return REG_ESPACE;
 4106 
 4107       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
 4108       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
 4109       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
 4110     return REG_ESPACE;
 4111     }
 4112   /* Already zero-ed by the caller.
 4113      else
 4114        mctx->bkref_ents = NULL;
 4115      mctx->nbkref_ents = 0;
 4116      mctx->nsub_tops = 0;  */
 4117   mctx->abkref_ents = n;
 4118   mctx->max_mb_elem_len = 1;
 4119   mctx->asub_tops = n;
 4120   return REG_NOERROR;
 4121 }
 4122 
 4123 /* Clean the entries which depend on the current input in MCTX.
 4124    This function must be invoked when the matcher changes the start index
 4125    of the input, or changes the input string.  */
 4126 
 4127 static void
 4128 match_ctx_clean (re_match_context_t *mctx)
 4129 {
 4130   Idx st_idx;
 4131   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
 4132     {
 4133       Idx sl_idx;
 4134       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
 4135       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
 4136     {
 4137       re_sub_match_last_t *last = top->lasts[sl_idx];
 4138       re_free (last->path.array);
 4139       re_free (last);
 4140     }
 4141       re_free (top->lasts);
 4142       if (top->path)
 4143     {
 4144       re_free (top->path->array);
 4145       re_free (top->path);
 4146     }
 4147       re_free (top);
 4148     }
 4149 
 4150   mctx->nsub_tops = 0;
 4151   mctx->nbkref_ents = 0;
 4152 }
 4153 
 4154 /* Free all the memory associated with MCTX.  */
 4155 
 4156 static void
 4157 match_ctx_free (re_match_context_t *mctx)
 4158 {
 4159   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
 4160   match_ctx_clean (mctx);
 4161   re_free (mctx->sub_tops);
 4162   re_free (mctx->bkref_ents);
 4163 }
 4164 
 4165 /* Add a new backreference entry to MCTX.
 4166    Note that we assume that caller never call this function with duplicate
 4167    entry, and call with STR_IDX which isn't smaller than any existing entry.
 4168 */
 4169 
 4170 static reg_errcode_t
 4171 __attribute_warn_unused_result__
 4172 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
 4173              Idx to)
 4174 {
 4175   if (mctx->nbkref_ents >= mctx->abkref_ents)
 4176     {
 4177       struct re_backref_cache_entry* new_entry;
 4178       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
 4179                   mctx->abkref_ents * 2);
 4180       if (__glibc_unlikely (new_entry == NULL))
 4181     {
 4182       re_free (mctx->bkref_ents);
 4183       return REG_ESPACE;
 4184     }
 4185       mctx->bkref_ents = new_entry;
 4186       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
 4187           sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
 4188       mctx->abkref_ents *= 2;
 4189     }
 4190   if (mctx->nbkref_ents > 0
 4191       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
 4192     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
 4193 
 4194   mctx->bkref_ents[mctx->nbkref_ents].node = node;
 4195   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
 4196   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
 4197   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
 4198 
 4199   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
 4200      If bit N is clear, means that this entry won't epsilon-transition to
 4201      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
 4202      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
 4203      such node.
 4204 
 4205      A backreference does not epsilon-transition unless it is empty, so set
 4206      to all zeros if FROM != TO.  */
 4207   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
 4208     = (from == to ? -1 : 0);
 4209 
 4210   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
 4211   if (mctx->max_mb_elem_len < to - from)
 4212     mctx->max_mb_elem_len = to - from;
 4213   return REG_NOERROR;
 4214 }
 4215 
 4216 /* Return the first entry with the same str_idx, or -1 if none is
 4217    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
 4218 
 4219 static Idx
 4220 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
 4221 {
 4222   Idx left, right, mid, last;
 4223   last = right = mctx->nbkref_ents;
 4224   for (left = 0; left < right;)
 4225     {
 4226       mid = (left + right) / 2;
 4227       if (mctx->bkref_ents[mid].str_idx < str_idx)
 4228     left = mid + 1;
 4229       else
 4230     right = mid;
 4231     }
 4232   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
 4233     return left;
 4234   else
 4235     return -1;
 4236 }
 4237 
 4238 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
 4239    at STR_IDX.  */
 4240 
 4241 static reg_errcode_t
 4242 __attribute_warn_unused_result__
 4243 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
 4244 {
 4245   DEBUG_ASSERT (mctx->sub_tops != NULL);
 4246   DEBUG_ASSERT (mctx->asub_tops > 0);
 4247   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
 4248     {
 4249       Idx new_asub_tops = mctx->asub_tops * 2;
 4250       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
 4251                            re_sub_match_top_t *,
 4252                            new_asub_tops);
 4253       if (__glibc_unlikely (new_array == NULL))
 4254     return REG_ESPACE;
 4255       mctx->sub_tops = new_array;
 4256       mctx->asub_tops = new_asub_tops;
 4257     }
 4258   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
 4259   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
 4260     return REG_ESPACE;
 4261   mctx->sub_tops[mctx->nsub_tops]->node = node;
 4262   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
 4263   return REG_NOERROR;
 4264 }
 4265 
 4266 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
 4267    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
 4268 
 4269 static re_sub_match_last_t *
 4270 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
 4271 {
 4272   re_sub_match_last_t *new_entry;
 4273   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
 4274     {
 4275       Idx new_alasts = 2 * subtop->alasts + 1;
 4276       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
 4277                             re_sub_match_last_t *,
 4278                             new_alasts);
 4279       if (__glibc_unlikely (new_array == NULL))
 4280     return NULL;
 4281       subtop->lasts = new_array;
 4282       subtop->alasts = new_alasts;
 4283     }
 4284   new_entry = calloc (1, sizeof (re_sub_match_last_t));
 4285   if (__glibc_likely (new_entry != NULL))
 4286     {
 4287       subtop->lasts[subtop->nlasts] = new_entry;
 4288       new_entry->node = node;
 4289       new_entry->str_idx = str_idx;
 4290       ++subtop->nlasts;
 4291     }
 4292   return new_entry;
 4293 }
 4294 
 4295 static void
 4296 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
 4297            re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
 4298 {
 4299   sctx->sifted_states = sifted_sts;
 4300   sctx->limited_states = limited_sts;
 4301   sctx->last_node = last_node;
 4302   sctx->last_str_idx = last_str_idx;
 4303   re_node_set_init_empty (&sctx->limits);
 4304 }