"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "lib/regexec.c" between
gcal-4.tar.gz and gcal-4.1.tar.gz

About: Gcal displays month/year calendar sheets, eternal holiday and fixed date lists in many ways.

regexec.c  (gcal-4):regexec.c  (gcal-4.1)
/* Extended regular expression matching and search library. /* Extended regular expression matching and search library.
Copyright (C) 2002-2015 Free Software Foundation, Inc. Copyright (C) 2002-2017 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version. version 3 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful, The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
skipping to change at line 221 skipping to change at line 221
least NMATCH elements, and we set them to the offsets of the least NMATCH elements, and we set them to the offsets of the
corresponding matched substrings. corresponding matched substrings.
EFLAGS specifies "execution flags" which affect matching: if EFLAGS specifies "execution flags" which affect matching: if
REG_NOTBOL is set, then ^ does not match at the beginning of the REG_NOTBOL is set, then ^ does not match at the beginning of the
string; if REG_NOTEOL is set, then $ does not match at the end. string; if REG_NOTEOL is set, then $ does not match at the end.
We return 0 if we find a match and REG_NOMATCH if not. */ We return 0 if we find a match and REG_NOMATCH if not. */
int int
regexec (preg, string, nmatch, pmatch, eflags) regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string,
const regex_t *_Restrict_ preg; size_t nmatch, regmatch_t pmatch[], int eflags)
const char *_Restrict_ string;
size_t nmatch;
regmatch_t pmatch[_Restrict_arr_];
int eflags;
{ {
reg_errcode_t err; reg_errcode_t err;
Idx start, length; Idx start, length;
re_dfa_t *dfa = preg->buffer; re_dfa_t *dfa = preg->buffer;
if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
return REG_BADPAT; return REG_BADPAT;
if (eflags & REG_STARTEND) if (eflags & REG_STARTEND)
{ {
skipping to change at line 307 skipping to change at line 303
If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
and all groups is stored in REGS. (For the "_2" variants, the offsets are and all groups is stored in REGS. (For the "_2" variants, the offsets are
computed relative to the concatenation, not relative to the individual computed relative to the concatenation, not relative to the individual
strings.) strings.)
On success, re_match* functions return the length of the match, re_search* On success, re_match* functions return the length of the match, re_search*
return the position of the start of the match. Return value -1 means no return the position of the start of the match. Return value -1 means no
match was found and -2 indicates an internal error. */ match was found and -2 indicates an internal error. */
regoff_t regoff_t
re_match (bufp, string, length, start, regs) re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
struct re_pattern_buffer *bufp; Idx start, struct re_registers *regs)
const char *string;
Idx length, start;
struct re_registers *regs;
{ {
return re_search_stub (bufp, string, length, start, 0, length, regs, true); return re_search_stub (bufp, string, length, start, 0, length, regs, true);
} }
#ifdef _LIBC #ifdef _LIBC
weak_alias (__re_match, re_match) weak_alias (__re_match, re_match)
#endif #endif
regoff_t regoff_t
re_search (bufp, string, length, start, range, regs) re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
struct re_pattern_buffer *bufp; Idx start, regoff_t range, struct re_registers *regs)
const char *string;
Idx length, start;
regoff_t range;
struct re_registers *regs;
{ {
return re_search_stub (bufp, string, length, start, range, length, regs, return re_search_stub (bufp, string, length, start, range, length, regs,
false); false);
} }
#ifdef _LIBC #ifdef _LIBC
weak_alias (__re_search, re_search) weak_alias (__re_search, re_search)
#endif #endif
regoff_t regoff_t
re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
struct re_pattern_buffer *bufp; const char *string2, Idx length2, Idx start,
const char *string1, *string2; struct re_registers *regs, Idx stop)
Idx length1, length2, start, stop;
struct re_registers *regs;
{ {
return re_search_2_stub (bufp, string1, length1, string2, length2, return re_search_2_stub (bufp, string1, length1, string2, length2,
start, 0, regs, stop, true); start, 0, regs, stop, true);
} }
#ifdef _LIBC #ifdef _LIBC
weak_alias (__re_match_2, re_match_2) weak_alias (__re_match_2, re_match_2)
#endif #endif
regoff_t regoff_t
re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
struct re_pattern_buffer *bufp; const char *string2, Idx length2, Idx start, regoff_t range,
const char *string1, *string2; struct re_registers *regs, Idx stop)
Idx length1, length2, start, stop;
regoff_t range;
struct re_registers *regs;
{ {
return re_search_2_stub (bufp, string1, length1, string2, length2, return re_search_2_stub (bufp, string1, length1, string2, length2,
start, range, regs, stop, false); start, range, regs, stop, false);
} }
#ifdef _LIBC #ifdef _LIBC
weak_alias (__re_search_2, re_search_2) weak_alias (__re_search_2, re_search_2)
#endif #endif
static regoff_t static regoff_t
re_search_2_stub (struct re_pattern_buffer *bufp, internal_function
const char *string1, Idx length1, re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
const char *string2, Idx length2, Idx length1, const char *string2, Idx length2, Idx start,
Idx start, regoff_t range, struct re_registers *regs, regoff_t range, struct re_registers *regs,
Idx stop, bool ret_len) Idx stop, bool ret_len)
{ {
const char *str; const char *str;
regoff_t rval; regoff_t rval;
Idx len = length1 + length2; Idx len;
char *s = NULL; char *s = NULL;
if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) if (BE ((length1 < 0 || length2 < 0 || stop < 0
|| INT_ADD_WRAPV (length1, length2, &len)),
0))
return -2; return -2;
/* Concatenate the strings. */ /* Concatenate the strings. */
if (length2 > 0) if (length2 > 0)
if (length1 > 0) if (length1 > 0)
{ {
s = re_malloc (char, len); s = re_malloc (char, len);
if (BE (s == NULL, 0)) if (BE (s == NULL, 0))
return -2; return -2;
skipping to change at line 411 skipping to change at line 397
re_free (s); re_free (s);
return rval; return rval;
} }
/* The parameters have the same meaning as those of re_search. /* The parameters have the same meaning as those of re_search.
Additional parameters: Additional parameters:
If RET_LEN is true the length of the match is returned (re_match style); If RET_LEN is true the length of the match is returned (re_match style);
otherwise the position of the match is returned. */ otherwise the position of the match is returned. */
static regoff_t static regoff_t
re_search_stub (struct re_pattern_buffer *bufp, internal_function
const char *string, Idx length, re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
Idx start, regoff_t range, Idx stop, struct re_registers *regs, Idx start, regoff_t range, Idx stop, struct re_registers *regs,
bool ret_len) bool ret_len)
{ {
reg_errcode_t result; reg_errcode_t result;
regmatch_t *pmatch; regmatch_t *pmatch;
Idx nregs; Idx nregs;
regoff_t rval; regoff_t rval;
int eflags = 0; int eflags = 0;
re_dfa_t *dfa = bufp->buffer; re_dfa_t *dfa = bufp->buffer;
Idx last_start = start + range; Idx last_start = start + range;
skipping to change at line 501 skipping to change at line 487
else else
rval = pmatch[0].rm_so; rval = pmatch[0].rm_so;
} }
re_free (pmatch); re_free (pmatch);
out: out:
lock_unlock (dfa->lock); lock_unlock (dfa->lock);
return rval; return rval;
} }
static unsigned static unsigned
internal_function
re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
int regs_allocated) int regs_allocated)
{ {
int rval = REGS_REALLOCATE; int rval = REGS_REALLOCATE;
Idx i; Idx i;
Idx need_regs = nregs + 1; Idx need_regs = nregs + 1;
/* We need one extra element beyond 'num_regs' for the '-1' marker GNU code /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
uses. */ uses. */
/* Have the register data arrays been allocated? */ /* Have the register data arrays been allocated? */
skipping to change at line 579 skipping to change at line 566
be at least NUM_REGS * sizeof (regoff_t) bytes long. be at least NUM_REGS * sizeof (regoff_t) bytes long.
If NUM_REGS == 0, then subsequent matches should allocate their own If NUM_REGS == 0, then subsequent matches should allocate their own
register data. register data.
Unless this function is called, the first search or match using Unless this function is called, the first search or match using
PATTERN_BUFFER will allocate its own register data, without PATTERN_BUFFER will allocate its own register data, without
freeing the old data. */ freeing the old data. */
void void
re_set_registers (bufp, regs, num_regs, starts, ends) re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
struct re_pattern_buffer *bufp; __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
struct re_registers *regs;
__re_size_t num_regs;
regoff_t *starts, *ends;
{ {
if (num_regs) if (num_regs)
{ {
bufp->regs_allocated = REGS_REALLOCATE; bufp->regs_allocated = REGS_REALLOCATE;
regs->num_regs = num_regs; regs->num_regs = num_regs;
regs->start = starts; regs->start = starts;
regs->end = ends; regs->end = ends;
} }
else else
{ {
skipping to change at line 610 skipping to change at line 594
weak_alias (__re_set_registers, re_set_registers) weak_alias (__re_set_registers, re_set_registers)
#endif #endif
/* Entry points compatible with 4.2 BSD regex library. We don't define /* Entry points compatible with 4.2 BSD regex library. We don't define
them unless specifically requested. */ them unless specifically requested. */
#if defined _REGEX_RE_COMP || defined _LIBC #if defined _REGEX_RE_COMP || defined _LIBC
int int
# ifdef _LIBC # ifdef _LIBC
weak_function weak_function
# endif # endif
re_exec (s) re_exec (const char *s)
const char *s;
{ {
return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
} }
#endif /* _REGEX_RE_COMP */ #endif /* _REGEX_RE_COMP */
/* Internal entry point. */ /* Internal entry point. */
/* Searches for a compiled pattern PREG in the string STRING, whose /* Searches for a compiled pattern PREG in the string STRING, whose
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
meaning as with regexec. LAST_START is START + RANGE, where meaning as with regexec. LAST_START is START + RANGE, where
START and RANGE have the same meaning as with re_search. START and RANGE have the same meaning as with re_search.
Return REG_NOERROR if we find a match, and REG_NOMATCH if not, Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
otherwise return the error code. otherwise return the error code.
Note: We assume front end functions already check ranges. Note: We assume front end functions already check ranges.
(0 <= LAST_START && LAST_START <= LENGTH) */ (0 <= LAST_START && LAST_START <= LENGTH) */
static reg_errcode_t static reg_errcode_t
__attribute_warn_unused_result__ __attribute_warn_unused_result__ internal_function
re_search_internal (const regex_t *preg, re_search_internal (const regex_t *preg, const char *string, Idx length,
const char *string, Idx length, Idx start, Idx last_start, Idx stop, size_t nmatch,
Idx start, Idx last_start, Idx stop, regmatch_t pmatch[], int eflags)
size_t nmatch, regmatch_t pmatch[],
int eflags)
{ {
reg_errcode_t err; reg_errcode_t err;
const re_dfa_t *dfa = preg->buffer; const re_dfa_t *dfa = preg->buffer;
Idx left_lim, right_lim; Idx left_lim, right_lim;
int incr; int incr;
bool fl_longest_match; bool fl_longest_match;
int match_kind; int match_kind;
Idx match_first; Idx match_first;
Idx match_last = REG_MISSING; Idx match_last = -1;
Idx extra_nmatch; Idx extra_nmatch;
bool sb; bool sb;
int ch; int ch;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
re_match_context_t mctx = { .dfa = dfa }; re_match_context_t mctx = { .dfa = dfa };
#else #else
re_match_context_t mctx; re_match_context_t mctx;
#endif #endif
char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
&& start != last_start && !preg->can_be_null) && start != last_start && !preg->can_be_null)
skipping to change at line 851 skipping to change at line 832
yet isn't the head, of a multibyte character. */ yet isn't the head, of a multibyte character. */
if (!sb && !re_string_first_byte (&mctx.input, 0)) if (!sb && !re_string_first_byte (&mctx.input, 0))
continue; continue;
#endif #endif
/* It seems to be appropriate one, then use the matcher. */ /* It seems to be appropriate one, then use the matcher. */
/* We assume that the matching starts from 0. */ /* We assume that the matching starts from 0. */
mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
match_last = check_matching (&mctx, fl_longest_match, match_last = check_matching (&mctx, fl_longest_match,
start <= last_start ? &match_first : NULL); start <= last_start ? &match_first : NULL);
if (match_last != REG_MISSING) if (match_last != -1)
{ {
if (BE (match_last == REG_ERROR, 0)) if (BE (match_last == -2, 0))
{ {
err = REG_ESPACE; err = REG_ESPACE;
goto free_return; goto free_return;
} }
else else
{ {
mctx.match_last = match_last; mctx.match_last = match_last;
if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
{ {
re_dfastate_t *pstate = mctx.state_log[match_last]; re_dfastate_t *pstate = mctx.state_log[match_last];
skipping to change at line 875 skipping to change at line 856
match_last); match_last);
} }
if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
|| dfa->nbackref) || dfa->nbackref)
{ {
err = prune_impossible_nodes (&mctx); err = prune_impossible_nodes (&mctx);
if (err == REG_NOERROR) if (err == REG_NOERROR)
break; break;
if (BE (err != REG_NOMATCH, 0)) if (BE (err != REG_NOMATCH, 0))
goto free_return; goto free_return;
match_last = REG_MISSING; match_last = -1;
} }
else else
break; /* We found a match. */ break; /* We found a match. */
} }
} }
match_ctx_clean (&mctx); match_ctx_clean (&mctx);
} }
#ifdef DEBUG #ifdef DEBUG
assert (match_last != REG_MISSING); assert (match_last != -1);
assert (err == REG_NOERROR); assert (err == REG_NOERROR);
#endif #endif
/* Set pmatch[] if we need. */ /* Set pmatch[] if we need. */
if (nmatch > 0) if (nmatch > 0)
{ {
Idx reg_idx; Idx reg_idx;
/* Initialize registers. */ /* Initialize registers. */
for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
skipping to change at line 964 skipping to change at line 945
free_return: free_return:
re_free (mctx.state_log); re_free (mctx.state_log);
if (dfa->nbackref) if (dfa->nbackref)
match_ctx_free (&mctx); match_ctx_free (&mctx);
re_string_destruct (&mctx.input); re_string_destruct (&mctx.input);
return err; return err;
} }
static reg_errcode_t static reg_errcode_t
__attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
prune_impossible_nodes (re_match_context_t *mctx) prune_impossible_nodes (re_match_context_t *mctx)
{ {
const re_dfa_t *const dfa = mctx->dfa; const re_dfa_t *const dfa = mctx->dfa;
Idx halt_node, match_last; Idx halt_node, match_last;
reg_errcode_t ret; reg_errcode_t ret;
re_dfastate_t **sifted_states; re_dfastate_t **sifted_states;
re_dfastate_t **lim_states = NULL; re_dfastate_t **lim_states = NULL;
re_sift_context_t sctx; re_sift_context_t sctx;
#ifdef DEBUG #ifdef DEBUG
assert (mctx->state_log != NULL); assert (mctx->state_log != NULL);
skipping to change at line 1012 skipping to change at line 993
match_last); match_last);
ret = sift_states_backward (mctx, &sctx); ret = sift_states_backward (mctx, &sctx);
re_node_set_free (&sctx.limits); re_node_set_free (&sctx.limits);
if (BE (ret != REG_NOERROR, 0)) if (BE (ret != REG_NOERROR, 0))
goto free_return; goto free_return;
if (sifted_states[0] != NULL || lim_states[0] != NULL) if (sifted_states[0] != NULL || lim_states[0] != NULL)
break; break;
do do
{ {
--match_last; --match_last;
if (! REG_VALID_INDEX (match_last)) if (match_last < 0)
{ {
ret = REG_NOMATCH; ret = REG_NOMATCH;
goto free_return; goto free_return;
} }
} while (mctx->state_log[match_last] == NULL } while (mctx->state_log[match_last] == NULL
|| !mctx->state_log[match_last]->halt); || !mctx->state_log[match_last]->halt);
halt_node = check_halt_state_context (mctx, halt_node = check_halt_state_context (mctx,
mctx->state_log[match_last], mctx->state_log[match_last],
match_last); match_last);
} }
skipping to change at line 1093 skipping to change at line 1074
} }
else else
/* Must not happen? */ /* Must not happen? */
return dfa->init_state; return dfa->init_state;
} }
else else
return dfa->init_state; return dfa->init_state;
} }
/* Check whether the regular expression match input string INPUT or not, /* Check whether the regular expression match input string INPUT or not,
and return the index where the matching end. Return REG_MISSING if and return the index where the matching end. Return -1 if
there is no match, and return REG_ERROR in case of an error. there is no match, and return -2 in case of an error.
FL_LONGEST_MATCH means we want the POSIX longest matching. FL_LONGEST_MATCH means we want the POSIX longest matching.
If P_MATCH_FIRST is not NULL, and the match fails, it is set to the If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
next place where we may want to try matching. next place where we may want to try matching.
Note that the matcher assumes that the matching starts from the current Note that the matcher assumes that the matching starts from the current
index of the buffer. */ index of the buffer. */
static Idx static Idx
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
check_matching (re_match_context_t *mctx, bool fl_longest_match, check_matching (re_match_context_t *mctx, bool fl_longest_match,
Idx *p_match_first) Idx *p_match_first)
{ {
const re_dfa_t *const dfa = mctx->dfa; const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err; reg_errcode_t err;
Idx match = 0; Idx match = 0;
Idx match_last = REG_MISSING; Idx match_last = -1;
Idx cur_str_idx = re_string_cur_idx (&mctx->input); Idx cur_str_idx = re_string_cur_idx (&mctx->input);
re_dfastate_t *cur_state; re_dfastate_t *cur_state;
bool at_init_state = p_match_first != NULL; bool at_init_state = p_match_first != NULL;
Idx next_start_idx = cur_str_idx; Idx next_start_idx = cur_str_idx;
err = REG_NOERROR; err = REG_NOERROR;
cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
/* An initial state must not be NULL (invalid). */ /* An initial state must not be NULL (invalid). */
if (BE (cur_state == NULL, 0)) if (BE (cur_state == NULL, 0))
{ {
assert (err == REG_ESPACE); assert (err == REG_ESPACE);
return REG_ERROR; return -2;
} }
if (mctx->state_log != NULL) if (mctx->state_log != NULL)
{ {
mctx->state_log[cur_str_idx] = cur_state; mctx->state_log[cur_str_idx] = cur_state;
/* Check OP_OPEN_SUBEXP in the initial state in case that we use them /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
later. E.g. Processing back references. */ later. E.g. Processing back references. */
if (BE (dfa->nbackref, 0)) if (BE (dfa->nbackref, 0))
{ {
skipping to change at line 1176 skipping to change at line 1157
if ((BE (next_char_idx >= mctx->input.bufs_len, 0) if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
&& mctx->input.bufs_len < mctx->input.len) && mctx->input.bufs_len < mctx->input.len)
|| (BE (next_char_idx >= mctx->input.valid_len, 0) || (BE (next_char_idx >= mctx->input.valid_len, 0)
&& mctx->input.valid_len < mctx->input.len)) && mctx->input.valid_len < mctx->input.len))
{ {
err = extend_buffers (mctx, next_char_idx + 1); err = extend_buffers (mctx, next_char_idx + 1);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
{ {
assert (err == REG_ESPACE); assert (err == REG_ESPACE);
return REG_ERROR; return -2;
} }
} }
cur_state = transit_state (&err, mctx, cur_state); cur_state = transit_state (&err, mctx, cur_state);
if (mctx->state_log != NULL) if (mctx->state_log != NULL)
cur_state = merge_state_with_log (&err, mctx, cur_state); cur_state = merge_state_with_log (&err, mctx, cur_state);
if (cur_state == NULL) if (cur_state == NULL)
{ {
/* Reached the invalid state or an error. Try to recover a valid /* Reached the invalid state or an error. Try to recover a valid
state using the state log, if available and if we have not state using the state log, if available and if we have not
already found a valid (even if not the longest) match. */ already found a valid (even if not the longest) match. */
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
return REG_ERROR; return -2;
if (mctx->state_log == NULL if (mctx->state_log == NULL
|| (match && !fl_longest_match) || (match && !fl_longest_match)
|| (cur_state = find_recover_state (&err, mctx)) == NULL) || (cur_state = find_recover_state (&err, mctx)) == NULL)
break; break;
} }
if (BE (at_init_state, 0)) if (BE (at_init_state, 0))
{ {
if (old_state == cur_state) if (old_state == cur_state)
skipping to change at line 1273 skipping to change at line 1254
context = re_string_context_at (&mctx->input, idx, mctx->eflags); context = re_string_context_at (&mctx->input, idx, mctx->eflags);
for (i = 0; i < state->nodes.nelem; ++i) for (i = 0; i < state->nodes.nelem; ++i)
if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
return state->nodes.elems[i]; return state->nodes.elems[i];
return 0; return 0;
} }
/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
corresponding to the DFA). corresponding to the DFA).
Return the destination node, and update EPS_VIA_NODES; Return the destination node, and update EPS_VIA_NODES;
return REG_MISSING in case of errors. */ return -1 in case of errors. */
static Idx static Idx
internal_function internal_function
proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
Idx *pidx, Idx node, re_node_set *eps_via_nodes, Idx *pidx, Idx node, re_node_set *eps_via_nodes,
struct re_fail_stack_t *fs) struct re_fail_stack_t *fs)
{ {
const re_dfa_t *const dfa = mctx->dfa; const re_dfa_t *const dfa = mctx->dfa;
Idx i; Idx i;
bool ok; bool ok;
if (IS_EPSILON_NODE (dfa->nodes[node].type)) if (IS_EPSILON_NODE (dfa->nodes[node].type))
{ {
re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
re_node_set *edests = &dfa->edests[node]; re_node_set *edests = &dfa->edests[node];
Idx dest_node; Idx dest_node;
ok = re_node_set_insert (eps_via_nodes, node); ok = re_node_set_insert (eps_via_nodes, node);
if (BE (! ok, 0)) if (BE (! ok, 0))
return REG_ERROR; return -2;
/* Pick up a valid destination, or return REG_MISSING if none /* Pick up a valid destination, or return -1 if none
is found. */ is found. */
for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i) for (dest_node = -1, i = 0; i < edests->nelem; ++i)
{ {
Idx candidate = edests->elems[i]; Idx candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate)) if (!re_node_set_contains (cur_nodes, candidate))
continue; continue;
if (dest_node == REG_MISSING) if (dest_node == -1)
dest_node = candidate; dest_node = candidate;
else else
{ {
/* In order to avoid infinite loop like "(a*)*", return the second /* In order to avoid infinite loop like "(a*)*", return the second
epsilon-transition if the first was already considered. */ epsilon-transition if the first was already considered. */
if (re_node_set_contains (eps_via_nodes, dest_node)) if (re_node_set_contains (eps_via_nodes, dest_node))
return candidate; return candidate;
/* Otherwise, push the second epsilon-transition on the fail stack. */ /* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs, && push_fail_stack (fs, *pidx, candidate, nregs, regs,
eps_via_nodes)) eps_via_nodes))
return REG_ERROR; return -2;
/* We know we are going to exit. */ /* We know we are going to exit. */
break; break;
} }
} }
return dest_node; return dest_node;
} }
else else
{ {
Idx naccepted = 0; Idx naccepted = 0;
skipping to change at line 1338 skipping to change at line 1319
naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
else else
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
if (type == OP_BACK_REF) if (type == OP_BACK_REF)
{ {
Idx subexp_idx = dfa->nodes[node].opr.idx + 1; Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
if (fs != NULL) if (fs != NULL)
{ {
if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
return REG_MISSING; return -1;
else if (naccepted) else if (naccepted)
{ {
char *buf = (char *) re_string_get_buffer (&mctx->input); char *buf = (char *) re_string_get_buffer (&mctx->input);
if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
naccepted) != 0) naccepted) != 0)
return REG_MISSING; return -1;
} }
} }
if (naccepted == 0) if (naccepted == 0)
{ {
Idx dest_node; Idx dest_node;
ok = re_node_set_insert (eps_via_nodes, node); ok = re_node_set_insert (eps_via_nodes, node);
if (BE (! ok, 0)) if (BE (! ok, 0))
return REG_ERROR; return -2;
dest_node = dfa->edests[node].elems[0]; dest_node = dfa->edests[node].elems[0];
if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
dest_node)) dest_node))
return dest_node; return dest_node;
} }
} }
if (naccepted != 0 if (naccepted != 0
|| check_node_accept (mctx, dfa->nodes + node, *pidx)) || check_node_accept (mctx, dfa->nodes + node, *pidx))
{ {
Idx dest_node = dfa->nexts[node]; Idx dest_node = dfa->nexts[node];
*pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
|| !re_node_set_contains (&mctx->state_log[*pidx]->nodes, || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
dest_node))) dest_node)))
return REG_MISSING; return -1;
re_node_set_empty (eps_via_nodes); re_node_set_empty (eps_via_nodes);
return dest_node; return dest_node;
} }
} }
return REG_MISSING; return -1;
} }
static reg_errcode_t static reg_errcode_t
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
{ {
reg_errcode_t err; reg_errcode_t err;
Idx num = fs->num++; Idx num = fs->num++;
if (fs->num == fs->alloc) if (fs->num == fs->alloc)
skipping to change at line 1410 skipping to change at line 1391
err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
return err; return err;
} }
static Idx static Idx
internal_function internal_function
pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
regmatch_t *regs, re_node_set *eps_via_nodes) regmatch_t *regs, re_node_set *eps_via_nodes)
{ {
Idx num = --fs->num; Idx num = --fs->num;
assert (REG_VALID_INDEX (num)); assert (num >= 0);
*pidx = fs->stack[num].idx; *pidx = fs->stack[num].idx;
memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
re_node_set_free (eps_via_nodes); re_node_set_free (eps_via_nodes);
re_free (fs->stack[num].regs); re_free (fs->stack[num].regs);
*eps_via_nodes = fs->stack[num].eps_via_nodes; *eps_via_nodes = fs->stack[num].eps_via_nodes;
return fs->stack[num].node; return fs->stack[num].node;
} }
/* Set the positions where the subexpressions are starts/ends to registers /* Set the positions where the subexpressions are starts/ends to registers
PMATCH. PMATCH.
skipping to change at line 1503 skipping to change at line 1484
if (prev_idx_match_malloced) if (prev_idx_match_malloced)
re_free (prev_idx_match); re_free (prev_idx_match);
return REG_NOERROR; return REG_NOERROR;
} }
} }
/* Proceed to next node. */ /* Proceed to next node. */
cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
&eps_via_nodes, fs); &eps_via_nodes, fs);
if (BE (! REG_VALID_INDEX (cur_node), 0)) if (BE (cur_node < 0, 0))
{ {
if (BE (cur_node == REG_ERROR, 0)) if (BE (cur_node == -2, 0))
{ {
re_node_set_free (&eps_via_nodes); re_node_set_free (&eps_via_nodes);
if (prev_idx_match_malloced) if (prev_idx_match_malloced)
re_free (prev_idx_match); re_free (prev_idx_match);
free_fail_stack_return (fs); free_fail_stack_return (fs);
return REG_ESPACE; return REG_ESPACE;
} }
if (fs) if (fs)
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
&eps_via_nodes); &eps_via_nodes);
skipping to change at line 1889 skipping to change at line 1870
re_node_set_init_empty (&except_nodes); re_node_set_init_empty (&except_nodes);
for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
{ {
Idx cur_node = inv_eclosure->elems[ecl_idx]; Idx cur_node = inv_eclosure->elems[ecl_idx];
if (cur_node == node) if (cur_node == node)
continue; continue;
if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
{ {
Idx edst1 = dfa->edests[cur_node].elems[0]; Idx edst1 = dfa->edests[cur_node].elems[0];
Idx edst2 = ((dfa->edests[cur_node].nelem > 1) Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
? dfa->edests[cur_node].elems[1] : REG_MISSING); ? dfa->edests[cur_node].elems[1] : -1);
if ((!re_node_set_contains (inv_eclosure, edst1) if ((!re_node_set_contains (inv_eclosure, edst1)
&& re_node_set_contains (dest_nodes, edst1)) && re_node_set_contains (dest_nodes, edst1))
|| (REG_VALID_NONZERO_INDEX (edst2) || (edst2 > 0
&& !re_node_set_contains (inv_eclosure, edst2) && !re_node_set_contains (inv_eclosure, edst2)
&& re_node_set_contains (dest_nodes, edst2))) && re_node_set_contains (dest_nodes, edst2)))
{ {
err = re_node_set_add_intersect (&except_nodes, candidates, err = re_node_set_add_intersect (&except_nodes, candidates,
dfa->inveclosures + cur_node); dfa->inveclosures + cur_node);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
{ {
re_node_set_free (&except_nodes); re_node_set_free (&except_nodes);
return err; return err;
} }
skipping to change at line 1972 skipping to change at line 1953
Idx node_idx; Idx node_idx;
/* Else, we are on the boundary: examine the nodes on the epsilon /* Else, we are on the boundary: examine the nodes on the epsilon
closure. */ closure. */
for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
{ {
Idx node = eclosures->elems[node_idx]; Idx node = eclosures->elems[node_idx];
switch (dfa->nodes[node].type) switch (dfa->nodes[node].type)
{ {
case OP_BACK_REF: case OP_BACK_REF:
if (bkref_idx != REG_MISSING) if (bkref_idx != -1)
{ {
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
do do
{ {
Idx dst; Idx dst;
int cpos; int cpos;
if (ent->node != node) if (ent->node != node)
continue; continue;
skipping to change at line 2088 skipping to change at line 2069
Idx subexp_idx; Idx subexp_idx;
struct re_backref_cache_entry *ent; struct re_backref_cache_entry *ent;
ent = bkref_ents + limits->elems[lim_idx]; ent = bkref_ents + limits->elems[lim_idx];
if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
continue; /* This is unrelated limitation. */ continue; /* This is unrelated limitation. */
subexp_idx = dfa->nodes[ent->node].opr.idx; subexp_idx = dfa->nodes[ent->node].opr.idx;
if (ent->subexp_to == str_idx) if (ent->subexp_to == str_idx)
{ {
Idx ops_node = REG_MISSING; Idx ops_node = -1;
Idx cls_node = REG_MISSING; Idx cls_node = -1;
for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
{ {
Idx node = dest_nodes->elems[node_idx]; Idx node = dest_nodes->elems[node_idx];
re_token_type_t type = dfa->nodes[node].type; re_token_type_t type = dfa->nodes[node].type;
if (type == OP_OPEN_SUBEXP if (type == OP_OPEN_SUBEXP
&& subexp_idx == dfa->nodes[node].opr.idx) && subexp_idx == dfa->nodes[node].opr.idx)
ops_node = node; ops_node = node;
else if (type == OP_CLOSE_SUBEXP else if (type == OP_CLOSE_SUBEXP
&& subexp_idx == dfa->nodes[node].opr.idx) && subexp_idx == dfa->nodes[node].opr.idx)
cls_node = node; cls_node = node;
} }
/* Check the limitation of the open subexpression. */ /* Check the limitation of the open subexpression. */
/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
if (REG_VALID_INDEX (ops_node)) if (ops_node >= 0)
{ {
err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
candidates); candidates);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
return err; return err;
} }
/* Check the limitation of the close subexpression. */ /* Check the limitation of the close subexpression. */
if (REG_VALID_INDEX (cls_node)) if (cls_node >= 0)
for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
{ {
Idx node = dest_nodes->elems[node_idx]; Idx node = dest_nodes->elems[node_idx];
if (!re_node_set_contains (dfa->inveclosures + node, if (!re_node_set_contains (dfa->inveclosures + node,
cls_node) cls_node)
&& !re_node_set_contains (dfa->eclosures + node, && !re_node_set_contains (dfa->eclosures + node,
cls_node)) cls_node))
{ {
/* It is against this limitation. /* It is against this limitation.
Remove it form the current sifted state. */ Remove it form the current sifted state. */
skipping to change at line 2166 skipping to change at line 2147
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
Idx str_idx, const re_node_set *candidates) Idx str_idx, const re_node_set *candidates)
{ {
const re_dfa_t *const dfa = mctx->dfa; const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err; reg_errcode_t err;
Idx node_idx, node; Idx node_idx, node;
re_sift_context_t local_sctx; re_sift_context_t local_sctx;
Idx first_idx = search_cur_bkref_entry (mctx, str_idx); Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
if (first_idx == REG_MISSING) if (first_idx == -1)
return REG_NOERROR; return REG_NOERROR;
local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
{ {
Idx enabled_idx; Idx enabled_idx;
re_token_type_t type; re_token_type_t type;
struct re_backref_cache_entry *entry; struct re_backref_cache_entry *entry;
node = candidates->elems[node_idx]; node = candidates->elems[node_idx];
skipping to change at line 2567 skipping to change at line 2548
continue; continue;
/* The node can accepts 'naccepted' bytes. */ /* The node can accepts 'naccepted' bytes. */
dest_idx = re_string_cur_idx (&mctx->input) + naccepted; dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
: mctx->max_mb_elem_len); : mctx->max_mb_elem_len);
err = clean_state_log_if_needed (mctx, dest_idx); err = clean_state_log_if_needed (mctx, dest_idx);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
return err; return err;
#ifdef DEBUG #ifdef DEBUG
assert (dfa->nexts[cur_node_idx] != REG_MISSING); assert (dfa->nexts[cur_node_idx] != -1);
#endif #endif
new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
dest_state = mctx->state_log[dest_idx]; dest_state = mctx->state_log[dest_idx];
if (dest_state == NULL) if (dest_state == NULL)
dest_nodes = *new_nodes; dest_nodes = *new_nodes;
else else
{ {
err = re_node_set_init_union (&dest_nodes, err = re_node_set_init_union (&dest_nodes,
dest_state->entrance_nodes, new_nodes); dest_state->entrance_nodes, new_nodes);
skipping to change at line 2633 skipping to change at line 2614
/* 'node' is a backreference. /* 'node' is a backreference.
Check the substring which the substring matched. */ Check the substring which the substring matched. */
bkc_idx = mctx->nbkref_ents; bkc_idx = mctx->nbkref_ents;
err = get_subexp (mctx, node_idx, cur_str_idx); err = get_subexp (mctx, node_idx, cur_str_idx);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
goto free_return; goto free_return;
/* And add the epsilon closures (which is 'new_dest_nodes') of /* And add the epsilon closures (which is 'new_dest_nodes') of
the backreference to appropriate state_log. */ the backreference to appropriate state_log. */
#ifdef DEBUG #ifdef DEBUG
assert (dfa->nexts[node_idx] != REG_MISSING); assert (dfa->nexts[node_idx] != -1);
#endif #endif
for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
{ {
Idx subexp_len; Idx subexp_len;
re_dfastate_t *dest_state; re_dfastate_t *dest_state;
struct re_backref_cache_entry *bkref_ent; struct re_backref_cache_entry *bkref_ent;
bkref_ent = mctx->bkref_ents + bkc_idx; bkref_ent = mctx->bkref_ents + bkc_idx;
if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
continue; continue;
subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
skipping to change at line 2717 skipping to change at line 2698
static reg_errcode_t static reg_errcode_t
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
{ {
const re_dfa_t *const dfa = mctx->dfa; const re_dfa_t *const dfa = mctx->dfa;
Idx subexp_num, sub_top_idx; Idx subexp_num, sub_top_idx;
const char *buf = (const char *) re_string_get_buffer (&mctx->input); const char *buf = (const char *) re_string_get_buffer (&mctx->input);
/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
if (cache_idx != REG_MISSING) if (cache_idx != -1)
{ {
const struct re_backref_cache_entry *entry const struct re_backref_cache_entry *entry
= mctx->bkref_ents + cache_idx; = mctx->bkref_ents + cache_idx;
do do
if (entry->node == bkref_node) if (entry->node == bkref_node)
return REG_NOERROR; /* We already checked it. */ return REG_NOERROR; /* We already checked it. */
while (entry++->more); while (entry++->more);
} }
subexp_num = dfa->nodes[bkref_node].opr.idx; subexp_num = dfa->nodes[bkref_node].opr.idx;
skipping to change at line 2822 skipping to change at line 2803
if (buf [bkref_str_off++] != buf[sl_str - 1]) if (buf [bkref_str_off++] != buf[sl_str - 1])
break; /* We don't need to search this sub expression break; /* We don't need to search this sub expression
any more. */ any more. */
} }
if (mctx->state_log[sl_str] == NULL) if (mctx->state_log[sl_str] == NULL)
continue; continue;
/* Does this state have a ')' of the sub expression? */ /* Does this state have a ')' of the sub expression? */
nodes = &mctx->state_log[sl_str]->nodes; nodes = &mctx->state_log[sl_str]->nodes;
cls_node = find_subexp_node (dfa, nodes, subexp_num, cls_node = find_subexp_node (dfa, nodes, subexp_num,
OP_CLOSE_SUBEXP); OP_CLOSE_SUBEXP);
if (cls_node == REG_MISSING) if (cls_node == -1)
continue; /* No. */ continue; /* No. */
if (sub_top->path == NULL) if (sub_top->path == NULL)
{ {
sub_top->path = calloc (sizeof (state_array_t), sub_top->path = calloc (sizeof (state_array_t),
sl_str - sub_top->str_idx + 1); sl_str - sub_top->str_idx + 1);
if (sub_top->path == NULL) if (sub_top->path == NULL)
return REG_ESPACE; return REG_ESPACE;
} }
/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
in the current context? */ in the current context? */
skipping to change at line 2901 skipping to change at line 2882
{ {
Idx cls_idx; Idx cls_idx;
for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
{ {
Idx cls_node = nodes->elems[cls_idx]; Idx cls_node = nodes->elems[cls_idx];
const re_token_t *node = dfa->nodes + cls_node; const re_token_t *node = dfa->nodes + cls_node;
if (node->type == type if (node->type == type
&& node->opr.idx == subexp_idx) && node->opr.idx == subexp_idx)
return cls_node; return cls_node;
} }
return REG_MISSING; return -1;
} }
/* Check whether the node TOP_NODE at TOP_STR can arrive to the node /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
LAST_NODE at LAST_STR. We record the path onto PATH since it will be LAST_NODE at LAST_STR. We record the path onto PATH since it will be
heavily reused. heavily reused.
Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
static reg_errcode_t static reg_errcode_t
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
skipping to change at line 3177 skipping to change at line 3158
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
return err; return err;
/* Create a new node set NEW_NODES with the nodes which are epsilon /* Create a new node set NEW_NODES with the nodes which are epsilon
closures of the node in CUR_NODES. */ closures of the node in CUR_NODES. */
for (idx = 0; idx < cur_nodes->nelem; ++idx) for (idx = 0; idx < cur_nodes->nelem; ++idx)
{ {
Idx cur_node = cur_nodes->elems[idx]; Idx cur_node = cur_nodes->elems[idx];
const re_node_set *eclosure = dfa->eclosures + cur_node; const re_node_set *eclosure = dfa->eclosures + cur_node;
outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
if (outside_node == REG_MISSING) if (outside_node == -1)
{ {
/* There are no problematic nodes, just merge them. */ /* There are no problematic nodes, just merge them. */
err = re_node_set_merge (&new_nodes, eclosure); err = re_node_set_merge (&new_nodes, eclosure);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
{ {
re_node_set_free (&new_nodes); re_node_set_free (&new_nodes);
return err; return err;
} }
} }
else else
skipping to change at line 3262 skipping to change at line 3243
static reg_errcode_t static reg_errcode_t
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
Idx cur_str, Idx subexp_num, int type) Idx cur_str, Idx subexp_num, int type)
{ {
const re_dfa_t *const dfa = mctx->dfa; const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err; reg_errcode_t err;
Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
struct re_backref_cache_entry *ent; struct re_backref_cache_entry *ent;
if (cache_idx_start == REG_MISSING) if (cache_idx_start == -1)
return REG_NOERROR; return REG_NOERROR;
restart: restart:
ent = mctx->bkref_ents + cache_idx_start; ent = mctx->bkref_ents + cache_idx_start;
do do
{ {
Idx to_idx, next_node; Idx to_idx, next_node;
/* Is this entry ENT is appropriate? */ /* Is this entry ENT is appropriate? */
if (!re_node_set_contains (cur_nodes, ent->node)) if (!re_node_set_contains (cur_nodes, ent->node))
skipping to change at line 3387 skipping to change at line 3368
} }
dests_node = dests_alloc->dests_node; dests_node = dests_alloc->dests_node;
dests_ch = dests_alloc->dests_ch; dests_ch = dests_alloc->dests_ch;
/* Initialize transition table. */ /* Initialize transition table. */
state->word_trtable = state->trtable = NULL; state->word_trtable = state->trtable = NULL;
/* At first, group all nodes belonging to 'state' into several /* At first, group all nodes belonging to 'state' into several
destinations. */ destinations. */
ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) if (BE (ndests <= 0, 0))
{ {
if (dests_node_malloced) if (dests_node_malloced)
free (dests_alloc); free (dests_alloc);
/* Return false in case of an error, true otherwise. */ /* Return false in case of an error, true otherwise. */
if (ndests == 0) if (ndests == 0)
{ {
state->trtable = (re_dfastate_t **) state->trtable = (re_dfastate_t **)
calloc (sizeof (re_dfastate_t *), SBC_MAX); calloc (sizeof (re_dfastate_t *), SBC_MAX);
if (BE (state->trtable == NULL, 0)) if (BE (state->trtable == NULL, 0))
return false; return false;
skipping to change at line 3449 skipping to change at line 3430
/* Then build the states for all destinations. */ /* Then build the states for all destinations. */
for (i = 0; i < ndests; ++i) for (i = 0; i < ndests; ++i)
{ {
Idx next_node; Idx next_node;
re_node_set_empty (&follows); re_node_set_empty (&follows);
/* Merge the follows of this destination states. */ /* Merge the follows of this destination states. */
for (j = 0; j < dests_node[i].nelem; ++j) for (j = 0; j < dests_node[i].nelem; ++j)
{ {
next_node = dfa->nexts[dests_node[i].elems[j]]; next_node = dfa->nexts[dests_node[i].elems[j]];
if (next_node != REG_MISSING) if (next_node != -1)
{ {
err = re_node_set_merge (&follows, dfa->eclosures + next_node); err = re_node_set_merge (&follows, dfa->eclosures + next_node);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
goto out_free; goto out_free;
} }
} }
dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
goto out_free; goto out_free;
/* If the new state has context constraint, /* If the new state has context constraint,
skipping to change at line 3760 skipping to change at line 3741
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
goto error_return; goto error_return;
++ndests; ++ndests;
bitset_empty (accepts); bitset_empty (accepts);
} }
} }
return ndests; return ndests;
error_return: error_return:
for (j = 0; j < ndests; ++j) for (j = 0; j < ndests; ++j)
re_node_set_free (dests_node + j); re_node_set_free (dests_node + j);
return REG_MISSING; return -1;
} }
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
Return the number of the bytes the node accepts. Return the number of the bytes the node accepts.
STR_IDX is the current index of the input string. STR_IDX is the current index of the input string.
This function handles the nodes which can accept one character, or This function handles the nodes which can accept one character, or
one collating element like '.', '[a-z]', opposite to the other nodes one collating element like '.', '[a-z]', opposite to the other nodes
can only accept one byte. */ can only accept one byte. */
# ifdef _LIBC
# include <locale/weight.h>
# endif
static int static int
internal_function internal_function
check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
const re_string_t *input, Idx str_idx) const re_string_t *input, Idx str_idx)
{ {
const re_token_t *node = dfa->nodes + node_idx; const re_token_t *node = dfa->nodes + node_idx;
int char_len, elem_len; int char_len, elem_len;
Idx i; Idx i;
if (BE (node->type == OP_UTF8_PERIOD, 0)) if (BE (node->type == OP_UTF8_PERIOD, 0))
skipping to change at line 3891 skipping to change at line 3876
} }
# ifdef _LIBC # ifdef _LIBC
nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0) if (nrules != 0)
{ {
unsigned int in_collseq = 0; unsigned int in_collseq = 0;
const int32_t *table, *indirect; const int32_t *table, *indirect;
const unsigned char *weights, *extra; const unsigned char *weights, *extra;
const char *collseqwc; const char *collseqwc;
/* This #include defines a local function! */
# include <locale/weight.h>
/* match with collating_symbol? */ /* match with collating_symbol? */
if (cset->ncoll_syms) if (cset->ncoll_syms)
extra = (const unsigned char *) extra = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
for (i = 0; i < cset->ncoll_syms; ++i) for (i = 0; i < cset->ncoll_syms; ++i)
{ {
const unsigned char *coll_sym = extra + cset->coll_syms[i]; const unsigned char *coll_sym = extra + cset->coll_syms[i];
/* Compare the length of input collating element and /* Compare the length of input collating element and
the length of current collating element. */ the length of current collating element. */
skipping to change at line 3949 skipping to change at line 3932
{ {
const unsigned char *cp = pin; const unsigned char *cp = pin;
table = (const int32_t *) table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
weights = (const unsigned char *) weights = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
extra = (const unsigned char *) extra = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *) indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
int32_t idx = findidx (&cp, elem_len); int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
if (idx > 0) if (idx > 0)
for (i = 0; i < cset->nequiv_classes; ++i) for (i = 0; i < cset->nequiv_classes; ++i)
{ {
int32_t equiv_class_idx = cset->equiv_classes[i]; int32_t equiv_class_idx = cset->equiv_classes[i];
size_t weight_len = weights[idx & 0xffffff]; size_t weight_len = weights[idx & 0xffffff];
if (weight_len == weights[equiv_class_idx & 0xffffff] if (weight_len == weights[equiv_class_idx & 0xffffff]
&& (idx >> 24) == (equiv_class_idx >> 24)) && (idx >> 24) == (equiv_class_idx >> 24))
{ {
Idx cnt = 0; Idx cnt = 0;
skipping to change at line 4187 skipping to change at line 4170
} }
/* Functions for matching context. */ /* Functions for matching context. */
/* Initialize MCTX. */ /* Initialize MCTX. */
static reg_errcode_t static reg_errcode_t
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
{ {
mctx->eflags = eflags; mctx->eflags = eflags;
mctx->match_last = REG_MISSING; mctx->match_last = -1;
if (n > 0) if (n > 0)
{ {
/* Avoid overflow. */ /* Avoid overflow. */
size_t max_object_size = size_t max_object_size =
MAX (sizeof (struct re_backref_cache_entry), MAX (sizeof (struct re_backref_cache_entry),
sizeof (re_sub_match_top_t *)); sizeof (re_sub_match_top_t *));
if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
return REG_ESPACE; return REG_ESPACE;
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
skipping to change at line 4308 skipping to change at line 4291
to all zeros if FROM != TO. */ to all zeros if FROM != TO. */
mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
= (from == to ? -1 : 0); = (from == to ? -1 : 0);
mctx->bkref_ents[mctx->nbkref_ents++].more = 0; mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
if (mctx->max_mb_elem_len < to - from) if (mctx->max_mb_elem_len < to - from)
mctx->max_mb_elem_len = to - from; mctx->max_mb_elem_len = to - from;
return REG_NOERROR; return REG_NOERROR;
} }
/* Return the first entry with the same str_idx, or REG_MISSING if none is /* Return the first entry with the same str_idx, or -1 if none is
found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
static Idx static Idx
internal_function internal_function
search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
{ {
Idx left, right, mid, last; Idx left, right, mid, last;
last = right = mctx->nbkref_ents; last = right = mctx->nbkref_ents;
for (left = 0; left < right;) for (left = 0; left < right;)
{ {
mid = (left + right) / 2; mid = (left + right) / 2;
if (mctx->bkref_ents[mid].str_idx < str_idx) if (mctx->bkref_ents[mid].str_idx < str_idx)
left = mid + 1; left = mid + 1;
else else
right = mid; right = mid;
} }
if (left < last && mctx->bkref_ents[left].str_idx == str_idx) if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
return left; return left;
else else
return REG_MISSING; return -1;
} }
/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
at STR_IDX. */ at STR_IDX. */
static reg_errcode_t static reg_errcode_t
internal_function __attribute_warn_unused_result__ internal_function __attribute_warn_unused_result__
match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
{ {
#ifdef DEBUG #ifdef DEBUG
 End of changes. 62 change blocks. 
101 lines changed or deleted 84 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS