regcomp.c (perl-5.30.2.tar.xz) | : | regcomp.c (perl-5.30.3.tar.xz) | ||
---|---|---|---|---|
skipping to change at line 109 | skipping to change at line 109 | |||
/* this is a chain of data about sub patterns we are processing that | /* this is a chain of data about sub patterns we are processing that | |||
need to be handled separately/specially in study_chunk. Its so | need to be handled separately/specially in study_chunk. Its so | |||
we can simulate recursion without losing state. */ | we can simulate recursion without losing state. */ | |||
struct scan_frame; | struct scan_frame; | |||
typedef struct scan_frame { | typedef struct scan_frame { | |||
regnode *last_regnode; /* last node to process in this frame */ | regnode *last_regnode; /* last node to process in this frame */ | |||
regnode *next_regnode; /* next node to process when last is reached */ | regnode *next_regnode; /* next node to process when last is reached */ | |||
U32 prev_recursed_depth; | U32 prev_recursed_depth; | |||
I32 stopparen; /* what stopparen do we use */ | I32 stopparen; /* what stopparen do we use */ | |||
bool in_gosub; /* this or an outer frame is for GOSUB */ | ||||
struct scan_frame *this_prev_frame; /* this previous frame */ | struct scan_frame *this_prev_frame; /* this previous frame */ | |||
struct scan_frame *prev_frame; /* previous frame */ | struct scan_frame *prev_frame; /* previous frame */ | |||
struct scan_frame *next_frame; /* next frame */ | struct scan_frame *next_frame; /* next frame */ | |||
} scan_frame; | } scan_frame; | |||
/* Certain characters are output as a sequence with the first being a | /* Certain characters are output as a sequence with the first being a | |||
* backslash. */ | * backslash. */ | |||
#define isBACKSLASHED_PUNCT(c) strchr("-[]\\^", c) | #define isBACKSLASHED_PUNCT(c) strchr("-[]\\^", c) | |||
skipping to change at line 4422 | skipping to change at line 4423 | |||
S_unwind_scan_frames(pTHX_ const void *p) | S_unwind_scan_frames(pTHX_ const void *p) | |||
{ | { | |||
scan_frame *f= (scan_frame *)p; | scan_frame *f= (scan_frame *)p; | |||
do { | do { | |||
scan_frame *n= f->next_frame; | scan_frame *n= f->next_frame; | |||
Safefree(f); | Safefree(f); | |||
f= n; | f= n; | |||
} while (f); | } while (f); | |||
} | } | |||
/* Follow the next-chain of the current node and optimize away | ||||
all the NOTHINGs from it. | ||||
*/ | ||||
STATIC void | ||||
S_rck_elide_nothing(pTHX_ regnode *node) | ||||
{ | ||||
dVAR; | ||||
PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING; | ||||
if (OP(node) != CURLYX) { | ||||
const int max = (reg_off_by_arg[OP(node)] | ||||
? I32_MAX | ||||
/* I32 may be smaller than U16 on CRAYs! */ | ||||
: (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); | ||||
int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node)); | ||||
int noff; | ||||
regnode *n = node; | ||||
/* Skip NOTHING and LONGJMP. */ | ||||
while ( | ||||
(n = regnext(n)) | ||||
&& ( | ||||
(PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) | ||||
|| ((OP(n) == LONGJMP) && (noff = ARG(n))) | ||||
) | ||||
&& off + noff < max | ||||
) { | ||||
off += noff; | ||||
} | ||||
if (reg_off_by_arg[OP(node)]) | ||||
ARG(node) = off; | ||||
else | ||||
NEXT_OFF(node) = off; | ||||
} | ||||
return; | ||||
} | ||||
/* the return from this sub is the minimum length that could possibly match */ | /* the return from this sub is the minimum length that could possibly match */ | |||
STATIC SSize_t | STATIC SSize_t | |||
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, | S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, | |||
SSize_t *minlenp, SSize_t *deltap, | SSize_t *minlenp, SSize_t *deltap, | |||
regnode *last, | regnode *last, | |||
scan_data_t *data, | scan_data_t *data, | |||
I32 stopparen, | I32 stopparen, | |||
U32 recursed_depth, | U32 recursed_depth, | |||
regnode_ssc *and_withp, | regnode_ssc *and_withp, | |||
U32 flags, U32 depth) | U32 flags, U32 depth, bool was_mutate_ok) | |||
/* scanp: Start here (read-write). */ | /* scanp: Start here (read-write). */ | |||
/* deltap: Write maxlen-minlen here. */ | /* deltap: Write maxlen-minlen here. */ | |||
/* last: Stop before this one. */ | /* last: Stop before this one. */ | |||
/* data: string data about the pattern */ | /* data: string data about the pattern */ | |||
/* stopparen: treat close N as END */ | /* stopparen: treat close N as END */ | |||
/* recursed: which subroutines have we recursed into */ | /* recursed: which subroutines have we recursed into */ | |||
/* and_withp: Valid if flags & SCF_DO_STCLASS_OR */ | /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */ | |||
{ | { | |||
dVAR; | dVAR; | |||
/* There must be at least this number of characters to match */ | /* There must be at least this number of characters to match */ | |||
skipping to change at line 4509 | skipping to change at line 4548 | |||
} | } | |||
} | } | |||
Perl_re_printf( aTHX_ "\n"); | Perl_re_printf( aTHX_ "\n"); | |||
} | } | |||
); | ); | |||
while ( scan && OP(scan) != END && scan < last ){ | while ( scan && OP(scan) != END && scan < last ){ | |||
UV min_subtract = 0; /* How mmany chars to subtract from the minimum | UV min_subtract = 0; /* How mmany chars to subtract from the minimum | |||
node length to get a real minimum (because | node length to get a real minimum (because | |||
the folded version may be shorter) */ | the folded version may be shorter) */ | |||
bool unfolded_multi_char = FALSE; | bool unfolded_multi_char = FALSE; | |||
/* avoid mutating ops if we are anywhere within the recursed or | ||||
* enframed handling for a GOSUB: the outermost level will handle it. | ||||
*/ | ||||
bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub); | ||||
/* Peephole optimizer: */ | /* Peephole optimizer: */ | |||
DEBUG_STUDYDATA("Peep", data, depth, is_inf); | DEBUG_STUDYDATA("Peep", data, depth, is_inf); | |||
DEBUG_PEEP("Peep", scan, depth, flags); | DEBUG_PEEP("Peep", scan, depth, flags); | |||
/* The reason we do this here is that we need to deal with things like | /* The reason we do this here is that we need to deal with things like | |||
* /(?:f)(?:o)(?:o)/ which cant be dealt with by the normal EXACT | * /(?:f)(?:o)(?:o)/ which cant be dealt with by the normal EXACT | |||
* parsing code, as each (?:..) is handled by a different invocation of | * parsing code, as each (?:..) is handled by a different invocation of | |||
* reg() -- Yves | * reg() -- Yves | |||
*/ | */ | |||
JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); | if (mutate_ok) | |||
JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); | ||||
/* Follow the next-chain of the current node and optimize | /* Follow the next-chain of the current node and optimize | |||
away all the NOTHINGs from it. */ | away all the NOTHINGs from it. | |||
if (OP(scan) != CURLYX) { | */ | |||
const int max = (reg_off_by_arg[OP(scan)] | rck_elide_nothing(scan); | |||
? I32_MAX | ||||
/* I32 may be smaller than U16 on CRAYs! */ | ||||
: (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); | ||||
int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan)); | ||||
int noff; | ||||
regnode *n = scan; | ||||
/* Skip NOTHING and LONGJMP. */ | ||||
while ((n = regnext(n)) | ||||
&& ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) | ||||
|| ((OP(n) == LONGJMP) && (noff = ARG(n)))) | ||||
&& off + noff < max) | ||||
off += noff; | ||||
if (reg_off_by_arg[OP(scan)]) | ||||
ARG(scan) = off; | ||||
else | ||||
NEXT_OFF(scan) = off; | ||||
} | ||||
/* The principal pseudo-switch. Cannot be a switch, since we | /* The principal pseudo-switch. Cannot be a switch, since we | |||
look into several different things. */ | look into several different things. */ | |||
if ( OP(scan) == DEFINEP ) { | if ( OP(scan) == DEFINEP ) { | |||
SSize_t minlen = 0; | SSize_t minlen = 0; | |||
SSize_t deltanext = 0; | SSize_t deltanext = 0; | |||
SSize_t fake_last_close = 0; | SSize_t fake_last_close = 0; | |||
I32 f = SCF_IN_DEFINE; | I32 f = SCF_IN_DEFINE; | |||
StructCopy(&zero_scan_data, &data_fake, scan_data_t); | StructCopy(&zero_scan_data, &data_fake, scan_data_t); | |||
skipping to change at line 4568 | skipping to change at line 4594 | |||
next = regnext(scan); | next = regnext(scan); | |||
scan = NEXTOPER(NEXTOPER(scan)); | scan = NEXTOPER(NEXTOPER(scan)); | |||
DEBUG_PEEP("scan", scan, depth, flags); | DEBUG_PEEP("scan", scan, depth, flags); | |||
DEBUG_PEEP("next", next, depth, flags); | DEBUG_PEEP("next", next, depth, flags); | |||
/* we suppose the run is continuous, last=next... | /* we suppose the run is continuous, last=next... | |||
* NOTE we dont use the return here! */ | * NOTE we dont use the return here! */ | |||
/* DEFINEP study_chunk() recursion */ | /* DEFINEP study_chunk() recursion */ | |||
(void)study_chunk(pRExC_state, &scan, &minlen, | (void)study_chunk(pRExC_state, &scan, &minlen, | |||
&deltanext, next, &data_fake, stopparen, | &deltanext, next, &data_fake, stopparen, | |||
recursed_depth, NULL, f, depth+1); | recursed_depth, NULL, f, depth+1, mutate_ok); | |||
scan = next; | scan = next; | |||
} else | } else | |||
if ( | if ( | |||
OP(scan) == BRANCH || | OP(scan) == BRANCH || | |||
OP(scan) == BRANCHJ || | OP(scan) == BRANCHJ || | |||
OP(scan) == IFTHEN | OP(scan) == IFTHEN | |||
) { | ) { | |||
next = regnext(scan); | next = regnext(scan); | |||
code = OP(scan); | code = OP(scan); | |||
skipping to change at line 4636 | skipping to change at line 4662 | |||
data_fake.start_class = &this_class; | data_fake.start_class = &this_class; | |||
f = SCF_DO_STCLASS_AND; | f = SCF_DO_STCLASS_AND; | |||
} | } | |||
if (flags & SCF_WHILEM_VISITED_POS) | if (flags & SCF_WHILEM_VISITED_POS) | |||
f |= SCF_WHILEM_VISITED_POS; | f |= SCF_WHILEM_VISITED_POS; | |||
/* we suppose the run is continuous, last=next...*/ | /* we suppose the run is continuous, last=next...*/ | |||
/* recurse study_chunk() for each BRANCH in an alternation * / | /* recurse study_chunk() for each BRANCH in an alternation * / | |||
minnext = study_chunk(pRExC_state, &scan, minlenp, | minnext = study_chunk(pRExC_state, &scan, minlenp, | |||
&deltanext, next, &data_fake, stopparen, | &deltanext, next, &data_fake, stopparen, | |||
recursed_depth, NULL, f, depth+1); | recursed_depth, NULL, f, depth+1, | |||
mutate_ok); | ||||
if (min1 > minnext) | if (min1 > minnext) | |||
min1 = minnext; | min1 = minnext; | |||
if (deltanext == SSize_t_MAX) { | if (deltanext == SSize_t_MAX) { | |||
is_inf = is_inf_internal = 1; | is_inf = is_inf_internal = 1; | |||
max1 = SSize_t_MAX; | max1 = SSize_t_MAX; | |||
} else if (max1 < minnext + deltanext) | } else if (max1 < minnext + deltanext) | |||
max1 = minnext + deltanext; | max1 = minnext + deltanext; | |||
scan = next; | scan = next; | |||
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) | if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) | |||
skipping to change at line 4703 | skipping to change at line 4730 | |||
/* Switch to OR mode: cache the old value of | /* Switch to OR mode: cache the old value of | |||
* data->start_class */ | * data->start_class */ | |||
INIT_AND_WITHP; | INIT_AND_WITHP; | |||
StructCopy(data->start_class, and_withp, regnode_ssc); | StructCopy(data->start_class, and_withp, regnode_ssc); | |||
flags &= ~SCF_DO_STCLASS_AND; | flags &= ~SCF_DO_STCLASS_AND; | |||
StructCopy(&accum, data->start_class, regnode_ssc); | StructCopy(&accum, data->start_class, regnode_ssc); | |||
flags |= SCF_DO_STCLASS_OR; | flags |= SCF_DO_STCLASS_OR; | |||
} | } | |||
} | } | |||
if (PERL_ENABLE_TRIE_OPTIMISATION && | if (PERL_ENABLE_TRIE_OPTIMISATION | |||
OP( startbranch ) == BRANCH ) | && OP(startbranch) == BRANCH | |||
{ | && mutate_ok | |||
) { | ||||
/* demq. | /* demq. | |||
Assuming this was/is a branch we are dealing with: 'scan' | Assuming this was/is a branch we are dealing with: 'scan' | |||
now points at the item that follows the branch sequence, | now points at the item that follows the branch sequence, | |||
whatever it is. We now start at the beginning of the | whatever it is. We now start at the beginning of the | |||
sequence and look for subsequences of | sequence and look for subsequences of | |||
BRANCH->EXACT=>x1 | BRANCH->EXACT=>x1 | |||
BRANCH->EXACT=>x2 | BRANCH->EXACT=>x2 | |||
tail | tail | |||
skipping to change at line 5158 | skipping to change at line 5186 | |||
} else { | } else { | |||
newframe= RExC_frame_last->next_frame; | newframe= RExC_frame_last->next_frame; | |||
} | } | |||
RExC_frame_last= newframe; | RExC_frame_last= newframe; | |||
newframe->next_regnode = regnext(scan); | newframe->next_regnode = regnext(scan); | |||
newframe->last_regnode = last; | newframe->last_regnode = last; | |||
newframe->stopparen = stopparen; | newframe->stopparen = stopparen; | |||
newframe->prev_recursed_depth = recursed_depth; | newframe->prev_recursed_depth = recursed_depth; | |||
newframe->this_prev_frame= frame; | newframe->this_prev_frame= frame; | |||
newframe->in_gosub = ( | ||||
(frame && frame->in_gosub) || OP(scan) == GOSUB | ||||
); | ||||
DEBUG_STUDYDATA("frame-new", data, depth, is_inf); | DEBUG_STUDYDATA("frame-new", data, depth, is_inf); | |||
DEBUG_PEEP("fnew", scan, depth, flags); | DEBUG_PEEP("fnew", scan, depth, flags); | |||
frame = newframe; | frame = newframe; | |||
scan = start; | scan = start; | |||
stopparen = paren; | stopparen = paren; | |||
last = end; | last = end; | |||
depth = depth + 1; | depth = depth + 1; | |||
recursed_depth= my_recursed_depth; | recursed_depth= my_recursed_depth; | |||
skipping to change at line 5315 | skipping to change at line 5346 | |||
} | } | |||
if (flags & SCF_DO_SUBSTR) | if (flags & SCF_DO_SUBSTR) | |||
data->pos_min++; | data->pos_min++; | |||
min++; | min++; | |||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | |||
case STAR: | case STAR: | |||
next = NEXTOPER(scan); | next = NEXTOPER(scan); | |||
/* This temporary node can now be turned into EXACTFU, and | /* This temporary node can now be turned into EXACTFU, and | |||
* must, as regexec.c doesn't handle it */ | * must, as regexec.c doesn't handle it */ | |||
if (OP(next) == EXACTFU_S_EDGE) { | if (OP(next) == EXACTFU_S_EDGE && mutate_ok) { | |||
OP(next) = EXACTFU; | OP(next) = EXACTFU; | |||
} | } | |||
if ( STR_LEN(next) == 1 | if ( STR_LEN(next) == 1 | |||
&& isALPHA_A(* STRING(next)) | && isALPHA_A(* STRING(next)) | |||
&& ( OP(next) == EXACTFAA | && ( OP(next) == EXACTFAA | |||
|| ( OP(next) == EXACTFU | || ( OP(next) == EXACTFU | |||
&& ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next | && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next | |||
))))) | )))) | |||
{ | && mutate_ok | |||
) { | ||||
/* These differ in just one bit */ | /* These differ in just one bit */ | |||
U8 mask = ~ ('A' ^ 'a'); | U8 mask = ~ ('A' ^ 'a'); | |||
assert(isALPHA_A(* STRING(next))); | assert(isALPHA_A(* STRING(next))); | |||
/* Then replace it by an ANYOFM node, with | /* Then replace it by an ANYOFM node, with | |||
* the mask set to the complement of the | * the mask set to the complement of the | |||
* bit that differs between upper and lower | * bit that differs between upper and lower | |||
* case, and the lowest code point of the | * case, and the lowest code point of the | |||
* pair (which the '&' forces) */ | * pair (which the '&' forces) */ | |||
skipping to change at line 5411 | skipping to change at line 5443 | |||
if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY)) | if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY)) | |||
f &= ~SCF_WHILEM_VISITED_POS; | f &= ~SCF_WHILEM_VISITED_POS; | |||
/* This will finish on WHILEM, setting scan, or on NULL: */ | /* This will finish on WHILEM, setting scan, or on NULL: */ | |||
/* recurse study_chunk() on loop bodies */ | /* recurse study_chunk() on loop bodies */ | |||
minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, | minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, | |||
last, data, stopparen, recursed_depth, NULL, | last, data, stopparen, recursed_depth, NULL, | |||
(mincount == 0 | (mincount == 0 | |||
? (f & ~SCF_DO_SUBSTR) | ? (f & ~SCF_DO_SUBSTR) | |||
: f) | : f) | |||
,depth+1); | , depth+1, mutate_ok); | |||
if (flags & SCF_DO_STCLASS) | if (flags & SCF_DO_STCLASS) | |||
data->start_class = oclass; | data->start_class = oclass; | |||
if (mincount == 0 || minnext == 0) { | if (mincount == 0 || minnext == 0) { | |||
if (flags & SCF_DO_STCLASS_OR) { | if (flags & SCF_DO_STCLASS_OR) { | |||
ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class); | ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class); | |||
} | } | |||
else if (flags & SCF_DO_STCLASS_AND) { | else if (flags & SCF_DO_STCLASS_AND) { | |||
/* Switch to OR mode: cache the old value of | /* Switch to OR mode: cache the old value of | |||
* data->start_class */ | * data->start_class */ | |||
skipping to change at line 5457 | skipping to change at line 5489 | |||
count */ | count */ | |||
{ | { | |||
_WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP), | _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP), | |||
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), | Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), | |||
"Quantifier unexpected on zero-length expression " | "Quantifier unexpected on zero-length expression " | |||
"in regex m/%" UTF8f "/", | "in regex m/%" UTF8f "/", | |||
UTF8fARG(UTF, RExC_precomp_end - RExC_precomp, | UTF8fARG(UTF, RExC_precomp_end - RExC_precomp, | |||
RExC_precomp))); | RExC_precomp))); | |||
} | } | |||
if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext ) | ||||
|| min >= SSize_t_MAX - minnext * mincount ) | ||||
{ | ||||
FAIL("Regexp out of space"); | ||||
} | ||||
min += minnext * mincount; | min += minnext * mincount; | |||
is_inf_internal |= deltanext == SSize_t_MAX | is_inf_internal |= deltanext == SSize_t_MAX | |||
|| (maxcount == REG_INFTY && minnext + deltanext > 0); | || (maxcount == REG_INFTY && minnext + deltanext > 0); | |||
is_inf |= is_inf_internal; | is_inf |= is_inf_internal; | |||
if (is_inf) { | if (is_inf) { | |||
delta = SSize_t_MAX; | delta = SSize_t_MAX; | |||
} else { | } else { | |||
delta += (minnext + deltanext) * maxcount | delta += (minnext + deltanext) * maxcount | |||
- minnext * mincount; | - minnext * mincount; | |||
} | } | |||
/* Try powerful optimization CURLYX => CURLYN. */ | /* Try powerful optimization CURLYX => CURLYN. */ | |||
if ( OP(oscan) == CURLYX && data | if ( OP(oscan) == CURLYX && data | |||
&& data->flags & SF_IN_PAR | && data->flags & SF_IN_PAR | |||
&& !(data->flags & SF_HAS_EVAL) | && !(data->flags & SF_HAS_EVAL) | |||
&& !deltanext && minnext == 1 ) { | && !deltanext && minnext == 1 | |||
&& mutate_ok | ||||
) { | ||||
/* Try to optimize to CURLYN. */ | /* Try to optimize to CURLYN. */ | |||
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; | regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; | |||
regnode * const nxt1 = nxt; | regnode * const nxt1 = nxt; | |||
#ifdef DEBUGGING | #ifdef DEBUGGING | |||
regnode *nxt2; | regnode *nxt2; | |||
#endif | #endif | |||
/* Skip open. */ | /* Skip open. */ | |||
nxt = regnext(nxt); | nxt = regnext(nxt); | |||
if (!REGNODE_SIMPLE(OP(nxt)) | if (!REGNODE_SIMPLE(OP(nxt)) | |||
skipping to change at line 5521 | skipping to change at line 5561 | |||
#endif | #endif | |||
} | } | |||
nogo: | nogo: | |||
/* Try optimization CURLYX => CURLYM. */ | /* Try optimization CURLYX => CURLYM. */ | |||
if ( OP(oscan) == CURLYX && data | if ( OP(oscan) == CURLYX && data | |||
&& !(data->flags & SF_HAS_PAR) | && !(data->flags & SF_HAS_PAR) | |||
&& !(data->flags & SF_HAS_EVAL) | && !(data->flags & SF_HAS_EVAL) | |||
&& !deltanext /* atom is fixed width */ | && !deltanext /* atom is fixed width */ | |||
&& minnext != 0 /* CURLYM can't handle zero width */ | && minnext != 0 /* CURLYM can't handle zero width */ | |||
/* Nor characters whose fold at run-time may be | /* Nor characters whose fold at run-time may be | |||
* multi-character */ | * multi-character */ | |||
&& ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN) | && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN) | |||
&& mutate_ok | ||||
) { | ) { | |||
/* XXXX How to optimize if data == 0? */ | /* XXXX How to optimize if data == 0? */ | |||
/* Optimize to a simpler form. */ | /* Optimize to a simpler form. */ | |||
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */ | regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */ | |||
regnode *nxt2; | regnode *nxt2; | |||
OP(oscan) = CURLYM; | OP(oscan) = CURLYM; | |||
while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/ | while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/ | |||
&& (OP(nxt2) != WHILEM)) | && (OP(nxt2) != WHILEM)) | |||
nxt = nxt2; | nxt = nxt2; | |||
skipping to change at line 5577 | skipping to change at line 5617 | |||
else | else | |||
OP(nxt) = NOTHING; /* Cannot beautify */ | OP(nxt) = NOTHING; /* Cannot beautify */ | |||
} | } | |||
nxt1 = nnxt; | nxt1 = nnxt; | |||
} | } | |||
#endif | #endif | |||
/* Optimize again: */ | /* Optimize again: */ | |||
/* recurse study_chunk() on optimised CURLYX => CURLYM * / | /* recurse study_chunk() on optimised CURLYX => CURLYM * / | |||
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, | study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, | |||
NULL, stopparen, recursed_depth, NULL, 0, | NULL, stopparen, recursed_depth, NULL, 0, | |||
depth+1); | depth+1, mutate_ok); | |||
} | } | |||
else | else | |||
oscan->flags = 0; | oscan->flags = 0; | |||
} | } | |||
else if ((OP(oscan) == CURLYX) | else if ((OP(oscan) == CURLYX) | |||
&& (flags & SCF_WHILEM_VISITED_POS) | && (flags & SCF_WHILEM_VISITED_POS) | |||
/* See the comment on a similar expression above. | /* See the comment on a similar expression above. | |||
However, this time it's not a subexpression | However, this time it's not a subexpression | |||
we care about, but the expression itself. */ | we care about, but the expression itself. */ | |||
&& (maxcount == REG_INFTY) | && (maxcount == REG_INFTY) | |||
skipping to change at line 5707 | skipping to change at line 5747 | |||
? SSize_t_MAX | ? SSize_t_MAX | |||
: data->pos_min + data->pos_delta - last_chrs; | : data->pos_min + data->pos_delta - last_chrs; | |||
} | } | |||
data->cur_is_floating = 1; /* float */ | data->cur_is_floating = 1; /* float */ | |||
} | } | |||
SvREFCNT_dec(last_str); | SvREFCNT_dec(last_str); | |||
} | } | |||
if (data && (fl & SF_HAS_EVAL)) | if (data && (fl & SF_HAS_EVAL)) | |||
data->flags |= SF_HAS_EVAL; | data->flags |= SF_HAS_EVAL; | |||
optimize_curly_tail: | optimize_curly_tail: | |||
if (OP(oscan) != CURLYX) { | rck_elide_nothing(oscan); | |||
while (PL_regkind[OP(next = regnext(oscan))] == NOTHING | ||||
&& NEXT_OFF(next)) | ||||
NEXT_OFF(oscan) += NEXT_OFF(next); | ||||
} | ||||
continue; | continue; | |||
default: | default: | |||
#ifdef DEBUGGING | #ifdef DEBUGGING | |||
Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d", | Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d", | |||
OP(scan)); | OP(scan)); | |||
#endif | #endif | |||
case REF: | case REF: | |||
case CLUMP: | case CLUMP: | |||
if (flags & SCF_DO_SUBSTR) { | if (flags & SCF_DO_SUBSTR) { | |||
skipping to change at line 5986 | skipping to change at line 6022 | |||
f |= SCF_DO_STCLASS_AND; | f |= SCF_DO_STCLASS_AND; | |||
} | } | |||
if (flags & SCF_WHILEM_VISITED_POS) | if (flags & SCF_WHILEM_VISITED_POS) | |||
f |= SCF_WHILEM_VISITED_POS; | f |= SCF_WHILEM_VISITED_POS; | |||
next = regnext(scan); | next = regnext(scan); | |||
nscan = NEXTOPER(NEXTOPER(scan)); | nscan = NEXTOPER(NEXTOPER(scan)); | |||
/* recurse study_chunk() for lookahead body */ | /* recurse study_chunk() for lookahead body */ | |||
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, | minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, | |||
last, &data_fake, stopparen, | last, &data_fake, stopparen, | |||
recursed_depth, NULL, f, depth+1); | recursed_depth, NULL, f, depth+1, | |||
mutate_ok); | ||||
if (scan->flags) { | if (scan->flags) { | |||
if ( deltanext < 0 | if ( deltanext < 0 | |||
|| deltanext > (I32) U8_MAX | || deltanext > (I32) U8_MAX | |||
|| minnext > (I32)U8_MAX | || minnext > (I32)U8_MAX | |||
|| minnext + deltanext > (I32)U8_MAX) | || minnext + deltanext > (I32)U8_MAX) | |||
{ | { | |||
FAIL2("Lookbehind longer than %" UVuf " not implemented", | FAIL2("Lookbehind longer than %" UVuf " not implemented", | |||
(UV)U8_MAX); | (UV)U8_MAX); | |||
} | } | |||
skipping to change at line 6091 | skipping to change at line 6128 | |||
} | } | |||
if (flags & SCF_WHILEM_VISITED_POS) | if (flags & SCF_WHILEM_VISITED_POS) | |||
f |= SCF_WHILEM_VISITED_POS; | f |= SCF_WHILEM_VISITED_POS; | |||
next = regnext(scan); | next = regnext(scan); | |||
nscan = NEXTOPER(NEXTOPER(scan)); | nscan = NEXTOPER(NEXTOPER(scan)); | |||
/* positive lookahead study_chunk() recursion */ | /* positive lookahead study_chunk() recursion */ | |||
*minnextp = study_chunk(pRExC_state, &nscan, minnextp, | *minnextp = study_chunk(pRExC_state, &nscan, minnextp, | |||
&deltanext, last, &data_fake, | &deltanext, last, &data_fake, | |||
stopparen, recursed_depth, NULL, | stopparen, recursed_depth, NULL, | |||
f, depth+1); | f, depth+1, mutate_ok); | |||
if (scan->flags) { | if (scan->flags) { | |||
assert(0); /* This code has never been tested since this | assert(0); /* This code has never been tested since this | |||
is normally not compiled */ | is normally not compiled */ | |||
if ( deltanext < 0 | if ( deltanext < 0 | |||
|| deltanext > (I32) U8_MAX | || deltanext > (I32) U8_MAX | |||
|| *minnextp > (I32)U8_MAX | || *minnextp > (I32)U8_MAX | |||
|| *minnextp + deltanext > (I32)U8_MAX) | || *minnextp + deltanext > (I32)U8_MAX) | |||
{ | { | |||
FAIL2("Lookbehind longer than %" UVuf " not implemented", | FAIL2("Lookbehind longer than %" UVuf " not implemented", | |||
(UV)U8_MAX); | (UV)U8_MAX); | |||
skipping to change at line 6259 | skipping to change at line 6296 | |||
if (trie->jump[word]) { | if (trie->jump[word]) { | |||
if (!nextbranch) | if (!nextbranch) | |||
nextbranch = trie_node + trie->jump[0]; | nextbranch = trie_node + trie->jump[0]; | |||
scan= trie_node + trie->jump[word]; | scan= trie_node + trie->jump[word]; | |||
/* We go from the jump point to the branch that follows | /* We go from the jump point to the branch that follows | |||
it. Note this means we need the vestigal unused | it. Note this means we need the vestigal unused | |||
branches even though they arent otherwise used. */ | branches even though they arent otherwise used. */ | |||
/* optimise study_chunk() for TRIE */ | /* optimise study_chunk() for TRIE */ | |||
minnext = study_chunk(pRExC_state, &scan, minlenp, | minnext = study_chunk(pRExC_state, &scan, minlenp, | |||
&deltanext, (regnode *)nextbranch, &data_fake, | &deltanext, (regnode *)nextbranch, &data_fake, | |||
stopparen, recursed_depth, NULL, f, depth+1); | stopparen, recursed_depth, NULL, f, depth+1, | |||
mutate_ok); | ||||
} | } | |||
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) | if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) | |||
nextbranch= regnext((regnode*)nextbranch); | nextbranch= regnext((regnode*)nextbranch); | |||
if (min1 > (SSize_t)(minnext + trie->minlen)) | if (min1 > (SSize_t)(minnext + trie->minlen)) | |||
min1 = minnext + trie->minlen; | min1 = minnext + trie->minlen; | |||
if (deltanext == SSize_t_MAX) { | if (deltanext == SSize_t_MAX) { | |||
is_inf = is_inf_internal = 1; | is_inf = is_inf_internal = 1; | |||
max1 = SSize_t_MAX; | max1 = SSize_t_MAX; | |||
} else if (max1 < (SSize_t)(minnext + deltanext + trie->maxl en)) | } else if (max1 < (SSize_t)(minnext + deltanext + trie->maxl en)) | |||
skipping to change at line 7695 | skipping to change at line 7733 | |||
if (reg(pRExC_state, 0, &flags, 1)) { | if (reg(pRExC_state, 0, &flags, 1)) { | |||
/* Success!, But we may need to redo the parse knowing how many parens | /* Success!, But we may need to redo the parse knowing how many parens | |||
* there actually are */ | * there actually are */ | |||
if (IN_PARENS_PASS) { | if (IN_PARENS_PASS) { | |||
flags |= RESTART_PARSE; | flags |= RESTART_PARSE; | |||
} | } | |||
/* We have that number in RExC_npar */ | /* We have that number in RExC_npar */ | |||
RExC_total_parens = RExC_npar; | RExC_total_parens = RExC_npar; | |||
/* XXX For backporting, use long jumps if there is any possibility of | ||||
* overflow */ | ||||
if (RExC_size > U16_MAX && ! RExC_use_BRANCHJ) { | ||||
RExC_use_BRANCHJ = TRUE; | ||||
flags |= RESTART_PARSE; | ||||
} | ||||
} | } | |||
else if (! MUST_RESTART(flags)) { | else if (! MUST_RESTART(flags)) { | |||
ReREFCNT_dec(Rx); | ReREFCNT_dec(Rx); | |||
Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile, flags=%# " UVxf, (UV) flags); | Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile, flags=%# " UVxf, (UV) flags); | |||
} | } | |||
/* Here, we either have success, or we have to redo the parse for some reaso n */ | /* Here, we either have success, or we have to redo the parse for some reaso n */ | |||
if (MUST_RESTART(flags)) { | if (MUST_RESTART(flags)) { | |||
/* It's possible to write a regexp in ascii that represents Unicode | /* It's possible to write a regexp in ascii that represents Unicode | |||
skipping to change at line 8030 | skipping to change at line 8075 | |||
DEBUG_RExC_seen(); | DEBUG_RExC_seen(); | |||
/* | /* | |||
* MAIN ENTRY FOR study_chunk() FOR m/PATTERN/ | * MAIN ENTRY FOR study_chunk() FOR m/PATTERN/ | |||
* (NO top level branches) | * (NO top level branches) | |||
*/ | */ | |||
minlen = study_chunk(pRExC_state, &first, &minlen, &fake, | minlen = study_chunk(pRExC_state, &first, &minlen, &fake, | |||
scan + RExC_size, /* Up to end */ | scan + RExC_size, /* Up to end */ | |||
&data, -1, 0, NULL, | &data, -1, 0, NULL, | |||
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag | SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag | |||
| (restudied ? SCF_TRIE_DOING_RESTUDY : 0), | | (restudied ? SCF_TRIE_DOING_RESTUDY : 0), | |||
0); | 0, TRUE); | |||
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk")); | CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk")); | |||
if ( RExC_total_parens == 1 && !data.cur_is_floating | if ( RExC_total_parens == 1 && !data.cur_is_floating | |||
&& data.last_start_min == 0 && data.last_end > 0 | && data.last_start_min == 0 && data.last_end > 0 | |||
&& !RExC_seen_zerolen | && !RExC_seen_zerolen | |||
&& !(RExC_seen & REG_VERBARG_SEEN) | && !(RExC_seen & REG_VERBARG_SEEN) | |||
&& !(RExC_seen & REG_GPOS_SEEN) | && !(RExC_seen & REG_GPOS_SEEN) | |||
){ | ){ | |||
RExC_rx->extflags |= RXf_CHECK_ALL; | RExC_rx->extflags |= RXf_CHECK_ALL; | |||
skipping to change at line 8156 | skipping to change at line 8201 | |||
DEBUG_RExC_seen(); | DEBUG_RExC_seen(); | |||
/* | /* | |||
* MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../ | * MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../ | |||
* (patterns WITH top level branches) | * (patterns WITH top level branches) | |||
*/ | */ | |||
minlen = study_chunk(pRExC_state, | minlen = study_chunk(pRExC_state, | |||
&scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL, | &scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL, | |||
SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied | SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied | |||
? SCF_TRIE_DOING_RESTUDY | ? SCF_TRIE_DOING_RESTUDY | |||
: 0), | : 0), | |||
0); | 0, TRUE); | |||
CHECK_RESTUDY_GOTO_butfirst(NOOP); | CHECK_RESTUDY_GOTO_butfirst(NOOP); | |||
RExC_rx->check_substr = NULL; | RExC_rx->check_substr = NULL; | |||
RExC_rx->check_utf8 = NULL; | RExC_rx->check_utf8 = NULL; | |||
RExC_rx->substrs->data[0].substr = NULL; | RExC_rx->substrs->data[0].substr = NULL; | |||
RExC_rx->substrs->data[0].utf8_substr = NULL; | RExC_rx->substrs->data[0].utf8_substr = NULL; | |||
RExC_rx->substrs->data[1].substr = NULL; | RExC_rx->substrs->data[1].substr = NULL; | |||
RExC_rx->substrs->data[1].utf8_substr = NULL; | RExC_rx->substrs->data[1].utf8_substr = NULL; | |||
End of changes. 25 change blocks. | ||||
47 lines changed or deleted | 92 lines changed or added |