tin  2.4.5
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.4.5.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pcre_exec.c
Go to the documentation of this file.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Written by Philip Hazel
9  Copyright (c) 1997-2006 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15  * Redistributions of source code must retain the above copyright notice,
16  this list of conditions and the following disclaimer.
17 
18  * Redistributions in binary form must reproduce the above copyright
19  notice, this list of conditions and the following disclaimer in the
20  documentation and/or other materials provided with the distribution.
21 
22  * Neither the name of the University of Cambridge nor the names of its
23  contributors may be used to endorse or promote products derived from
24  this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44 
45 #define NLBLOCK md /* Block containing newline information */
46 #define PSSTART start_subject /* Field containing processed string start */
47 #define PSEND end_subject /* Field containing processed string end */
48 
49 #include "pcre_internal.h"
50 
51 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53 
54 #define EPTR_WORK_SIZE (1000)
55 
56 /* Flag bits for the match() function */
57 
58 #define match_condassert 0x01 /* Called to check a condition assertion */
59 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
60 #define match_tail_recursed 0x04 /* Tail recursive call */
61 
62 /* Non-error returns from the match() function. Error returns are externally
63 defined PCRE_ERROR_xxx codes, which are all negative. */
64 
65 #define MATCH_MATCH 1
66 #define MATCH_NOMATCH 0
67 
68 /* Maximum number of ints of offset to save on the stack for recursive calls.
69 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
70 because the offset vector is always a multiple of 3 long. */
71 
72 #define REC_STACK_SAVE_MAX 30
73 
74 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
75 
76 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
77 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
78 
79 
80 
81 #ifdef DEBUG
82 /*************************************************
83 * Debugging function to print chars *
84 *************************************************/
85 
86 /* Print a sequence of chars in printable format, stopping at the end of the
87 subject if the requested.
88 
89 Arguments:
90  p points to characters
91  length number to print
92  is_subject TRUE if printing from within md->start_subject
93  md pointer to matching data block, if is_subject is TRUE
94 
95 Returns: nothing
96 */
97 
98 static void
99 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100 {
101 unsigned int c;
102 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103 while (length-- > 0)
104  if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
105 }
106 #endif
107 
108 
109 
110 /*************************************************
111 * Match a back-reference *
112 *************************************************/
113 
114 /* If a back reference hasn't been set, the length that is passed is greater
115 than the number of characters left in the string, so the match fails.
116 
117 Arguments:
118  offset index into the offset vector
119  eptr points into the subject
120  length length to be matched
121  md points to match data block
122  ims the ims flags
123 
124 Returns: TRUE if matched
125 */
126 
127 static BOOL
128 match_ref(int offset, register USPTR eptr, int length, match_data *md,
129  unsigned long int ims)
130 {
131 USPTR p = md->start_subject + md->offset_vector[offset];
132 
133 #ifdef DEBUG
134 if (eptr >= md->end_subject)
135  printf("matching subject <null>");
136 else
137  {
138  printf("matching subject ");
139  pchars(eptr, length, TRUE, md);
140  }
141 printf(" against backref ");
142 pchars(p, length, FALSE, md);
143 printf("\n");
144 #endif
145 
146 /* Always fail if not enough characters left */
147 
148 if (length > md->end_subject - eptr) return FALSE;
149 
150 /* Separate the caselesss case for speed */
151 
152 if ((ims & PCRE_CASELESS) != 0)
153  {
154  while (length-- > 0)
155  if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
156  }
157 else
158  { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
159 
160 return TRUE;
161 }
162 
163 
164 
165 /***************************************************************************
166 ****************************************************************************
167  RECURSION IN THE match() FUNCTION
168 
169 The match() function is highly recursive, though not every recursive call
170 increases the recursive depth. Nevertheless, some regular expressions can cause
171 it to recurse to a great depth. I was writing for Unix, so I just let it call
172 itself recursively. This uses the stack for saving everything that has to be
173 saved for a recursive call. On Unix, the stack can be large, and this works
174 fine.
175 
176 It turns out that on some non-Unix-like systems there are problems with
177 programs that use a lot of stack. (This despite the fact that every last chip
178 has oodles of memory these days, and techniques for extending the stack have
179 been known for decades.) So....
180 
181 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
182 calls by keeping local variables that need to be preserved in blocks of memory
183 obtained from malloc() instead instead of on the stack. Macros are used to
184 achieve this so that the actual code doesn't look very different to what it
185 always used to.
186 ****************************************************************************
187 ***************************************************************************/
188 
189 
190 /* These versions of the macros use the stack, as normal. There are debugging
191 versions and production versions. */
192 
193 #ifndef NO_RECURSE
194 #define REGISTER register
195 #ifdef DEBUG
196 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
197  { \
198  printf("match() called in line %d\n", __LINE__); \
199  rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
200  printf("to line %d\n", __LINE__); \
201  }
202 #define RRETURN(ra) \
203  { \
204  printf("match() returned %d from line %d ", ra, __LINE__); \
205  return ra; \
206  }
207 #else
208 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
209  rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
210 #define RRETURN(ra) return ra
211 #endif
212 
213 #else
214 
215 
216 /* These versions of the macros manage a private stack on the heap. Note
217 that the rd argument of RMATCH isn't actually used. It's the md argument of
218 match(), which never changes. */
219 
220 #define REGISTER
221 
222 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
223  {\
224  heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
225  if (setjmp(frame->Xwhere) == 0)\
226  {\
227  newframe->Xeptr = ra;\
228  newframe->Xecode = rb;\
229  newframe->Xoffset_top = rc;\
230  newframe->Xims = re;\
231  newframe->Xeptrb = rf;\
232  newframe->Xflags = rg;\
233  newframe->Xrdepth = frame->Xrdepth + 1;\
234  newframe->Xprevframe = frame;\
235  frame = newframe;\
236  DPRINTF(("restarting from line %d\n", __LINE__));\
237  goto HEAP_RECURSE;\
238  }\
239  else\
240  {\
241  DPRINTF(("longjumped back to line %d\n", __LINE__));\
242  frame = md->thisframe;\
243  rx = frame->Xresult;\
244  }\
245  }
246 
247 #define RRETURN(ra)\
248  {\
249  heapframe *newframe = frame;\
250  frame = newframe->Xprevframe;\
251  (pcre_stack_free)(newframe);\
252  if (frame != NULL)\
253  {\
254  frame->Xresult = ra;\
255  md->thisframe = frame;\
256  longjmp(frame->Xwhere, 1);\
257  }\
258  return ra;\
259  }
260 
261 
262 /* Structure for remembering the local variables in a private frame */
263 
264 typedef struct heapframe {
265  struct heapframe *Xprevframe;
266 
267  /* Function arguments that may change */
268 
269  const uschar *Xeptr;
270  const uschar *Xecode;
271  int Xoffset_top;
272  long int Xims;
273  eptrblock *Xeptrb;
274  int Xflags;
275  unsigned int Xrdepth;
276 
277  /* Function local variables */
278 
279  const uschar *Xcallpat;
280  const uschar *Xcharptr;
281  const uschar *Xdata;
282  const uschar *Xnext;
283  const uschar *Xpp;
284  const uschar *Xprev;
285  const uschar *Xsaved_eptr;
286 
287  recursion_info Xnew_recursive;
288 
289  BOOL Xcur_is_word;
290  BOOL Xcondition;
291  BOOL Xprev_is_word;
292 
293  unsigned long int Xoriginal_ims;
294 
295 #ifdef SUPPORT_UCP
296  int Xprop_type;
297  int Xprop_value;
298  int Xprop_fail_result;
299  int Xprop_category;
300  int Xprop_chartype;
301  int Xprop_script;
302 #endif
303 
304  int Xctype;
305  unsigned int Xfc;
306  int Xfi;
307  int Xlength;
308  int Xmax;
309  int Xmin;
310  int Xnumber;
311  int Xoffset;
312  int Xop;
313  int Xsave_capture_last;
314  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
315  int Xstacksave[REC_STACK_SAVE_MAX];
316 
317  eptrblock Xnewptrb;
318 
319  /* Place to pass back result, and where to jump back to */
320 
321  int Xresult;
322  jmp_buf Xwhere;
323 
324 } heapframe;
325 
326 #endif
327 
328 
329 /***************************************************************************
330 ***************************************************************************/
331 
332 
333 
334 /*************************************************
335 * Match from current position *
336 *************************************************/
337 
338 /* This function is called recursively in many circumstances. Whenever it
339 returns a negative (error) response, the outer incarnation must also return the
340 same response.
341 
342 Performance note: It might be tempting to extract commonly used fields from the
343 md structure (e.g. utf8, end_subject) into individual variables to improve
344 performance. Tests using gcc on a SPARC disproved this; in the first case, it
345 made performance worse.
346 
347 Arguments:
348  eptr pointer to current character in subject
349  ecode pointer to current position in compiled code
350  offset_top current top pointer
351  md pointer to "static" info for the match
352  ims current /i, /m, and /s options
353  eptrb pointer to chain of blocks containing eptr at start of
354  brackets - for testing for empty matches
355  flags can contain
356  match_condassert - this is an assertion condition
357  match_cbegroup - this is the start of an unlimited repeat
358  group that can match an empty string
359  match_tail_recursed - this is a tail_recursed group
360  rdepth the recursion depth
361 
362 Returns: MATCH_MATCH if matched ) these values are >= 0
363  MATCH_NOMATCH if failed to match )
364  a negative PCRE_ERROR_xxx value if aborted by an error condition
365  (e.g. stopped by repeated call or recursion limit)
366 */
367 
368 static int
369 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
370  int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
371  int flags, unsigned int rdepth)
372 {
373 /* These variables do not need to be preserved over recursion in this function,
374 so they can be ordinary variables in all cases. Mark some of them with
375 "register" because they are used a lot in loops. */
376 
377 register int rrc; /* Returns from recursive calls */
378 register int i; /* Used for loops not involving calls to RMATCH() */
379 register unsigned int c; /* Character values not kept over RMATCH() calls */
380 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
381 
382 BOOL minimize, possessive; /* Quantifier options */
383 
384 /* When recursion is not being used, all "local" variables that have to be
385 preserved over calls to RMATCH() are part of a "frame" which is obtained from
386 heap storage. Set up the top-level frame here; others are obtained from the
387 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
388 
389 #ifdef NO_RECURSE
390 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
391 frame->Xprevframe = NULL; /* Marks the top level */
392 
393 /* Copy in the original argument variables */
394 
395 frame->Xeptr = eptr;
396 frame->Xecode = ecode;
397 frame->Xoffset_top = offset_top;
398 frame->Xims = ims;
399 frame->Xeptrb = eptrb;
400 frame->Xflags = flags;
401 frame->Xrdepth = rdepth;
402 
403 /* This is where control jumps back to to effect "recursion" */
404 
405 HEAP_RECURSE:
406 
407 /* Macros make the argument variables come from the current frame */
408 
409 #define eptr frame->Xeptr
410 #define ecode frame->Xecode
411 #define offset_top frame->Xoffset_top
412 #define ims frame->Xims
413 #define eptrb frame->Xeptrb
414 #define flags frame->Xflags
415 #define rdepth frame->Xrdepth
416 
417 /* Ditto for the local variables */
418 
419 #ifdef SUPPORT_UTF8
420 #define charptr frame->Xcharptr
421 #endif
422 #define callpat frame->Xcallpat
423 #define data frame->Xdata
424 #define next frame->Xnext
425 #define pp frame->Xpp
426 #define prev frame->Xprev
427 #define saved_eptr frame->Xsaved_eptr
428 
429 #define new_recursive frame->Xnew_recursive
430 
431 #define cur_is_word frame->Xcur_is_word
432 #define condition frame->Xcondition
433 #define prev_is_word frame->Xprev_is_word
434 
435 #define original_ims frame->Xoriginal_ims
436 
437 #ifdef SUPPORT_UCP
438 #define prop_type frame->Xprop_type
439 #define prop_value frame->Xprop_value
440 #define prop_fail_result frame->Xprop_fail_result
441 #define prop_category frame->Xprop_category
442 #define prop_chartype frame->Xprop_chartype
443 #define prop_script frame->Xprop_script
444 #endif
445 
446 #define ctype frame->Xctype
447 #define fc frame->Xfc
448 #define fi frame->Xfi
449 #define length frame->Xlength
450 #define max frame->Xmax
451 #define min frame->Xmin
452 #define number frame->Xnumber
453 #define offset frame->Xoffset
454 #define op frame->Xop
455 #define save_capture_last frame->Xsave_capture_last
456 #define save_offset1 frame->Xsave_offset1
457 #define save_offset2 frame->Xsave_offset2
458 #define save_offset3 frame->Xsave_offset3
459 #define stacksave frame->Xstacksave
460 
461 #define newptrb frame->Xnewptrb
462 
463 /* When recursion is being used, local variables are allocated on the stack and
464 get preserved during recursion in the normal way. In this environment, fi and
465 i, and fc and c, can be the same variables. */
466 
467 #else /* NO_RECURSE not defined */
468 #define fi i
469 #define fc c
470 
471 
472 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
473 const uschar *charptr; /* in small blocks of the code. My normal */
474 #endif /* style of coding would have declared */
475 const uschar *callpat; /* them within each of those blocks. */
476 const uschar *data; /* However, in order to accommodate the */
477 const uschar *next; /* version of this code that uses an */
478 USPTR pp; /* external "stack" implemented on the */
479 const uschar *prev; /* heap, it is easier to declare them all */
480 USPTR saved_eptr; /* here, so the declarations can be cut */
481  /* out in a block. The only declarations */
482 recursion_info new_recursive; /* within blocks below are for variables */
483  /* that do not have to be preserved over */
484 BOOL cur_is_word; /* a recursive call to RMATCH(). */
485 BOOL condition;
486 BOOL prev_is_word;
487 
488 unsigned long int original_ims;
489 
490 #ifdef SUPPORT_UCP
491 int prop_type;
492 int prop_value;
493 int prop_fail_result;
494 int prop_category;
495 int prop_chartype;
496 int prop_script;
497 #endif
498 
499 int ctype;
500 int length;
501 int max;
502 int min;
503 int number;
504 int offset;
505 int op;
506 int save_capture_last;
507 int save_offset1, save_offset2, save_offset3;
508 int stacksave[REC_STACK_SAVE_MAX];
509 
510 eptrblock newptrb;
511 #endif /* NO_RECURSE */
512 
513 /* These statements are here to stop the compiler complaining about unitialized
514 variables. */
515 
516 #ifdef SUPPORT_UCP
517 prop_value = 0;
518 prop_fail_result = 0;
519 #endif
520 
521 
522 /* This label is used for tail recursion, which is used in a few cases even
523 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
524 used. Thanks to Ian Taylor for noticing this possibility and sending the
525 original patch. */
526 
527 TAIL_RECURSE:
528 
529 /* OK, now we can get on with the real code of the function. Recursive calls
530 are specified by the macro RMATCH and RRETURN is used to return. When
531 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
532 and a "return", respectively (possibly with some debugging if DEBUG is
533 defined). However, RMATCH isn't like a function call because it's quite a
534 complicated macro. It has to be used in one particular way. This shouldn't,
535 however, impact performance when true recursion is being used. */
536 
537 /* First check that we haven't called match() too many times, or that we
538 haven't exceeded the recursive call limit. */
539 
542 
543 original_ims = ims; /* Save for resetting on ')' */
544 
545 #ifdef SUPPORT_UTF8
546 utf8 = md->utf8; /* Local copy of the flag */
547 #else
548 utf8 = FALSE;
549 #endif
550 
551 /* At the start of a group with an unlimited repeat that may match an empty
552 string, the match_cbegroup flag is set. When this is the case, add the current
553 subject pointer to the chain of such remembered pointers, to be checked when we
554 hit the closing ket, in order to break infinite loops that match no characters.
555 When match() is called in other circumstances, don't add to the chain. If this
556 is a tail recursion, use a block from the workspace, as the one on the stack is
557 already used. */
558 
559 if ((flags & match_cbegroup) != 0)
560  {
561  eptrblock *p;
562  if ((flags & match_tail_recursed) != 0)
563  {
565  p = md->eptrchain + md->eptrn++;
566  }
567  else p = &newptrb;
568  p->epb_saved_eptr = eptr;
569  p->epb_prev = eptrb;
570  eptrb = p;
571  }
572 
573 /* Now start processing the opcodes. */
574 
575 for (;;)
576  {
577  minimize = possessive = FALSE;
578  op = *ecode;
579 
580  /* For partial matching, remember if we ever hit the end of the subject after
581  matching at least one subject character. */
582 
583  if (md->partial &&
584  eptr >= md->end_subject &&
585  eptr > md->start_match)
586  md->hitend = TRUE;
587 
588  switch(op)
589  {
590  /* Handle a capturing bracket. If there is space in the offset vector, save
591  the current subject position in the working slot at the top of the vector.
592  We mustn't change the current values of the data slot, because they may be
593  set from a previous iteration of this group, and be referred to by a
594  reference inside the group.
595 
596  If the bracket fails to match, we need to restore this value and also the
597  values of the final offsets, in case they were set by a previous iteration
598  of the same bracket.
599 
600  If there isn't enough space in the offset vector, treat this as if it were
601  a non-capturing bracket. Don't worry about setting the flag for the error
602  case here; that is handled in the code for KET. */
603 
604  case OP_CBRA:
605  case OP_SCBRA:
606  number = GET2(ecode, 1+LINK_SIZE);
607  offset = number << 1;
608 
609 #ifdef DEBUG
610  printf("start bracket %d\n", number);
611  printf("subject=");
612  pchars(eptr, 16, TRUE, md);
613  printf("\n");
614 #endif
615 
616  if (offset < md->offset_max)
617  {
618  save_offset1 = md->offset_vector[offset];
619  save_offset2 = md->offset_vector[offset+1];
620  save_offset3 = md->offset_vector[md->offset_end - number];
621  save_capture_last = md->capture_last;
622 
623  DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
624  md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
625 
626  flags = (op == OP_SCBRA)? match_cbegroup : 0;
627  do
628  {
629  RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
630  ims, eptrb, flags);
631  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
632  md->capture_last = save_capture_last;
633  ecode += GET(ecode, 1);
634  }
635  while (*ecode == OP_ALT);
636 
637  DPRINTF(("bracket %d failed\n", number));
638 
639  md->offset_vector[offset] = save_offset1;
640  md->offset_vector[offset+1] = save_offset2;
641  md->offset_vector[md->offset_end - number] = save_offset3;
642 
644  }
645 
646  /* Insufficient room for saving captured contents. Treat as a non-capturing
647  bracket. */
648 
649  DPRINTF(("insufficient capture room: treat as non-capturing\n"));
650 
651  /* Non-capturing bracket. Loop for all the alternatives. When we get to the
652  final alternative within the brackets, we would return the result of a
653  recursive call to match() whatever happened. We can reduce stack usage by
654  turning this into a tail recursion. */
655 
656  case OP_BRA:
657  case OP_SBRA:
658  DPRINTF(("start non-capturing bracket\n"));
659  flags = (op >= OP_SBRA)? match_cbegroup : 0;
660  for (;;)
661  {
662  if (ecode[GET(ecode, 1)] != OP_ALT)
663  {
664  ecode += _pcre_OP_lengths[*ecode];
665  flags |= match_tail_recursed;
666  DPRINTF(("bracket 0 tail recursion\n"));
667  goto TAIL_RECURSE;
668  }
669 
670  /* For non-final alternatives, continue the loop for a NOMATCH result;
671  otherwise return. */
672 
673  RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
674  eptrb, flags);
675  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676  ecode += GET(ecode, 1);
677  }
678  /* Control never reaches here. */
679 
680  /* Conditional group: compilation checked that there are no more than
681  two branches. If the condition is false, skipping the first branch takes us
682  past the end if there is only one branch, but that's OK because that is
683  exactly what going to the ket would do. As there is only one branch to be
684  obeyed, we can use tail recursion to avoid using another stack frame. */
685 
686  case OP_COND:
687  case OP_SCOND:
688  if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
689  {
690  offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
691  condition = md->recursive != NULL &&
692  (offset == RREF_ANY || offset == md->recursive->group_num);
693  ecode += condition? 3 : GET(ecode, 1);
694  }
695 
696  else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
697  {
698  offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
699  condition = offset < offset_top && md->offset_vector[offset] >= 0;
700  ecode += condition? 3 : GET(ecode, 1);
701  }
702 
703  else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
704  {
705  condition = FALSE;
706  ecode += GET(ecode, 1);
707  }
708 
709  /* The condition is an assertion. Call match() to evaluate it - setting
710  the final argument match_condassert causes it to stop at the end of an
711  assertion. */
712 
713  else
714  {
715  RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
717  if (rrc == MATCH_MATCH)
718  {
719  condition = TRUE;
720  ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
721  while (*ecode == OP_ALT) ecode += GET(ecode, 1);
722  }
723  else if (rrc != MATCH_NOMATCH)
724  {
725  RRETURN(rrc); /* Need braces because of following else */
726  }
727  else
728  {
729  condition = FALSE;
730  ecode += GET(ecode, 1);
731  }
732  }
733 
734  /* We are now at the branch that is to be obeyed. As there is only one,
735  we can use tail recursion to avoid using another stack frame. If the second
736  alternative doesn't exist, we can just plough on. */
737 
738  if (condition || *ecode == OP_ALT)
739  {
740  ecode += 1 + LINK_SIZE;
741  flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
742  goto TAIL_RECURSE;
743  }
744  else
745  {
746  ecode += 1 + LINK_SIZE;
747  }
748  break;
749 
750 
751  /* End of the pattern. If we are in a top-level recursion, we should
752  restore the offsets appropriately and continue from after the call. */
753 
754  case OP_END:
755  if (md->recursive != NULL && md->recursive->group_num == 0)
756  {
757  recursion_info *rec = md->recursive;
758  DPRINTF(("End of pattern in a (?0) recursion\n"));
759  md->recursive = rec->prevrec;
761  rec->saved_max * sizeof(int));
762  md->start_match = rec->save_start;
763  ims = original_ims;
764  ecode = rec->after_call;
765  break;
766  }
767 
768  /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
769  string - backtracking will then try other alternatives, if any. */
770 
771  if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
772  md->end_match_ptr = eptr; /* Record where we ended */
773  md->end_offset_top = offset_top; /* and how many extracts were taken */
775 
776  /* Change option settings */
777 
778  case OP_OPT:
779  ims = ecode[1];
780  ecode += 2;
781  DPRINTF(("ims set to %02lx\n", ims));
782  break;
783 
784  /* Assertion brackets. Check the alternative branches in turn - the
785  matching won't pass the KET for an assertion. If any one branch matches,
786  the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
787  start of each branch to move the current point backwards, so the code at
788  this level is identical to the lookahead case. */
789 
790  case OP_ASSERT:
791  case OP_ASSERTBACK:
792  do
793  {
794  RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
795  if (rrc == MATCH_MATCH) break;
796  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797  ecode += GET(ecode, 1);
798  }
799  while (*ecode == OP_ALT);
800  if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
801 
802  /* If checking an assertion for a condition, return MATCH_MATCH. */
803 
804  if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
805 
806  /* Continue from after the assertion, updating the offsets high water
807  mark, since extracts may have been taken during the assertion. */
808 
809  do ecode += GET(ecode,1); while (*ecode == OP_ALT);
810  ecode += 1 + LINK_SIZE;
811  offset_top = md->end_offset_top;
812  continue;
813 
814  /* Negative assertion: all branches must fail to match */
815 
816  case OP_ASSERT_NOT:
817  case OP_ASSERTBACK_NOT:
818  do
819  {
820  RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
821  if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
822  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
823  ecode += GET(ecode,1);
824  }
825  while (*ecode == OP_ALT);
826 
827  if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
828 
829  ecode += 1 + LINK_SIZE;
830  continue;
831 
832  /* Move the subject pointer back. This occurs only at the start of
833  each branch of a lookbehind assertion. If we are too close to the start to
834  move back, this match function fails. When working with UTF-8 we move
835  back a number of characters, not bytes. */
836 
837  case OP_REVERSE:
838 #ifdef SUPPORT_UTF8
839  if (utf8)
840  {
841  i = GET(ecode, 1);
842  while (i-- > 0)
843  {
844  eptr--;
845  if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
846  BACKCHAR(eptr)
847  }
848  }
849  else
850 #endif
851 
852  /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
853 
854  {
855  eptr -= GET(ecode, 1);
856  if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
857  }
858 
859  /* Skip to next op code */
860 
861  ecode += 1 + LINK_SIZE;
862  break;
863 
864  /* The callout item calls an external function, if one is provided, passing
865  details of the match so far. This is mainly for debugging, though the
866  function is able to force a failure. */
867 
868  case OP_CALLOUT:
869  if (pcre_callout != NULL)
870  {
872  cb.version = 1; /* Version 1 of the callout block */
873  cb.callout_number = ecode[1];
874  cb.offset_vector = md->offset_vector;
875  cb.subject = (PCRE_SPTR)md->start_subject;
877  cb.start_match = md->start_match - md->start_subject;
878  cb.current_position = eptr - md->start_subject;
879  cb.pattern_position = GET(ecode, 2);
880  cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
881  cb.capture_top = offset_top/2;
882  cb.capture_last = md->capture_last;
883  cb.callout_data = md->callout_data;
884  if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
885  if (rrc < 0) RRETURN(rrc);
886  }
887  ecode += 2 + 2*LINK_SIZE;
888  break;
889 
890  /* Recursion either matches the current regex, or some subexpression. The
891  offset data is the offset to the starting bracket from the start of the
892  whole pattern. (This is so that it works from duplicated subpatterns.)
893 
894  If there are any capturing brackets started but not finished, we have to
895  save their starting points and reinstate them after the recursion. However,
896  we don't know how many such there are (offset_top records the completed
897  total) so we just have to save all the potential data. There may be up to
898  65535 such values, which is too large to put on the stack, but using malloc
899  for small numbers seems expensive. As a compromise, the stack is used when
900  there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
901  is used. A problem is what to do if the malloc fails ... there is no way of
902  returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
903  values on the stack, and accept that the rest may be wrong.
904 
905  There are also other values that have to be saved. We use a chained
906  sequence of blocks that actually live on the stack. Thanks to Robin Houston
907  for the original version of this logic. */
908 
909  case OP_RECURSE:
910  {
911  callpat = md->start_code + GET(ecode, 1);
912  new_recursive.group_num = (callpat == md->start_code)? 0 :
913  GET2(callpat, 1 + LINK_SIZE);
914 
915  /* Add to "recursing stack" */
916 
917  new_recursive.prevrec = md->recursive;
918  md->recursive = &new_recursive;
919 
920  /* Find where to continue from afterwards */
921 
922  ecode += 1 + LINK_SIZE;
923  new_recursive.after_call = ecode;
924 
925  /* Now save the offset data. */
926 
927  new_recursive.saved_max = md->offset_end;
928  if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
929  new_recursive.offset_save = stacksave;
930  else
931  {
932  new_recursive.offset_save =
933  (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
934  if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
935  }
936 
937  memcpy(new_recursive.offset_save, md->offset_vector,
938  new_recursive.saved_max * sizeof(int));
939  new_recursive.save_start = md->start_match;
940  md->start_match = eptr;
941 
942  /* OK, now we can do the recursion. For each top-level alternative we
943  restore the offset and recursion data. */
944 
945  DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
946  flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
947  do
948  {
949  RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
950  md, ims, eptrb, flags);
951  if (rrc == MATCH_MATCH)
952  {
953  DPRINTF(("Recursion matched\n"));
954  md->recursive = new_recursive.prevrec;
955  if (new_recursive.offset_save != stacksave)
956  (pcre_free)(new_recursive.offset_save);
958  }
959  else if (rrc != MATCH_NOMATCH)
960  {
961  DPRINTF(("Recursion gave error %d\n", rrc));
962  RRETURN(rrc);
963  }
964 
965  md->recursive = &new_recursive;
966  memcpy(md->offset_vector, new_recursive.offset_save,
967  new_recursive.saved_max * sizeof(int));
968  callpat += GET(callpat, 1);
969  }
970  while (*callpat == OP_ALT);
971 
972  DPRINTF(("Recursion didn't match\n"));
973  md->recursive = new_recursive.prevrec;
974  if (new_recursive.offset_save != stacksave)
975  (pcre_free)(new_recursive.offset_save);
977  }
978  /* Control never reaches here */
979 
980  /* "Once" brackets are like assertion brackets except that after a match,
981  the point in the subject string is not moved back. Thus there can never be
982  a move back into the brackets. Friedl calls these "atomic" subpatterns.
983  Check the alternative branches in turn - the matching won't pass the KET
984  for this kind of subpattern. If any one branch matches, we carry on as at
985  the end of a normal bracket, leaving the subject pointer. */
986 
987  case OP_ONCE:
988  prev = ecode;
989  saved_eptr = eptr;
990 
991  do
992  {
993  RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
994  eptrb, 0);
995  if (rrc == MATCH_MATCH) break;
996  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
997  ecode += GET(ecode,1);
998  }
999  while (*ecode == OP_ALT);
1000 
1001  /* If hit the end of the group (which could be repeated), fail */
1002 
1003  if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1004 
1005  /* Continue as from after the assertion, updating the offsets high water
1006  mark, since extracts may have been taken. */
1007 
1008  do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1009 
1010  offset_top = md->end_offset_top;
1011  eptr = md->end_match_ptr;
1012 
1013  /* For a non-repeating ket, just continue at this level. This also
1014  happens for a repeating ket if no characters were matched in the group.
1015  This is the forcible breaking of infinite loops as implemented in Perl
1016  5.005. If there is an options reset, it will get obeyed in the normal
1017  course of events. */
1018 
1019  if (*ecode == OP_KET || eptr == saved_eptr)
1020  {
1021  ecode += 1+LINK_SIZE;
1022  break;
1023  }
1024 
1025  /* The repeating kets try the rest of the pattern or restart from the
1026  preceding bracket, in the appropriate order. The second "call" of match()
1027  uses tail recursion, to avoid using another stack frame. We need to reset
1028  any options that changed within the bracket before re-running it, so
1029  check the next opcode. */
1030 
1031  if (ecode[1+LINK_SIZE] == OP_OPT)
1032  {
1033  ims = (ims & ~PCRE_IMS) | ecode[4];
1034  DPRINTF(("ims set to %02lx at group repeat\n", ims));
1035  }
1036 
1037  if (*ecode == OP_KETRMIN)
1038  {
1039  RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1040  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1041  ecode = prev;
1042  flags = match_tail_recursed;
1043  goto TAIL_RECURSE;
1044  }
1045  else /* OP_KETRMAX */
1046  {
1047  RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1048  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1049  ecode += 1 + LINK_SIZE;
1050  flags = match_tail_recursed;
1051  goto TAIL_RECURSE;
1052  }
1053  /* Control never gets here */
1054 
1055  /* An alternation is the end of a branch; scan along to find the end of the
1056  bracketed group and go to there. */
1057 
1058  case OP_ALT:
1059  do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1060  break;
1061 
1062  /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1063  that it may occur zero times. It may repeat infinitely, or not at all -
1064  i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1065  repeat limits are compiled as a number of copies, with the optional ones
1066  preceded by BRAZERO or BRAMINZERO. */
1067 
1068  case OP_BRAZERO:
1069  {
1070  next = ecode+1;
1071  RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1072  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1073  do next += GET(next,1); while (*next == OP_ALT);
1074  ecode = next + 1 + LINK_SIZE;
1075  }
1076  break;
1077 
1078  case OP_BRAMINZERO:
1079  {
1080  next = ecode+1;
1081  do next += GET(next, 1); while (*next == OP_ALT);
1082  RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1083  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1084  ecode++;
1085  }
1086  break;
1087 
1088  /* End of a group, repeated or non-repeating. */
1089 
1090  case OP_KET:
1091  case OP_KETRMIN:
1092  case OP_KETRMAX:
1093  prev = ecode - GET(ecode, 1);
1094 
1095  /* If this was a group that remembered the subject start, in order to break
1096  infinite repeats of empty string matches, retrieve the subject start from
1097  the chain. Otherwise, set it NULL. */
1098 
1099  if (*prev >= OP_SBRA)
1100  {
1101  saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1102  eptrb = eptrb->epb_prev; /* Backup to previous group */
1103  }
1104  else saved_eptr = NULL;
1105 
1106  /* If we are at the end of an assertion group, stop matching and return
1107  MATCH_MATCH, but record the current high water mark for use by positive
1108  assertions. Do this also for the "once" (atomic) groups. */
1109 
1110  if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1111  *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1112  *prev == OP_ONCE)
1113  {
1114  md->end_match_ptr = eptr; /* For ONCE */
1115  md->end_offset_top = offset_top;
1117  }
1118 
1119  /* For capturing groups we have to check the group number back at the start
1120  and if necessary complete handling an extraction by setting the offsets and
1121  bumping the high water mark. Note that whole-pattern recursion is coded as
1122  a recurse into group 0, so it won't be picked up here. Instead, we catch it
1123  when the OP_END is reached. Other recursion is handled here. */
1124 
1125  if (*prev == OP_CBRA || *prev == OP_SCBRA)
1126  {
1127  number = GET2(prev, 1+LINK_SIZE);
1128  offset = number << 1;
1129 
1130 #ifdef DEBUG
1131  printf("end bracket %d", number);
1132  printf("\n");
1133 #endif
1134 
1135  md->capture_last = number;
1136  if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1137  {
1138  md->offset_vector[offset] =
1139  md->offset_vector[md->offset_end - number];
1140  md->offset_vector[offset+1] = eptr - md->start_subject;
1141  if (offset_top <= offset) offset_top = offset + 2;
1142  }
1143 
1144  /* Handle a recursively called group. Restore the offsets
1145  appropriately and continue from after the call. */
1146 
1147  if (md->recursive != NULL && md->recursive->group_num == number)
1148  {
1149  recursion_info *rec = md->recursive;
1150  DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1151  md->recursive = rec->prevrec;
1152  md->start_match = rec->save_start;
1153  memcpy(md->offset_vector, rec->offset_save,
1154  rec->saved_max * sizeof(int));
1155  ecode = rec->after_call;
1156  ims = original_ims;
1157  break;
1158  }
1159  }
1160 
1161  /* For both capturing and non-capturing groups, reset the value of the ims
1162  flags, in case they got changed during the group. */
1163 
1164  ims = original_ims;
1165  DPRINTF(("ims reset to %02lx\n", ims));
1166 
1167  /* For a non-repeating ket, just continue at this level. This also
1168  happens for a repeating ket if no characters were matched in the group.
1169  This is the forcible breaking of infinite loops as implemented in Perl
1170  5.005. If there is an options reset, it will get obeyed in the normal
1171  course of events. */
1172 
1173  if (*ecode == OP_KET || eptr == saved_eptr)
1174  {
1175  ecode += 1 + LINK_SIZE;
1176  break;
1177  }
1178 
1179  /* The repeating kets try the rest of the pattern or restart from the
1180  preceding bracket, in the appropriate order. In the second case, we can use
1181  tail recursion to avoid using another stack frame. */
1182 
1183  flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1184 
1185  if (*ecode == OP_KETRMIN)
1186  {
1187  RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1188  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1189  ecode = prev;
1190  flags |= match_tail_recursed;
1191  goto TAIL_RECURSE;
1192  }
1193  else /* OP_KETRMAX */
1194  {
1195  RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1196  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1197  ecode += 1 + LINK_SIZE;
1198  flags = match_tail_recursed;
1199  goto TAIL_RECURSE;
1200  }
1201  /* Control never gets here */
1202 
1203  /* Start of subject unless notbol, or after internal newline if multiline */
1204 
1205  case OP_CIRC:
1206  if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1207  if ((ims & PCRE_MULTILINE) != 0)
1208  {
1209  if (eptr != md->start_subject &&
1210  (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1212  ecode++;
1213  break;
1214  }
1215  /* ... else fall through */
1216 
1217  /* Start of subject assertion */
1218 
1219  case OP_SOD:
1220  if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1221  ecode++;
1222  break;
1223 
1224  /* Start of match assertion */
1225 
1226  case OP_SOM:
1227  if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1228  ecode++;
1229  break;
1230 
1231  /* Assert before internal newline if multiline, or before a terminating
1232  newline unless endonly is set, else end of subject unless noteol is set. */
1233 
1234  case OP_DOLL:
1235  if ((ims & PCRE_MULTILINE) != 0)
1236  {
1237  if (eptr < md->end_subject)
1238  { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1239  else
1240  { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1241  ecode++;
1242  break;
1243  }
1244  else
1245  {
1246  if (md->noteol) RRETURN(MATCH_NOMATCH);
1247  if (!md->endonly)
1248  {
1249  if (eptr != md->end_subject &&
1250  (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1252  ecode++;
1253  break;
1254  }
1255  }
1256  /* ... else fall through for endonly */
1257 
1258  /* End of subject assertion (\z) */
1259 
1260  case OP_EOD:
1261  if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1262  ecode++;
1263  break;
1264 
1265  /* End of subject or ending \n assertion (\Z) */
1266 
1267  case OP_EODN:
1268  if (eptr != md->end_subject &&
1269  (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1271  ecode++;
1272  break;
1273 
1274  /* Word boundary assertions */
1275 
1276  case OP_NOT_WORD_BOUNDARY:
1277  case OP_WORD_BOUNDARY:
1278  {
1279 
1280  /* Find out if the previous and current characters are "word" characters.
1281  It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1282  be "non-word" characters. */
1283 
1284 #ifdef SUPPORT_UTF8
1285  if (utf8)
1286  {
1287  if (eptr == md->start_subject) prev_is_word = FALSE; else
1288  {
1289  const uschar *lastptr = eptr - 1;
1290  while((*lastptr & 0xc0) == 0x80) lastptr--;
1291  GETCHAR(c, lastptr);
1292  prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1293  }
1294  if (eptr >= md->end_subject) cur_is_word = FALSE; else
1295  {
1296  GETCHAR(c, eptr);
1297  cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1298  }
1299  }
1300  else
1301 #endif
1302 
1303  /* More streamlined when not in UTF-8 mode */
1304 
1305  {
1306  prev_is_word = (eptr != md->start_subject) &&
1307  ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1308  cur_is_word = (eptr < md->end_subject) &&
1309  ((md->ctypes[*eptr] & ctype_word) != 0);
1310  }
1311 
1312  /* Now see if the situation is what we want */
1313 
1314  if ((*ecode++ == OP_WORD_BOUNDARY)?
1315  cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1317  }
1318  break;
1319 
1320  /* Match a single character type; inline for speed */
1321 
1322  case OP_ANY:
1323  if ((ims & PCRE_DOTALL) == 0)
1324  {
1325  if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1326  }
1327  if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1328  if (utf8)
1329  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1330  ecode++;
1331  break;
1332 
1333  /* Match a single byte, even in UTF-8 mode. This opcode really does match
1334  any byte, even newline, independent of the setting of PCRE_DOTALL. */
1335 
1336  case OP_ANYBYTE:
1337  if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1338  ecode++;
1339  break;
1340 
1341  case OP_NOT_DIGIT:
1342  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1343  GETCHARINCTEST(c, eptr);
1344  if (
1345 #ifdef SUPPORT_UTF8
1346  c < 256 &&
1347 #endif
1348  (md->ctypes[c] & ctype_digit) != 0
1349  )
1351  ecode++;
1352  break;
1353 
1354  case OP_DIGIT:
1355  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1356  GETCHARINCTEST(c, eptr);
1357  if (
1358 #ifdef SUPPORT_UTF8
1359  c >= 256 ||
1360 #endif
1361  (md->ctypes[c] & ctype_digit) == 0
1362  )
1364  ecode++;
1365  break;
1366 
1367  case OP_NOT_WHITESPACE:
1368  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1369  GETCHARINCTEST(c, eptr);
1370  if (
1371 #ifdef SUPPORT_UTF8
1372  c < 256 &&
1373 #endif
1374  (md->ctypes[c] & ctype_space) != 0
1375  )
1377  ecode++;
1378  break;
1379 
1380  case OP_WHITESPACE:
1381  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1382  GETCHARINCTEST(c, eptr);
1383  if (
1384 #ifdef SUPPORT_UTF8
1385  c >= 256 ||
1386 #endif
1387  (md->ctypes[c] & ctype_space) == 0
1388  )
1390  ecode++;
1391  break;
1392 
1393  case OP_NOT_WORDCHAR:
1394  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1395  GETCHARINCTEST(c, eptr);
1396  if (
1397 #ifdef SUPPORT_UTF8
1398  c < 256 &&
1399 #endif
1400  (md->ctypes[c] & ctype_word) != 0
1401  )
1403  ecode++;
1404  break;
1405 
1406  case OP_WORDCHAR:
1407  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1408  GETCHARINCTEST(c, eptr);
1409  if (
1410 #ifdef SUPPORT_UTF8
1411  c >= 256 ||
1412 #endif
1413  (md->ctypes[c] & ctype_word) == 0
1414  )
1416  ecode++;
1417  break;
1418 
1419  case OP_ANYNL:
1420  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1421  GETCHARINCTEST(c, eptr);
1422  switch(c)
1423  {
1424  default: RRETURN(MATCH_NOMATCH);
1425  case 0x000d:
1426  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1427  break;
1428  case 0x000a:
1429  case 0x000b:
1430  case 0x000c:
1431  case 0x0085:
1432  case 0x2028:
1433  case 0x2029:
1434  break;
1435  }
1436  ecode++;
1437  break;
1438 
1439 #ifdef SUPPORT_UCP
1440  /* Check the next character by Unicode property. We will get here only
1441  if the support is in the binary; otherwise a compile-time error occurs. */
1442 
1443  case OP_PROP:
1444  case OP_NOTPROP:
1445  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1446  GETCHARINCTEST(c, eptr);
1447  {
1448  int chartype, script;
1449  int category = _pcre_ucp_findprop(c, &chartype, &script);
1450 
1451  switch(ecode[1])
1452  {
1453  case PT_ANY:
1454  if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1455  break;
1456 
1457  case PT_LAMP:
1458  if ((chartype == ucp_Lu ||
1459  chartype == ucp_Ll ||
1460  chartype == ucp_Lt) == (op == OP_NOTPROP))
1462  break;
1463 
1464  case PT_GC:
1465  if ((ecode[2] != category) == (op == OP_PROP))
1467  break;
1468 
1469  case PT_PC:
1470  if ((ecode[2] != chartype) == (op == OP_PROP))
1472  break;
1473 
1474  case PT_SC:
1475  if ((ecode[2] != script) == (op == OP_PROP))
1477  break;
1478 
1479  default:
1481  }
1482 
1483  ecode += 3;
1484  }
1485  break;
1486 
1487  /* Match an extended Unicode sequence. We will get here only if the support
1488  is in the binary; otherwise a compile-time error occurs. */
1489 
1490  case OP_EXTUNI:
1491  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1492  GETCHARINCTEST(c, eptr);
1493  {
1494  int chartype, script;
1495  int category = _pcre_ucp_findprop(c, &chartype, &script);
1496  if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1497  while (eptr < md->end_subject)
1498  {
1499  int len = 1;
1500  if (!utf8) c = *eptr; else
1501  {
1502  GETCHARLEN(c, eptr, len);
1503  }
1504  category = _pcre_ucp_findprop(c, &chartype, &script);
1505  if (category != ucp_M) break;
1506  eptr += len;
1507  }
1508  }
1509  ecode++;
1510  break;
1511 #endif
1512 
1513 
1514  /* Match a back reference, possibly repeatedly. Look past the end of the
1515  item to see if there is repeat information following. The code is similar
1516  to that for character classes, but repeated for efficiency. Then obey
1517  similar code to character type repeats - written out again for speed.
1518  However, if the referenced string is the empty string, always treat
1519  it as matched, any number of times (otherwise there could be infinite
1520  loops). */
1521 
1522  case OP_REF:
1523  {
1524  offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1525  ecode += 3; /* Advance past item */
1526 
1527  /* If the reference is unset, set the length to be longer than the amount
1528  of subject left; this ensures that every attempt at a match fails. We
1529  can't just fail here, because of the possibility of quantifiers with zero
1530  minima. */
1531 
1532  length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1533  md->end_subject - eptr + 1 :
1534  md->offset_vector[offset+1] - md->offset_vector[offset];
1535 
1536  /* Set up for repetition, or handle the non-repeated case */
1537 
1538  switch (*ecode)
1539  {
1540  case OP_CRSTAR:
1541  case OP_CRMINSTAR:
1542  case OP_CRPLUS:
1543  case OP_CRMINPLUS:
1544  case OP_CRQUERY:
1545  case OP_CRMINQUERY:
1546  c = *ecode++ - OP_CRSTAR;
1547  minimize = (c & 1) != 0;
1548  min = rep_min[c]; /* Pick up values from tables; */
1549  max = rep_max[c]; /* zero for max => infinity */
1550  if (max == 0) max = INT_MAX;
1551  break;
1552 
1553  case OP_CRRANGE:
1554  case OP_CRMINRANGE:
1555  minimize = (*ecode == OP_CRMINRANGE);
1556  min = GET2(ecode, 1);
1557  max = GET2(ecode, 3);
1558  if (max == 0) max = INT_MAX;
1559  ecode += 5;
1560  break;
1561 
1562  default: /* No repeat follows */
1563  if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1564  eptr += length;
1565  continue; /* With the main loop */
1566  }
1567 
1568  /* If the length of the reference is zero, just continue with the
1569  main loop. */
1570 
1571  if (length == 0) continue;
1572 
1573  /* First, ensure the minimum number of matches are present. We get back
1574  the length of the reference string explicitly rather than passing the
1575  address of eptr, so that eptr can be a register variable. */
1576 
1577  for (i = 1; i <= min; i++)
1578  {
1579  if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1580  eptr += length;
1581  }
1582 
1583  /* If min = max, continue at the same level without recursion.
1584  They are not both allowed to be zero. */
1585 
1586  if (min == max) continue;
1587 
1588  /* If minimizing, keep trying and advancing the pointer */
1589 
1590  if (minimize)
1591  {
1592  for (fi = min;; fi++)
1593  {
1594  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1595  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1596  if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1598  eptr += length;
1599  }
1600  /* Control never gets here */
1601  }
1602 
1603  /* If maximizing, find the longest string and work backwards */
1604 
1605  else
1606  {
1607  pp = eptr;
1608  for (i = min; i < max; i++)
1609  {
1610  if (!match_ref(offset, eptr, length, md, ims)) break;
1611  eptr += length;
1612  }
1613  while (eptr >= pp)
1614  {
1615  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1616  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617  eptr -= length;
1618  }
1620  }
1621  }
1622  /* Control never gets here */
1623 
1624 
1625 
1626  /* Match a bit-mapped character class, possibly repeatedly. This op code is
1627  used when all the characters in the class have values in the range 0-255,
1628  and either the matching is caseful, or the characters are in the range
1629  0-127 when UTF-8 processing is enabled. The only difference between
1630  OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1631  encountered.
1632 
1633  First, look past the end of the item to see if there is repeat information
1634  following. Then obey similar code to character type repeats - written out
1635  again for speed. */
1636 
1637  case OP_NCLASS:
1638  case OP_CLASS:
1639  {
1640  data = ecode + 1; /* Save for matching */
1641  ecode += 33; /* Advance past the item */
1642 
1643  switch (*ecode)
1644  {
1645  case OP_CRSTAR:
1646  case OP_CRMINSTAR:
1647  case OP_CRPLUS:
1648  case OP_CRMINPLUS:
1649  case OP_CRQUERY:
1650  case OP_CRMINQUERY:
1651  c = *ecode++ - OP_CRSTAR;
1652  minimize = (c & 1) != 0;
1653  min = rep_min[c]; /* Pick up values from tables; */
1654  max = rep_max[c]; /* zero for max => infinity */
1655  if (max == 0) max = INT_MAX;
1656  break;
1657 
1658  case OP_CRRANGE:
1659  case OP_CRMINRANGE:
1660  minimize = (*ecode == OP_CRMINRANGE);
1661  min = GET2(ecode, 1);
1662  max = GET2(ecode, 3);
1663  if (max == 0) max = INT_MAX;
1664  ecode += 5;
1665  break;
1666 
1667  default: /* No repeat follows */
1668  min = max = 1;
1669  break;
1670  }
1671 
1672  /* First, ensure the minimum number of matches are present. */
1673 
1674 #ifdef SUPPORT_UTF8
1675  /* UTF-8 mode */
1676  if (utf8)
1677  {
1678  for (i = 1; i <= min; i++)
1679  {
1680  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1681  GETCHARINC(c, eptr);
1682  if (c > 255)
1683  {
1684  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1685  }
1686  else
1687  {
1688  if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1689  }
1690  }
1691  }
1692  else
1693 #endif
1694  /* Not UTF-8 mode */
1695  {
1696  for (i = 1; i <= min; i++)
1697  {
1698  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1699  c = *eptr++;
1700  if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1701  }
1702  }
1703 
1704  /* If max == min we can continue with the main loop without the
1705  need to recurse. */
1706 
1707  if (min == max) continue;
1708 
1709  /* If minimizing, keep testing the rest of the expression and advancing
1710  the pointer while it matches the class. */
1711 
1712  if (minimize)
1713  {
1714 #ifdef SUPPORT_UTF8
1715  /* UTF-8 mode */
1716  if (utf8)
1717  {
1718  for (fi = min;; fi++)
1719  {
1720  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1721  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1722  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1723  GETCHARINC(c, eptr);
1724  if (c > 255)
1725  {
1726  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1727  }
1728  else
1729  {
1730  if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1731  }
1732  }
1733  }
1734  else
1735 #endif
1736  /* Not UTF-8 mode */
1737  {
1738  for (fi = min;; fi++)
1739  {
1740  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1741  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1742  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1743  c = *eptr++;
1744  if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1745  }
1746  }
1747  /* Control never gets here */
1748  }
1749 
1750  /* If maximizing, find the longest possible run, then work backwards. */
1751 
1752  else
1753  {
1754  pp = eptr;
1755 
1756 #ifdef SUPPORT_UTF8
1757  /* UTF-8 mode */
1758  if (utf8)
1759  {
1760  for (i = min; i < max; i++)
1761  {
1762  int len = 1;
1763  if (eptr >= md->end_subject) break;
1764  GETCHARLEN(c, eptr, len);
1765  if (c > 255)
1766  {
1767  if (op == OP_CLASS) break;
1768  }
1769  else
1770  {
1771  if ((data[c/8] & (1 << (c&7))) == 0) break;
1772  }
1773  eptr += len;
1774  }
1775  for (;;)
1776  {
1777  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1778  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1779  if (eptr-- == pp) break; /* Stop if tried at original pos */
1780  BACKCHAR(eptr);
1781  }
1782  }
1783  else
1784 #endif
1785  /* Not UTF-8 mode */
1786  {
1787  for (i = min; i < max; i++)
1788  {
1789  if (eptr >= md->end_subject) break;
1790  c = *eptr;
1791  if ((data[c/8] & (1 << (c&7))) == 0) break;
1792  eptr++;
1793  }
1794  while (eptr >= pp)
1795  {
1796  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1797  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1798  eptr--;
1799  }
1800  }
1801 
1803  }
1804  }
1805  /* Control never gets here */
1806 
1807 
1808  /* Match an extended character class. This opcode is encountered only
1809  in UTF-8 mode, because that's the only time it is compiled. */
1810 
1811 #ifdef SUPPORT_UTF8
1812  case OP_XCLASS:
1813  {
1814  data = ecode + 1 + LINK_SIZE; /* Save for matching */
1815  ecode += GET(ecode, 1); /* Advance past the item */
1816 
1817  switch (*ecode)
1818  {
1819  case OP_CRSTAR:
1820  case OP_CRMINSTAR:
1821  case OP_CRPLUS:
1822  case OP_CRMINPLUS:
1823  case OP_CRQUERY:
1824  case OP_CRMINQUERY:
1825  c = *ecode++ - OP_CRSTAR;
1826  minimize = (c & 1) != 0;
1827  min = rep_min[c]; /* Pick up values from tables; */
1828  max = rep_max[c]; /* zero for max => infinity */
1829  if (max == 0) max = INT_MAX;
1830  break;
1831 
1832  case OP_CRRANGE:
1833  case OP_CRMINRANGE:
1834  minimize = (*ecode == OP_CRMINRANGE);
1835  min = GET2(ecode, 1);
1836  max = GET2(ecode, 3);
1837  if (max == 0) max = INT_MAX;
1838  ecode += 5;
1839  break;
1840 
1841  default: /* No repeat follows */
1842  min = max = 1;
1843  break;
1844  }
1845 
1846  /* First, ensure the minimum number of matches are present. */
1847 
1848  for (i = 1; i <= min; i++)
1849  {
1850  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1851  GETCHARINC(c, eptr);
1852  if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1853  }
1854 
1855  /* If max == min we can continue with the main loop without the
1856  need to recurse. */
1857 
1858  if (min == max) continue;
1859 
1860  /* If minimizing, keep testing the rest of the expression and advancing
1861  the pointer while it matches the class. */
1862 
1863  if (minimize)
1864  {
1865  for (fi = min;; fi++)
1866  {
1867  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1868  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1869  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1870  GETCHARINC(c, eptr);
1871  if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1872  }
1873  /* Control never gets here */
1874  }
1875 
1876  /* If maximizing, find the longest possible run, then work backwards. */
1877 
1878  else
1879  {
1880  pp = eptr;
1881  for (i = min; i < max; i++)
1882  {
1883  int len = 1;
1884  if (eptr >= md->end_subject) break;
1885  GETCHARLEN(c, eptr, len);
1886  if (!_pcre_xclass(c, data)) break;
1887  eptr += len;
1888  }
1889  for(;;)
1890  {
1891  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1892  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1893  if (eptr-- == pp) break; /* Stop if tried at original pos */
1894  BACKCHAR(eptr)
1895  }
1897  }
1898 
1899  /* Control never gets here */
1900  }
1901 #endif /* End of XCLASS */
1902 
1903  /* Match a single character, casefully */
1904 
1905  case OP_CHAR:
1906 #ifdef SUPPORT_UTF8
1907  if (utf8)
1908  {
1909  length = 1;
1910  ecode++;
1911  GETCHARLEN(fc, ecode, length);
1912  if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1913  while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
1914  }
1915  else
1916 #endif
1917 
1918  /* Non-UTF-8 mode */
1919  {
1920  if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1921  if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1922  ecode += 2;
1923  }
1924  break;
1925 
1926  /* Match a single character, caselessly */
1927 
1928  case OP_CHARNC:
1929 #ifdef SUPPORT_UTF8
1930  if (utf8)
1931  {
1932  length = 1;
1933  ecode++;
1934  GETCHARLEN(fc, ecode, length);
1935 
1936  if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1937 
1938  /* If the pattern character's value is < 128, we have only one byte, and
1939  can use the fast lookup table. */
1940 
1941  if (fc < 128)
1942  {
1943  if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1944  }
1945 
1946  /* Otherwise we must pick up the subject character */
1947 
1948  else
1949  {
1950  unsigned int dc;
1951  GETCHARINC(dc, eptr);
1952  ecode += length;
1953 
1954  /* If we have Unicode property support, we can use it to test the other
1955  case of the character, if there is one. */
1956 
1957  if (fc != dc)
1958  {
1959 #ifdef SUPPORT_UCP
1960  if (dc != _pcre_ucp_othercase(fc))
1961 #endif
1963  }
1964  }
1965  }
1966  else
1967 #endif /* SUPPORT_UTF8 */
1968 
1969  /* Non-UTF-8 mode */
1970  {
1971  if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1972  if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1973  ecode += 2;
1974  }
1975  break;
1976 
1977  /* Match a single character repeatedly. */
1978 
1979  case OP_EXACT:
1980  min = max = GET2(ecode, 1);
1981  ecode += 3;
1982  goto REPEATCHAR;
1983 
1984  case OP_POSUPTO:
1985  possessive = TRUE;
1986  /* Fall through */
1987 
1988  case OP_UPTO:
1989  case OP_MINUPTO:
1990  min = 0;
1991  max = GET2(ecode, 1);
1992  minimize = *ecode == OP_MINUPTO;
1993  ecode += 3;
1994  goto REPEATCHAR;
1995 
1996  case OP_POSSTAR:
1997  possessive = TRUE;
1998  min = 0;
1999  max = INT_MAX;
2000  ecode++;
2001  goto REPEATCHAR;
2002 
2003  case OP_POSPLUS:
2004  possessive = TRUE;
2005  min = 1;
2006  max = INT_MAX;
2007  ecode++;
2008  goto REPEATCHAR;
2009 
2010  case OP_POSQUERY:
2011  possessive = TRUE;
2012  min = 0;
2013  max = 1;
2014  ecode++;
2015  goto REPEATCHAR;
2016 
2017  case OP_STAR:
2018  case OP_MINSTAR:
2019  case OP_PLUS:
2020  case OP_MINPLUS:
2021  case OP_QUERY:
2022  case OP_MINQUERY:
2023  c = *ecode++ - OP_STAR;
2024  minimize = (c & 1) != 0;
2025  min = rep_min[c]; /* Pick up values from tables; */
2026  max = rep_max[c]; /* zero for max => infinity */
2027  if (max == 0) max = INT_MAX;
2028 
2029  /* Common code for all repeated single-character matches. We can give
2030  up quickly if there are fewer than the minimum number of characters left in
2031  the subject. */
2032 
2033  REPEATCHAR:
2034 #ifdef SUPPORT_UTF8
2035  if (utf8)
2036  {
2037  length = 1;
2038  charptr = ecode;
2039  GETCHARLEN(fc, ecode, length);
2040  if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2041  ecode += length;
2042 
2043  /* Handle multibyte character matching specially here. There is
2044  support for caseless matching if UCP support is present. */
2045 
2046  if (length > 1)
2047  {
2048  int oclength = 0;
2049  uschar occhars[8];
2050 
2051 #ifdef SUPPORT_UCP
2052  unsigned int othercase;
2053  if ((ims & PCRE_CASELESS) != 0 &&
2054  (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2055  oclength = _pcre_ord2utf8(othercase, occhars);
2056 #endif /* SUPPORT_UCP */
2057 
2058  for (i = 1; i <= min; i++)
2059  {
2060  if (memcmp(eptr, charptr, length) == 0) eptr += length;
2061  /* Need braces because of following else */
2062  else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2063  else
2064  {
2065  if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2066  eptr += oclength;
2067  }
2068  }
2069 
2070  if (min == max) continue;
2071 
2072  if (minimize)
2073  {
2074  for (fi = min;; fi++)
2075  {
2076  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2077  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2078  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2079  if (memcmp(eptr, charptr, length) == 0) eptr += length;
2080  /* Need braces because of following else */
2081  else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2082  else
2083  {
2084  if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2085  eptr += oclength;
2086  }
2087  }
2088  /* Control never gets here */
2089  }
2090 
2091  else /* Maximize */
2092  {
2093  pp = eptr;
2094  for (i = min; i < max; i++)
2095  {
2096  if (eptr > md->end_subject - length) break;
2097  if (memcmp(eptr, charptr, length) == 0) eptr += length;
2098  else if (oclength == 0) break;
2099  else
2100  {
2101  if (memcmp(eptr, occhars, oclength) != 0) break;
2102  eptr += oclength;
2103  }
2104  }
2105 
2106  if (possessive) continue;
2107  while (eptr >= pp)
2108  {
2109  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2110  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2111  eptr -= length;
2112  }
2114  }
2115  /* Control never gets here */
2116  }
2117 
2118  /* If the length of a UTF-8 character is 1, we fall through here, and
2119  obey the code as for non-UTF-8 characters below, though in this case the
2120  value of fc will always be < 128. */
2121  }
2122  else
2123 #endif /* SUPPORT_UTF8 */
2124 
2125  /* When not in UTF-8 mode, load a single-byte character. */
2126  {
2127  if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2128  fc = *ecode++;
2129  }
2130 
2131  /* The value of fc at this point is always less than 256, though we may or
2132  may not be in UTF-8 mode. The code is duplicated for the caseless and
2133  caseful cases, for speed, since matching characters is likely to be quite
2134  common. First, ensure the minimum number of matches are present. If min =
2135  max, continue at the same level without recursing. Otherwise, if
2136  minimizing, keep trying the rest of the expression and advancing one
2137  matching character if failing, up to the maximum. Alternatively, if
2138  maximizing, find the maximum number of characters and work backwards. */
2139 
2140  DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2141  max, eptr));
2142 
2143  if ((ims & PCRE_CASELESS) != 0)
2144  {
2145  fc = md->lcc[fc];
2146  for (i = 1; i <= min; i++)
2147  if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2148  if (min == max) continue;
2149  if (minimize)
2150  {
2151  for (fi = min;; fi++)
2152  {
2153  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2154  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2155  if (fi >= max || eptr >= md->end_subject ||
2156  fc != md->lcc[*eptr++])
2158  }
2159  /* Control never gets here */
2160  }
2161  else /* Maximize */
2162  {
2163  pp = eptr;
2164  for (i = min; i < max; i++)
2165  {
2166  if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2167  eptr++;
2168  }
2169  if (possessive) continue;
2170  while (eptr >= pp)
2171  {
2172  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2173  eptr--;
2174  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2175  }
2177  }
2178  /* Control never gets here */
2179  }
2180 
2181  /* Caseful comparisons (includes all multi-byte characters) */
2182 
2183  else
2184  {
2185  for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2186  if (min == max) continue;
2187  if (minimize)
2188  {
2189  for (fi = min;; fi++)
2190  {
2191  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2192  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2193  if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2195  }
2196  /* Control never gets here */
2197  }
2198  else /* Maximize */
2199  {
2200  pp = eptr;
2201  for (i = min; i < max; i++)
2202  {
2203  if (eptr >= md->end_subject || fc != *eptr) break;
2204  eptr++;
2205  }
2206  if (possessive) continue;
2207  while (eptr >= pp)
2208  {
2209  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2210  eptr--;
2211  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212  }
2214  }
2215  }
2216  /* Control never gets here */
2217 
2218  /* Match a negated single one-byte character. The character we are
2219  checking can be multibyte. */
2220 
2221  case OP_NOT:
2222  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2223  ecode++;
2224  GETCHARINCTEST(c, eptr);
2225  if ((ims & PCRE_CASELESS) != 0)
2226  {
2227 #ifdef SUPPORT_UTF8
2228  if (c < 256)
2229 #endif
2230  c = md->lcc[c];
2231  if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2232  }
2233  else
2234  {
2235  if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2236  }
2237  break;
2238 
2239  /* Match a negated single one-byte character repeatedly. This is almost a
2240  repeat of the code for a repeated single character, but I haven't found a
2241  nice way of commoning these up that doesn't require a test of the
2242  positive/negative option for each character match. Maybe that wouldn't add
2243  very much to the time taken, but character matching *is* what this is all
2244  about... */
2245 
2246  case OP_NOTEXACT:
2247  min = max = GET2(ecode, 1);
2248  ecode += 3;
2249  goto REPEATNOTCHAR;
2250 
2251  case OP_NOTUPTO:
2252  case OP_NOTMINUPTO:
2253  min = 0;
2254  max = GET2(ecode, 1);
2255  minimize = *ecode == OP_NOTMINUPTO;
2256  ecode += 3;
2257  goto REPEATNOTCHAR;
2258 
2259  case OP_NOTPOSSTAR:
2260  possessive = TRUE;
2261  min = 0;
2262  max = INT_MAX;
2263  ecode++;
2264  goto REPEATNOTCHAR;
2265 
2266  case OP_NOTPOSPLUS:
2267  possessive = TRUE;
2268  min = 1;
2269  max = INT_MAX;
2270  ecode++;
2271  goto REPEATNOTCHAR;
2272 
2273  case OP_NOTPOSQUERY:
2274  possessive = TRUE;
2275  min = 0;
2276  max = 1;
2277  ecode++;
2278  goto REPEATNOTCHAR;
2279 
2280  case OP_NOTPOSUPTO:
2281  possessive = TRUE;
2282  min = 0;
2283  max = GET2(ecode, 1);
2284  ecode += 3;
2285  goto REPEATNOTCHAR;
2286 
2287  case OP_NOTSTAR:
2288  case OP_NOTMINSTAR:
2289  case OP_NOTPLUS:
2290  case OP_NOTMINPLUS:
2291  case OP_NOTQUERY:
2292  case OP_NOTMINQUERY:
2293  c = *ecode++ - OP_NOTSTAR;
2294  minimize = (c & 1) != 0;
2295  min = rep_min[c]; /* Pick up values from tables; */
2296  max = rep_max[c]; /* zero for max => infinity */
2297  if (max == 0) max = INT_MAX;
2298 
2299  /* Common code for all repeated single-byte matches. We can give up quickly
2300  if there are fewer than the minimum number of bytes left in the
2301  subject. */
2302 
2303  REPEATNOTCHAR:
2304  if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2305  fc = *ecode++;
2306 
2307  /* The code is duplicated for the caseless and caseful cases, for speed,
2308  since matching characters is likely to be quite common. First, ensure the
2309  minimum number of matches are present. If min = max, continue at the same
2310  level without recursing. Otherwise, if minimizing, keep trying the rest of
2311  the expression and advancing one matching character if failing, up to the
2312  maximum. Alternatively, if maximizing, find the maximum number of
2313  characters and work backwards. */
2314 
2315  DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2316  max, eptr));
2317 
2318  if ((ims & PCRE_CASELESS) != 0)
2319  {
2320  fc = md->lcc[fc];
2321 
2322 #ifdef SUPPORT_UTF8
2323  /* UTF-8 mode */
2324  if (utf8)
2325  {
2326  register unsigned int d;
2327  for (i = 1; i <= min; i++)
2328  {
2329  GETCHARINC(d, eptr);
2330  if (d < 256) d = md->lcc[d];
2331  if (fc == d) RRETURN(MATCH_NOMATCH);
2332  }
2333  }
2334  else
2335 #endif
2336 
2337  /* Not UTF-8 mode */
2338  {
2339  for (i = 1; i <= min; i++)
2340  if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2341  }
2342 
2343  if (min == max) continue;
2344 
2345  if (minimize)
2346  {
2347 #ifdef SUPPORT_UTF8
2348  /* UTF-8 mode */
2349  if (utf8)
2350  {
2351  register unsigned int d;
2352  for (fi = min;; fi++)
2353  {
2354  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2355  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356  GETCHARINC(d, eptr);
2357  if (d < 256) d = md->lcc[d];
2358  if (fi >= max || eptr >= md->end_subject || fc == d)
2360  }
2361  }
2362  else
2363 #endif
2364  /* Not UTF-8 mode */
2365  {
2366  for (fi = min;; fi++)
2367  {
2368  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2369  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2370  if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2372  }
2373  }
2374  /* Control never gets here */
2375  }
2376 
2377  /* Maximize case */
2378 
2379  else
2380  {
2381  pp = eptr;
2382 
2383 #ifdef SUPPORT_UTF8
2384  /* UTF-8 mode */
2385  if (utf8)
2386  {
2387  register unsigned int d;
2388  for (i = min; i < max; i++)
2389  {
2390  int len = 1;
2391  if (eptr >= md->end_subject) break;
2392  GETCHARLEN(d, eptr, len);
2393  if (d < 256) d = md->lcc[d];
2394  if (fc == d) break;
2395  eptr += len;
2396  }
2397  if (possessive) continue;
2398  for(;;)
2399  {
2400  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2401  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2402  if (eptr-- == pp) break; /* Stop if tried at original pos */
2403  BACKCHAR(eptr);
2404  }
2405  }
2406  else
2407 #endif
2408  /* Not UTF-8 mode */
2409  {
2410  for (i = min; i < max; i++)
2411  {
2412  if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2413  eptr++;
2414  }
2415  if (possessive) continue;
2416  while (eptr >= pp)
2417  {
2418  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2419  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2420  eptr--;
2421  }
2422  }
2423 
2425  }
2426  /* Control never gets here */
2427  }
2428 
2429  /* Caseful comparisons */
2430 
2431  else
2432  {
2433 #ifdef SUPPORT_UTF8
2434  /* UTF-8 mode */
2435  if (utf8)
2436  {
2437  register unsigned int d;
2438  for (i = 1; i <= min; i++)
2439  {
2440  GETCHARINC(d, eptr);
2441  if (fc == d) RRETURN(MATCH_NOMATCH);
2442  }
2443  }
2444  else
2445 #endif
2446  /* Not UTF-8 mode */
2447  {
2448  for (i = 1; i <= min; i++)
2449  if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2450  }
2451 
2452  if (min == max) continue;
2453 
2454  if (minimize)
2455  {
2456 #ifdef SUPPORT_UTF8
2457  /* UTF-8 mode */
2458  if (utf8)
2459  {
2460  register unsigned int d;
2461  for (fi = min;; fi++)
2462  {
2463  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2464  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2465  GETCHARINC(d, eptr);
2466  if (fi >= max || eptr >= md->end_subject || fc == d)
2468  }
2469  }
2470  else
2471 #endif
2472  /* Not UTF-8 mode */
2473  {
2474  for (fi = min;; fi++)
2475  {
2476  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2477  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2478  if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2480  }
2481  }
2482  /* Control never gets here */
2483  }
2484 
2485  /* Maximize case */
2486 
2487  else
2488  {
2489  pp = eptr;
2490 
2491 #ifdef SUPPORT_UTF8
2492  /* UTF-8 mode */
2493  if (utf8)
2494  {
2495  register unsigned int d;
2496  for (i = min; i < max; i++)
2497  {
2498  int len = 1;
2499  if (eptr >= md->end_subject) break;
2500  GETCHARLEN(d, eptr, len);
2501  if (fc == d) break;
2502  eptr += len;
2503  }
2504  if (possessive) continue;
2505  for(;;)
2506  {
2507  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2508  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2509  if (eptr-- == pp) break; /* Stop if tried at original pos */
2510  BACKCHAR(eptr);
2511  }
2512  }
2513  else
2514 #endif
2515  /* Not UTF-8 mode */
2516  {
2517  for (i = min; i < max; i++)
2518  {
2519  if (eptr >= md->end_subject || fc == *eptr) break;
2520  eptr++;
2521  }
2522  if (possessive) continue;
2523  while (eptr >= pp)
2524  {
2525  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2526  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527  eptr--;
2528  }
2529  }
2530 
2532  }
2533  }
2534  /* Control never gets here */
2535 
2536  /* Match a single character type repeatedly; several different opcodes
2537  share code. This is very similar to the code for single characters, but we
2538  repeat it in the interests of efficiency. */
2539 
2540  case OP_TYPEEXACT:
2541  min = max = GET2(ecode, 1);
2542  minimize = TRUE;
2543  ecode += 3;
2544  goto REPEATTYPE;
2545 
2546  case OP_TYPEUPTO:
2547  case OP_TYPEMINUPTO:
2548  min = 0;
2549  max = GET2(ecode, 1);
2550  minimize = *ecode == OP_TYPEMINUPTO;
2551  ecode += 3;
2552  goto REPEATTYPE;
2553 
2554  case OP_TYPEPOSSTAR:
2555  possessive = TRUE;
2556  min = 0;
2557  max = INT_MAX;
2558  ecode++;
2559  goto REPEATTYPE;
2560 
2561  case OP_TYPEPOSPLUS:
2562  possessive = TRUE;
2563  min = 1;
2564  max = INT_MAX;
2565  ecode++;
2566  goto REPEATTYPE;
2567 
2568  case OP_TYPEPOSQUERY:
2569  possessive = TRUE;
2570  min = 0;
2571  max = 1;
2572  ecode++;
2573  goto REPEATTYPE;
2574 
2575  case OP_TYPEPOSUPTO:
2576  possessive = TRUE;
2577  min = 0;
2578  max = GET2(ecode, 1);
2579  ecode += 3;
2580  goto REPEATTYPE;
2581 
2582  case OP_TYPESTAR:
2583  case OP_TYPEMINSTAR:
2584  case OP_TYPEPLUS:
2585  case OP_TYPEMINPLUS:
2586  case OP_TYPEQUERY:
2587  case OP_TYPEMINQUERY:
2588  c = *ecode++ - OP_TYPESTAR;
2589  minimize = (c & 1) != 0;
2590  min = rep_min[c]; /* Pick up values from tables; */
2591  max = rep_max[c]; /* zero for max => infinity */
2592  if (max == 0) max = INT_MAX;
2593 
2594  /* Common code for all repeated single character type matches. Note that
2595  in UTF-8 mode, '.' matches a character of any length, but for the other
2596  character types, the valid characters are all one-byte long. */
2597 
2598  REPEATTYPE:
2599  ctype = *ecode++; /* Code for the character type */
2600 
2601 #ifdef SUPPORT_UCP
2602  if (ctype == OP_PROP || ctype == OP_NOTPROP)
2603  {
2604  prop_fail_result = ctype == OP_NOTPROP;
2605  prop_type = *ecode++;
2606  prop_value = *ecode++;
2607  }
2608  else prop_type = -1;
2609 #endif
2610 
2611  /* First, ensure the minimum number of matches are present. Use inline
2612  code for maximizing the speed, and do the type test once at the start
2613  (i.e. keep it out of the loop). Also we can test that there are at least
2614  the minimum number of bytes before we start. This isn't as effective in
2615  UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2616  is tidier. Also separate the UCP code, which can be the same for both UTF-8
2617  and single-bytes. */
2618 
2619  if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2620  if (min > 0)
2621  {
2622 #ifdef SUPPORT_UCP
2623  if (prop_type >= 0)
2624  {
2625  switch(prop_type)
2626  {
2627  case PT_ANY:
2628  if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2629  for (i = 1; i <= min; i++)
2630  {
2631  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2632  GETCHARINC(c, eptr);
2633  }
2634  break;
2635 
2636  case PT_LAMP:
2637  for (i = 1; i <= min; i++)
2638  {
2639  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2640  GETCHARINC(c, eptr);
2641  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2642  if ((prop_chartype == ucp_Lu ||
2643  prop_chartype == ucp_Ll ||
2644  prop_chartype == ucp_Lt) == prop_fail_result)
2646  }
2647  break;
2648 
2649  case PT_GC:
2650  for (i = 1; i <= min; i++)
2651  {
2652  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2653  GETCHARINC(c, eptr);
2654  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2655  if ((prop_category == prop_value) == prop_fail_result)
2657  }
2658  break;
2659 
2660  case PT_PC:
2661  for (i = 1; i <= min; i++)
2662  {
2663  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2664  GETCHARINC(c, eptr);
2665  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2666  if ((prop_chartype == prop_value) == prop_fail_result)
2668  }
2669  break;
2670 
2671  case PT_SC:
2672  for (i = 1; i <= min; i++)
2673  {
2674  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2675  GETCHARINC(c, eptr);
2676  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2677  if ((prop_script == prop_value) == prop_fail_result)
2679  }
2680  break;
2681 
2682  default:
2684  }
2685  }
2686 
2687  /* Match extended Unicode sequences. We will get here only if the
2688  support is in the binary; otherwise a compile-time error occurs. */
2689 
2690  else if (ctype == OP_EXTUNI)
2691  {
2692  for (i = 1; i <= min; i++)
2693  {
2694  GETCHARINCTEST(c, eptr);
2695  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2696  if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2697  while (eptr < md->end_subject)
2698  {
2699  int len = 1;
2700  if (!utf8) c = *eptr; else
2701  {
2702  GETCHARLEN(c, eptr, len);
2703  }
2704  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2705  if (prop_category != ucp_M) break;
2706  eptr += len;
2707  }
2708  }
2709  }
2710 
2711  else
2712 #endif /* SUPPORT_UCP */
2713 
2714 /* Handle all other cases when the coding is UTF-8 */
2715 
2716 #ifdef SUPPORT_UTF8
2717  if (utf8) switch(ctype)
2718  {
2719  case OP_ANY:
2720  for (i = 1; i <= min; i++)
2721  {
2722  if (eptr >= md->end_subject ||
2723  ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2725  eptr++;
2726  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2727  }
2728  break;
2729 
2730  case OP_ANYBYTE:
2731  eptr += min;
2732  break;
2733 
2734  case OP_ANYNL:
2735  for (i = 1; i <= min; i++)
2736  {
2737  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2738  GETCHARINC(c, eptr);
2739  switch(c)
2740  {
2741  default: RRETURN(MATCH_NOMATCH);
2742  case 0x000d:
2743  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2744  break;
2745  case 0x000a:
2746  case 0x000b:
2747  case 0x000c:
2748  case 0x0085:
2749  case 0x2028:
2750  case 0x2029:
2751  break;
2752  }
2753  }
2754  break;
2755 
2756  case OP_NOT_DIGIT:
2757  for (i = 1; i <= min; i++)
2758  {
2759  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2760  GETCHARINC(c, eptr);
2761  if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
2763  }
2764  break;
2765 
2766  case OP_DIGIT:
2767  for (i = 1; i <= min; i++)
2768  {
2769  if (eptr >= md->end_subject ||
2770  *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
2772  /* No need to skip more bytes - we know it's a 1-byte character */
2773  }
2774  break;
2775 
2776  case OP_NOT_WHITESPACE:
2777  for (i = 1; i <= min; i++)
2778  {
2779  if (eptr >= md->end_subject ||
2780  (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
2782  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2783  }
2784  break;
2785 
2786  case OP_WHITESPACE:
2787  for (i = 1; i <= min; i++)
2788  {
2789  if (eptr >= md->end_subject ||
2790  *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
2792  /* No need to skip more bytes - we know it's a 1-byte character */
2793  }
2794  break;
2795 
2796  case OP_NOT_WORDCHAR:
2797  for (i = 1; i <= min; i++)
2798  {
2799  if (eptr >= md->end_subject ||
2800  (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
2802  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2803  }
2804  break;
2805 
2806  case OP_WORDCHAR:
2807  for (i = 1; i <= min; i++)
2808  {
2809  if (eptr >= md->end_subject ||
2810  *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
2812  /* No need to skip more bytes - we know it's a 1-byte character */
2813  }
2814  break;
2815 
2816  default:
2818  } /* End switch(ctype) */
2819 
2820  else
2821 #endif /* SUPPORT_UTF8 */
2822 
2823  /* Code for the non-UTF-8 case for minimum matching of operators other
2824  than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2825  number of bytes present, as this was tested above. */
2826 
2827  switch(ctype)
2828  {
2829  case OP_ANY:
2830  if ((ims & PCRE_DOTALL) == 0)
2831  {
2832  for (i = 1; i <= min; i++)
2833  {
2834  if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2835  eptr++;
2836  }
2837  }
2838  else eptr += min;
2839  break;
2840 
2841  case OP_ANYBYTE:
2842  eptr += min;
2843  break;
2844 
2845  /* Because of the CRLF case, we can't assume the minimum number of
2846  bytes are present in this case. */
2847 
2848  case OP_ANYNL:
2849  for (i = 1; i <= min; i++)
2850  {
2851  if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2852  switch(*eptr++)
2853  {
2854  default: RRETURN(MATCH_NOMATCH);
2855  case 0x000d:
2856  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2857  break;
2858  case 0x000a:
2859  case 0x000b:
2860  case 0x000c:
2861  case 0x0085:
2862  break;
2863  }
2864  }
2865  break;
2866 
2867  case OP_NOT_DIGIT:
2868  for (i = 1; i <= min; i++)
2869  if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
2870  break;
2871 
2872  case OP_DIGIT:
2873  for (i = 1; i <= min; i++)
2874  if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
2875  break;
2876 
2877  case OP_NOT_WHITESPACE:
2878  for (i = 1; i <= min; i++)
2879  if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
2880  break;
2881 
2882  case OP_WHITESPACE:
2883  for (i = 1; i <= min; i++)
2884  if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
2885  break;
2886 
2887  case OP_NOT_WORDCHAR:
2888  for (i = 1; i <= min; i++)
2889  if ((md->ctypes[*eptr++] & ctype_word) != 0)
2891  break;
2892 
2893  case OP_WORDCHAR:
2894  for (i = 1; i <= min; i++)
2895  if ((md->ctypes[*eptr++] & ctype_word) == 0)
2897  break;
2898 
2899  default:
2901  }
2902  }
2903 
2904  /* If min = max, continue at the same level without recursing */
2905 
2906  if (min == max) continue;
2907 
2908  /* If minimizing, we have to test the rest of the pattern before each
2909  subsequent match. Again, separate the UTF-8 case for speed, and also
2910  separate the UCP cases. */
2911 
2912  if (minimize)
2913  {
2914 #ifdef SUPPORT_UCP
2915  if (prop_type >= 0)
2916  {
2917  switch(prop_type)
2918  {
2919  case PT_ANY:
2920  for (fi = min;; fi++)
2921  {
2922  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2923  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2924  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2925  GETCHARINC(c, eptr);
2926  if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2927  }
2928  /* Control never gets here */
2929 
2930  case PT_LAMP:
2931  for (fi = min;; fi++)
2932  {
2933  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2934  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2935  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2936  GETCHARINC(c, eptr);
2937  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2938  if ((prop_chartype == ucp_Lu ||
2939  prop_chartype == ucp_Ll ||
2940  prop_chartype == ucp_Lt) == prop_fail_result)
2942  }
2943  /* Control never gets here */
2944 
2945  case PT_GC:
2946  for (fi = min;; fi++)
2947  {
2948  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2949  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2950  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951  GETCHARINC(c, eptr);
2952  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2953  if ((prop_category == prop_value) == prop_fail_result)
2955  }
2956  /* Control never gets here */
2957 
2958  case PT_PC:
2959  for (fi = min;; fi++)
2960  {
2961  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2962  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2963  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2964  GETCHARINC(c, eptr);
2965  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2966  if ((prop_chartype == prop_value) == prop_fail_result)
2968  }
2969  /* Control never gets here */
2970 
2971  case PT_SC:
2972  for (fi = min;; fi++)
2973  {
2974  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2975  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2976  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2977  GETCHARINC(c, eptr);
2978  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2979  if ((prop_script == prop_value) == prop_fail_result)
2981  }
2982  /* Control never gets here */
2983 
2984  default:
2986  }
2987  }
2988 
2989  /* Match extended Unicode sequences. We will get here only if the
2990  support is in the binary; otherwise a compile-time error occurs. */
2991 
2992  else if (ctype == OP_EXTUNI)
2993  {
2994  for (fi = min;; fi++)
2995  {
2996  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2997  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2998  if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2999  GETCHARINCTEST(c, eptr);
3000  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3001  if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3002  while (eptr < md->end_subject)
3003  {
3004  int len = 1;
3005  if (!utf8) c = *eptr; else
3006  {
3007  GETCHARLEN(c, eptr, len);
3008  }
3009  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3010  if (prop_category != ucp_M) break;
3011  eptr += len;
3012  }
3013  }
3014  }
3015 
3016  else
3017 #endif /* SUPPORT_UCP */
3018 
3019 #ifdef SUPPORT_UTF8
3020  /* UTF-8 mode */
3021  if (utf8)
3022  {
3023  for (fi = min;; fi++)
3024  {
3025  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3026  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027  if (fi >= max || eptr >= md->end_subject ||
3028  (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3029  IS_NEWLINE(eptr)))
3031 
3032  GETCHARINC(c, eptr);
3033  switch(ctype)
3034  {
3035  case OP_ANY: /* This is the DOTALL case */
3036  break;
3037 
3038  case OP_ANYBYTE:
3039  break;
3040 
3041  case OP_ANYNL:
3042  switch(c)
3043  {
3044  default: RRETURN(MATCH_NOMATCH);
3045  case 0x000d:
3046  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3047  break;
3048  case 0x000a:
3049  case 0x000b:
3050  case 0x000c:
3051  case 0x0085:
3052  case 0x2028:
3053  case 0x2029:
3054  break;
3055  }
3056  break;
3057 
3058  case OP_NOT_DIGIT:
3059  if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3061  break;
3062 
3063  case OP_DIGIT:
3064  if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3066  break;
3067 
3068  case OP_NOT_WHITESPACE:
3069  if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3071  break;
3072 
3073  case OP_WHITESPACE:
3074  if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3076  break;
3077 
3078  case OP_NOT_WORDCHAR:
3079  if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3081  break;
3082 
3083  case OP_WORDCHAR:
3084  if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3086  break;
3087 
3088  default:
3090  }
3091  }
3092  }
3093  else
3094 #endif
3095  /* Not UTF-8 mode */
3096  {
3097  for (fi = min;; fi++)
3098  {
3099  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3100  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3101  if (fi >= max || eptr >= md->end_subject ||
3102  ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3104 
3105  c = *eptr++;
3106  switch(ctype)
3107  {
3108  case OP_ANY: /* This is the DOTALL case */
3109  break;
3110 
3111  case OP_ANYBYTE:
3112  break;
3113 
3114  case OP_ANYNL:
3115  switch(c)
3116  {
3117  default: RRETURN(MATCH_NOMATCH);
3118  case 0x000d:
3119  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3120  break;
3121  case 0x000a:
3122  case 0x000b:
3123  case 0x000c:
3124  case 0x0085:
3125  break;
3126  }
3127  break;
3128 
3129  case OP_NOT_DIGIT:
3130  if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3131  break;
3132 
3133  case OP_DIGIT:
3134  if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3135  break;
3136 
3137  case OP_NOT_WHITESPACE:
3138  if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3139  break;
3140 
3141  case OP_WHITESPACE:
3142  if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3143  break;
3144 
3145  case OP_NOT_WORDCHAR:
3146  if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3147  break;
3148 
3149  case OP_WORDCHAR:
3150  if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3151  break;
3152 
3153  default:
3155  }
3156  }
3157  }
3158  /* Control never gets here */
3159  }
3160 
3161  /* If maximizing, it is worth using inline code for speed, doing the type
3162  test once at the start (i.e. keep it out of the loop). Again, keep the
3163  UTF-8 and UCP stuff separate. */
3164 
3165  else
3166  {
3167  pp = eptr; /* Remember where we started */
3168 
3169 #ifdef SUPPORT_UCP
3170  if (prop_type >= 0)
3171  {
3172  switch(prop_type)
3173  {
3174  case PT_ANY:
3175  for (i = min; i < max; i++)
3176  {
3177  int len = 1;
3178  if (eptr >= md->end_subject) break;
3179  GETCHARLEN(c, eptr, len);
3180  if (prop_fail_result) break;
3181  eptr+= len;
3182  }
3183  break;
3184 
3185  case PT_LAMP:
3186  for (i = min; i < max; i++)
3187  {
3188  int len = 1;
3189  if (eptr >= md->end_subject) break;
3190  GETCHARLEN(c, eptr, len);
3191  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3192  if ((prop_chartype == ucp_Lu ||
3193  prop_chartype == ucp_Ll ||
3194  prop_chartype == ucp_Lt) == prop_fail_result)
3195  break;
3196  eptr+= len;
3197  }
3198  break;
3199 
3200  case PT_GC:
3201  for (i = min; i < max; i++)
3202  {
3203  int len = 1;
3204  if (eptr >= md->end_subject) break;
3205  GETCHARLEN(c, eptr, len);
3206  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3207  if ((prop_category == prop_value) == prop_fail_result)
3208  break;
3209  eptr+= len;
3210  }
3211  break;
3212 
3213  case PT_PC:
3214  for (i = min; i < max; i++)
3215  {
3216  int len = 1;
3217  if (eptr >= md->end_subject) break;
3218  GETCHARLEN(c, eptr, len);
3219  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3220  if ((prop_chartype == prop_value) == prop_fail_result)
3221  break;
3222  eptr+= len;
3223  }
3224  break;
3225 
3226  case PT_SC:
3227  for (i = min; i < max; i++)
3228  {
3229  int len = 1;
3230  if (eptr >= md->end_subject) break;
3231  GETCHARLEN(c, eptr, len);
3232  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3233  if ((prop_script == prop_value) == prop_fail_result)
3234  break;
3235  eptr+= len;
3236  }
3237  break;
3238  }
3239 
3240  /* eptr is now past the end of the maximum run */
3241 
3242  if (possessive) continue;
3243  for(;;)
3244  {
3245  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3246  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3247  if (eptr-- == pp) break; /* Stop if tried at original pos */
3248  BACKCHAR(eptr);
3249  }
3250  }
3251 
3252  /* Match extended Unicode sequences. We will get here only if the
3253  support is in the binary; otherwise a compile-time error occurs. */
3254 
3255  else if (ctype == OP_EXTUNI)
3256  {
3257  for (i = min; i < max; i++)
3258  {
3259  if (eptr >= md->end_subject) break;
3260  GETCHARINCTEST(c, eptr);
3261  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3262  if (prop_category == ucp_M) break;
3263  while (eptr < md->end_subject)
3264  {
3265  int len = 1;
3266  if (!utf8) c = *eptr; else
3267  {
3268  GETCHARLEN(c, eptr, len);
3269  }
3270  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3271  if (prop_category != ucp_M) break;
3272  eptr += len;
3273  }
3274  }
3275 
3276  /* eptr is now past the end of the maximum run */
3277 
3278  if (possessive) continue;
3279  for(;;)
3280  {
3281  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3282  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3283  if (eptr-- == pp) break; /* Stop if tried at original pos */
3284  for (;;) /* Move back over one extended */
3285  {
3286  int len = 1;
3287  BACKCHAR(eptr);
3288  if (!utf8) c = *eptr; else
3289  {
3290  GETCHARLEN(c, eptr, len);
3291  }
3292  prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3293  if (prop_category != ucp_M) break;
3294  eptr--;
3295  }
3296  }
3297  }
3298 
3299  else
3300 #endif /* SUPPORT_UCP */
3301 
3302 #ifdef SUPPORT_UTF8
3303  /* UTF-8 mode */
3304 
3305  if (utf8)
3306  {
3307  switch(ctype)
3308  {
3309  case OP_ANY:
3310 
3311  /* Special code is required for UTF8, but when the maximum is
3312  unlimited we don't need it, so we repeat the non-UTF8 code. This is
3313  probably worth it, because .* is quite a common idiom. */
3314 
3315  if (max < INT_MAX)
3316  {
3317  if ((ims & PCRE_DOTALL) == 0)
3318  {
3319  for (i = min; i < max; i++)
3320  {
3321  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3322  eptr++;
3323  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3324  }
3325  }
3326  else
3327  {
3328  for (i = min; i < max; i++)
3329  {
3330  if (eptr >= md->end_subject) break;
3331  eptr++;
3332  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3333  }
3334  }
3335  }
3336 
3337  /* Handle unlimited UTF-8 repeat */
3338 
3339  else
3340  {
3341  if ((ims & PCRE_DOTALL) == 0)
3342  {
3343  for (i = min; i < max; i++)
3344  {
3345  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3346  eptr++;
3347  }
3348  break;
3349  }
3350  else
3351  {
3352  c = max - min;
3353  if (c > (unsigned int)(md->end_subject - eptr))
3354  c = md->end_subject - eptr;
3355  eptr += c;
3356  }
3357  }
3358  break;
3359 
3360  /* The byte case is the same as non-UTF8 */
3361 
3362  case OP_ANYBYTE:
3363  c = max - min;
3364  if (c > (unsigned int)(md->end_subject - eptr))
3365  c = md->end_subject - eptr;
3366  eptr += c;
3367  break;
3368 
3369  case OP_ANYNL:
3370  for (i = min; i < max; i++)
3371  {
3372  int len = 1;
3373  if (eptr >= md->end_subject) break;
3374  GETCHARLEN(c, eptr, len);
3375  if (c == 0x000d)
3376  {
3377  if (++eptr >= md->end_subject) break;
3378  if (*eptr == 0x000a) eptr++;
3379  }
3380  else
3381  {
3382  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3383  c != 0x0085 && c != 0x2028 && c != 0x2029)
3384  break;
3385  eptr += len;
3386  }
3387  }
3388  break;
3389 
3390  case OP_NOT_DIGIT:
3391  for (i = min; i < max; i++)
3392  {
3393  int len = 1;
3394  if (eptr >= md->end_subject) break;
3395  GETCHARLEN(c, eptr, len);
3396  if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3397  eptr+= len;
3398  }
3399  break;
3400 
3401  case OP_DIGIT:
3402  for (i = min; i < max; i++)
3403  {
3404  int len = 1;
3405  if (eptr >= md->end_subject) break;
3406  GETCHARLEN(c, eptr, len);
3407  if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3408  eptr+= len;
3409  }
3410  break;
3411 
3412  case OP_NOT_WHITESPACE:
3413  for (i = min; i < max; i++)
3414  {
3415  int len = 1;
3416  if (eptr >= md->end_subject) break;
3417  GETCHARLEN(c, eptr, len);
3418  if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3419  eptr+= len;
3420  }
3421  break;
3422 
3423  case OP_WHITESPACE:
3424  for (i = min; i < max; i++)
3425  {
3426  int len = 1;
3427  if (eptr >= md->end_subject) break;
3428  GETCHARLEN(c, eptr, len);
3429  if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3430  eptr+= len;
3431  }
3432  break;
3433 
3434  case OP_NOT_WORDCHAR:
3435  for (i = min; i < max; i++)
3436  {
3437  int len = 1;
3438  if (eptr >= md->end_subject) break;
3439  GETCHARLEN(c, eptr, len);
3440  if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3441  eptr+= len;
3442  }
3443  break;
3444 
3445  case OP_WORDCHAR:
3446  for (i = min; i < max; i++)
3447  {
3448  int len = 1;
3449  if (eptr >= md->end_subject) break;
3450  GETCHARLEN(c, eptr, len);
3451  if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3452  eptr+= len;
3453  }
3454  break;
3455 
3456  default:
3458  }
3459 
3460  /* eptr is now past the end of the maximum run */
3461 
3462  if (possessive) continue;
3463  for(;;)
3464  {
3465  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3466  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3467  if (eptr-- == pp) break; /* Stop if tried at original pos */
3468  BACKCHAR(eptr);
3469  }
3470  }
3471  else
3472 #endif
3473 
3474  /* Not UTF-8 mode */
3475  {
3476  switch(ctype)
3477  {
3478  case OP_ANY:
3479  if ((ims & PCRE_DOTALL) == 0)
3480  {
3481  for (i = min; i < max; i++)
3482  {
3483  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3484  eptr++;
3485  }
3486  break;
3487  }
3488  /* For DOTALL case, fall through and treat as \C */
3489 
3490  case OP_ANYBYTE:
3491  c = max - min;
3492  if (c > (unsigned int)(md->end_subject - eptr))
3493  c = md->end_subject - eptr;
3494  eptr += c;
3495  break;
3496 
3497  case OP_ANYNL:
3498  for (i = min; i < max; i++)
3499  {
3500  if (eptr >= md->end_subject) break;
3501  c = *eptr;
3502  if (c == 0x000d)
3503  {
3504  if (++eptr >= md->end_subject) break;
3505  if (*eptr == 0x000a) eptr++;
3506  }
3507  else
3508  {
3509  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3510  break;
3511  eptr++;
3512  }
3513  }
3514  break;
3515 
3516  case OP_NOT_DIGIT:
3517  for (i = min; i < max; i++)
3518  {
3519  if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3520  break;
3521  eptr++;
3522  }
3523  break;
3524 
3525  case OP_DIGIT:
3526  for (i = min; i < max; i++)
3527  {
3528  if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3529  break;
3530  eptr++;
3531  }
3532  break;
3533 
3534  case OP_NOT_WHITESPACE:
3535  for (i = min; i < max; i++)
3536  {
3537  if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3538  break;
3539  eptr++;
3540  }
3541  break;
3542 
3543  case OP_WHITESPACE:
3544  for (i = min; i < max; i++)
3545  {
3546  if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3547  break;
3548  eptr++;
3549  }
3550  break;
3551 
3552  case OP_NOT_WORDCHAR:
3553  for (i = min; i < max; i++)
3554  {
3555  if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3556  break;
3557  eptr++;
3558  }
3559  break;
3560 
3561  case OP_WORDCHAR:
3562  for (i = min; i < max; i++)
3563  {
3564  if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3565  break;
3566  eptr++;
3567  }
3568  break;
3569 
3570  default:
3572  }
3573 
3574  /* eptr is now past the end of the maximum run */
3575 
3576  if (possessive) continue;
3577  while (eptr >= pp)
3578  {
3579  RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3580  eptr--;
3581  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3582  }
3583  }
3584 
3585  /* Get here if we can't make it match with any permitted repetitions */
3586 
3588  }
3589  /* Control never gets here */
3590 
3591  /* There's been some horrible disaster. Arrival here can only mean there is
3592  something seriously wrong in the code above or the OP_xxx definitions. */
3593 
3594  default:
3595  DPRINTF(("Unknown opcode %d\n", *ecode));
3597  }
3598 
3599  /* Do not stick any code in here without much thought; it is assumed
3600  that "continue" in the code above comes out to here to repeat the main
3601  loop. */
3602 
3603  } /* End of main loop */
3604 /* Control never reaches here */
3605 }
3606 
3607 
3608 /***************************************************************************
3609 ****************************************************************************
3610  RECURSION IN THE match() FUNCTION
3611 
3612 Undefine all the macros that were defined above to handle this. */
3613 
3614 #ifdef NO_RECURSE
3615 #undef eptr
3616 #undef ecode
3617 #undef offset_top
3618 #undef ims
3619 #undef eptrb
3620 #undef flags
3621 
3622 #undef callpat
3623 #undef charptr
3624 #undef data
3625 #undef next
3626 #undef pp
3627 #undef prev
3628 #undef saved_eptr
3629 
3630 #undef new_recursive
3631 
3632 #undef cur_is_word
3633 #undef condition
3634 #undef prev_is_word
3635 
3636 #undef original_ims
3637 
3638 #undef ctype
3639 #undef length
3640 #undef max
3641 #undef min
3642 #undef number
3643 #undef offset
3644 #undef op
3645 #undef save_capture_last
3646 #undef save_offset1
3647 #undef save_offset2
3648 #undef save_offset3
3649 #undef stacksave
3650 
3651 #undef newptrb
3652 
3653 #endif
3654 
3655 /* These two are defined as macros in both cases */
3656 
3657 #undef fc
3658 #undef fi
3659 
3660 /***************************************************************************
3661 ***************************************************************************/
3662 
3663 
3664 
3665 /*************************************************
3666 * Execute a Regular Expression *
3667 *************************************************/
3668 
3669 /* This function applies a compiled re to a subject string and picks out
3670 portions of the string if it matches. Two elements in the vector are set for
3671 each substring: the offsets to the start and end of the substring.
3672 
3673 Arguments:
3674  argument_re points to the compiled expression
3675  extra_data points to extra data or is NULL
3676  subject points to the subject string
3677  length length of subject string (may contain binary zeros)
3678  start_offset where to start in the subject string
3679  options option bits
3680  offsets points to a vector of ints to be filled in with offsets
3681  offsetcount the number of elements in the vector
3682 
3683 Returns: > 0 => success; value is the number of elements filled in
3684  = 0 => success, but offsets is not big enough
3685  -1 => failed to match
3686  < -1 => some kind of unexpected problem
3687 */
3688 
3689 PCRE_DATA_SCOPE int
3690 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3691  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3692  int offsetcount)
3693 {
3694 int rc, resetcount, ocount;
3695 int first_byte = -1;
3696 int req_byte = -1;
3697 int req_byte2 = -1;
3698 int newline;
3699 unsigned long int ims;
3700 BOOL using_temporary_offsets = FALSE;
3701 BOOL anchored;
3702 BOOL startline;
3703 BOOL firstline;
3704 BOOL first_byte_caseless = FALSE;
3705 BOOL req_byte_caseless = FALSE;
3706 BOOL utf8;
3707 match_data match_block;
3708 match_data *md = &match_block;
3709 const uschar *tables;
3710 const uschar *start_bits = NULL;
3711 USPTR start_match = (USPTR)subject + start_offset;
3712 USPTR end_subject;
3713 USPTR req_byte_ptr = start_match - 1;
3714 eptrblock eptrchain[EPTR_WORK_SIZE];
3715 
3716 pcre_study_data internal_study;
3717 const pcre_study_data *study;
3718 
3719 real_pcre internal_re;
3720 const real_pcre *external_re = (const real_pcre *)argument_re;
3721 const real_pcre *re = external_re;
3722 
3723 /* Plausibility checks */
3724 
3725 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3726 if (re == NULL || subject == NULL ||
3727  (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3728 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3729 
3730 /* Fish out the optional data from the extra_data structure, first setting
3731 the default values. */
3732 
3733 study = NULL;
3734 md->match_limit = MATCH_LIMIT;
3735 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3736 md->callout_data = NULL;
3737 
3738 /* The table pointer is always in native byte order. */
3739 
3740 tables = external_re->tables;
3741 
3742 if (extra_data != NULL)
3743  {
3744  register unsigned int flags = extra_data->flags;
3745  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3746  study = (const pcre_study_data *)extra_data->study_data;
3747  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3748  md->match_limit = extra_data->match_limit;
3749  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3750  md->match_limit_recursion = extra_data->match_limit_recursion;
3751  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3752  md->callout_data = extra_data->callout_data;
3753  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3754  }
3755 
3756 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3757 is a feature that makes it possible to save compiled regex and re-use them
3758 in other programs later. */
3759 
3760 if (tables == NULL) tables = _pcre_default_tables;
3761 
3762 /* Check that the first field in the block is the magic number. If it is not,
3763 test for a regex that was compiled on a host of opposite endianness. If this is
3764 the case, flipped values are put in internal_re and internal_study if there was
3765 study data too. */
3766 
3767 if (re->magic_number != MAGIC_NUMBER)
3768  {
3769  re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
3770  if (re == NULL) return PCRE_ERROR_BADMAGIC;
3771  if (study != NULL) study = &internal_study;
3772  }
3773 
3774 /* Set up other data */
3775 
3776 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3777 startline = (re->options & PCRE_STARTLINE) != 0;
3778 firstline = (re->options & PCRE_FIRSTLINE) != 0;
3779 
3780 /* The code starts after the real_pcre block and the capture name table. */
3781 
3782 md->start_code = (const uschar *)external_re + re->name_table_offset +
3783  re->name_count * re->name_entry_size;
3784 
3785 md->start_subject = (USPTR)subject;
3786 md->start_offset = start_offset;
3787 md->end_subject = md->start_subject + length;
3788 end_subject = md->end_subject;
3789 
3790 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3791 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3792 
3793 md->notbol = (options & PCRE_NOTBOL) != 0;
3794 md->noteol = (options & PCRE_NOTEOL) != 0;
3795 md->notempty = (options & PCRE_NOTEMPTY) != 0;
3796 md->partial = (options & PCRE_PARTIAL) != 0;
3797 md->hitend = FALSE;
3798 
3799 md->recursive = NULL; /* No recursion at top level */
3800 md->eptrchain = eptrchain; /* Make workspace generally available */
3801 
3802 md->lcc = tables + lcc_offset;
3803 md->ctypes = tables + ctypes_offset;
3804 
3805 /* Handle different types of newline. The two bits give four cases. If nothing
3806 is set at run time, whatever was used at compile time applies. */
3807 
3808 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3810  {
3811  case 0: newline = NEWLINE; break; /* Compile-time default */
3812  case PCRE_NEWLINE_CR: newline = '\r'; break;
3813  case PCRE_NEWLINE_LF: newline = '\n'; break;
3814  case PCRE_NEWLINE_CR+
3815  PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3816  case PCRE_NEWLINE_ANY: newline = -1; break;
3817  default: return PCRE_ERROR_BADNEWLINE;
3818  }
3819 
3820 if (newline < 0)
3821  {
3822  md->nltype = NLTYPE_ANY;
3823  }
3824 else
3825  {
3826  md->nltype = NLTYPE_FIXED;
3827  if (newline > 255)
3828  {
3829  md->nllen = 2;
3830  md->nl[0] = (newline >> 8) & 255;
3831  md->nl[1] = newline & 255;
3832  }
3833  else
3834  {
3835  md->nllen = 1;
3836  md->nl[0] = newline;
3837  }
3838  }
3839 
3840 /* Partial matching is supported only for a restricted set of regexes at the
3841 moment. */
3842 
3843 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3844  return PCRE_ERROR_BADPARTIAL;
3845 
3846 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3847 back the character offset. */
3848 
3849 #ifdef SUPPORT_UTF8
3850 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3851  {
3852  if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3853  return PCRE_ERROR_BADUTF8;
3854  if (start_offset > 0 && start_offset < length)
3855  {
3856  int tb = ((uschar *)subject)[start_offset];
3857  if (tb > 127)
3858  {
3859  tb &= 0xc0;
3860  if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
3861  }
3862  }
3863  }
3864 #endif
3865 
3866 /* The ims options can vary during the matching as a result of the presence
3867 of (?ims) items in the pattern. They are kept in a local variable so that
3868 restoring at the exit of a group is easy. */
3869 
3871 
3872 /* If the expression has got more back references than the offsets supplied can
3873 hold, we get a temporary chunk of working store to use during the matching.
3874 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3875 of 3. */
3876 
3877 ocount = offsetcount - (offsetcount % 3);
3878 
3879 if (re->top_backref > 0 && re->top_backref >= ocount/3)
3880  {
3881  ocount = re->top_backref * 3 + 3;
3882  md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3883  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3884  using_temporary_offsets = TRUE;
3885  DPRINTF(("Got memory to hold back references\n"));
3886  }
3887 else md->offset_vector = offsets;
3888 
3889 md->offset_end = ocount;
3890 md->offset_max = (2*ocount)/3;
3891 md->offset_overflow = FALSE;
3892 md->capture_last = -1;
3893 
3894 /* Compute the minimum number of offsets that we need to reset each time. Doing
3895 this makes a huge difference to execution time when there aren't many brackets
3896 in the pattern. */
3897 
3898 resetcount = 2 + re->top_bracket * 2;
3899 if (resetcount > offsetcount) resetcount = ocount;
3900 
3901 /* Reset the working variable associated with each extraction. These should
3902 never be used unless previously set, but they get saved and restored, and so we
3903 initialize them to avoid reading uninitialized locations. */
3904 
3905 if (md->offset_vector != NULL)
3906  {
3907  register int *iptr = md->offset_vector + ocount;
3908  register int *iend = iptr - resetcount/2 + 1;
3909  while (--iptr >= iend) *iptr = -1;
3910  }
3911 
3912 /* Set up the first character to match, if available. The first_byte value is
3913 never set for an anchored regular expression, but the anchoring may be forced
3914 at run time, so we have to test for anchoring. The first char may be unset for
3915 an unanchored pattern, of course. If there's no first char and the pattern was
3916 studied, there may be a bitmap of possible first characters. */
3917 
3918 if (!anchored)
3919  {
3920  if ((re->options & PCRE_FIRSTSET) != 0)
3921  {
3922  first_byte = re->first_byte & 255;
3923  if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3924  first_byte = md->lcc[first_byte];
3925  }
3926  else
3927  if (!startline && study != NULL &&
3928  (study->options & PCRE_STUDY_MAPPED) != 0)
3929  start_bits = study->start_bits;
3930  }
3931 
3932 /* For anchored or unanchored matches, there may be a "last known required
3933 character" set. */
3934 
3935 if ((re->options & PCRE_REQCHSET) != 0)
3936  {
3937  req_byte = re->req_byte & 255;
3938  req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
3939  req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
3940  }
3941 
3942 
3943 /* ==========================================================================*/
3944 
3945 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3946 the loop runs just once. */
3947 
3948 for(;;)
3949  {
3950  USPTR save_end_subject = end_subject;
3951 
3952  /* Reset the maximum number of extractions we might see. */
3953 
3954  if (md->offset_vector != NULL)
3955  {
3956  register int *iptr = md->offset_vector;
3957  register int *iend = iptr + resetcount;
3958  while (iptr < iend) *iptr++ = -1;
3959  }
3960 
3961  /* Advance to a unique first char if possible. If firstline is TRUE, the
3962  start of the match is constrained to the first line of a multiline string.
3963  That is, the match must be before or at the first newline. Implement this by
3964  temporarily adjusting end_subject so that we stop scanning at a newline. If
3965  the match fails at the newline, later code breaks this loop. */
3966 
3967  if (firstline)
3968  {
3969  USPTR t = start_match;
3970  while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3971  end_subject = t;
3972  }
3973 
3974  /* Now test for a unique first byte */
3975 
3976  if (first_byte >= 0)
3977  {
3978  if (first_byte_caseless)
3979  while (start_match < end_subject &&
3980  md->lcc[*start_match] != first_byte)
3981  start_match++;
3982  else
3983  while (start_match < end_subject && *start_match != first_byte)
3984  start_match++;
3985  }
3986 
3987  /* Or to just after a linebreak for a multiline match if possible */
3988 
3989  else if (startline)
3990  {
3991  if (start_match > md->start_subject + start_offset)
3992  {
3993  while (start_match <= end_subject && !WAS_NEWLINE(start_match))
3994  start_match++;
3995  }
3996  }
3997 
3998  /* Or to a non-unique first char after study */
3999 
4000  else if (start_bits != NULL)
4001  {
4002  while (start_match < end_subject)
4003  {
4004  register unsigned int c = *start_match;
4005  if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4006  }
4007  }
4008 
4009  /* Restore fudged end_subject */
4010 
4011  end_subject = save_end_subject;
4012 
4013 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4014  printf(">>>> Match against: ");
4015  pchars(start_match, end_subject - start_match, TRUE, md);
4016  printf("\n");
4017 #endif
4018 
4019  /* If req_byte is set, we know that that character must appear in the subject
4020  for the match to succeed. If the first character is set, req_byte must be
4021  later in the subject; otherwise the test starts at the match point. This
4022  optimization can save a huge amount of backtracking in patterns with nested
4023  unlimited repeats that aren't going to match. Writing separate code for
4024  cased/caseless versions makes it go faster, as does using an autoincrement
4025  and backing off on a match.
4026 
4027  HOWEVER: when the subject string is very, very long, searching to its end can
4028  take a long time, and give bad performance on quite ordinary patterns. This
4029  showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4030  string... so we don't do this when the string is sufficiently long.
4031 
4032  ALSO: this processing is disabled when partial matching is requested.
4033  */
4034 
4035  if (req_byte >= 0 &&
4036  end_subject - start_match < REQ_BYTE_MAX &&
4037  !md->partial)
4038  {
4039  register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4040 
4041  /* We don't need to repeat the search if we haven't yet reached the
4042  place we found it at last time. */
4043 
4044  if (p > req_byte_ptr)
4045  {
4046  if (req_byte_caseless)
4047  {
4048  while (p < end_subject)
4049  {
4050  register int pp = *p++;
4051  if (pp == req_byte || pp == req_byte2) { p--; break; }
4052  }
4053  }
4054  else
4055  {
4056  while (p < end_subject)
4057  {
4058  if (*p++ == req_byte) { p--; break; }
4059  }
4060  }
4061 
4062  /* If we can't find the required character, break the matching loop,
4063  forcing a match failure. */
4064 
4065  if (p >= end_subject)
4066  {
4067  rc = MATCH_NOMATCH;
4068  break;
4069  }
4070 
4071  /* If we have found the required character, save the point where we
4072  found it, so that we don't search again next time round the loop if
4073  the start hasn't passed this character yet. */
4074 
4075  req_byte_ptr = p;
4076  }
4077  }
4078 
4079  /* OK, we can now run the match. */
4080 
4081  md->start_match = start_match;
4082  md->match_call_count = 0;
4083  md->eptrn = 0; /* Next free eptrchain slot */
4084  rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4085 
4086  /* Any return other than MATCH_NOMATCH breaks the loop. */
4087 
4088  if (rc != MATCH_NOMATCH) break;
4089 
4090  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4091  newline in the subject (though it may continue over the newline). Therefore,
4092  if we have just failed to match, starting at a newline, do not continue. */
4093 
4094  if (firstline && IS_NEWLINE(start_match)) break;
4095 
4096  /* Advance the match position by one character. */
4097 
4098  start_match++;
4099 #ifdef SUPPORT_UTF8
4100  if (utf8)
4101  while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4102  start_match++;
4103 #endif
4104 
4105  /* Break the loop if the pattern is anchored or if we have passed the end of
4106  the subject. */
4107 
4108  if (anchored || start_match > end_subject) break;
4109 
4110  /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4111  are now at a LF, advance the match position by one more character. */
4112 
4113  if (start_match[-1] == '\r' &&
4114  (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4115  start_match < end_subject &&
4116  *start_match == '\n')
4117  start_match++;
4118 
4119  } /* End of for(;;) "bumpalong" loop */
4120 
4121 /* ==========================================================================*/
4122 
4123 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4124 conditions is true:
4125 
4126 (1) The pattern is anchored;
4127 
4128 (2) We are past the end of the subject;
4129 
4130 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4131  this option requests that a match occur at or before the first newline in
4132  the subject.
4133 
4134 When we have a match and the offset vector is big enough to deal with any
4135 backreferences, captured substring offsets will already be set up. In the case
4136 where we had to get some local store to hold offsets for backreference
4137 processing, copy those that we can. In this case there need not be overflow if
4138 certain parts of the pattern were not used, even though there are more
4139 capturing parentheses than vector slots. */
4140 
4141 if (rc == MATCH_MATCH)
4142  {
4143  if (using_temporary_offsets)
4144  {
4145  if (offsetcount >= 4)
4146  {
4147  memcpy(offsets + 2, md->offset_vector + 2,
4148  (offsetcount - 2) * sizeof(int));
4149  DPRINTF(("Copied offsets from temporary memory\n"));
4150  }
4151  if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4152  DPRINTF(("Freeing temporary memory\n"));
4153  (pcre_free)(md->offset_vector);
4154  }
4155 
4156  /* Set the return code to the number of captured strings, or 0 if there are
4157  too many to fit into the vector. */
4158 
4159  rc = md->offset_overflow? 0 : md->end_offset_top/2;
4160 
4161  /* If there is space, set up the whole thing as substring 0. */
4162 
4163  if (offsetcount < 2) rc = 0; else
4164  {
4165  offsets[0] = start_match - md->start_subject;
4166  offsets[1] = md->end_match_ptr - md->start_subject;
4167  }
4168 
4169  DPRINTF((">>>> returning %d\n", rc));
4170  return rc;
4171  }
4172 
4173 /* Control gets here if there has been an error, or if the overall match
4174 attempt has failed at all permitted starting positions. */
4175 
4176 if (using_temporary_offsets)
4177  {
4178  DPRINTF(("Freeing temporary memory\n"));
4179  (pcre_free)(md->offset_vector);
4180  }
4181 
4182 if (rc != MATCH_NOMATCH)
4183  {
4184  DPRINTF((">>>> error: returning %d\n", rc));
4185  return rc;
4186  }
4187 else if (md->partial && md->hitend)
4188  {
4189  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4190  return PCRE_ERROR_PARTIAL;
4191  }
4192 else
4193  {
4194  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4195  return PCRE_ERROR_NOMATCH;
4196  }
4197 }
4198 
4199 /* End of pcre_exec.c */
#define TRUE
Definition: bool.h:74
#define FALSE
Definition: bool.h:70
#define PCRE_ERROR_UNKNOWN_OPCODE
Definition: pcre.h:129
#define PCRE_ERROR_BADUTF8_OFFSET
Definition: pcre.h:136
#define PCRE_NO_UTF8_CHECK
Definition: pcre.h:111
#define PCRE_FIRSTLINE
Definition: pcre.h:116
#define PCRE_ERROR_PARTIAL
Definition: pcre.h:137
#define PCRE_NOTBOL
Definition: pcre.h:105
#define PCRE_UTF8
Definition: pcre.h:109
#define PCRE_EXTRA_CALLOUT_DATA
Definition: pcre.h:183
#define PCRE_NEWLINE_ANY
Definition: pcre.h:121
#define PCRE_ERROR_BADNEWLINE
Definition: pcre.h:148
#define PCRE_EXTRA_STUDY_DATA
Definition: pcre.h:181
void *(* pcre_malloc)(size_t)
Definition: pcre_globals.c:75
#define PCRE_ERROR_BADOPTION
Definition: pcre.h:127
#define PCRE_CASELESS
Definition: pcre.h:98
#define PCRE_EXTRA_MATCH_LIMIT
Definition: pcre.h:182
void *(* pcre_stack_malloc)(size_t)
Definition: pcre_globals.c:77
#define PCRE_EXTRA_TABLES
Definition: pcre.h:184
#define PCRE_PARTIAL
Definition: pcre.h:113
#define PCRE_DATA_SCOPE
Definition: pcre.h:81
#define PCRE_NOTEOL
Definition: pcre.h:106
#define PCRE_ERROR_BADMAGIC
Definition: pcre.h:128
#define PCRE_MULTILINE
Definition: pcre.h:99
void(* pcre_free)(void *)
Definition: pcre_globals.c:76
#define PCRE_ERROR_INTERNAL
Definition: pcre.h:139
#define PCRE_ERROR_RECURSIONLIMIT
Definition: pcre.h:146
#define PCRE_NOTEMPTY
Definition: pcre.h:108
#define PCRE_NEWLINE_LF
Definition: pcre.h:119
#define PCRE_DOTALL
Definition: pcre.h:100
#define PCRE_ANCHORED
Definition: pcre.h:102
#define PCRE_ERROR_NULL
Definition: pcre.h:126
#define PCRE_ERROR_BADCOUNT
Definition: pcre.h:140
#define PCRE_NEWLINE_CR
Definition: pcre.h:118
#define PCRE_SPTR
Definition: pcre.h:197
#define PCRE_ERROR_MATCHLIMIT
Definition: pcre.h:133
#define PCRE_ERROR_NOMEMORY
Definition: pcre.h:131
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION
Definition: pcre.h:185
#define PCRE_ERROR_NOMATCH
Definition: pcre.h:125
#define PCRE_DOLLAR_ENDONLY
Definition: pcre.h:103
#define PCRE_ERROR_BADPARTIAL
Definition: pcre.h:138
#define PCRE_ERROR_BADUTF8
Definition: pcre.h:135
#define PCRE_ERROR_NULLWSLIMIT
Definition: pcre.h:147
int(* pcre_callout)(pcre_callout_block *)
Definition: pcre_globals.c:79
#define match_cbegroup
Definition: pcre_exec.c:59
static BOOL match_ref(int offset, register const unsigned char *eptr, int length, match_data *md, unsigned long int ims)
Definition: pcre_exec.c:128
static const char rep_min[]
Definition: pcre_exec.c:76
#define RRETURN(ra)
Definition: pcre_exec.c:210
#define match_condassert
Definition: pcre_exec.c:58
#define MATCH_MATCH
Definition: pcre_exec.c:65
#define RMATCH(rx, ra, rb, rc, rd, re, rf, rg)
Definition: pcre_exec.c:208
#define REGISTER
Definition: pcre_exec.c:194
#define REC_STACK_SAVE_MAX
Definition: pcre_exec.c:72
int pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, const char *subject, int length, int start_offset, int options, int *offsets, int offsetcount)
Definition: pcre_exec.c:3690
#define EPTR_WORK_SIZE
Definition: pcre_exec.c:54
#define fc
#define MATCH_NOMATCH
Definition: pcre_exec.c:66
#define match_tail_recursed
Definition: pcre_exec.c:60
#define fi
static const char rep_max[]
Definition: pcre_exec.c:77
static int match(register const unsigned char *eptr, register const uschar *ecode, int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, int flags, unsigned int rdepth)
Definition: pcre_exec.c:369
int BOOL
#define PT_PC
#define REQ_BYTE_MAX
#define PT_ANY
#define PT_GC
#define PUBLIC_EXEC_OPTIONS
#define ctype_digit
#define PCRE_NEWLINE_BITS
#define PCRE_IMS
#define ctype_word
#define NLTYPE_FIXED
#define PCRE_STUDY_MAPPED
#define MAGIC_NUMBER
#define USPTR
#define ctype_space
int _pcre_ord2utf8(int, uschar *)
Definition: pcre_ord2utf8.c:63
#define NOTACHAR
#define PCRE_REQCHSET
unsigned int _pcre_ucp_othercase(const unsigned int)
#define PCRE_NOPARTIAL
int _pcre_ucp_findprop(const unsigned int, int *, int *)
#define RREF_ANY
#define DPRINTF(p)
Definition: pcre_internal.h:66
#define PT_SC
real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *, const pcre_study_data *, pcre_study_data *)
#define IS_NEWLINE(p)
#define PCRE_FIRSTSET
#define WAS_NEWLINE(p)
@ OP_END
@ OP_ANYNL
@ OP_CHAR
@ OP_CRMINQUERY
@ OP_SBRA
@ OP_ONCE
@ OP_NOTPROP
@ OP_NOTPLUS
@ OP_TYPEMINPLUS
@ OP_TYPEQUERY
@ OP_SCOND
@ OP_ASSERTBACK
@ OP_CLASS
@ OP_TYPEPLUS
@ OP_NOT_WORDCHAR
@ OP_CRMINPLUS
@ OP_CRRANGE
@ OP_DOLL
@ OP_ASSERT_NOT
@ OP_NOT
@ OP_ASSERT
@ OP_TYPEPOSSTAR
@ OP_TYPEMINUPTO
@ OP_TYPEPOSPLUS
@ OP_POSSTAR
@ OP_NOTUPTO
@ OP_TYPESTAR
@ OP_BRAMINZERO
@ OP_CRQUERY
@ OP_ASSERTBACK_NOT
@ OP_OPT
@ OP_RREF
@ OP_DIGIT
@ OP_EXACT
@ OP_TYPEEXACT
@ OP_PLUS
@ OP_WHITESPACE
@ OP_CRMINSTAR
@ OP_NOT_WORD_BOUNDARY
@ OP_KET
@ OP_NOT_DIGIT
@ OP_CALLOUT
@ OP_CRMINRANGE
@ OP_RECURSE
@ OP_BRA
@ OP_CHARNC
@ OP_CREF
@ OP_POSUPTO
@ OP_NOTPOSUPTO
@ OP_REVERSE
@ OP_NCLASS
@ OP_KETRMIN
@ OP_COND
@ OP_MINPLUS
@ OP_TYPEPOSUPTO
@ OP_WORDCHAR
@ OP_MINQUERY
@ OP_EODN
@ OP_ALT
@ OP_UPTO
@ OP_QUERY
@ OP_PROP
@ OP_NOTPOSSTAR
@ OP_KETRMAX
@ OP_NOTMINPLUS
@ OP_BRAZERO
@ OP_ANYBYTE
@ OP_TYPEMINQUERY
@ OP_NOT_WHITESPACE
@ OP_NOTMINSTAR
@ OP_NOTSTAR
@ OP_SCBRA
@ OP_MINUPTO
@ OP_CRSTAR
@ OP_POSQUERY
@ OP_MINSTAR
@ OP_STAR
@ OP_DEF
@ OP_TYPEMINSTAR
@ OP_NOTMINUPTO
@ OP_NOTMINQUERY
@ OP_CRPLUS
@ OP_TYPEPOSQUERY
@ OP_POSPLUS
@ OP_REF
@ OP_SOD
@ OP_NOTPOSQUERY
@ OP_TYPEUPTO
@ OP_SOM
@ OP_ANY
@ OP_XCLASS
@ OP_NOTQUERY
@ OP_CBRA
@ OP_EXTUNI
@ OP_EOD
@ OP_NOTEXACT
@ OP_WORD_BOUNDARY
@ OP_NOTPOSPLUS
@ OP_CIRC
#define PT_LAMP
BOOL _pcre_xclass(int, const uschar *)
Definition: pcre_xclass.c:64
#define GETCHARINCTEST(c, eptr)
unsigned char uschar
#define fcc_offset
#define NLTYPE_ANY
#define memmove(a, b, c)
#define GETCHAR(c, eptr)
#define GETCHARLEN(c, eptr, len)
#define lcc_offset
#define GETCHARINC(c, eptr)
const uschar _pcre_default_tables[]
#define REQ_CASELESS
#define ctypes_offset
#define BACKCHAR(eptr)
int _pcre_valid_utf8(const uschar *, int)
#define GET2(a, n)
#define PCRE_STARTLINE
static BOOL utf8
Definition: pcregrep.c:147
static BOOL number
Definition: pcregrep.c:143
static char * newline
Definition: pcregrep.c:112
#define _pcre_OP_lengths
Definition: pcretest.c:90
static int pchars(unsigned char *p, int length, FILE *f)
Definition: pcretest.c:375
static int offset
Definition: read.c:62
struct eptrblock * epb_prev
const unsigned char * epb_saved_eptr
unsigned long int match_limit_recursion
int * offset_vector
const uschar * start_code
const uschar * lcc
unsigned long int match_call_count
void * callout_data
int end_offset_top
uschar nl[4]
unsigned long int match_limit
eptrblock * eptrchain
const unsigned char * end_subject
const unsigned char * start_subject
const uschar * ctypes
const unsigned char * end_match_ptr
recursion_info * recursive
const unsigned char * start_match
BOOL offset_overflow
int current_position
Definition: pcre.h:226
int * offset_vector
Definition: pcre.h:222
void * callout_data
Definition: pcre.h:229
int next_item_length
Definition: pcre.h:232
int capture_last
Definition: pcre.h:228
int subject_length
Definition: pcre.h:224
int pattern_position
Definition: pcre.h:231
const char * subject
Definition: pcre.h:223
int callout_number
Definition: pcre.h:221
void * study_data
Definition: pcre.h:206
unsigned long int flags
Definition: pcre.h:205
unsigned long int match_limit
Definition: pcre.h:207
unsigned long int match_limit_recursion
Definition: pcre.h:210
const unsigned char * tables
Definition: pcre.h:209
void * callout_data
Definition: pcre.h:208
pcre_uint32 options
uschar start_bits[32]
pcre_uint32 magic_number
pcre_uint32 options
pcre_uint16 name_entry_size
pcre_uint16 name_count
pcre_uint16 name_table_offset
pcre_uint16 first_byte
pcre_uint16 req_byte
pcre_uint16 top_backref
const unsigned char * tables
pcre_uint16 top_bracket
struct recursion_info * prevrec
const unsigned char * save_start
const uschar * after_call
@ ucp_Lu
Definition: ucp.h:37
@ ucp_Lt
Definition: ucp.h:36
@ ucp_Ll
Definition: ucp.h:33
@ ucp_M
Definition: ucp.h:18