"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.4.1/pcre/pcre_exec.c" (28 Aug 2013, 126511 Bytes) of archive /linux/misc/tin-2.4.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pcre_exec.c" see the Fossies "Dox" file reference documentation.

    1 /*************************************************
    2 *      Perl-Compatible Regular Expressions       *
    3 *************************************************/
    4 
    5 /* PCRE is a library of functions to support regular expressions whose syntax
    6 and semantics are as close as possible to those of the Perl 5 language.
    7 
    8                        Written by Philip Hazel
    9            Copyright (c) 1997-2006 University of Cambridge
   10 
   11 -----------------------------------------------------------------------------
   12 Redistribution and use in source and binary forms, with or without
   13 modification, are permitted provided that the following conditions are met:
   14 
   15     * Redistributions of source code must retain the above copyright notice,
   16       this list of conditions and the following disclaimer.
   17 
   18     * Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in the
   20       documentation and/or other materials provided with the distribution.
   21 
   22     * Neither the name of the University of Cambridge nor the names of its
   23       contributors may be used to endorse or promote products derived from
   24       this software without specific prior written permission.
   25 
   26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36 POSSIBILITY OF SUCH DAMAGE.
   37 -----------------------------------------------------------------------------
   38 */
   39 
   40 
   41 /* This module contains pcre_exec(), the externally visible function that does
   42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
   43 possible. There are also some static supporting functions. */
   44 
   45 #define NLBLOCK md             /* Block containing newline information */
   46 #define PSSTART start_subject  /* Field containing processed string start */
   47 #define PSEND   end_subject    /* Field containing processed string end */
   48 
   49 #include "pcre_internal.h"
   50 
   51 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
   52 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
   53 
   54 #define EPTR_WORK_SIZE (1000)
   55 
   56 /* Flag bits for the match() function */
   57 
   58 #define match_condassert     0x01  /* Called to check a condition assertion */
   59 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
   60 #define match_tail_recursed  0x04  /* Tail recursive call */
   61 
   62 /* Non-error returns from the match() function. Error returns are externally
   63 defined PCRE_ERROR_xxx codes, which are all negative. */
   64 
   65 #define MATCH_MATCH        1
   66 #define MATCH_NOMATCH      0
   67 
   68 /* Maximum number of ints of offset to save on the stack for recursive calls.
   69 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
   70 because the offset vector is always a multiple of 3 long. */
   71 
   72 #define REC_STACK_SAVE_MAX 30
   73 
   74 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
   75 
   76 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
   77 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
   78 
   79 
   80 
   81 #ifdef DEBUG
   82 /*************************************************
   83 *        Debugging function to print chars       *
   84 *************************************************/
   85 
   86 /* Print a sequence of chars in printable format, stopping at the end of the
   87 subject if the requested.
   88 
   89 Arguments:
   90   p           points to characters
   91   length      number to print
   92   is_subject  TRUE if printing from within md->start_subject
   93   md          pointer to matching data block, if is_subject is TRUE
   94 
   95 Returns:     nothing
   96 */
   97 
   98 static void
   99 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
  100 {
  101 unsigned int c;
  102 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
  103 while (length-- > 0)
  104   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
  105 }
  106 #endif
  107 
  108 
  109 
  110 /*************************************************
  111 *          Match a back-reference                *
  112 *************************************************/
  113 
  114 /* If a back reference hasn't been set, the length that is passed is greater
  115 than the number of characters left in the string, so the match fails.
  116 
  117 Arguments:
  118   offset      index into the offset vector
  119   eptr        points into the subject
  120   length      length to be matched
  121   md          points to match data block
  122   ims         the ims flags
  123 
  124 Returns:      TRUE if matched
  125 */
  126 
  127 static BOOL
  128 match_ref(int offset, register USPTR eptr, int length, match_data *md,
  129   unsigned long int ims)
  130 {
  131 USPTR p = md->start_subject + md->offset_vector[offset];
  132 
  133 #ifdef DEBUG
  134 if (eptr >= md->end_subject)
  135   printf("matching subject <null>");
  136 else
  137   {
  138   printf("matching subject ");
  139   pchars(eptr, length, TRUE, md);
  140   }
  141 printf(" against backref ");
  142 pchars(p, length, FALSE, md);
  143 printf("\n");
  144 #endif
  145 
  146 /* Always fail if not enough characters left */
  147 
  148 if (length > md->end_subject - eptr) return FALSE;
  149 
  150 /* Separate the caselesss case for speed */
  151 
  152 if ((ims & PCRE_CASELESS) != 0)
  153   {
  154   while (length-- > 0)
  155     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
  156   }
  157 else
  158   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
  159 
  160 return TRUE;
  161 }
  162 
  163 
  164 
  165 /***************************************************************************
  166 ****************************************************************************
  167                    RECURSION IN THE match() FUNCTION
  168 
  169 The match() function is highly recursive, though not every recursive call
  170 increases the recursive depth. Nevertheless, some regular expressions can cause
  171 it to recurse to a great depth. I was writing for Unix, so I just let it call
  172 itself recursively. This uses the stack for saving everything that has to be
  173 saved for a recursive call. On Unix, the stack can be large, and this works
  174 fine.
  175 
  176 It turns out that on some non-Unix-like systems there are problems with
  177 programs that use a lot of stack. (This despite the fact that every last chip
  178 has oodles of memory these days, and techniques for extending the stack have
  179 been known for decades.) So....
  180 
  181 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
  182 calls by keeping local variables that need to be preserved in blocks of memory
  183 obtained from malloc() instead instead of on the stack. Macros are used to
  184 achieve this so that the actual code doesn't look very different to what it
  185 always used to.
  186 ****************************************************************************
  187 ***************************************************************************/
  188 
  189 
  190 /* These versions of the macros use the stack, as normal. There are debugging
  191 versions and production versions. */
  192 
  193 #ifndef NO_RECURSE
  194 #define REGISTER register
  195 #ifdef DEBUG
  196 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
  197   { \
  198   printf("match() called in line %d\n", __LINE__); \
  199   rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
  200   printf("to line %d\n", __LINE__); \
  201   }
  202 #define RRETURN(ra) \
  203   { \
  204   printf("match() returned %d from line %d ", ra, __LINE__); \
  205   return ra; \
  206   }
  207 #else
  208 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
  209   rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
  210 #define RRETURN(ra) return ra
  211 #endif
  212 
  213 #else
  214 
  215 
  216 /* These versions of the macros manage a private stack on the heap. Note
  217 that the rd argument of RMATCH isn't actually used. It's the md argument of
  218 match(), which never changes. */
  219 
  220 #define REGISTER
  221 
  222 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
  223   {\
  224   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
  225   if (setjmp(frame->Xwhere) == 0)\
  226     {\
  227     newframe->Xeptr = ra;\
  228     newframe->Xecode = rb;\
  229     newframe->Xoffset_top = rc;\
  230     newframe->Xims = re;\
  231     newframe->Xeptrb = rf;\
  232     newframe->Xflags = rg;\
  233     newframe->Xrdepth = frame->Xrdepth + 1;\
  234     newframe->Xprevframe = frame;\
  235     frame = newframe;\
  236     DPRINTF(("restarting from line %d\n", __LINE__));\
  237     goto HEAP_RECURSE;\
  238     }\
  239   else\
  240     {\
  241     DPRINTF(("longjumped back to line %d\n", __LINE__));\
  242     frame = md->thisframe;\
  243     rx = frame->Xresult;\
  244     }\
  245   }
  246 
  247 #define RRETURN(ra)\
  248   {\
  249   heapframe *newframe = frame;\
  250   frame = newframe->Xprevframe;\
  251   (pcre_stack_free)(newframe);\
  252   if (frame != NULL)\
  253     {\
  254     frame->Xresult = ra;\
  255     md->thisframe = frame;\
  256     longjmp(frame->Xwhere, 1);\
  257     }\
  258   return ra;\
  259   }
  260 
  261 
  262 /* Structure for remembering the local variables in a private frame */
  263 
  264 typedef struct heapframe {
  265   struct heapframe *Xprevframe;
  266 
  267   /* Function arguments that may change */
  268 
  269   const uschar *Xeptr;
  270   const uschar *Xecode;
  271   int Xoffset_top;
  272   long int Xims;
  273   eptrblock *Xeptrb;
  274   int Xflags;
  275   unsigned int Xrdepth;
  276 
  277   /* Function local variables */
  278 
  279   const uschar *Xcallpat;
  280   const uschar *Xcharptr;
  281   const uschar *Xdata;
  282   const uschar *Xnext;
  283   const uschar *Xpp;
  284   const uschar *Xprev;
  285   const uschar *Xsaved_eptr;
  286 
  287   recursion_info Xnew_recursive;
  288 
  289   BOOL Xcur_is_word;
  290   BOOL Xcondition;
  291   BOOL Xprev_is_word;
  292 
  293   unsigned long int Xoriginal_ims;
  294 
  295 #ifdef SUPPORT_UCP
  296   int Xprop_type;
  297   int Xprop_value;
  298   int Xprop_fail_result;
  299   int Xprop_category;
  300   int Xprop_chartype;
  301   int Xprop_script;
  302 #endif
  303 
  304   int Xctype;
  305   unsigned int Xfc;
  306   int Xfi;
  307   int Xlength;
  308   int Xmax;
  309   int Xmin;
  310   int Xnumber;
  311   int Xoffset;
  312   int Xop;
  313   int Xsave_capture_last;
  314   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
  315   int Xstacksave[REC_STACK_SAVE_MAX];
  316 
  317   eptrblock Xnewptrb;
  318 
  319   /* Place to pass back result, and where to jump back to */
  320 
  321   int  Xresult;
  322   jmp_buf Xwhere;
  323 
  324 } heapframe;
  325 
  326 #endif
  327 
  328 
  329 /***************************************************************************
  330 ***************************************************************************/
  331 
  332 
  333 
  334 /*************************************************
  335 *         Match from current position            *
  336 *************************************************/
  337 
  338 /* This function is called recursively in many circumstances. Whenever it
  339 returns a negative (error) response, the outer incarnation must also return the
  340 same response.
  341 
  342 Performance note: It might be tempting to extract commonly used fields from the
  343 md structure (e.g. utf8, end_subject) into individual variables to improve
  344 performance. Tests using gcc on a SPARC disproved this; in the first case, it
  345 made performance worse.
  346 
  347 Arguments:
  348    eptr        pointer to current character in subject
  349    ecode       pointer to current position in compiled code
  350    offset_top  current top pointer
  351    md          pointer to "static" info for the match
  352    ims         current /i, /m, and /s options
  353    eptrb       pointer to chain of blocks containing eptr at start of
  354                  brackets - for testing for empty matches
  355    flags       can contain
  356                  match_condassert - this is an assertion condition
  357                  match_cbegroup - this is the start of an unlimited repeat
  358                    group that can match an empty string
  359                  match_tail_recursed - this is a tail_recursed group
  360    rdepth      the recursion depth
  361 
  362 Returns:       MATCH_MATCH if matched            )  these values are >= 0
  363                MATCH_NOMATCH if failed to match  )
  364                a negative PCRE_ERROR_xxx value if aborted by an error condition
  365                  (e.g. stopped by repeated call or recursion limit)
  366 */
  367 
  368 static int
  369 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
  370   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
  371   int flags, unsigned int rdepth)
  372 {
  373 /* These variables do not need to be preserved over recursion in this function,
  374 so they can be ordinary variables in all cases. Mark some of them with
  375 "register" because they are used a lot in loops. */
  376 
  377 register int  rrc;         /* Returns from recursive calls */
  378 register int  i;           /* Used for loops not involving calls to RMATCH() */
  379 register unsigned int c;   /* Character values not kept over RMATCH() calls */
  380 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
  381 
  382 BOOL minimize, possessive; /* Quantifier options */
  383 
  384 /* When recursion is not being used, all "local" variables that have to be
  385 preserved over calls to RMATCH() are part of a "frame" which is obtained from
  386 heap storage. Set up the top-level frame here; others are obtained from the
  387 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
  388 
  389 #ifdef NO_RECURSE
  390 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
  391 frame->Xprevframe = NULL;            /* Marks the top level */
  392 
  393 /* Copy in the original argument variables */
  394 
  395 frame->Xeptr = eptr;
  396 frame->Xecode = ecode;
  397 frame->Xoffset_top = offset_top;
  398 frame->Xims = ims;
  399 frame->Xeptrb = eptrb;
  400 frame->Xflags = flags;
  401 frame->Xrdepth = rdepth;
  402 
  403 /* This is where control jumps back to to effect "recursion" */
  404 
  405 HEAP_RECURSE:
  406 
  407 /* Macros make the argument variables come from the current frame */
  408 
  409 #define eptr               frame->Xeptr
  410 #define ecode              frame->Xecode
  411 #define offset_top         frame->Xoffset_top
  412 #define ims                frame->Xims
  413 #define eptrb              frame->Xeptrb
  414 #define flags              frame->Xflags
  415 #define rdepth             frame->Xrdepth
  416 
  417 /* Ditto for the local variables */
  418 
  419 #ifdef SUPPORT_UTF8
  420 #define charptr            frame->Xcharptr
  421 #endif
  422 #define callpat            frame->Xcallpat
  423 #define data               frame->Xdata
  424 #define next               frame->Xnext
  425 #define pp                 frame->Xpp
  426 #define prev               frame->Xprev
  427 #define saved_eptr         frame->Xsaved_eptr
  428 
  429 #define new_recursive      frame->Xnew_recursive
  430 
  431 #define cur_is_word        frame->Xcur_is_word
  432 #define condition          frame->Xcondition
  433 #define prev_is_word       frame->Xprev_is_word
  434 
  435 #define original_ims       frame->Xoriginal_ims
  436 
  437 #ifdef SUPPORT_UCP
  438 #define prop_type          frame->Xprop_type
  439 #define prop_value         frame->Xprop_value
  440 #define prop_fail_result   frame->Xprop_fail_result
  441 #define prop_category      frame->Xprop_category
  442 #define prop_chartype      frame->Xprop_chartype
  443 #define prop_script        frame->Xprop_script
  444 #endif
  445 
  446 #define ctype              frame->Xctype
  447 #define fc                 frame->Xfc
  448 #define fi                 frame->Xfi
  449 #define length             frame->Xlength
  450 #define max                frame->Xmax
  451 #define min                frame->Xmin
  452 #define number             frame->Xnumber
  453 #define offset             frame->Xoffset
  454 #define op                 frame->Xop
  455 #define save_capture_last  frame->Xsave_capture_last
  456 #define save_offset1       frame->Xsave_offset1
  457 #define save_offset2       frame->Xsave_offset2
  458 #define save_offset3       frame->Xsave_offset3
  459 #define stacksave          frame->Xstacksave
  460 
  461 #define newptrb            frame->Xnewptrb
  462 
  463 /* When recursion is being used, local variables are allocated on the stack and
  464 get preserved during recursion in the normal way. In this environment, fi and
  465 i, and fc and c, can be the same variables. */
  466 
  467 #else         /* NO_RECURSE not defined */
  468 #define fi i
  469 #define fc c
  470 
  471 
  472 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
  473 const uschar *charptr;             /* in small blocks of the code. My normal */
  474 #endif                             /* style of coding would have declared    */
  475 const uschar *callpat;             /* them within each of those blocks.      */
  476 const uschar *data;                /* However, in order to accommodate the   */
  477 const uschar *next;                /* version of this code that uses an      */
  478 USPTR         pp;                  /* external "stack" implemented on the    */
  479 const uschar *prev;                /* heap, it is easier to declare them all */
  480 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
  481                                    /* out in a block. The only declarations  */
  482 recursion_info new_recursive;      /* within blocks below are for variables  */
  483                                    /* that do not have to be preserved over  */
  484 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
  485 BOOL condition;
  486 BOOL prev_is_word;
  487 
  488 unsigned long int original_ims;
  489 
  490 #ifdef SUPPORT_UCP
  491 int prop_type;
  492 int prop_value;
  493 int prop_fail_result;
  494 int prop_category;
  495 int prop_chartype;
  496 int prop_script;
  497 #endif
  498 
  499 int ctype;
  500 int length;
  501 int max;
  502 int min;
  503 int number;
  504 int offset;
  505 int op;
  506 int save_capture_last;
  507 int save_offset1, save_offset2, save_offset3;
  508 int stacksave[REC_STACK_SAVE_MAX];
  509 
  510 eptrblock newptrb;
  511 #endif     /* NO_RECURSE */
  512 
  513 /* These statements are here to stop the compiler complaining about unitialized
  514 variables. */
  515 
  516 #ifdef SUPPORT_UCP
  517 prop_value = 0;
  518 prop_fail_result = 0;
  519 #endif
  520 
  521 
  522 /* This label is used for tail recursion, which is used in a few cases even
  523 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
  524 used. Thanks to Ian Taylor for noticing this possibility and sending the
  525 original patch. */
  526 
  527 TAIL_RECURSE:
  528 
  529 /* OK, now we can get on with the real code of the function. Recursive calls
  530 are specified by the macro RMATCH and RRETURN is used to return. When
  531 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
  532 and a "return", respectively (possibly with some debugging if DEBUG is
  533 defined). However, RMATCH isn't like a function call because it's quite a
  534 complicated macro. It has to be used in one particular way. This shouldn't,
  535 however, impact performance when true recursion is being used. */
  536 
  537 /* First check that we haven't called match() too many times, or that we
  538 haven't exceeded the recursive call limit. */
  539 
  540 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
  541 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
  542 
  543 original_ims = ims;    /* Save for resetting on ')' */
  544 
  545 #ifdef SUPPORT_UTF8
  546 utf8 = md->utf8;       /* Local copy of the flag */
  547 #else
  548 utf8 = FALSE;
  549 #endif
  550 
  551 /* At the start of a group with an unlimited repeat that may match an empty
  552 string, the match_cbegroup flag is set. When this is the case, add the current
  553 subject pointer to the chain of such remembered pointers, to be checked when we
  554 hit the closing ket, in order to break infinite loops that match no characters.
  555 When match() is called in other circumstances, don't add to the chain. If this
  556 is a tail recursion, use a block from the workspace, as the one on the stack is
  557 already used. */
  558 
  559 if ((flags & match_cbegroup) != 0)
  560   {
  561   eptrblock *p;
  562   if ((flags & match_tail_recursed) != 0)
  563     {
  564     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
  565     p = md->eptrchain + md->eptrn++;
  566     }
  567   else p = &newptrb;
  568   p->epb_saved_eptr = eptr;
  569   p->epb_prev = eptrb;
  570   eptrb = p;
  571   }
  572 
  573 /* Now start processing the opcodes. */
  574 
  575 for (;;)
  576   {
  577   minimize = possessive = FALSE;
  578   op = *ecode;
  579 
  580   /* For partial matching, remember if we ever hit the end of the subject after
  581   matching at least one subject character. */
  582 
  583   if (md->partial &&
  584       eptr >= md->end_subject &&
  585       eptr > md->start_match)
  586     md->hitend = TRUE;
  587 
  588   switch(op)
  589     {
  590     /* Handle a capturing bracket. If there is space in the offset vector, save
  591     the current subject position in the working slot at the top of the vector.
  592     We mustn't change the current values of the data slot, because they may be
  593     set from a previous iteration of this group, and be referred to by a
  594     reference inside the group.
  595 
  596     If the bracket fails to match, we need to restore this value and also the
  597     values of the final offsets, in case they were set by a previous iteration
  598     of the same bracket.
  599 
  600     If there isn't enough space in the offset vector, treat this as if it were
  601     a non-capturing bracket. Don't worry about setting the flag for the error
  602     case here; that is handled in the code for KET. */
  603 
  604     case OP_CBRA:
  605     case OP_SCBRA:
  606     number = GET2(ecode, 1+LINK_SIZE);
  607     offset = number << 1;
  608 
  609 #ifdef DEBUG
  610     printf("start bracket %d\n", number);
  611     printf("subject=");
  612     pchars(eptr, 16, TRUE, md);
  613     printf("\n");
  614 #endif
  615 
  616     if (offset < md->offset_max)
  617       {
  618       save_offset1 = md->offset_vector[offset];
  619       save_offset2 = md->offset_vector[offset+1];
  620       save_offset3 = md->offset_vector[md->offset_end - number];
  621       save_capture_last = md->capture_last;
  622 
  623       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
  624       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
  625 
  626       flags = (op == OP_SCBRA)? match_cbegroup : 0;
  627       do
  628         {
  629         RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
  630           ims, eptrb, flags);
  631         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  632         md->capture_last = save_capture_last;
  633         ecode += GET(ecode, 1);
  634         }
  635       while (*ecode == OP_ALT);
  636 
  637       DPRINTF(("bracket %d failed\n", number));
  638 
  639       md->offset_vector[offset] = save_offset1;
  640       md->offset_vector[offset+1] = save_offset2;
  641       md->offset_vector[md->offset_end - number] = save_offset3;
  642 
  643       RRETURN(MATCH_NOMATCH);
  644       }
  645 
  646     /* Insufficient room for saving captured contents. Treat as a non-capturing
  647     bracket. */
  648 
  649     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
  650 
  651     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
  652     final alternative within the brackets, we would return the result of a
  653     recursive call to match() whatever happened. We can reduce stack usage by
  654     turning this into a tail recursion. */
  655 
  656     case OP_BRA:
  657     case OP_SBRA:
  658     DPRINTF(("start non-capturing bracket\n"));
  659     flags = (op >= OP_SBRA)? match_cbegroup : 0;
  660     for (;;)
  661       {
  662       if (ecode[GET(ecode, 1)] != OP_ALT)
  663         {
  664         ecode += _pcre_OP_lengths[*ecode];
  665         flags |= match_tail_recursed;
  666         DPRINTF(("bracket 0 tail recursion\n"));
  667         goto TAIL_RECURSE;
  668         }
  669 
  670       /* For non-final alternatives, continue the loop for a NOMATCH result;
  671       otherwise return. */
  672 
  673       RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
  674         eptrb, flags);
  675       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  676       ecode += GET(ecode, 1);
  677       }
  678     /* Control never reaches here. */
  679 
  680     /* Conditional group: compilation checked that there are no more than
  681     two branches. If the condition is false, skipping the first branch takes us
  682     past the end if there is only one branch, but that's OK because that is
  683     exactly what going to the ket would do. As there is only one branch to be
  684     obeyed, we can use tail recursion to avoid using another stack frame. */
  685 
  686     case OP_COND:
  687     case OP_SCOND:
  688     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
  689       {
  690       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
  691       condition = md->recursive != NULL &&
  692         (offset == RREF_ANY || offset == md->recursive->group_num);
  693       ecode += condition? 3 : GET(ecode, 1);
  694       }
  695 
  696     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
  697       {
  698       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
  699       condition = offset < offset_top && md->offset_vector[offset] >= 0;
  700       ecode += condition? 3 : GET(ecode, 1);
  701       }
  702 
  703     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
  704       {
  705       condition = FALSE;
  706       ecode += GET(ecode, 1);
  707       }
  708 
  709     /* The condition is an assertion. Call match() to evaluate it - setting
  710     the final argument match_condassert causes it to stop at the end of an
  711     assertion. */
  712 
  713     else
  714       {
  715       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
  716           match_condassert);
  717       if (rrc == MATCH_MATCH)
  718         {
  719         condition = TRUE;
  720         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
  721         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
  722         }
  723       else if (rrc != MATCH_NOMATCH)
  724         {
  725         RRETURN(rrc);         /* Need braces because of following else */
  726         }
  727       else
  728         {
  729         condition = FALSE;
  730         ecode += GET(ecode, 1);
  731         }
  732       }
  733 
  734     /* We are now at the branch that is to be obeyed. As there is only one,
  735     we can use tail recursion to avoid using another stack frame. If the second
  736     alternative doesn't exist, we can just plough on. */
  737 
  738     if (condition || *ecode == OP_ALT)
  739       {
  740       ecode += 1 + LINK_SIZE;
  741       flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
  742       goto TAIL_RECURSE;
  743       }
  744     else
  745       {
  746       ecode += 1 + LINK_SIZE;
  747       }
  748     break;
  749 
  750 
  751     /* End of the pattern. If we are in a top-level recursion, we should
  752     restore the offsets appropriately and continue from after the call. */
  753 
  754     case OP_END:
  755     if (md->recursive != NULL && md->recursive->group_num == 0)
  756       {
  757       recursion_info *rec = md->recursive;
  758       DPRINTF(("End of pattern in a (?0) recursion\n"));
  759       md->recursive = rec->prevrec;
  760       memmove(md->offset_vector, rec->offset_save,
  761         rec->saved_max * sizeof(int));
  762       md->start_match = rec->save_start;
  763       ims = original_ims;
  764       ecode = rec->after_call;
  765       break;
  766       }
  767 
  768     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
  769     string - backtracking will then try other alternatives, if any. */
  770 
  771     if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
  772     md->end_match_ptr = eptr;          /* Record where we ended */
  773     md->end_offset_top = offset_top;   /* and how many extracts were taken */
  774     RRETURN(MATCH_MATCH);
  775 
  776     /* Change option settings */
  777 
  778     case OP_OPT:
  779     ims = ecode[1];
  780     ecode += 2;
  781     DPRINTF(("ims set to %02lx\n", ims));
  782     break;
  783 
  784     /* Assertion brackets. Check the alternative branches in turn - the
  785     matching won't pass the KET for an assertion. If any one branch matches,
  786     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
  787     start of each branch to move the current point backwards, so the code at
  788     this level is identical to the lookahead case. */
  789 
  790     case OP_ASSERT:
  791     case OP_ASSERTBACK:
  792     do
  793       {
  794       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
  795       if (rrc == MATCH_MATCH) break;
  796       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  797       ecode += GET(ecode, 1);
  798       }
  799     while (*ecode == OP_ALT);
  800     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
  801 
  802     /* If checking an assertion for a condition, return MATCH_MATCH. */
  803 
  804     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
  805 
  806     /* Continue from after the assertion, updating the offsets high water
  807     mark, since extracts may have been taken during the assertion. */
  808 
  809     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
  810     ecode += 1 + LINK_SIZE;
  811     offset_top = md->end_offset_top;
  812     continue;
  813 
  814     /* Negative assertion: all branches must fail to match */
  815 
  816     case OP_ASSERT_NOT:
  817     case OP_ASSERTBACK_NOT:
  818     do
  819       {
  820       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
  821       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
  822       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  823       ecode += GET(ecode,1);
  824       }
  825     while (*ecode == OP_ALT);
  826 
  827     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
  828 
  829     ecode += 1 + LINK_SIZE;
  830     continue;
  831 
  832     /* Move the subject pointer back. This occurs only at the start of
  833     each branch of a lookbehind assertion. If we are too close to the start to
  834     move back, this match function fails. When working with UTF-8 we move
  835     back a number of characters, not bytes. */
  836 
  837     case OP_REVERSE:
  838 #ifdef SUPPORT_UTF8
  839     if (utf8)
  840       {
  841       i = GET(ecode, 1);
  842       while (i-- > 0)
  843         {
  844         eptr--;
  845         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
  846         BACKCHAR(eptr)
  847         }
  848       }
  849     else
  850 #endif
  851 
  852     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
  853 
  854       {
  855       eptr -= GET(ecode, 1);
  856       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
  857       }
  858 
  859     /* Skip to next op code */
  860 
  861     ecode += 1 + LINK_SIZE;
  862     break;
  863 
  864     /* The callout item calls an external function, if one is provided, passing
  865     details of the match so far. This is mainly for debugging, though the
  866     function is able to force a failure. */
  867 
  868     case OP_CALLOUT:
  869     if (pcre_callout != NULL)
  870       {
  871       pcre_callout_block cb;
  872       cb.version          = 1;   /* Version 1 of the callout block */
  873       cb.callout_number   = ecode[1];
  874       cb.offset_vector    = md->offset_vector;
  875       cb.subject          = (PCRE_SPTR)md->start_subject;
  876       cb.subject_length   = md->end_subject - md->start_subject;
  877       cb.start_match      = md->start_match - md->start_subject;
  878       cb.current_position = eptr - md->start_subject;
  879       cb.pattern_position = GET(ecode, 2);
  880       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
  881       cb.capture_top      = offset_top/2;
  882       cb.capture_last     = md->capture_last;
  883       cb.callout_data     = md->callout_data;
  884       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
  885       if (rrc < 0) RRETURN(rrc);
  886       }
  887     ecode += 2 + 2*LINK_SIZE;
  888     break;
  889 
  890     /* Recursion either matches the current regex, or some subexpression. The
  891     offset data is the offset to the starting bracket from the start of the
  892     whole pattern. (This is so that it works from duplicated subpatterns.)
  893 
  894     If there are any capturing brackets started but not finished, we have to
  895     save their starting points and reinstate them after the recursion. However,
  896     we don't know how many such there are (offset_top records the completed
  897     total) so we just have to save all the potential data. There may be up to
  898     65535 such values, which is too large to put on the stack, but using malloc
  899     for small numbers seems expensive. As a compromise, the stack is used when
  900     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
  901     is used. A problem is what to do if the malloc fails ... there is no way of
  902     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
  903     values on the stack, and accept that the rest may be wrong.
  904 
  905     There are also other values that have to be saved. We use a chained
  906     sequence of blocks that actually live on the stack. Thanks to Robin Houston
  907     for the original version of this logic. */
  908 
  909     case OP_RECURSE:
  910       {
  911       callpat = md->start_code + GET(ecode, 1);
  912       new_recursive.group_num = (callpat == md->start_code)? 0 :
  913         GET2(callpat, 1 + LINK_SIZE);
  914 
  915       /* Add to "recursing stack" */
  916 
  917       new_recursive.prevrec = md->recursive;
  918       md->recursive = &new_recursive;
  919 
  920       /* Find where to continue from afterwards */
  921 
  922       ecode += 1 + LINK_SIZE;
  923       new_recursive.after_call = ecode;
  924 
  925       /* Now save the offset data. */
  926 
  927       new_recursive.saved_max = md->offset_end;
  928       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
  929         new_recursive.offset_save = stacksave;
  930       else
  931         {
  932         new_recursive.offset_save =
  933           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
  934         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
  935         }
  936 
  937       memcpy(new_recursive.offset_save, md->offset_vector,
  938             new_recursive.saved_max * sizeof(int));
  939       new_recursive.save_start = md->start_match;
  940       md->start_match = eptr;
  941 
  942       /* OK, now we can do the recursion. For each top-level alternative we
  943       restore the offset and recursion data. */
  944 
  945       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
  946       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
  947       do
  948         {
  949         RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
  950           md, ims, eptrb, flags);
  951         if (rrc == MATCH_MATCH)
  952           {
  953           DPRINTF(("Recursion matched\n"));
  954           md->recursive = new_recursive.prevrec;
  955           if (new_recursive.offset_save != stacksave)
  956             (pcre_free)(new_recursive.offset_save);
  957           RRETURN(MATCH_MATCH);
  958           }
  959         else if (rrc != MATCH_NOMATCH)
  960           {
  961           DPRINTF(("Recursion gave error %d\n", rrc));
  962           RRETURN(rrc);
  963           }
  964 
  965         md->recursive = &new_recursive;
  966         memcpy(md->offset_vector, new_recursive.offset_save,
  967             new_recursive.saved_max * sizeof(int));
  968         callpat += GET(callpat, 1);
  969         }
  970       while (*callpat == OP_ALT);
  971 
  972       DPRINTF(("Recursion didn't match\n"));
  973       md->recursive = new_recursive.prevrec;
  974       if (new_recursive.offset_save != stacksave)
  975         (pcre_free)(new_recursive.offset_save);
  976       RRETURN(MATCH_NOMATCH);
  977       }
  978     /* Control never reaches here */
  979 
  980     /* "Once" brackets are like assertion brackets except that after a match,
  981     the point in the subject string is not moved back. Thus there can never be
  982     a move back into the brackets. Friedl calls these "atomic" subpatterns.
  983     Check the alternative branches in turn - the matching won't pass the KET
  984     for this kind of subpattern. If any one branch matches, we carry on as at
  985     the end of a normal bracket, leaving the subject pointer. */
  986 
  987     case OP_ONCE:
  988     prev = ecode;
  989     saved_eptr = eptr;
  990 
  991     do
  992       {
  993       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
  994         eptrb, 0);
  995       if (rrc == MATCH_MATCH) break;
  996       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  997       ecode += GET(ecode,1);
  998       }
  999     while (*ecode == OP_ALT);
 1000 
 1001     /* If hit the end of the group (which could be repeated), fail */
 1002 
 1003     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
 1004 
 1005     /* Continue as from after the assertion, updating the offsets high water
 1006     mark, since extracts may have been taken. */
 1007 
 1008     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
 1009 
 1010     offset_top = md->end_offset_top;
 1011     eptr = md->end_match_ptr;
 1012 
 1013     /* For a non-repeating ket, just continue at this level. This also
 1014     happens for a repeating ket if no characters were matched in the group.
 1015     This is the forcible breaking of infinite loops as implemented in Perl
 1016     5.005. If there is an options reset, it will get obeyed in the normal
 1017     course of events. */
 1018 
 1019     if (*ecode == OP_KET || eptr == saved_eptr)
 1020       {
 1021       ecode += 1+LINK_SIZE;
 1022       break;
 1023       }
 1024 
 1025     /* The repeating kets try the rest of the pattern or restart from the
 1026     preceding bracket, in the appropriate order. The second "call" of match()
 1027     uses tail recursion, to avoid using another stack frame. We need to reset
 1028     any options that changed within the bracket before re-running it, so
 1029     check the next opcode. */
 1030 
 1031     if (ecode[1+LINK_SIZE] == OP_OPT)
 1032       {
 1033       ims = (ims & ~PCRE_IMS) | ecode[4];
 1034       DPRINTF(("ims set to %02lx at group repeat\n", ims));
 1035       }
 1036 
 1037     if (*ecode == OP_KETRMIN)
 1038       {
 1039       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
 1040       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1041       ecode = prev;
 1042       flags = match_tail_recursed;
 1043       goto TAIL_RECURSE;
 1044       }
 1045     else  /* OP_KETRMAX */
 1046       {
 1047       RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
 1048       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1049       ecode += 1 + LINK_SIZE;
 1050       flags = match_tail_recursed;
 1051       goto TAIL_RECURSE;
 1052       }
 1053     /* Control never gets here */
 1054 
 1055     /* An alternation is the end of a branch; scan along to find the end of the
 1056     bracketed group and go to there. */
 1057 
 1058     case OP_ALT:
 1059     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 1060     break;
 1061 
 1062     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
 1063     that it may occur zero times. It may repeat infinitely, or not at all -
 1064     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
 1065     repeat limits are compiled as a number of copies, with the optional ones
 1066     preceded by BRAZERO or BRAMINZERO. */
 1067 
 1068     case OP_BRAZERO:
 1069       {
 1070       next = ecode+1;
 1071       RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
 1072       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1073       do next += GET(next,1); while (*next == OP_ALT);
 1074       ecode = next + 1 + LINK_SIZE;
 1075       }
 1076     break;
 1077 
 1078     case OP_BRAMINZERO:
 1079       {
 1080       next = ecode+1;
 1081       do next += GET(next, 1); while (*next == OP_ALT);
 1082       RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
 1083       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1084       ecode++;
 1085       }
 1086     break;
 1087 
 1088     /* End of a group, repeated or non-repeating. */
 1089 
 1090     case OP_KET:
 1091     case OP_KETRMIN:
 1092     case OP_KETRMAX:
 1093     prev = ecode - GET(ecode, 1);
 1094 
 1095     /* If this was a group that remembered the subject start, in order to break
 1096     infinite repeats of empty string matches, retrieve the subject start from
 1097     the chain. Otherwise, set it NULL. */
 1098 
 1099     if (*prev >= OP_SBRA)
 1100       {
 1101       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
 1102       eptrb = eptrb->epb_prev;              /* Backup to previous group */
 1103       }
 1104     else saved_eptr = NULL;
 1105 
 1106     /* If we are at the end of an assertion group, stop matching and return
 1107     MATCH_MATCH, but record the current high water mark for use by positive
 1108     assertions. Do this also for the "once" (atomic) groups. */
 1109 
 1110     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
 1111         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
 1112         *prev == OP_ONCE)
 1113       {
 1114       md->end_match_ptr = eptr;      /* For ONCE */
 1115       md->end_offset_top = offset_top;
 1116       RRETURN(MATCH_MATCH);
 1117       }
 1118 
 1119     /* For capturing groups we have to check the group number back at the start
 1120     and if necessary complete handling an extraction by setting the offsets and
 1121     bumping the high water mark. Note that whole-pattern recursion is coded as
 1122     a recurse into group 0, so it won't be picked up here. Instead, we catch it
 1123     when the OP_END is reached. Other recursion is handled here. */
 1124 
 1125     if (*prev == OP_CBRA || *prev == OP_SCBRA)
 1126       {
 1127       number = GET2(prev, 1+LINK_SIZE);
 1128       offset = number << 1;
 1129 
 1130 #ifdef DEBUG
 1131       printf("end bracket %d", number);
 1132       printf("\n");
 1133 #endif
 1134 
 1135       md->capture_last = number;
 1136       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
 1137         {
 1138         md->offset_vector[offset] =
 1139           md->offset_vector[md->offset_end - number];
 1140         md->offset_vector[offset+1] = eptr - md->start_subject;
 1141         if (offset_top <= offset) offset_top = offset + 2;
 1142         }
 1143 
 1144       /* Handle a recursively called group. Restore the offsets
 1145       appropriately and continue from after the call. */
 1146 
 1147       if (md->recursive != NULL && md->recursive->group_num == number)
 1148         {
 1149         recursion_info *rec = md->recursive;
 1150         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
 1151         md->recursive = rec->prevrec;
 1152         md->start_match = rec->save_start;
 1153         memcpy(md->offset_vector, rec->offset_save,
 1154           rec->saved_max * sizeof(int));
 1155         ecode = rec->after_call;
 1156         ims = original_ims;
 1157         break;
 1158         }
 1159       }
 1160 
 1161     /* For both capturing and non-capturing groups, reset the value of the ims
 1162     flags, in case they got changed during the group. */
 1163 
 1164     ims = original_ims;
 1165     DPRINTF(("ims reset to %02lx\n", ims));
 1166 
 1167     /* For a non-repeating ket, just continue at this level. This also
 1168     happens for a repeating ket if no characters were matched in the group.
 1169     This is the forcible breaking of infinite loops as implemented in Perl
 1170     5.005. If there is an options reset, it will get obeyed in the normal
 1171     course of events. */
 1172 
 1173     if (*ecode == OP_KET || eptr == saved_eptr)
 1174       {
 1175       ecode += 1 + LINK_SIZE;
 1176       break;
 1177       }
 1178 
 1179     /* The repeating kets try the rest of the pattern or restart from the
 1180     preceding bracket, in the appropriate order. In the second case, we can use
 1181     tail recursion to avoid using another stack frame. */
 1182 
 1183     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
 1184 
 1185     if (*ecode == OP_KETRMIN)
 1186       {
 1187       RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
 1188       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1189       ecode = prev;
 1190       flags |= match_tail_recursed;
 1191       goto TAIL_RECURSE;
 1192       }
 1193     else  /* OP_KETRMAX */
 1194       {
 1195       RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
 1196       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1197       ecode += 1 + LINK_SIZE;
 1198       flags = match_tail_recursed;
 1199       goto TAIL_RECURSE;
 1200       }
 1201     /* Control never gets here */
 1202 
 1203     /* Start of subject unless notbol, or after internal newline if multiline */
 1204 
 1205     case OP_CIRC:
 1206     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
 1207     if ((ims & PCRE_MULTILINE) != 0)
 1208       {
 1209       if (eptr != md->start_subject &&
 1210           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
 1211         RRETURN(MATCH_NOMATCH);
 1212       ecode++;
 1213       break;
 1214       }
 1215     /* ... else fall through */
 1216 
 1217     /* Start of subject assertion */
 1218 
 1219     case OP_SOD:
 1220     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
 1221     ecode++;
 1222     break;
 1223 
 1224     /* Start of match assertion */
 1225 
 1226     case OP_SOM:
 1227     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
 1228     ecode++;
 1229     break;
 1230 
 1231     /* Assert before internal newline if multiline, or before a terminating
 1232     newline unless endonly is set, else end of subject unless noteol is set. */
 1233 
 1234     case OP_DOLL:
 1235     if ((ims & PCRE_MULTILINE) != 0)
 1236       {
 1237       if (eptr < md->end_subject)
 1238         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
 1239       else
 1240         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
 1241       ecode++;
 1242       break;
 1243       }
 1244     else
 1245       {
 1246       if (md->noteol) RRETURN(MATCH_NOMATCH);
 1247       if (!md->endonly)
 1248         {
 1249         if (eptr != md->end_subject &&
 1250             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
 1251           RRETURN(MATCH_NOMATCH);
 1252         ecode++;
 1253         break;
 1254         }
 1255       }
 1256     /* ... else fall through for endonly */
 1257 
 1258     /* End of subject assertion (\z) */
 1259 
 1260     case OP_EOD:
 1261     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
 1262     ecode++;
 1263     break;
 1264 
 1265     /* End of subject or ending \n assertion (\Z) */
 1266 
 1267     case OP_EODN:
 1268     if (eptr != md->end_subject &&
 1269         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
 1270       RRETURN(MATCH_NOMATCH);
 1271     ecode++;
 1272     break;
 1273 
 1274     /* Word boundary assertions */
 1275 
 1276     case OP_NOT_WORD_BOUNDARY:
 1277     case OP_WORD_BOUNDARY:
 1278       {
 1279 
 1280       /* Find out if the previous and current characters are "word" characters.
 1281       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
 1282       be "non-word" characters. */
 1283 
 1284 #ifdef SUPPORT_UTF8
 1285       if (utf8)
 1286         {
 1287         if (eptr == md->start_subject) prev_is_word = FALSE; else
 1288           {
 1289           const uschar *lastptr = eptr - 1;
 1290           while((*lastptr & 0xc0) == 0x80) lastptr--;
 1291           GETCHAR(c, lastptr);
 1292           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
 1293           }
 1294         if (eptr >= md->end_subject) cur_is_word = FALSE; else
 1295           {
 1296           GETCHAR(c, eptr);
 1297           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
 1298           }
 1299         }
 1300       else
 1301 #endif
 1302 
 1303       /* More streamlined when not in UTF-8 mode */
 1304 
 1305         {
 1306         prev_is_word = (eptr != md->start_subject) &&
 1307           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
 1308         cur_is_word = (eptr < md->end_subject) &&
 1309           ((md->ctypes[*eptr] & ctype_word) != 0);
 1310         }
 1311 
 1312       /* Now see if the situation is what we want */
 1313 
 1314       if ((*ecode++ == OP_WORD_BOUNDARY)?
 1315            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
 1316         RRETURN(MATCH_NOMATCH);
 1317       }
 1318     break;
 1319 
 1320     /* Match a single character type; inline for speed */
 1321 
 1322     case OP_ANY:
 1323     if ((ims & PCRE_DOTALL) == 0)
 1324       {
 1325       if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
 1326       }
 1327     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1328     if (utf8)
 1329       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 1330     ecode++;
 1331     break;
 1332 
 1333     /* Match a single byte, even in UTF-8 mode. This opcode really does match
 1334     any byte, even newline, independent of the setting of PCRE_DOTALL. */
 1335 
 1336     case OP_ANYBYTE:
 1337     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1338     ecode++;
 1339     break;
 1340 
 1341     case OP_NOT_DIGIT:
 1342     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1343     GETCHARINCTEST(c, eptr);
 1344     if (
 1345 #ifdef SUPPORT_UTF8
 1346        c < 256 &&
 1347 #endif
 1348        (md->ctypes[c] & ctype_digit) != 0
 1349        )
 1350       RRETURN(MATCH_NOMATCH);
 1351     ecode++;
 1352     break;
 1353 
 1354     case OP_DIGIT:
 1355     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1356     GETCHARINCTEST(c, eptr);
 1357     if (
 1358 #ifdef SUPPORT_UTF8
 1359        c >= 256 ||
 1360 #endif
 1361        (md->ctypes[c] & ctype_digit) == 0
 1362        )
 1363       RRETURN(MATCH_NOMATCH);
 1364     ecode++;
 1365     break;
 1366 
 1367     case OP_NOT_WHITESPACE:
 1368     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1369     GETCHARINCTEST(c, eptr);
 1370     if (
 1371 #ifdef SUPPORT_UTF8
 1372        c < 256 &&
 1373 #endif
 1374        (md->ctypes[c] & ctype_space) != 0
 1375        )
 1376       RRETURN(MATCH_NOMATCH);
 1377     ecode++;
 1378     break;
 1379 
 1380     case OP_WHITESPACE:
 1381     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1382     GETCHARINCTEST(c, eptr);
 1383     if (
 1384 #ifdef SUPPORT_UTF8
 1385        c >= 256 ||
 1386 #endif
 1387        (md->ctypes[c] & ctype_space) == 0
 1388        )
 1389       RRETURN(MATCH_NOMATCH);
 1390     ecode++;
 1391     break;
 1392 
 1393     case OP_NOT_WORDCHAR:
 1394     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1395     GETCHARINCTEST(c, eptr);
 1396     if (
 1397 #ifdef SUPPORT_UTF8
 1398        c < 256 &&
 1399 #endif
 1400        (md->ctypes[c] & ctype_word) != 0
 1401        )
 1402       RRETURN(MATCH_NOMATCH);
 1403     ecode++;
 1404     break;
 1405 
 1406     case OP_WORDCHAR:
 1407     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1408     GETCHARINCTEST(c, eptr);
 1409     if (
 1410 #ifdef SUPPORT_UTF8
 1411        c >= 256 ||
 1412 #endif
 1413        (md->ctypes[c] & ctype_word) == 0
 1414        )
 1415       RRETURN(MATCH_NOMATCH);
 1416     ecode++;
 1417     break;
 1418 
 1419     case OP_ANYNL:
 1420     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1421     GETCHARINCTEST(c, eptr);
 1422     switch(c)
 1423       {
 1424       default: RRETURN(MATCH_NOMATCH);
 1425       case 0x000d:
 1426       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
 1427       break;
 1428       case 0x000a:
 1429       case 0x000b:
 1430       case 0x000c:
 1431       case 0x0085:
 1432       case 0x2028:
 1433       case 0x2029:
 1434       break;
 1435       }
 1436     ecode++;
 1437     break;
 1438 
 1439 #ifdef SUPPORT_UCP
 1440     /* Check the next character by Unicode property. We will get here only
 1441     if the support is in the binary; otherwise a compile-time error occurs. */
 1442 
 1443     case OP_PROP:
 1444     case OP_NOTPROP:
 1445     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1446     GETCHARINCTEST(c, eptr);
 1447       {
 1448       int chartype, script;
 1449       int category = _pcre_ucp_findprop(c, &chartype, &script);
 1450 
 1451       switch(ecode[1])
 1452         {
 1453         case PT_ANY:
 1454         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
 1455         break;
 1456 
 1457         case PT_LAMP:
 1458         if ((chartype == ucp_Lu ||
 1459              chartype == ucp_Ll ||
 1460              chartype == ucp_Lt) == (op == OP_NOTPROP))
 1461           RRETURN(MATCH_NOMATCH);
 1462          break;
 1463 
 1464         case PT_GC:
 1465         if ((ecode[2] != category) == (op == OP_PROP))
 1466           RRETURN(MATCH_NOMATCH);
 1467         break;
 1468 
 1469         case PT_PC:
 1470         if ((ecode[2] != chartype) == (op == OP_PROP))
 1471           RRETURN(MATCH_NOMATCH);
 1472         break;
 1473 
 1474         case PT_SC:
 1475         if ((ecode[2] != script) == (op == OP_PROP))
 1476           RRETURN(MATCH_NOMATCH);
 1477         break;
 1478 
 1479         default:
 1480         RRETURN(PCRE_ERROR_INTERNAL);
 1481         }
 1482 
 1483       ecode += 3;
 1484       }
 1485     break;
 1486 
 1487     /* Match an extended Unicode sequence. We will get here only if the support
 1488     is in the binary; otherwise a compile-time error occurs. */
 1489 
 1490     case OP_EXTUNI:
 1491     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1492     GETCHARINCTEST(c, eptr);
 1493       {
 1494       int chartype, script;
 1495       int category = _pcre_ucp_findprop(c, &chartype, &script);
 1496       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
 1497       while (eptr < md->end_subject)
 1498         {
 1499         int len = 1;
 1500         if (!utf8) c = *eptr; else
 1501           {
 1502           GETCHARLEN(c, eptr, len);
 1503           }
 1504         category = _pcre_ucp_findprop(c, &chartype, &script);
 1505         if (category != ucp_M) break;
 1506         eptr += len;
 1507         }
 1508       }
 1509     ecode++;
 1510     break;
 1511 #endif
 1512 
 1513 
 1514     /* Match a back reference, possibly repeatedly. Look past the end of the
 1515     item to see if there is repeat information following. The code is similar
 1516     to that for character classes, but repeated for efficiency. Then obey
 1517     similar code to character type repeats - written out again for speed.
 1518     However, if the referenced string is the empty string, always treat
 1519     it as matched, any number of times (otherwise there could be infinite
 1520     loops). */
 1521 
 1522     case OP_REF:
 1523       {
 1524       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
 1525       ecode += 3;                                 /* Advance past item */
 1526 
 1527       /* If the reference is unset, set the length to be longer than the amount
 1528       of subject left; this ensures that every attempt at a match fails. We
 1529       can't just fail here, because of the possibility of quantifiers with zero
 1530       minima. */
 1531 
 1532       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
 1533         md->end_subject - eptr + 1 :
 1534         md->offset_vector[offset+1] - md->offset_vector[offset];
 1535 
 1536       /* Set up for repetition, or handle the non-repeated case */
 1537 
 1538       switch (*ecode)
 1539         {
 1540         case OP_CRSTAR:
 1541         case OP_CRMINSTAR:
 1542         case OP_CRPLUS:
 1543         case OP_CRMINPLUS:
 1544         case OP_CRQUERY:
 1545         case OP_CRMINQUERY:
 1546         c = *ecode++ - OP_CRSTAR;
 1547         minimize = (c & 1) != 0;
 1548         min = rep_min[c];                 /* Pick up values from tables; */
 1549         max = rep_max[c];                 /* zero for max => infinity */
 1550         if (max == 0) max = INT_MAX;
 1551         break;
 1552 
 1553         case OP_CRRANGE:
 1554         case OP_CRMINRANGE:
 1555         minimize = (*ecode == OP_CRMINRANGE);
 1556         min = GET2(ecode, 1);
 1557         max = GET2(ecode, 3);
 1558         if (max == 0) max = INT_MAX;
 1559         ecode += 5;
 1560         break;
 1561 
 1562         default:               /* No repeat follows */
 1563         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
 1564         eptr += length;
 1565         continue;              /* With the main loop */
 1566         }
 1567 
 1568       /* If the length of the reference is zero, just continue with the
 1569       main loop. */
 1570 
 1571       if (length == 0) continue;
 1572 
 1573       /* First, ensure the minimum number of matches are present. We get back
 1574       the length of the reference string explicitly rather than passing the
 1575       address of eptr, so that eptr can be a register variable. */
 1576 
 1577       for (i = 1; i <= min; i++)
 1578         {
 1579         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
 1580         eptr += length;
 1581         }
 1582 
 1583       /* If min = max, continue at the same level without recursion.
 1584       They are not both allowed to be zero. */
 1585 
 1586       if (min == max) continue;
 1587 
 1588       /* If minimizing, keep trying and advancing the pointer */
 1589 
 1590       if (minimize)
 1591         {
 1592         for (fi = min;; fi++)
 1593           {
 1594           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1595           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1596           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
 1597             RRETURN(MATCH_NOMATCH);
 1598           eptr += length;
 1599           }
 1600         /* Control never gets here */
 1601         }
 1602 
 1603       /* If maximizing, find the longest string and work backwards */
 1604 
 1605       else
 1606         {
 1607         pp = eptr;
 1608         for (i = min; i < max; i++)
 1609           {
 1610           if (!match_ref(offset, eptr, length, md, ims)) break;
 1611           eptr += length;
 1612           }
 1613         while (eptr >= pp)
 1614           {
 1615           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1616           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1617           eptr -= length;
 1618           }
 1619         RRETURN(MATCH_NOMATCH);
 1620         }
 1621       }
 1622     /* Control never gets here */
 1623 
 1624 
 1625 
 1626     /* Match a bit-mapped character class, possibly repeatedly. This op code is
 1627     used when all the characters in the class have values in the range 0-255,
 1628     and either the matching is caseful, or the characters are in the range
 1629     0-127 when UTF-8 processing is enabled. The only difference between
 1630     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
 1631     encountered.
 1632 
 1633     First, look past the end of the item to see if there is repeat information
 1634     following. Then obey similar code to character type repeats - written out
 1635     again for speed. */
 1636 
 1637     case OP_NCLASS:
 1638     case OP_CLASS:
 1639       {
 1640       data = ecode + 1;                /* Save for matching */
 1641       ecode += 33;                     /* Advance past the item */
 1642 
 1643       switch (*ecode)
 1644         {
 1645         case OP_CRSTAR:
 1646         case OP_CRMINSTAR:
 1647         case OP_CRPLUS:
 1648         case OP_CRMINPLUS:
 1649         case OP_CRQUERY:
 1650         case OP_CRMINQUERY:
 1651         c = *ecode++ - OP_CRSTAR;
 1652         minimize = (c & 1) != 0;
 1653         min = rep_min[c];                 /* Pick up values from tables; */
 1654         max = rep_max[c];                 /* zero for max => infinity */
 1655         if (max == 0) max = INT_MAX;
 1656         break;
 1657 
 1658         case OP_CRRANGE:
 1659         case OP_CRMINRANGE:
 1660         minimize = (*ecode == OP_CRMINRANGE);
 1661         min = GET2(ecode, 1);
 1662         max = GET2(ecode, 3);
 1663         if (max == 0) max = INT_MAX;
 1664         ecode += 5;
 1665         break;
 1666 
 1667         default:               /* No repeat follows */
 1668         min = max = 1;
 1669         break;
 1670         }
 1671 
 1672       /* First, ensure the minimum number of matches are present. */
 1673 
 1674 #ifdef SUPPORT_UTF8
 1675       /* UTF-8 mode */
 1676       if (utf8)
 1677         {
 1678         for (i = 1; i <= min; i++)
 1679           {
 1680           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1681           GETCHARINC(c, eptr);
 1682           if (c > 255)
 1683             {
 1684             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
 1685             }
 1686           else
 1687             {
 1688             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 1689             }
 1690           }
 1691         }
 1692       else
 1693 #endif
 1694       /* Not UTF-8 mode */
 1695         {
 1696         for (i = 1; i <= min; i++)
 1697           {
 1698           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1699           c = *eptr++;
 1700           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 1701           }
 1702         }
 1703 
 1704       /* If max == min we can continue with the main loop without the
 1705       need to recurse. */
 1706 
 1707       if (min == max) continue;
 1708 
 1709       /* If minimizing, keep testing the rest of the expression and advancing
 1710       the pointer while it matches the class. */
 1711 
 1712       if (minimize)
 1713         {
 1714 #ifdef SUPPORT_UTF8
 1715         /* UTF-8 mode */
 1716         if (utf8)
 1717           {
 1718           for (fi = min;; fi++)
 1719             {
 1720             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1721             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1722             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1723             GETCHARINC(c, eptr);
 1724             if (c > 255)
 1725               {
 1726               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
 1727               }
 1728             else
 1729               {
 1730               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 1731               }
 1732             }
 1733           }
 1734         else
 1735 #endif
 1736         /* Not UTF-8 mode */
 1737           {
 1738           for (fi = min;; fi++)
 1739             {
 1740             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1741             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1742             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1743             c = *eptr++;
 1744             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 1745             }
 1746           }
 1747         /* Control never gets here */
 1748         }
 1749 
 1750       /* If maximizing, find the longest possible run, then work backwards. */
 1751 
 1752       else
 1753         {
 1754         pp = eptr;
 1755 
 1756 #ifdef SUPPORT_UTF8
 1757         /* UTF-8 mode */
 1758         if (utf8)
 1759           {
 1760           for (i = min; i < max; i++)
 1761             {
 1762             int len = 1;
 1763             if (eptr >= md->end_subject) break;
 1764             GETCHARLEN(c, eptr, len);
 1765             if (c > 255)
 1766               {
 1767               if (op == OP_CLASS) break;
 1768               }
 1769             else
 1770               {
 1771               if ((data[c/8] & (1 << (c&7))) == 0) break;
 1772               }
 1773             eptr += len;
 1774             }
 1775           for (;;)
 1776             {
 1777             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1778             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1779             if (eptr-- == pp) break;        /* Stop if tried at original pos */
 1780             BACKCHAR(eptr);
 1781             }
 1782           }
 1783         else
 1784 #endif
 1785           /* Not UTF-8 mode */
 1786           {
 1787           for (i = min; i < max; i++)
 1788             {
 1789             if (eptr >= md->end_subject) break;
 1790             c = *eptr;
 1791             if ((data[c/8] & (1 << (c&7))) == 0) break;
 1792             eptr++;
 1793             }
 1794           while (eptr >= pp)
 1795             {
 1796             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1797             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1798             eptr--;
 1799             }
 1800           }
 1801 
 1802         RRETURN(MATCH_NOMATCH);
 1803         }
 1804       }
 1805     /* Control never gets here */
 1806 
 1807 
 1808     /* Match an extended character class. This opcode is encountered only
 1809     in UTF-8 mode, because that's the only time it is compiled. */
 1810 
 1811 #ifdef SUPPORT_UTF8
 1812     case OP_XCLASS:
 1813       {
 1814       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
 1815       ecode += GET(ecode, 1);                      /* Advance past the item */
 1816 
 1817       switch (*ecode)
 1818         {
 1819         case OP_CRSTAR:
 1820         case OP_CRMINSTAR:
 1821         case OP_CRPLUS:
 1822         case OP_CRMINPLUS:
 1823         case OP_CRQUERY:
 1824         case OP_CRMINQUERY:
 1825         c = *ecode++ - OP_CRSTAR;
 1826         minimize = (c & 1) != 0;
 1827         min = rep_min[c];                 /* Pick up values from tables; */
 1828         max = rep_max[c];                 /* zero for max => infinity */
 1829         if (max == 0) max = INT_MAX;
 1830         break;
 1831 
 1832         case OP_CRRANGE:
 1833         case OP_CRMINRANGE:
 1834         minimize = (*ecode == OP_CRMINRANGE);
 1835         min = GET2(ecode, 1);
 1836         max = GET2(ecode, 3);
 1837         if (max == 0) max = INT_MAX;
 1838         ecode += 5;
 1839         break;
 1840 
 1841         default:               /* No repeat follows */
 1842         min = max = 1;
 1843         break;
 1844         }
 1845 
 1846       /* First, ensure the minimum number of matches are present. */
 1847 
 1848       for (i = 1; i <= min; i++)
 1849         {
 1850         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1851         GETCHARINC(c, eptr);
 1852         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
 1853         }
 1854 
 1855       /* If max == min we can continue with the main loop without the
 1856       need to recurse. */
 1857 
 1858       if (min == max) continue;
 1859 
 1860       /* If minimizing, keep testing the rest of the expression and advancing
 1861       the pointer while it matches the class. */
 1862 
 1863       if (minimize)
 1864         {
 1865         for (fi = min;; fi++)
 1866           {
 1867           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1868           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1869           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 1870           GETCHARINC(c, eptr);
 1871           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
 1872           }
 1873         /* Control never gets here */
 1874         }
 1875 
 1876       /* If maximizing, find the longest possible run, then work backwards. */
 1877 
 1878       else
 1879         {
 1880         pp = eptr;
 1881         for (i = min; i < max; i++)
 1882           {
 1883           int len = 1;
 1884           if (eptr >= md->end_subject) break;
 1885           GETCHARLEN(c, eptr, len);
 1886           if (!_pcre_xclass(c, data)) break;
 1887           eptr += len;
 1888           }
 1889         for(;;)
 1890           {
 1891           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 1892           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1893           if (eptr-- == pp) break;        /* Stop if tried at original pos */
 1894           BACKCHAR(eptr)
 1895           }
 1896         RRETURN(MATCH_NOMATCH);
 1897         }
 1898 
 1899       /* Control never gets here */
 1900       }
 1901 #endif    /* End of XCLASS */
 1902 
 1903     /* Match a single character, casefully */
 1904 
 1905     case OP_CHAR:
 1906 #ifdef SUPPORT_UTF8
 1907     if (utf8)
 1908       {
 1909       length = 1;
 1910       ecode++;
 1911       GETCHARLEN(fc, ecode, length);
 1912       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
 1913       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
 1914       }
 1915     else
 1916 #endif
 1917 
 1918     /* Non-UTF-8 mode */
 1919       {
 1920       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
 1921       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
 1922       ecode += 2;
 1923       }
 1924     break;
 1925 
 1926     /* Match a single character, caselessly */
 1927 
 1928     case OP_CHARNC:
 1929 #ifdef SUPPORT_UTF8
 1930     if (utf8)
 1931       {
 1932       length = 1;
 1933       ecode++;
 1934       GETCHARLEN(fc, ecode, length);
 1935 
 1936       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
 1937 
 1938       /* If the pattern character's value is < 128, we have only one byte, and
 1939       can use the fast lookup table. */
 1940 
 1941       if (fc < 128)
 1942         {
 1943         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
 1944         }
 1945 
 1946       /* Otherwise we must pick up the subject character */
 1947 
 1948       else
 1949         {
 1950         unsigned int dc;
 1951         GETCHARINC(dc, eptr);
 1952         ecode += length;
 1953 
 1954         /* If we have Unicode property support, we can use it to test the other
 1955         case of the character, if there is one. */
 1956 
 1957         if (fc != dc)
 1958           {
 1959 #ifdef SUPPORT_UCP
 1960           if (dc != _pcre_ucp_othercase(fc))
 1961 #endif
 1962             RRETURN(MATCH_NOMATCH);
 1963           }
 1964         }
 1965       }
 1966     else
 1967 #endif   /* SUPPORT_UTF8 */
 1968 
 1969     /* Non-UTF-8 mode */
 1970       {
 1971       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
 1972       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
 1973       ecode += 2;
 1974       }
 1975     break;
 1976 
 1977     /* Match a single character repeatedly. */
 1978 
 1979     case OP_EXACT:
 1980     min = max = GET2(ecode, 1);
 1981     ecode += 3;
 1982     goto REPEATCHAR;
 1983 
 1984     case OP_POSUPTO:
 1985     possessive = TRUE;
 1986     /* Fall through */
 1987 
 1988     case OP_UPTO:
 1989     case OP_MINUPTO:
 1990     min = 0;
 1991     max = GET2(ecode, 1);
 1992     minimize = *ecode == OP_MINUPTO;
 1993     ecode += 3;
 1994     goto REPEATCHAR;
 1995 
 1996     case OP_POSSTAR:
 1997     possessive = TRUE;
 1998     min = 0;
 1999     max = INT_MAX;
 2000     ecode++;
 2001     goto REPEATCHAR;
 2002 
 2003     case OP_POSPLUS:
 2004     possessive = TRUE;
 2005     min = 1;
 2006     max = INT_MAX;
 2007     ecode++;
 2008     goto REPEATCHAR;
 2009 
 2010     case OP_POSQUERY:
 2011     possessive = TRUE;
 2012     min = 0;
 2013     max = 1;
 2014     ecode++;
 2015     goto REPEATCHAR;
 2016 
 2017     case OP_STAR:
 2018     case OP_MINSTAR:
 2019     case OP_PLUS:
 2020     case OP_MINPLUS:
 2021     case OP_QUERY:
 2022     case OP_MINQUERY:
 2023     c = *ecode++ - OP_STAR;
 2024     minimize = (c & 1) != 0;
 2025     min = rep_min[c];                 /* Pick up values from tables; */
 2026     max = rep_max[c];                 /* zero for max => infinity */
 2027     if (max == 0) max = INT_MAX;
 2028 
 2029     /* Common code for all repeated single-character matches. We can give
 2030     up quickly if there are fewer than the minimum number of characters left in
 2031     the subject. */
 2032 
 2033     REPEATCHAR:
 2034 #ifdef SUPPORT_UTF8
 2035     if (utf8)
 2036       {
 2037       length = 1;
 2038       charptr = ecode;
 2039       GETCHARLEN(fc, ecode, length);
 2040       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
 2041       ecode += length;
 2042 
 2043       /* Handle multibyte character matching specially here. There is
 2044       support for caseless matching if UCP support is present. */
 2045 
 2046       if (length > 1)
 2047         {
 2048         int oclength = 0;
 2049         uschar occhars[8];
 2050 
 2051 #ifdef SUPPORT_UCP
 2052         unsigned int othercase;
 2053         if ((ims & PCRE_CASELESS) != 0 &&
 2054             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
 2055           oclength = _pcre_ord2utf8(othercase, occhars);
 2056 #endif  /* SUPPORT_UCP */
 2057 
 2058         for (i = 1; i <= min; i++)
 2059           {
 2060           if (memcmp(eptr, charptr, length) == 0) eptr += length;
 2061           /* Need braces because of following else */
 2062           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
 2063           else
 2064             {
 2065             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
 2066             eptr += oclength;
 2067             }
 2068           }
 2069 
 2070         if (min == max) continue;
 2071 
 2072         if (minimize)
 2073           {
 2074           for (fi = min;; fi++)
 2075             {
 2076             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2077             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2078             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2079             if (memcmp(eptr, charptr, length) == 0) eptr += length;
 2080             /* Need braces because of following else */
 2081             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
 2082             else
 2083               {
 2084               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
 2085               eptr += oclength;
 2086               }
 2087             }
 2088           /* Control never gets here */
 2089           }
 2090 
 2091         else  /* Maximize */
 2092           {
 2093           pp = eptr;
 2094           for (i = min; i < max; i++)
 2095             {
 2096             if (eptr > md->end_subject - length) break;
 2097             if (memcmp(eptr, charptr, length) == 0) eptr += length;
 2098             else if (oclength == 0) break;
 2099             else
 2100               {
 2101               if (memcmp(eptr, occhars, oclength) != 0) break;
 2102               eptr += oclength;
 2103               }
 2104             }
 2105 
 2106           if (possessive) continue;
 2107           while (eptr >= pp)
 2108            {
 2109            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2110            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2111            eptr -= length;
 2112            }
 2113           RRETURN(MATCH_NOMATCH);
 2114           }
 2115         /* Control never gets here */
 2116         }
 2117 
 2118       /* If the length of a UTF-8 character is 1, we fall through here, and
 2119       obey the code as for non-UTF-8 characters below, though in this case the
 2120       value of fc will always be < 128. */
 2121       }
 2122     else
 2123 #endif  /* SUPPORT_UTF8 */
 2124 
 2125     /* When not in UTF-8 mode, load a single-byte character. */
 2126       {
 2127       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
 2128       fc = *ecode++;
 2129       }
 2130 
 2131     /* The value of fc at this point is always less than 256, though we may or
 2132     may not be in UTF-8 mode. The code is duplicated for the caseless and
 2133     caseful cases, for speed, since matching characters is likely to be quite
 2134     common. First, ensure the minimum number of matches are present. If min =
 2135     max, continue at the same level without recursing. Otherwise, if
 2136     minimizing, keep trying the rest of the expression and advancing one
 2137     matching character if failing, up to the maximum. Alternatively, if
 2138     maximizing, find the maximum number of characters and work backwards. */
 2139 
 2140     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
 2141       max, eptr));
 2142 
 2143     if ((ims & PCRE_CASELESS) != 0)
 2144       {
 2145       fc = md->lcc[fc];
 2146       for (i = 1; i <= min; i++)
 2147         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
 2148       if (min == max) continue;
 2149       if (minimize)
 2150         {
 2151         for (fi = min;; fi++)
 2152           {
 2153           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2154           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2155           if (fi >= max || eptr >= md->end_subject ||
 2156               fc != md->lcc[*eptr++])
 2157             RRETURN(MATCH_NOMATCH);
 2158           }
 2159         /* Control never gets here */
 2160         }
 2161       else  /* Maximize */
 2162         {
 2163         pp = eptr;
 2164         for (i = min; i < max; i++)
 2165           {
 2166           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
 2167           eptr++;
 2168           }
 2169         if (possessive) continue;
 2170         while (eptr >= pp)
 2171           {
 2172           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2173           eptr--;
 2174           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2175           }
 2176         RRETURN(MATCH_NOMATCH);
 2177         }
 2178       /* Control never gets here */
 2179       }
 2180 
 2181     /* Caseful comparisons (includes all multi-byte characters) */
 2182 
 2183     else
 2184       {
 2185       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
 2186       if (min == max) continue;
 2187       if (minimize)
 2188         {
 2189         for (fi = min;; fi++)
 2190           {
 2191           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2192           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2193           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
 2194             RRETURN(MATCH_NOMATCH);
 2195           }
 2196         /* Control never gets here */
 2197         }
 2198       else  /* Maximize */
 2199         {
 2200         pp = eptr;
 2201         for (i = min; i < max; i++)
 2202           {
 2203           if (eptr >= md->end_subject || fc != *eptr) break;
 2204           eptr++;
 2205           }
 2206         if (possessive) continue;
 2207         while (eptr >= pp)
 2208           {
 2209           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2210           eptr--;
 2211           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2212           }
 2213         RRETURN(MATCH_NOMATCH);
 2214         }
 2215       }
 2216     /* Control never gets here */
 2217 
 2218     /* Match a negated single one-byte character. The character we are
 2219     checking can be multibyte. */
 2220 
 2221     case OP_NOT:
 2222     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2223     ecode++;
 2224     GETCHARINCTEST(c, eptr);
 2225     if ((ims & PCRE_CASELESS) != 0)
 2226       {
 2227 #ifdef SUPPORT_UTF8
 2228       if (c < 256)
 2229 #endif
 2230       c = md->lcc[c];
 2231       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
 2232       }
 2233     else
 2234       {
 2235       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
 2236       }
 2237     break;
 2238 
 2239     /* Match a negated single one-byte character repeatedly. This is almost a
 2240     repeat of the code for a repeated single character, but I haven't found a
 2241     nice way of commoning these up that doesn't require a test of the
 2242     positive/negative option for each character match. Maybe that wouldn't add
 2243     very much to the time taken, but character matching *is* what this is all
 2244     about... */
 2245 
 2246     case OP_NOTEXACT:
 2247     min = max = GET2(ecode, 1);
 2248     ecode += 3;
 2249     goto REPEATNOTCHAR;
 2250 
 2251     case OP_NOTUPTO:
 2252     case OP_NOTMINUPTO:
 2253     min = 0;
 2254     max = GET2(ecode, 1);
 2255     minimize = *ecode == OP_NOTMINUPTO;
 2256     ecode += 3;
 2257     goto REPEATNOTCHAR;
 2258 
 2259     case OP_NOTPOSSTAR:
 2260     possessive = TRUE;
 2261     min = 0;
 2262     max = INT_MAX;
 2263     ecode++;
 2264     goto REPEATNOTCHAR;
 2265 
 2266     case OP_NOTPOSPLUS:
 2267     possessive = TRUE;
 2268     min = 1;
 2269     max = INT_MAX;
 2270     ecode++;
 2271     goto REPEATNOTCHAR;
 2272 
 2273     case OP_NOTPOSQUERY:
 2274     possessive = TRUE;
 2275     min = 0;
 2276     max = 1;
 2277     ecode++;
 2278     goto REPEATNOTCHAR;
 2279 
 2280     case OP_NOTPOSUPTO:
 2281     possessive = TRUE;
 2282     min = 0;
 2283     max = GET2(ecode, 1);
 2284     ecode += 3;
 2285     goto REPEATNOTCHAR;
 2286 
 2287     case OP_NOTSTAR:
 2288     case OP_NOTMINSTAR:
 2289     case OP_NOTPLUS:
 2290     case OP_NOTMINPLUS:
 2291     case OP_NOTQUERY:
 2292     case OP_NOTMINQUERY:
 2293     c = *ecode++ - OP_NOTSTAR;
 2294     minimize = (c & 1) != 0;
 2295     min = rep_min[c];                 /* Pick up values from tables; */
 2296     max = rep_max[c];                 /* zero for max => infinity */
 2297     if (max == 0) max = INT_MAX;
 2298 
 2299     /* Common code for all repeated single-byte matches. We can give up quickly
 2300     if there are fewer than the minimum number of bytes left in the
 2301     subject. */
 2302 
 2303     REPEATNOTCHAR:
 2304     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
 2305     fc = *ecode++;
 2306 
 2307     /* The code is duplicated for the caseless and caseful cases, for speed,
 2308     since matching characters is likely to be quite common. First, ensure the
 2309     minimum number of matches are present. If min = max, continue at the same
 2310     level without recursing. Otherwise, if minimizing, keep trying the rest of
 2311     the expression and advancing one matching character if failing, up to the
 2312     maximum. Alternatively, if maximizing, find the maximum number of
 2313     characters and work backwards. */
 2314 
 2315     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
 2316       max, eptr));
 2317 
 2318     if ((ims & PCRE_CASELESS) != 0)
 2319       {
 2320       fc = md->lcc[fc];
 2321 
 2322 #ifdef SUPPORT_UTF8
 2323       /* UTF-8 mode */
 2324       if (utf8)
 2325         {
 2326         register unsigned int d;
 2327         for (i = 1; i <= min; i++)
 2328           {
 2329           GETCHARINC(d, eptr);
 2330           if (d < 256) d = md->lcc[d];
 2331           if (fc == d) RRETURN(MATCH_NOMATCH);
 2332           }
 2333         }
 2334       else
 2335 #endif
 2336 
 2337       /* Not UTF-8 mode */
 2338         {
 2339         for (i = 1; i <= min; i++)
 2340           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
 2341         }
 2342 
 2343       if (min == max) continue;
 2344 
 2345       if (minimize)
 2346         {
 2347 #ifdef SUPPORT_UTF8
 2348         /* UTF-8 mode */
 2349         if (utf8)
 2350           {
 2351           register unsigned int d;
 2352           for (fi = min;; fi++)
 2353             {
 2354             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2355             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2356             GETCHARINC(d, eptr);
 2357             if (d < 256) d = md->lcc[d];
 2358             if (fi >= max || eptr >= md->end_subject || fc == d)
 2359               RRETURN(MATCH_NOMATCH);
 2360             }
 2361           }
 2362         else
 2363 #endif
 2364         /* Not UTF-8 mode */
 2365           {
 2366           for (fi = min;; fi++)
 2367             {
 2368             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2369             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2370             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
 2371               RRETURN(MATCH_NOMATCH);
 2372             }
 2373           }
 2374         /* Control never gets here */
 2375         }
 2376 
 2377       /* Maximize case */
 2378 
 2379       else
 2380         {
 2381         pp = eptr;
 2382 
 2383 #ifdef SUPPORT_UTF8
 2384         /* UTF-8 mode */
 2385         if (utf8)
 2386           {
 2387           register unsigned int d;
 2388           for (i = min; i < max; i++)
 2389             {
 2390             int len = 1;
 2391             if (eptr >= md->end_subject) break;
 2392             GETCHARLEN(d, eptr, len);
 2393             if (d < 256) d = md->lcc[d];
 2394             if (fc == d) break;
 2395             eptr += len;
 2396             }
 2397         if (possessive) continue;
 2398         for(;;)
 2399             {
 2400             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2401             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2402             if (eptr-- == pp) break;        /* Stop if tried at original pos */
 2403             BACKCHAR(eptr);
 2404             }
 2405           }
 2406         else
 2407 #endif
 2408         /* Not UTF-8 mode */
 2409           {
 2410           for (i = min; i < max; i++)
 2411             {
 2412             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
 2413             eptr++;
 2414             }
 2415           if (possessive) continue;
 2416           while (eptr >= pp)
 2417             {
 2418             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2419             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2420             eptr--;
 2421             }
 2422           }
 2423 
 2424         RRETURN(MATCH_NOMATCH);
 2425         }
 2426       /* Control never gets here */
 2427       }
 2428 
 2429     /* Caseful comparisons */
 2430 
 2431     else
 2432       {
 2433 #ifdef SUPPORT_UTF8
 2434       /* UTF-8 mode */
 2435       if (utf8)
 2436         {
 2437         register unsigned int d;
 2438         for (i = 1; i <= min; i++)
 2439           {
 2440           GETCHARINC(d, eptr);
 2441           if (fc == d) RRETURN(MATCH_NOMATCH);
 2442           }
 2443         }
 2444       else
 2445 #endif
 2446       /* Not UTF-8 mode */
 2447         {
 2448         for (i = 1; i <= min; i++)
 2449           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
 2450         }
 2451 
 2452       if (min == max) continue;
 2453 
 2454       if (minimize)
 2455         {
 2456 #ifdef SUPPORT_UTF8
 2457         /* UTF-8 mode */
 2458         if (utf8)
 2459           {
 2460           register unsigned int d;
 2461           for (fi = min;; fi++)
 2462             {
 2463             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2464             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2465             GETCHARINC(d, eptr);
 2466             if (fi >= max || eptr >= md->end_subject || fc == d)
 2467               RRETURN(MATCH_NOMATCH);
 2468             }
 2469           }
 2470         else
 2471 #endif
 2472         /* Not UTF-8 mode */
 2473           {
 2474           for (fi = min;; fi++)
 2475             {
 2476             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2477             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2478             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
 2479               RRETURN(MATCH_NOMATCH);
 2480             }
 2481           }
 2482         /* Control never gets here */
 2483         }
 2484 
 2485       /* Maximize case */
 2486 
 2487       else
 2488         {
 2489         pp = eptr;
 2490 
 2491 #ifdef SUPPORT_UTF8
 2492         /* UTF-8 mode */
 2493         if (utf8)
 2494           {
 2495           register unsigned int d;
 2496           for (i = min; i < max; i++)
 2497             {
 2498             int len = 1;
 2499             if (eptr >= md->end_subject) break;
 2500             GETCHARLEN(d, eptr, len);
 2501             if (fc == d) break;
 2502             eptr += len;
 2503             }
 2504           if (possessive) continue;
 2505           for(;;)
 2506             {
 2507             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2508             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2509             if (eptr-- == pp) break;        /* Stop if tried at original pos */
 2510             BACKCHAR(eptr);
 2511             }
 2512           }
 2513         else
 2514 #endif
 2515         /* Not UTF-8 mode */
 2516           {
 2517           for (i = min; i < max; i++)
 2518             {
 2519             if (eptr >= md->end_subject || fc == *eptr) break;
 2520             eptr++;
 2521             }
 2522           if (possessive) continue;
 2523           while (eptr >= pp)
 2524             {
 2525             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2526             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2527             eptr--;
 2528             }
 2529           }
 2530 
 2531         RRETURN(MATCH_NOMATCH);
 2532         }
 2533       }
 2534     /* Control never gets here */
 2535 
 2536     /* Match a single character type repeatedly; several different opcodes
 2537     share code. This is very similar to the code for single characters, but we
 2538     repeat it in the interests of efficiency. */
 2539 
 2540     case OP_TYPEEXACT:
 2541     min = max = GET2(ecode, 1);
 2542     minimize = TRUE;
 2543     ecode += 3;
 2544     goto REPEATTYPE;
 2545 
 2546     case OP_TYPEUPTO:
 2547     case OP_TYPEMINUPTO:
 2548     min = 0;
 2549     max = GET2(ecode, 1);
 2550     minimize = *ecode == OP_TYPEMINUPTO;
 2551     ecode += 3;
 2552     goto REPEATTYPE;
 2553 
 2554     case OP_TYPEPOSSTAR:
 2555     possessive = TRUE;
 2556     min = 0;
 2557     max = INT_MAX;
 2558     ecode++;
 2559     goto REPEATTYPE;
 2560 
 2561     case OP_TYPEPOSPLUS:
 2562     possessive = TRUE;
 2563     min = 1;
 2564     max = INT_MAX;
 2565     ecode++;
 2566     goto REPEATTYPE;
 2567 
 2568     case OP_TYPEPOSQUERY:
 2569     possessive = TRUE;
 2570     min = 0;
 2571     max = 1;
 2572     ecode++;
 2573     goto REPEATTYPE;
 2574 
 2575     case OP_TYPEPOSUPTO:
 2576     possessive = TRUE;
 2577     min = 0;
 2578     max = GET2(ecode, 1);
 2579     ecode += 3;
 2580     goto REPEATTYPE;
 2581 
 2582     case OP_TYPESTAR:
 2583     case OP_TYPEMINSTAR:
 2584     case OP_TYPEPLUS:
 2585     case OP_TYPEMINPLUS:
 2586     case OP_TYPEQUERY:
 2587     case OP_TYPEMINQUERY:
 2588     c = *ecode++ - OP_TYPESTAR;
 2589     minimize = (c & 1) != 0;
 2590     min = rep_min[c];                 /* Pick up values from tables; */
 2591     max = rep_max[c];                 /* zero for max => infinity */
 2592     if (max == 0) max = INT_MAX;
 2593 
 2594     /* Common code for all repeated single character type matches. Note that
 2595     in UTF-8 mode, '.' matches a character of any length, but for the other
 2596     character types, the valid characters are all one-byte long. */
 2597 
 2598     REPEATTYPE:
 2599     ctype = *ecode++;      /* Code for the character type */
 2600 
 2601 #ifdef SUPPORT_UCP
 2602     if (ctype == OP_PROP || ctype == OP_NOTPROP)
 2603       {
 2604       prop_fail_result = ctype == OP_NOTPROP;
 2605       prop_type = *ecode++;
 2606       prop_value = *ecode++;
 2607       }
 2608     else prop_type = -1;
 2609 #endif
 2610 
 2611     /* First, ensure the minimum number of matches are present. Use inline
 2612     code for maximizing the speed, and do the type test once at the start
 2613     (i.e. keep it out of the loop). Also we can test that there are at least
 2614     the minimum number of bytes before we start. This isn't as effective in
 2615     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
 2616     is tidier. Also separate the UCP code, which can be the same for both UTF-8
 2617     and single-bytes. */
 2618 
 2619     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
 2620     if (min > 0)
 2621       {
 2622 #ifdef SUPPORT_UCP
 2623       if (prop_type >= 0)
 2624         {
 2625         switch(prop_type)
 2626           {
 2627           case PT_ANY:
 2628           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
 2629           for (i = 1; i <= min; i++)
 2630             {
 2631             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2632             GETCHARINC(c, eptr);
 2633             }
 2634           break;
 2635 
 2636           case PT_LAMP:
 2637           for (i = 1; i <= min; i++)
 2638             {
 2639             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2640             GETCHARINC(c, eptr);
 2641             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2642             if ((prop_chartype == ucp_Lu ||
 2643                  prop_chartype == ucp_Ll ||
 2644                  prop_chartype == ucp_Lt) == prop_fail_result)
 2645               RRETURN(MATCH_NOMATCH);
 2646             }
 2647           break;
 2648 
 2649           case PT_GC:
 2650           for (i = 1; i <= min; i++)
 2651             {
 2652             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2653             GETCHARINC(c, eptr);
 2654             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2655             if ((prop_category == prop_value) == prop_fail_result)
 2656               RRETURN(MATCH_NOMATCH);
 2657             }
 2658           break;
 2659 
 2660           case PT_PC:
 2661           for (i = 1; i <= min; i++)
 2662             {
 2663             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2664             GETCHARINC(c, eptr);
 2665             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2666             if ((prop_chartype == prop_value) == prop_fail_result)
 2667               RRETURN(MATCH_NOMATCH);
 2668             }
 2669           break;
 2670 
 2671           case PT_SC:
 2672           for (i = 1; i <= min; i++)
 2673             {
 2674             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2675             GETCHARINC(c, eptr);
 2676             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2677             if ((prop_script == prop_value) == prop_fail_result)
 2678               RRETURN(MATCH_NOMATCH);
 2679             }
 2680           break;
 2681 
 2682           default:
 2683           RRETURN(PCRE_ERROR_INTERNAL);
 2684           }
 2685         }
 2686 
 2687       /* Match extended Unicode sequences. We will get here only if the
 2688       support is in the binary; otherwise a compile-time error occurs. */
 2689 
 2690       else if (ctype == OP_EXTUNI)
 2691         {
 2692         for (i = 1; i <= min; i++)
 2693           {
 2694           GETCHARINCTEST(c, eptr);
 2695           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2696           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
 2697           while (eptr < md->end_subject)
 2698             {
 2699             int len = 1;
 2700             if (!utf8) c = *eptr; else
 2701               {
 2702               GETCHARLEN(c, eptr, len);
 2703               }
 2704             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2705             if (prop_category != ucp_M) break;
 2706             eptr += len;
 2707             }
 2708           }
 2709         }
 2710 
 2711       else
 2712 #endif     /* SUPPORT_UCP */
 2713 
 2714 /* Handle all other cases when the coding is UTF-8 */
 2715 
 2716 #ifdef SUPPORT_UTF8
 2717       if (utf8) switch(ctype)
 2718         {
 2719         case OP_ANY:
 2720         for (i = 1; i <= min; i++)
 2721           {
 2722           if (eptr >= md->end_subject ||
 2723                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
 2724             RRETURN(MATCH_NOMATCH);
 2725           eptr++;
 2726           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 2727           }
 2728         break;
 2729 
 2730         case OP_ANYBYTE:
 2731         eptr += min;
 2732         break;
 2733 
 2734         case OP_ANYNL:
 2735         for (i = 1; i <= min; i++)
 2736           {
 2737           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2738           GETCHARINC(c, eptr);
 2739           switch(c)
 2740             {
 2741             default: RRETURN(MATCH_NOMATCH);
 2742             case 0x000d:
 2743             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
 2744             break;
 2745             case 0x000a:
 2746             case 0x000b:
 2747             case 0x000c:
 2748             case 0x0085:
 2749             case 0x2028:
 2750             case 0x2029:
 2751             break;
 2752             }
 2753           }
 2754         break;
 2755 
 2756         case OP_NOT_DIGIT:
 2757         for (i = 1; i <= min; i++)
 2758           {
 2759           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2760           GETCHARINC(c, eptr);
 2761           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
 2762             RRETURN(MATCH_NOMATCH);
 2763           }
 2764         break;
 2765 
 2766         case OP_DIGIT:
 2767         for (i = 1; i <= min; i++)
 2768           {
 2769           if (eptr >= md->end_subject ||
 2770              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
 2771             RRETURN(MATCH_NOMATCH);
 2772           /* No need to skip more bytes - we know it's a 1-byte character */
 2773           }
 2774         break;
 2775 
 2776         case OP_NOT_WHITESPACE:
 2777         for (i = 1; i <= min; i++)
 2778           {
 2779           if (eptr >= md->end_subject ||
 2780              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
 2781             RRETURN(MATCH_NOMATCH);
 2782           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 2783           }
 2784         break;
 2785 
 2786         case OP_WHITESPACE:
 2787         for (i = 1; i <= min; i++)
 2788           {
 2789           if (eptr >= md->end_subject ||
 2790              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
 2791             RRETURN(MATCH_NOMATCH);
 2792           /* No need to skip more bytes - we know it's a 1-byte character */
 2793           }
 2794         break;
 2795 
 2796         case OP_NOT_WORDCHAR:
 2797         for (i = 1; i <= min; i++)
 2798           {
 2799           if (eptr >= md->end_subject ||
 2800              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
 2801             RRETURN(MATCH_NOMATCH);
 2802           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 2803           }
 2804         break;
 2805 
 2806         case OP_WORDCHAR:
 2807         for (i = 1; i <= min; i++)
 2808           {
 2809           if (eptr >= md->end_subject ||
 2810              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
 2811             RRETURN(MATCH_NOMATCH);
 2812           /* No need to skip more bytes - we know it's a 1-byte character */
 2813           }
 2814         break;
 2815 
 2816         default:
 2817         RRETURN(PCRE_ERROR_INTERNAL);
 2818         }  /* End switch(ctype) */
 2819 
 2820       else
 2821 #endif     /* SUPPORT_UTF8 */
 2822 
 2823       /* Code for the non-UTF-8 case for minimum matching of operators other
 2824       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
 2825       number of bytes present, as this was tested above. */
 2826 
 2827       switch(ctype)
 2828         {
 2829         case OP_ANY:
 2830         if ((ims & PCRE_DOTALL) == 0)
 2831           {
 2832           for (i = 1; i <= min; i++)
 2833             {
 2834             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
 2835             eptr++;
 2836             }
 2837           }
 2838         else eptr += min;
 2839         break;
 2840 
 2841         case OP_ANYBYTE:
 2842         eptr += min;
 2843         break;
 2844 
 2845         /* Because of the CRLF case, we can't assume the minimum number of
 2846         bytes are present in this case. */
 2847 
 2848         case OP_ANYNL:
 2849         for (i = 1; i <= min; i++)
 2850           {
 2851           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2852           switch(*eptr++)
 2853             {
 2854             default: RRETURN(MATCH_NOMATCH);
 2855             case 0x000d:
 2856             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
 2857             break;
 2858             case 0x000a:
 2859             case 0x000b:
 2860             case 0x000c:
 2861             case 0x0085:
 2862             break;
 2863             }
 2864           }
 2865         break;
 2866 
 2867         case OP_NOT_DIGIT:
 2868         for (i = 1; i <= min; i++)
 2869           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
 2870         break;
 2871 
 2872         case OP_DIGIT:
 2873         for (i = 1; i <= min; i++)
 2874           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
 2875         break;
 2876 
 2877         case OP_NOT_WHITESPACE:
 2878         for (i = 1; i <= min; i++)
 2879           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
 2880         break;
 2881 
 2882         case OP_WHITESPACE:
 2883         for (i = 1; i <= min; i++)
 2884           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
 2885         break;
 2886 
 2887         case OP_NOT_WORDCHAR:
 2888         for (i = 1; i <= min; i++)
 2889           if ((md->ctypes[*eptr++] & ctype_word) != 0)
 2890             RRETURN(MATCH_NOMATCH);
 2891         break;
 2892 
 2893         case OP_WORDCHAR:
 2894         for (i = 1; i <= min; i++)
 2895           if ((md->ctypes[*eptr++] & ctype_word) == 0)
 2896             RRETURN(MATCH_NOMATCH);
 2897         break;
 2898 
 2899         default:
 2900         RRETURN(PCRE_ERROR_INTERNAL);
 2901         }
 2902       }
 2903 
 2904     /* If min = max, continue at the same level without recursing */
 2905 
 2906     if (min == max) continue;
 2907 
 2908     /* If minimizing, we have to test the rest of the pattern before each
 2909     subsequent match. Again, separate the UTF-8 case for speed, and also
 2910     separate the UCP cases. */
 2911 
 2912     if (minimize)
 2913       {
 2914 #ifdef SUPPORT_UCP
 2915       if (prop_type >= 0)
 2916         {
 2917         switch(prop_type)
 2918           {
 2919           case PT_ANY:
 2920           for (fi = min;; fi++)
 2921             {
 2922             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2923             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2924             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2925             GETCHARINC(c, eptr);
 2926             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
 2927             }
 2928           /* Control never gets here */
 2929 
 2930           case PT_LAMP:
 2931           for (fi = min;; fi++)
 2932             {
 2933             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2934             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2935             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2936             GETCHARINC(c, eptr);
 2937             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2938             if ((prop_chartype == ucp_Lu ||
 2939                  prop_chartype == ucp_Ll ||
 2940                  prop_chartype == ucp_Lt) == prop_fail_result)
 2941               RRETURN(MATCH_NOMATCH);
 2942             }
 2943           /* Control never gets here */
 2944 
 2945           case PT_GC:
 2946           for (fi = min;; fi++)
 2947             {
 2948             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2949             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2950             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2951             GETCHARINC(c, eptr);
 2952             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2953             if ((prop_category == prop_value) == prop_fail_result)
 2954               RRETURN(MATCH_NOMATCH);
 2955             }
 2956           /* Control never gets here */
 2957 
 2958           case PT_PC:
 2959           for (fi = min;; fi++)
 2960             {
 2961             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2962             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2963             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2964             GETCHARINC(c, eptr);
 2965             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2966             if ((prop_chartype == prop_value) == prop_fail_result)
 2967               RRETURN(MATCH_NOMATCH);
 2968             }
 2969           /* Control never gets here */
 2970 
 2971           case PT_SC:
 2972           for (fi = min;; fi++)
 2973             {
 2974             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2975             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2976             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2977             GETCHARINC(c, eptr);
 2978             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 2979             if ((prop_script == prop_value) == prop_fail_result)
 2980               RRETURN(MATCH_NOMATCH);
 2981             }
 2982           /* Control never gets here */
 2983 
 2984           default:
 2985           RRETURN(PCRE_ERROR_INTERNAL);
 2986           }
 2987         }
 2988 
 2989       /* Match extended Unicode sequences. We will get here only if the
 2990       support is in the binary; otherwise a compile-time error occurs. */
 2991 
 2992       else if (ctype == OP_EXTUNI)
 2993         {
 2994         for (fi = min;; fi++)
 2995           {
 2996           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 2997           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2998           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
 2999           GETCHARINCTEST(c, eptr);
 3000           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3001           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
 3002           while (eptr < md->end_subject)
 3003             {
 3004             int len = 1;
 3005             if (!utf8) c = *eptr; else
 3006               {
 3007               GETCHARLEN(c, eptr, len);
 3008               }
 3009             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3010             if (prop_category != ucp_M) break;
 3011             eptr += len;
 3012             }
 3013           }
 3014         }
 3015 
 3016       else
 3017 #endif     /* SUPPORT_UCP */
 3018 
 3019 #ifdef SUPPORT_UTF8
 3020       /* UTF-8 mode */
 3021       if (utf8)
 3022         {
 3023         for (fi = min;; fi++)
 3024           {
 3025           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 3026           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3027           if (fi >= max || eptr >= md->end_subject ||
 3028                (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
 3029                 IS_NEWLINE(eptr)))
 3030             RRETURN(MATCH_NOMATCH);
 3031 
 3032           GETCHARINC(c, eptr);
 3033           switch(ctype)
 3034             {
 3035             case OP_ANY:        /* This is the DOTALL case */
 3036             break;
 3037 
 3038             case OP_ANYBYTE:
 3039             break;
 3040 
 3041             case OP_ANYNL:
 3042             switch(c)
 3043               {
 3044               default: RRETURN(MATCH_NOMATCH);
 3045               case 0x000d:
 3046               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
 3047               break;
 3048               case 0x000a:
 3049               case 0x000b:
 3050               case 0x000c:
 3051               case 0x0085:
 3052               case 0x2028:
 3053               case 0x2029:
 3054               break;
 3055               }
 3056             break;
 3057 
 3058             case OP_NOT_DIGIT:
 3059             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
 3060               RRETURN(MATCH_NOMATCH);
 3061             break;
 3062 
 3063             case OP_DIGIT:
 3064             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
 3065               RRETURN(MATCH_NOMATCH);
 3066             break;
 3067 
 3068             case OP_NOT_WHITESPACE:
 3069             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
 3070               RRETURN(MATCH_NOMATCH);
 3071             break;
 3072 
 3073             case OP_WHITESPACE:
 3074             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
 3075               RRETURN(MATCH_NOMATCH);
 3076             break;
 3077 
 3078             case OP_NOT_WORDCHAR:
 3079             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
 3080               RRETURN(MATCH_NOMATCH);
 3081             break;
 3082 
 3083             case OP_WORDCHAR:
 3084             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
 3085               RRETURN(MATCH_NOMATCH);
 3086             break;
 3087 
 3088             default:
 3089             RRETURN(PCRE_ERROR_INTERNAL);
 3090             }
 3091           }
 3092         }
 3093       else
 3094 #endif
 3095       /* Not UTF-8 mode */
 3096         {
 3097         for (fi = min;; fi++)
 3098           {
 3099           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 3100           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3101           if (fi >= max || eptr >= md->end_subject ||
 3102                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
 3103             RRETURN(MATCH_NOMATCH);
 3104 
 3105           c = *eptr++;
 3106           switch(ctype)
 3107             {
 3108             case OP_ANY:   /* This is the DOTALL case */
 3109             break;
 3110 
 3111             case OP_ANYBYTE:
 3112             break;
 3113 
 3114             case OP_ANYNL:
 3115             switch(c)
 3116               {
 3117               default: RRETURN(MATCH_NOMATCH);
 3118               case 0x000d:
 3119               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
 3120               break;
 3121               case 0x000a:
 3122               case 0x000b:
 3123               case 0x000c:
 3124               case 0x0085:
 3125               break;
 3126               }
 3127             break;
 3128 
 3129             case OP_NOT_DIGIT:
 3130             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
 3131             break;
 3132 
 3133             case OP_DIGIT:
 3134             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
 3135             break;
 3136 
 3137             case OP_NOT_WHITESPACE:
 3138             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
 3139             break;
 3140 
 3141             case OP_WHITESPACE:
 3142             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
 3143             break;
 3144 
 3145             case OP_NOT_WORDCHAR:
 3146             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
 3147             break;
 3148 
 3149             case OP_WORDCHAR:
 3150             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
 3151             break;
 3152 
 3153             default:
 3154             RRETURN(PCRE_ERROR_INTERNAL);
 3155             }
 3156           }
 3157         }
 3158       /* Control never gets here */
 3159       }
 3160 
 3161     /* If maximizing, it is worth using inline code for speed, doing the type
 3162     test once at the start (i.e. keep it out of the loop). Again, keep the
 3163     UTF-8 and UCP stuff separate. */
 3164 
 3165     else
 3166       {
 3167       pp = eptr;  /* Remember where we started */
 3168 
 3169 #ifdef SUPPORT_UCP
 3170       if (prop_type >= 0)
 3171         {
 3172         switch(prop_type)
 3173           {
 3174           case PT_ANY:
 3175           for (i = min; i < max; i++)
 3176             {
 3177             int len = 1;
 3178             if (eptr >= md->end_subject) break;
 3179             GETCHARLEN(c, eptr, len);
 3180             if (prop_fail_result) break;
 3181             eptr+= len;
 3182             }
 3183           break;
 3184 
 3185           case PT_LAMP:
 3186           for (i = min; i < max; i++)
 3187             {
 3188             int len = 1;
 3189             if (eptr >= md->end_subject) break;
 3190             GETCHARLEN(c, eptr, len);
 3191             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3192             if ((prop_chartype == ucp_Lu ||
 3193                  prop_chartype == ucp_Ll ||
 3194                  prop_chartype == ucp_Lt) == prop_fail_result)
 3195               break;
 3196             eptr+= len;
 3197             }
 3198           break;
 3199 
 3200           case PT_GC:
 3201           for (i = min; i < max; i++)
 3202             {
 3203             int len = 1;
 3204             if (eptr >= md->end_subject) break;
 3205             GETCHARLEN(c, eptr, len);
 3206             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3207             if ((prop_category == prop_value) == prop_fail_result)
 3208               break;
 3209             eptr+= len;
 3210             }
 3211           break;
 3212 
 3213           case PT_PC:
 3214           for (i = min; i < max; i++)
 3215             {
 3216             int len = 1;
 3217             if (eptr >= md->end_subject) break;
 3218             GETCHARLEN(c, eptr, len);
 3219             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3220             if ((prop_chartype == prop_value) == prop_fail_result)
 3221               break;
 3222             eptr+= len;
 3223             }
 3224           break;
 3225 
 3226           case PT_SC:
 3227           for (i = min; i < max; i++)
 3228             {
 3229             int len = 1;
 3230             if (eptr >= md->end_subject) break;
 3231             GETCHARLEN(c, eptr, len);
 3232             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3233             if ((prop_script == prop_value) == prop_fail_result)
 3234               break;
 3235             eptr+= len;
 3236             }
 3237           break;
 3238           }
 3239 
 3240         /* eptr is now past the end of the maximum run */
 3241 
 3242         if (possessive) continue;
 3243         for(;;)
 3244           {
 3245           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 3246           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3247           if (eptr-- == pp) break;        /* Stop if tried at original pos */
 3248           BACKCHAR(eptr);
 3249           }
 3250         }
 3251 
 3252       /* Match extended Unicode sequences. We will get here only if the
 3253       support is in the binary; otherwise a compile-time error occurs. */
 3254 
 3255       else if (ctype == OP_EXTUNI)
 3256         {
 3257         for (i = min; i < max; i++)
 3258           {
 3259           if (eptr >= md->end_subject) break;
 3260           GETCHARINCTEST(c, eptr);
 3261           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3262           if (prop_category == ucp_M) break;
 3263           while (eptr < md->end_subject)
 3264             {
 3265             int len = 1;
 3266             if (!utf8) c = *eptr; else
 3267               {
 3268               GETCHARLEN(c, eptr, len);
 3269               }
 3270             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3271             if (prop_category != ucp_M) break;
 3272             eptr += len;
 3273             }
 3274           }
 3275 
 3276         /* eptr is now past the end of the maximum run */
 3277 
 3278         if (possessive) continue;
 3279         for(;;)
 3280           {
 3281           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 3282           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3283           if (eptr-- == pp) break;        /* Stop if tried at original pos */
 3284           for (;;)                        /* Move back over one extended */
 3285             {
 3286             int len = 1;
 3287             BACKCHAR(eptr);
 3288             if (!utf8) c = *eptr; else
 3289               {
 3290               GETCHARLEN(c, eptr, len);
 3291               }
 3292             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
 3293             if (prop_category != ucp_M) break;
 3294             eptr--;
 3295             }
 3296           }
 3297         }
 3298 
 3299       else
 3300 #endif   /* SUPPORT_UCP */
 3301 
 3302 #ifdef SUPPORT_UTF8
 3303       /* UTF-8 mode */
 3304 
 3305       if (utf8)
 3306         {
 3307         switch(ctype)
 3308           {
 3309           case OP_ANY:
 3310 
 3311           /* Special code is required for UTF8, but when the maximum is
 3312           unlimited we don't need it, so we repeat the non-UTF8 code. This is
 3313           probably worth it, because .* is quite a common idiom. */
 3314 
 3315           if (max < INT_MAX)
 3316             {
 3317             if ((ims & PCRE_DOTALL) == 0)
 3318               {
 3319               for (i = min; i < max; i++)
 3320                 {
 3321                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
 3322                 eptr++;
 3323                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 3324                 }
 3325               }
 3326             else
 3327               {
 3328               for (i = min; i < max; i++)
 3329                 {
 3330                 if (eptr >= md->end_subject) break;
 3331                 eptr++;
 3332                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 3333                 }
 3334               }
 3335             }
 3336 
 3337           /* Handle unlimited UTF-8 repeat */
 3338 
 3339           else
 3340             {
 3341             if ((ims & PCRE_DOTALL) == 0)
 3342               {
 3343               for (i = min; i < max; i++)
 3344                 {
 3345                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
 3346                 eptr++;
 3347                 }
 3348               break;
 3349               }
 3350             else
 3351               {
 3352               c = max - min;
 3353               if (c > (unsigned int)(md->end_subject - eptr))
 3354                 c = md->end_subject - eptr;
 3355               eptr += c;
 3356               }
 3357             }
 3358           break;
 3359 
 3360           /* The byte case is the same as non-UTF8 */
 3361 
 3362           case OP_ANYBYTE:
 3363           c = max - min;
 3364           if (c > (unsigned int)(md->end_subject - eptr))
 3365             c = md->end_subject - eptr;
 3366           eptr += c;
 3367           break;
 3368 
 3369           case OP_ANYNL:
 3370           for (i = min; i < max; i++)
 3371             {
 3372             int len = 1;
 3373             if (eptr >= md->end_subject) break;
 3374             GETCHARLEN(c, eptr, len);
 3375             if (c == 0x000d)
 3376               {
 3377               if (++eptr >= md->end_subject) break;
 3378               if (*eptr == 0x000a) eptr++;
 3379               }
 3380             else
 3381               {
 3382               if (c != 0x000a && c != 0x000b && c != 0x000c &&
 3383                   c != 0x0085 && c != 0x2028 && c != 0x2029)
 3384                 break;
 3385               eptr += len;
 3386               }
 3387             }
 3388           break;
 3389 
 3390           case OP_NOT_DIGIT:
 3391           for (i = min; i < max; i++)
 3392             {
 3393             int len = 1;
 3394             if (eptr >= md->end_subject) break;
 3395             GETCHARLEN(c, eptr, len);
 3396             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
 3397             eptr+= len;
 3398             }
 3399           break;
 3400 
 3401           case OP_DIGIT:
 3402           for (i = min; i < max; i++)
 3403             {
 3404             int len = 1;
 3405             if (eptr >= md->end_subject) break;
 3406             GETCHARLEN(c, eptr, len);
 3407             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
 3408             eptr+= len;
 3409             }
 3410           break;
 3411 
 3412           case OP_NOT_WHITESPACE:
 3413           for (i = min; i < max; i++)
 3414             {
 3415             int len = 1;
 3416             if (eptr >= md->end_subject) break;
 3417             GETCHARLEN(c, eptr, len);
 3418             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
 3419             eptr+= len;
 3420             }
 3421           break;
 3422 
 3423           case OP_WHITESPACE:
 3424           for (i = min; i < max; i++)
 3425             {
 3426             int len = 1;
 3427             if (eptr >= md->end_subject) break;
 3428             GETCHARLEN(c, eptr, len);
 3429             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
 3430             eptr+= len;
 3431             }
 3432           break;
 3433 
 3434           case OP_NOT_WORDCHAR:
 3435           for (i = min; i < max; i++)
 3436             {
 3437             int len = 1;
 3438             if (eptr >= md->end_subject) break;
 3439             GETCHARLEN(c, eptr, len);
 3440             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
 3441             eptr+= len;
 3442             }
 3443           break;
 3444 
 3445           case OP_WORDCHAR:
 3446           for (i = min; i < max; i++)
 3447             {
 3448             int len = 1;
 3449             if (eptr >= md->end_subject) break;
 3450             GETCHARLEN(c, eptr, len);
 3451             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
 3452             eptr+= len;
 3453             }
 3454           break;
 3455 
 3456           default:
 3457           RRETURN(PCRE_ERROR_INTERNAL);
 3458           }
 3459 
 3460         /* eptr is now past the end of the maximum run */
 3461 
 3462         if (possessive) continue;
 3463         for(;;)
 3464           {
 3465           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 3466           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3467           if (eptr-- == pp) break;        /* Stop if tried at original pos */
 3468           BACKCHAR(eptr);
 3469           }
 3470         }
 3471       else
 3472 #endif
 3473 
 3474       /* Not UTF-8 mode */
 3475         {
 3476         switch(ctype)
 3477           {
 3478           case OP_ANY:
 3479           if ((ims & PCRE_DOTALL) == 0)
 3480             {
 3481             for (i = min; i < max; i++)
 3482               {
 3483               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
 3484               eptr++;
 3485               }
 3486             break;
 3487             }
 3488           /* For DOTALL case, fall through and treat as \C */
 3489 
 3490           case OP_ANYBYTE:
 3491           c = max - min;
 3492           if (c > (unsigned int)(md->end_subject - eptr))
 3493             c = md->end_subject - eptr;
 3494           eptr += c;
 3495           break;
 3496 
 3497           case OP_ANYNL:
 3498           for (i = min; i < max; i++)
 3499             {
 3500             if (eptr >= md->end_subject) break;
 3501             c = *eptr;
 3502             if (c == 0x000d)
 3503               {
 3504               if (++eptr >= md->end_subject) break;
 3505               if (*eptr == 0x000a) eptr++;
 3506               }
 3507             else
 3508               {
 3509               if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
 3510                 break;
 3511               eptr++;
 3512               }
 3513             }
 3514           break;
 3515 
 3516           case OP_NOT_DIGIT:
 3517           for (i = min; i < max; i++)
 3518             {
 3519             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
 3520               break;
 3521             eptr++;
 3522             }
 3523           break;
 3524 
 3525           case OP_DIGIT:
 3526           for (i = min; i < max; i++)
 3527             {
 3528             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
 3529               break;
 3530             eptr++;
 3531             }
 3532           break;
 3533 
 3534           case OP_NOT_WHITESPACE:
 3535           for (i = min; i < max; i++)
 3536             {
 3537             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
 3538               break;
 3539             eptr++;
 3540             }
 3541           break;
 3542 
 3543           case OP_WHITESPACE:
 3544           for (i = min; i < max; i++)
 3545             {
 3546             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
 3547               break;
 3548             eptr++;
 3549             }
 3550           break;
 3551 
 3552           case OP_NOT_WORDCHAR:
 3553           for (i = min; i < max; i++)
 3554             {
 3555             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
 3556               break;
 3557             eptr++;
 3558             }
 3559           break;
 3560 
 3561           case OP_WORDCHAR:
 3562           for (i = min; i < max; i++)
 3563             {
 3564             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
 3565               break;
 3566             eptr++;
 3567             }
 3568           break;
 3569 
 3570           default:
 3571           RRETURN(PCRE_ERROR_INTERNAL);
 3572           }
 3573 
 3574         /* eptr is now past the end of the maximum run */
 3575 
 3576         if (possessive) continue;
 3577         while (eptr >= pp)
 3578           {
 3579           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
 3580           eptr--;
 3581           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3582           }
 3583         }
 3584 
 3585       /* Get here if we can't make it match with any permitted repetitions */
 3586 
 3587       RRETURN(MATCH_NOMATCH);
 3588       }
 3589     /* Control never gets here */
 3590 
 3591     /* There's been some horrible disaster. Arrival here can only mean there is
 3592     something seriously wrong in the code above or the OP_xxx definitions. */
 3593 
 3594     default:
 3595     DPRINTF(("Unknown opcode %d\n", *ecode));
 3596     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
 3597     }
 3598 
 3599   /* Do not stick any code in here without much thought; it is assumed
 3600   that "continue" in the code above comes out to here to repeat the main
 3601   loop. */
 3602 
 3603   }             /* End of main loop */
 3604 /* Control never reaches here */
 3605 }
 3606 
 3607 
 3608 /***************************************************************************
 3609 ****************************************************************************
 3610                    RECURSION IN THE match() FUNCTION
 3611 
 3612 Undefine all the macros that were defined above to handle this. */
 3613 
 3614 #ifdef NO_RECURSE
 3615 #undef eptr
 3616 #undef ecode
 3617 #undef offset_top
 3618 #undef ims
 3619 #undef eptrb
 3620 #undef flags
 3621 
 3622 #undef callpat
 3623 #undef charptr
 3624 #undef data
 3625 #undef next
 3626 #undef pp
 3627 #undef prev
 3628 #undef saved_eptr
 3629 
 3630 #undef new_recursive
 3631 
 3632 #undef cur_is_word
 3633 #undef condition
 3634 #undef prev_is_word
 3635 
 3636 #undef original_ims
 3637 
 3638 #undef ctype
 3639 #undef length
 3640 #undef max
 3641 #undef min
 3642 #undef number
 3643 #undef offset
 3644 #undef op
 3645 #undef save_capture_last
 3646 #undef save_offset1
 3647 #undef save_offset2
 3648 #undef save_offset3
 3649 #undef stacksave
 3650 
 3651 #undef newptrb
 3652 
 3653 #endif
 3654 
 3655 /* These two are defined as macros in both cases */
 3656 
 3657 #undef fc
 3658 #undef fi
 3659 
 3660 /***************************************************************************
 3661 ***************************************************************************/
 3662 
 3663 
 3664 
 3665 /*************************************************
 3666 *         Execute a Regular Expression           *
 3667 *************************************************/
 3668 
 3669 /* This function applies a compiled re to a subject string and picks out
 3670 portions of the string if it matches. Two elements in the vector are set for
 3671 each substring: the offsets to the start and end of the substring.
 3672 
 3673 Arguments:
 3674   argument_re     points to the compiled expression
 3675   extra_data      points to extra data or is NULL
 3676   subject         points to the subject string
 3677   length          length of subject string (may contain binary zeros)
 3678   start_offset    where to start in the subject string
 3679   options         option bits
 3680   offsets         points to a vector of ints to be filled in with offsets
 3681   offsetcount     the number of elements in the vector
 3682 
 3683 Returns:          > 0 => success; value is the number of elements filled in
 3684                   = 0 => success, but offsets is not big enough
 3685                    -1 => failed to match
 3686                  < -1 => some kind of unexpected problem
 3687 */
 3688 
 3689 PCRE_DATA_SCOPE int
 3690 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
 3691   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
 3692   int offsetcount)
 3693 {
 3694 int rc, resetcount, ocount;
 3695 int first_byte = -1;
 3696 int req_byte = -1;
 3697 int req_byte2 = -1;
 3698 int newline;
 3699 unsigned long int ims;
 3700 BOOL using_temporary_offsets = FALSE;
 3701 BOOL anchored;
 3702 BOOL startline;
 3703 BOOL firstline;
 3704 BOOL first_byte_caseless = FALSE;
 3705 BOOL req_byte_caseless = FALSE;
 3706 BOOL utf8;
 3707 match_data match_block;
 3708 match_data *md = &match_block;
 3709 const uschar *tables;
 3710 const uschar *start_bits = NULL;
 3711 USPTR start_match = (USPTR)subject + start_offset;
 3712 USPTR end_subject;
 3713 USPTR req_byte_ptr = start_match - 1;
 3714 eptrblock eptrchain[EPTR_WORK_SIZE];
 3715 
 3716 pcre_study_data internal_study;
 3717 const pcre_study_data *study;
 3718 
 3719 real_pcre internal_re;
 3720 const real_pcre *external_re = (const real_pcre *)argument_re;
 3721 const real_pcre *re = external_re;
 3722 
 3723 /* Plausibility checks */
 3724 
 3725 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
 3726 if (re == NULL || subject == NULL ||
 3727    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 3728 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 3729 
 3730 /* Fish out the optional data from the extra_data structure, first setting
 3731 the default values. */
 3732 
 3733 study = NULL;
 3734 md->match_limit = MATCH_LIMIT;
 3735 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
 3736 md->callout_data = NULL;
 3737 
 3738 /* The table pointer is always in native byte order. */
 3739 
 3740 tables = external_re->tables;
 3741 
 3742 if (extra_data != NULL)
 3743   {
 3744   register unsigned int flags = extra_data->flags;
 3745   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
 3746     study = (const pcre_study_data *)extra_data->study_data;
 3747   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
 3748     md->match_limit = extra_data->match_limit;
 3749   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
 3750     md->match_limit_recursion = extra_data->match_limit_recursion;
 3751   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
 3752     md->callout_data = extra_data->callout_data;
 3753   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
 3754   }
 3755 
 3756 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
 3757 is a feature that makes it possible to save compiled regex and re-use them
 3758 in other programs later. */
 3759 
 3760 if (tables == NULL) tables = _pcre_default_tables;
 3761 
 3762 /* Check that the first field in the block is the magic number. If it is not,
 3763 test for a regex that was compiled on a host of opposite endianness. If this is
 3764 the case, flipped values are put in internal_re and internal_study if there was
 3765 study data too. */
 3766 
 3767 if (re->magic_number != MAGIC_NUMBER)
 3768   {
 3769   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
 3770   if (re == NULL) return PCRE_ERROR_BADMAGIC;
 3771   if (study != NULL) study = &internal_study;
 3772   }
 3773 
 3774 /* Set up other data */
 3775 
 3776 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
 3777 startline = (re->options & PCRE_STARTLINE) != 0;
 3778 firstline = (re->options & PCRE_FIRSTLINE) != 0;
 3779 
 3780 /* The code starts after the real_pcre block and the capture name table. */
 3781 
 3782 md->start_code = (const uschar *)external_re + re->name_table_offset +
 3783   re->name_count * re->name_entry_size;
 3784 
 3785 md->start_subject = (USPTR)subject;
 3786 md->start_offset = start_offset;
 3787 md->end_subject = md->start_subject + length;
 3788 end_subject = md->end_subject;
 3789 
 3790 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 3791 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
 3792 
 3793 md->notbol = (options & PCRE_NOTBOL) != 0;
 3794 md->noteol = (options & PCRE_NOTEOL) != 0;
 3795 md->notempty = (options & PCRE_NOTEMPTY) != 0;
 3796 md->partial = (options & PCRE_PARTIAL) != 0;
 3797 md->hitend = FALSE;
 3798 
 3799 md->recursive = NULL;                   /* No recursion at top level */
 3800 md->eptrchain = eptrchain;              /* Make workspace generally available */
 3801 
 3802 md->lcc = tables + lcc_offset;
 3803 md->ctypes = tables + ctypes_offset;
 3804 
 3805 /* Handle different types of newline. The two bits give four cases. If nothing
 3806 is set at run time, whatever was used at compile time applies. */
 3807 
 3808 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
 3809        PCRE_NEWLINE_BITS)
 3810   {
 3811   case 0: newline = NEWLINE; break;   /* Compile-time default */
 3812   case PCRE_NEWLINE_CR: newline = '\r'; break;
 3813   case PCRE_NEWLINE_LF: newline = '\n'; break;
 3814   case PCRE_NEWLINE_CR+
 3815        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
 3816   case PCRE_NEWLINE_ANY: newline = -1; break;
 3817   default: return PCRE_ERROR_BADNEWLINE;
 3818   }
 3819 
 3820 if (newline < 0)
 3821   {
 3822   md->nltype = NLTYPE_ANY;
 3823   }
 3824 else
 3825   {
 3826   md->nltype = NLTYPE_FIXED;
 3827   if (newline > 255)
 3828     {
 3829     md->nllen = 2;
 3830     md->nl[0] = (newline >> 8) & 255;
 3831     md->nl[1] = newline & 255;
 3832     }
 3833   else
 3834     {
 3835     md->nllen = 1;
 3836     md->nl[0] = newline;
 3837     }
 3838   }
 3839 
 3840 /* Partial matching is supported only for a restricted set of regexes at the
 3841 moment. */
 3842 
 3843 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
 3844   return PCRE_ERROR_BADPARTIAL;
 3845 
 3846 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
 3847 back the character offset. */
 3848 
 3849 #ifdef SUPPORT_UTF8
 3850 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
 3851   {
 3852   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
 3853     return PCRE_ERROR_BADUTF8;
 3854   if (start_offset > 0 && start_offset < length)
 3855     {
 3856     int tb = ((uschar *)subject)[start_offset];
 3857     if (tb > 127)
 3858       {
 3859       tb &= 0xc0;
 3860       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
 3861       }
 3862     }
 3863   }
 3864 #endif
 3865 
 3866 /* The ims options can vary during the matching as a result of the presence
 3867 of (?ims) items in the pattern. They are kept in a local variable so that
 3868 restoring at the exit of a group is easy. */
 3869 
 3870 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
 3871 
 3872 /* If the expression has got more back references than the offsets supplied can
 3873 hold, we get a temporary chunk of working store to use during the matching.
 3874 Otherwise, we can use the vector supplied, rounding down its size to a multiple
 3875 of 3. */
 3876 
 3877 ocount = offsetcount - (offsetcount % 3);
 3878 
 3879 if (re->top_backref > 0 && re->top_backref >= ocount/3)
 3880   {
 3881   ocount = re->top_backref * 3 + 3;
 3882   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
 3883   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
 3884   using_temporary_offsets = TRUE;
 3885   DPRINTF(("Got memory to hold back references\n"));
 3886   }
 3887 else md->offset_vector = offsets;
 3888 
 3889 md->offset_end = ocount;
 3890 md->offset_max = (2*ocount)/3;
 3891 md->offset_overflow = FALSE;
 3892 md->capture_last = -1;
 3893 
 3894 /* Compute the minimum number of offsets that we need to reset each time. Doing
 3895 this makes a huge difference to execution time when there aren't many brackets
 3896 in the pattern. */
 3897 
 3898 resetcount = 2 + re->top_bracket * 2;
 3899 if (resetcount > offsetcount) resetcount = ocount;
 3900 
 3901 /* Reset the working variable associated with each extraction. These should
 3902 never be used unless previously set, but they get saved and restored, and so we
 3903 initialize them to avoid reading uninitialized locations. */
 3904 
 3905 if (md->offset_vector != NULL)
 3906   {
 3907   register int *iptr = md->offset_vector + ocount;
 3908   register int *iend = iptr - resetcount/2 + 1;
 3909   while (--iptr >= iend) *iptr = -1;
 3910   }
 3911 
 3912 /* Set up the first character to match, if available. The first_byte value is
 3913 never set for an anchored regular expression, but the anchoring may be forced
 3914 at run time, so we have to test for anchoring. The first char may be unset for
 3915 an unanchored pattern, of course. If there's no first char and the pattern was
 3916 studied, there may be a bitmap of possible first characters. */
 3917 
 3918 if (!anchored)
 3919   {
 3920   if ((re->options & PCRE_FIRSTSET) != 0)
 3921     {
 3922     first_byte = re->first_byte & 255;
 3923     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
 3924       first_byte = md->lcc[first_byte];
 3925     }
 3926   else
 3927     if (!startline && study != NULL &&
 3928       (study->options & PCRE_STUDY_MAPPED) != 0)
 3929         start_bits = study->start_bits;
 3930   }
 3931 
 3932 /* For anchored or unanchored matches, there may be a "last known required
 3933 character" set. */
 3934 
 3935 if ((re->options & PCRE_REQCHSET) != 0)
 3936   {
 3937   req_byte = re->req_byte & 255;
 3938   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
 3939   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
 3940   }
 3941 
 3942 
 3943 /* ==========================================================================*/
 3944 
 3945 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
 3946 the loop runs just once. */
 3947 
 3948 for(;;)
 3949   {
 3950   USPTR save_end_subject = end_subject;
 3951 
 3952   /* Reset the maximum number of extractions we might see. */
 3953 
 3954   if (md->offset_vector != NULL)
 3955     {
 3956     register int *iptr = md->offset_vector;
 3957     register int *iend = iptr + resetcount;
 3958     while (iptr < iend) *iptr++ = -1;
 3959     }
 3960 
 3961   /* Advance to a unique first char if possible. If firstline is TRUE, the
 3962   start of the match is constrained to the first line of a multiline string.
 3963   That is, the match must be before or at the first newline. Implement this by
 3964   temporarily adjusting end_subject so that we stop scanning at a newline. If
 3965   the match fails at the newline, later code breaks this loop. */
 3966 
 3967   if (firstline)
 3968     {
 3969     USPTR t = start_match;
 3970     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
 3971     end_subject = t;
 3972     }
 3973 
 3974   /* Now test for a unique first byte */
 3975 
 3976   if (first_byte >= 0)
 3977     {
 3978     if (first_byte_caseless)
 3979       while (start_match < end_subject &&
 3980              md->lcc[*start_match] != first_byte)
 3981         start_match++;
 3982     else
 3983       while (start_match < end_subject && *start_match != first_byte)
 3984         start_match++;
 3985     }
 3986 
 3987   /* Or to just after a linebreak for a multiline match if possible */
 3988 
 3989   else if (startline)
 3990     {
 3991     if (start_match > md->start_subject + start_offset)
 3992       {
 3993       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
 3994         start_match++;
 3995       }
 3996     }
 3997 
 3998   /* Or to a non-unique first char after study */
 3999 
 4000   else if (start_bits != NULL)
 4001     {
 4002     while (start_match < end_subject)
 4003       {
 4004       register unsigned int c = *start_match;
 4005       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
 4006       }
 4007     }
 4008 
 4009   /* Restore fudged end_subject */
 4010 
 4011   end_subject = save_end_subject;
 4012 
 4013 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
 4014   printf(">>>> Match against: ");
 4015   pchars(start_match, end_subject - start_match, TRUE, md);
 4016   printf("\n");
 4017 #endif
 4018 
 4019   /* If req_byte is set, we know that that character must appear in the subject
 4020   for the match to succeed. If the first character is set, req_byte must be
 4021   later in the subject; otherwise the test starts at the match point. This
 4022   optimization can save a huge amount of backtracking in patterns with nested
 4023   unlimited repeats that aren't going to match. Writing separate code for
 4024   cased/caseless versions makes it go faster, as does using an autoincrement
 4025   and backing off on a match.
 4026 
 4027   HOWEVER: when the subject string is very, very long, searching to its end can
 4028   take a long time, and give bad performance on quite ordinary patterns. This
 4029   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
 4030   string... so we don't do this when the string is sufficiently long.
 4031 
 4032   ALSO: this processing is disabled when partial matching is requested.
 4033   */
 4034 
 4035   if (req_byte >= 0 &&
 4036       end_subject - start_match < REQ_BYTE_MAX &&
 4037       !md->partial)
 4038     {
 4039     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
 4040 
 4041     /* We don't need to repeat the search if we haven't yet reached the
 4042     place we found it at last time. */
 4043 
 4044     if (p > req_byte_ptr)
 4045       {
 4046       if (req_byte_caseless)
 4047         {
 4048         while (p < end_subject)
 4049           {
 4050           register int pp = *p++;
 4051           if (pp == req_byte || pp == req_byte2) { p--; break; }
 4052           }
 4053         }
 4054       else
 4055         {
 4056         while (p < end_subject)
 4057           {
 4058           if (*p++ == req_byte) { p--; break; }
 4059           }
 4060         }
 4061 
 4062       /* If we can't find the required character, break the matching loop,
 4063       forcing a match failure. */
 4064 
 4065       if (p >= end_subject)
 4066         {
 4067         rc = MATCH_NOMATCH;
 4068         break;
 4069         }
 4070 
 4071       /* If we have found the required character, save the point where we
 4072       found it, so that we don't search again next time round the loop if
 4073       the start hasn't passed this character yet. */
 4074 
 4075       req_byte_ptr = p;
 4076       }
 4077     }
 4078 
 4079   /* OK, we can now run the match. */
 4080 
 4081   md->start_match = start_match;
 4082   md->match_call_count = 0;
 4083   md->eptrn = 0;                          /* Next free eptrchain slot */
 4084   rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
 4085 
 4086   /* Any return other than MATCH_NOMATCH breaks the loop. */
 4087 
 4088   if (rc != MATCH_NOMATCH) break;
 4089 
 4090   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
 4091   newline in the subject (though it may continue over the newline). Therefore,
 4092   if we have just failed to match, starting at a newline, do not continue. */
 4093 
 4094   if (firstline && IS_NEWLINE(start_match)) break;
 4095 
 4096   /* Advance the match position by one character. */
 4097 
 4098   start_match++;
 4099 #ifdef SUPPORT_UTF8
 4100   if (utf8)
 4101     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
 4102       start_match++;
 4103 #endif
 4104 
 4105   /* Break the loop if the pattern is anchored or if we have passed the end of
 4106   the subject. */
 4107 
 4108   if (anchored || start_match > end_subject) break;
 4109 
 4110   /* If we have just passed a CR and the newline option is CRLF or ANY, and we
 4111   are now at a LF, advance the match position by one more character. */
 4112 
 4113   if (start_match[-1] == '\r' &&
 4114        (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
 4115        start_match < end_subject &&
 4116        *start_match == '\n')
 4117     start_match++;
 4118 
 4119   }   /* End of for(;;) "bumpalong" loop */
 4120 
 4121 /* ==========================================================================*/
 4122 
 4123 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
 4124 conditions is true:
 4125 
 4126 (1) The pattern is anchored;
 4127 
 4128 (2) We are past the end of the subject;
 4129 
 4130 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
 4131     this option requests that a match occur at or before the first newline in
 4132     the subject.
 4133 
 4134 When we have a match and the offset vector is big enough to deal with any
 4135 backreferences, captured substring offsets will already be set up. In the case
 4136 where we had to get some local store to hold offsets for backreference
 4137 processing, copy those that we can. In this case there need not be overflow if
 4138 certain parts of the pattern were not used, even though there are more
 4139 capturing parentheses than vector slots. */
 4140 
 4141 if (rc == MATCH_MATCH)
 4142   {
 4143   if (using_temporary_offsets)
 4144     {
 4145     if (offsetcount >= 4)
 4146       {
 4147       memcpy(offsets + 2, md->offset_vector + 2,
 4148         (offsetcount - 2) * sizeof(int));
 4149       DPRINTF(("Copied offsets from temporary memory\n"));
 4150       }
 4151     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
 4152     DPRINTF(("Freeing temporary memory\n"));
 4153     (pcre_free)(md->offset_vector);
 4154     }
 4155 
 4156   /* Set the return code to the number of captured strings, or 0 if there are
 4157   too many to fit into the vector. */
 4158 
 4159   rc = md->offset_overflow? 0 : md->end_offset_top/2;
 4160 
 4161   /* If there is space, set up the whole thing as substring 0. */
 4162 
 4163   if (offsetcount < 2) rc = 0; else
 4164     {
 4165     offsets[0] = start_match - md->start_subject;
 4166     offsets[1] = md->end_match_ptr - md->start_subject;
 4167     }
 4168 
 4169   DPRINTF((">>>> returning %d\n", rc));
 4170   return rc;
 4171   }
 4172 
 4173 /* Control gets here if there has been an error, or if the overall match
 4174 attempt has failed at all permitted starting positions. */
 4175 
 4176 if (using_temporary_offsets)
 4177   {
 4178   DPRINTF(("Freeing temporary memory\n"));
 4179   (pcre_free)(md->offset_vector);
 4180   }
 4181 
 4182 if (rc != MATCH_NOMATCH)
 4183   {
 4184   DPRINTF((">>>> error: returning %d\n", rc));
 4185   return rc;
 4186   }
 4187 else if (md->partial && md->hitend)
 4188   {
 4189   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
 4190   return PCRE_ERROR_PARTIAL;
 4191   }
 4192 else
 4193   {
 4194   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
 4195   return PCRE_ERROR_NOMATCH;
 4196   }
 4197 }
 4198 
 4199 /* End of pcre_exec.c */