"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.6.2/pcre/pcretest.c" (23 Aug 2021, 66834 Bytes) of package /linux/misc/tin-2.6.2.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /*************************************************
    2 *             PCRE testing program               *
    3 *************************************************/
    4 
    5 /* This program was hacked up as a tester for PCRE. I really should have
    6 written it more tidily in the first place. Will I ever learn? It has grown and
    7 been extended and consequently is now rather, er, *very* untidy in places.
    8 
    9 -----------------------------------------------------------------------------
   10 Redistribution and use in source and binary forms, with or without
   11 modification, are permitted provided that the following conditions are met:
   12 
   13     * Redistributions of source code must retain the above copyright notice,
   14       this list of conditions and the following disclaimer.
   15 
   16     * Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in the
   18       documentation and/or other materials provided with the distribution.
   19 
   20     * Neither the name of the University of Cambridge nor the names of its
   21       contributors may be used to endorse or promote products derived from
   22       this software without specific prior written permission.
   23 
   24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34 POSSIBILITY OF SUCH DAMAGE.
   35 -----------------------------------------------------------------------------
   36 */
   37 
   38 
   39 #include <ctype.h>
   40 #include <stdio.h>
   41 #include <string.h>
   42 #include <stdlib.h>
   43 #include <time.h>
   44 #include <locale.h>
   45 #include <errno.h>
   46 
   47 
   48 /* A number of things vary for Windows builds. Originally, pcretest opened its
   49 input and output without "b"; then I was told that "b" was needed in some
   50 environments, so it was added for release 5.0 to both the input and output. (It
   51 makes no difference on Unix-like systems.) Later I was told that it is wrong
   52 for the input on Windows. I've now abstracted the modes into two macros that
   53 are set here, to make it easier to fiddle with them, and removed "b" from the
   54 input mode under Windows. */
   55 
   56 #if defined(_WIN32) || defined(WIN32)
   57 #include <io.h>                /* For _setmode() */
   58 #include <fcntl.h>             /* For _O_BINARY */
   59 #define INPUT_MODE   "r"
   60 #define OUTPUT_MODE  "wb"
   61 
   62 #else
   63 #include <sys/time.h>          /* These two includes are needed */
   64 #include <sys/resource.h>      /* for setrlimit(). */
   65 #define INPUT_MODE   "rb"
   66 #define OUTPUT_MODE  "wb"
   67 #endif
   68 
   69 
   70 #define PCRE_SPY        /* For Win32 build, import data, not export */
   71 
   72 /* We include pcre_internal.h because we need the internal info for displaying
   73 the results of pcre_study() and we also need to know about the internal
   74 macros, structures, and other internal data values; pcretest has "inside
   75 information" compared to a program that strictly follows the PCRE API. */
   76 
   77 #include "pcre_internal.h"
   78 
   79 /* We need access to the data tables that PCRE uses. So as not to have to keep
   80 two copies, we include the source file here, changing the names of the external
   81 symbols to prevent clashes. */
   82 
   83 #define _pcre_utf8_table1      utf8_table1
   84 #define _pcre_utf8_table1_size utf8_table1_size
   85 #define _pcre_utf8_table2      utf8_table2
   86 #define _pcre_utf8_table3      utf8_table3
   87 #define _pcre_utf8_table4      utf8_table4
   88 #define _pcre_utt              utt
   89 #define _pcre_utt_size         utt_size
   90 #define _pcre_OP_lengths       OP_lengths
   91 
   92 #include "pcre_tables.c"
   93 
   94 /* We also need the pcre_printint() function for printing out compiled
   95 patterns. This function is in a separate file so that it can be included in
   96 pcre_compile.c when that module is compiled with debugging enabled.
   97 
   98 The definition of the macro PRINTABLE, which determines whether to print an
   99 output character as-is or as a hex value when showing compiled patterns, is
  100 contained in this file. We uses it here also, in cases when the locale has not
  101 been explicitly changed, so as to get consistent output from systems that
  102 differ in their output from isprint() even in the "C" locale. */
  103 
  104 #include "pcre_printint.src"
  105 
  106 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
  107 
  108 
  109 /* It is possible to compile this test program without including support for
  110 testing the POSIX interface, though this is not available via the standard
  111 Makefile. */
  112 
  113 #if !defined NOPOSIX
  114 #include "pcreposix.h"
  115 #endif
  116 
  117 /* It is also possible, for the benefit of the version imported into Exim, to
  118 build pcretest without support for UTF8 (define NOUTF8), without the interface
  119 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
  120 function (define NOINFOCHECK). */
  121 
  122 
  123 /* Other parameters */
  124 
  125 #ifndef CLOCKS_PER_SEC
  126 #ifdef CLK_TCK
  127 #define CLOCKS_PER_SEC CLK_TCK
  128 #else
  129 #define CLOCKS_PER_SEC 100
  130 #endif
  131 #endif
  132 
  133 /* This is the default loop count for timing. */
  134 
  135 #define LOOPREPEAT 500000
  136 
  137 /* Static variables */
  138 
  139 static FILE *outfile;
  140 static int log_store = 0;
  141 static int callout_count;
  142 static int callout_extra;
  143 static int callout_fail_count;
  144 static int callout_fail_id;
  145 static int first_callout;
  146 static int locale_set = 0;
  147 static int show_malloc;
  148 static int use_utf8;
  149 static size_t gotten_store;
  150 
  151 /* The buffers grow automatically if very long input lines are encountered. */
  152 
  153 static int buffer_size = 50000;
  154 static uschar *buffer = NULL;
  155 static uschar *dbuffer = NULL;
  156 static uschar *pbuffer = NULL;
  157 
  158 
  159 
  160 /*************************************************
  161 *        Read or extend an input line            *
  162 *************************************************/
  163 
  164 /* Input lines are read into buffer, but both patterns and data lines can be
  165 continued over multiple input lines. In addition, if the buffer fills up, we
  166 want to automatically expand it so as to be able to handle extremely large
  167 lines that are needed for certain stress tests. When the input buffer is
  168 expanded, the other two buffers must also be expanded likewise, and the
  169 contents of pbuffer, which are a copy of the input for callouts, must be
  170 preserved (for when expansion happens for a data line). This is not the most
  171 optimal way of handling this, but hey, this is just a test program!
  172 
  173 Arguments:
  174   f            the file to read
  175   start        where in buffer to start (this *must* be within buffer)
  176 
  177 Returns:       pointer to the start of new data
  178                could be a copy of start, or could be moved
  179                NULL if no data read and EOF reached
  180 */
  181 
  182 static uschar *
  183 extend_inputline(FILE *f, uschar *start)
  184 {
  185 uschar *here = start;
  186 
  187 for (;;)
  188   {
  189   int rlen = buffer_size - (here - buffer);
  190 
  191   if (rlen > 1000)
  192     {
  193     int dlen;
  194     if (fgets((char *)here, rlen,  f) == NULL)
  195       return (here == start)? NULL : start;
  196     dlen = (int)strlen((char *)here);
  197     if (dlen > 0 && here[dlen - 1] == '\n') return start;
  198     here += dlen;
  199     }
  200 
  201   else
  202     {
  203     int new_buffer_size = 2*buffer_size;
  204     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
  205     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
  206     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
  207 
  208     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
  209       {
  210       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
  211       exit(1);
  212       }
  213 
  214     memcpy(new_buffer, buffer, buffer_size);
  215     memcpy(new_pbuffer, pbuffer, buffer_size);
  216 
  217     buffer_size = new_buffer_size;
  218 
  219     start = new_buffer + (start - buffer);
  220     here = new_buffer + (here - buffer);
  221 
  222     free(buffer);
  223     free(dbuffer);
  224     free(pbuffer);
  225 
  226     buffer = new_buffer;
  227     dbuffer = new_dbuffer;
  228     pbuffer = new_pbuffer;
  229     }
  230   }
  231 
  232 return NULL;  /* Control never gets here */
  233 }
  234 
  235 
  236 
  237 
  238 
  239 
  240 
  241 /*************************************************
  242 *          Read number from string               *
  243 *************************************************/
  244 
  245 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
  246 around with conditional compilation, just do the job by hand. It is only used
  247 for unpicking arguments, so just keep it simple.
  248 
  249 Arguments:
  250   str           string to be converted
  251   endptr        where to put the end pointer
  252 
  253 Returns:        the unsigned long
  254 */
  255 
  256 static int
  257 get_value(unsigned char *str, unsigned char **endptr)
  258 {
  259 int result = 0;
  260 while(*str != 0 && isspace(*str)) str++;
  261 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
  262 *endptr = str;
  263 return(result);
  264 }
  265 
  266 
  267 
  268 
  269 /*************************************************
  270 *            Convert UTF-8 string to value       *
  271 *************************************************/
  272 
  273 /* This function takes one or more bytes that represents a UTF-8 character,
  274 and returns the value of the character.
  275 
  276 Argument:
  277   utf8bytes   a pointer to the byte vector
  278   vptr        a pointer to an int to receive the value
  279 
  280 Returns:      >  0 => the number of bytes consumed
  281               -6 to 0 => malformed UTF-8 character at offset = (-return)
  282 */
  283 
  284 #if !defined NOUTF8
  285 
  286 static int
  287 utf82ord(unsigned char *utf8bytes, int *vptr)
  288 {
  289 int c = *utf8bytes++;
  290 int d = c;
  291 int i, j, s;
  292 
  293 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
  294   {
  295   if ((d & 0x80) == 0) break;
  296   d <<= 1;
  297   }
  298 
  299 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
  300 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
  301 
  302 /* i now has a value in the range 1-5 */
  303 
  304 s = 6*i;
  305 d = (c & utf8_table3[i]) << s;
  306 
  307 for (j = 0; j < i; j++)
  308   {
  309   c = *utf8bytes++;
  310   if ((c & 0xc0) != 0x80) return -(j+1);
  311   s -= 6;
  312   d |= (c & 0x3f) << s;
  313   }
  314 
  315 /* Check that encoding was the correct unique one */
  316 
  317 for (j = 0; j < utf8_table1_size; j++)
  318   if (d <= utf8_table1[j]) break;
  319 if (j != i) return -(i+1);
  320 
  321 /* Valid value */
  322 
  323 *vptr = d;
  324 return i+1;
  325 }
  326 
  327 #endif
  328 
  329 
  330 
  331 /*************************************************
  332 *       Convert character value to UTF-8         *
  333 *************************************************/
  334 
  335 /* This function takes an integer value in the range 0 - 0x7fffffff
  336 and encodes it as a UTF-8 character in 0 to 6 bytes.
  337 
  338 Arguments:
  339   cvalue     the character value
  340   utf8bytes  pointer to buffer for result - at least 6 bytes long
  341 
  342 Returns:     number of characters placed in the buffer
  343 */
  344 
  345 #if !defined NOUTF8
  346 
  347 static int
  348 ord2utf8(int cvalue, uschar *utf8bytes)
  349 {
  350 register int i, j;
  351 for (i = 0; i < utf8_table1_size; i++)
  352   if (cvalue <= utf8_table1[i]) break;
  353 utf8bytes += i;
  354 for (j = i; j > 0; j--)
  355  {
  356  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
  357  cvalue >>= 6;
  358  }
  359 *utf8bytes = utf8_table2[i] | cvalue;
  360 return i + 1;
  361 }
  362 
  363 #endif
  364 
  365 
  366 
  367 /*************************************************
  368 *             Print character string             *
  369 *************************************************/
  370 
  371 /* Character string printing function. Must handle UTF-8 strings in utf8
  372 mode. Yields number of characters printed. If handed a NULL file, just counts
  373 chars without printing. */
  374 
  375 static int pchars(unsigned char *p, int length, FILE *f)
  376 {
  377 int c = 0;
  378 int yield = 0;
  379 
  380 while (length-- > 0)
  381   {
  382 #if !defined NOUTF8
  383   if (use_utf8)
  384     {
  385     int rc = utf82ord(p, &c);
  386 
  387     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
  388       {
  389       length -= rc - 1;
  390       p += rc;
  391       if (PRINTHEX(c))
  392         {
  393         if (f != NULL) fprintf(f, "%c", c);
  394         yield++;
  395         }
  396       else
  397         {
  398         int n = 4;
  399         if (f != NULL) fprintf(f, "\\x{%02x}", c);
  400         yield += (n <= 0x000000ff)? 2 :
  401                  (n <= 0x00000fff)? 3 :
  402                  (n <= 0x0000ffff)? 4 :
  403                  (n <= 0x000fffff)? 5 : 6;
  404         }
  405       continue;
  406       }
  407     }
  408 #endif
  409 
  410    /* Not UTF-8, or malformed UTF-8  */
  411 
  412   c = *p++;
  413   if (PRINTHEX(c))
  414     {
  415     if (f != NULL) fprintf(f, "%c", c);
  416     yield++;
  417     }
  418   else
  419     {
  420     if (f != NULL) fprintf(f, "\\x%02x", c);
  421     yield += 4;
  422     }
  423   }
  424 
  425 return yield;
  426 }
  427 
  428 
  429 
  430 /*************************************************
  431 *              Callout function                  *
  432 *************************************************/
  433 
  434 /* Called from PCRE as a result of the (?C) item. We print out where we are in
  435 the match. Yield zero unless more callouts than the fail count, or the callout
  436 data is not zero. */
  437 
  438 static int callout(pcre_callout_block *cb)
  439 {
  440 FILE *f = (first_callout | callout_extra)? outfile : NULL;
  441 int i, pre_start, post_start, subject_length;
  442 
  443 if (callout_extra)
  444   {
  445   fprintf(f, "Callout %d: last capture = %d\n",
  446     cb->callout_number, cb->capture_last);
  447 
  448   for (i = 0; i < cb->capture_top * 2; i += 2)
  449     {
  450     if (cb->offset_vector[i] < 0)
  451       fprintf(f, "%2d: <unset>\n", i/2);
  452     else
  453       {
  454       fprintf(f, "%2d: ", i/2);
  455       (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
  456         cb->offset_vector[i+1] - cb->offset_vector[i], f);
  457       fprintf(f, "\n");
  458       }
  459     }
  460   }
  461 
  462 /* Re-print the subject in canonical form, the first time or if giving full
  463 datails. On subsequent calls in the same match, we use pchars just to find the
  464 printed lengths of the substrings. */
  465 
  466 if (f != NULL) fprintf(f, "--->");
  467 
  468 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
  469 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
  470   cb->current_position - cb->start_match, f);
  471 
  472 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
  473 
  474 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
  475   cb->subject_length - cb->current_position, f);
  476 
  477 if (f != NULL) fprintf(f, "\n");
  478 
  479 /* Always print appropriate indicators, with callout number if not already
  480 shown. For automatic callouts, show the pattern offset. */
  481 
  482 if (cb->callout_number == 255)
  483   {
  484   fprintf(outfile, "%+3d ", cb->pattern_position);
  485   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
  486   }
  487 else
  488   {
  489   if (callout_extra) fprintf(outfile, "    ");
  490     else fprintf(outfile, "%3d ", cb->callout_number);
  491   }
  492 
  493 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
  494 fprintf(outfile, "^");
  495 
  496 if (post_start > 0)
  497   {
  498   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
  499   fprintf(outfile, "^");
  500   }
  501 
  502 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
  503   fprintf(outfile, " ");
  504 
  505 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
  506   pbuffer + cb->pattern_position);
  507 
  508 fprintf(outfile, "\n");
  509 first_callout = 0;
  510 
  511 if (cb->callout_data != NULL)
  512   {
  513   int callout_data = *((int *)(cb->callout_data));
  514   if (callout_data != 0)
  515     {
  516     fprintf(outfile, "Callout data = %d\n", callout_data);
  517     return callout_data;
  518     }
  519   }
  520 
  521 return (cb->callout_number != callout_fail_id)? 0 :
  522        (++callout_count >= callout_fail_count)? 1 : 0;
  523 }
  524 
  525 
  526 /*************************************************
  527 *            Local malloc functions              *
  528 *************************************************/
  529 
  530 /* Alternative malloc function, to test functionality and show the size of the
  531 compiled re. */
  532 
  533 static void *new_malloc(size_t size)
  534 {
  535 void *block = malloc(size);
  536 gotten_store = size;
  537 if (show_malloc)
  538   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
  539 return block;
  540 }
  541 
  542 static void new_free(void *block)
  543 {
  544 if (show_malloc)
  545   fprintf(outfile, "free             %p\n", block);
  546 free(block);
  547 }
  548 
  549 
  550 /* For recursion malloc/free, to test stacking calls */
  551 
  552 static void *stack_malloc(size_t size)
  553 {
  554 void *block = malloc(size);
  555 if (show_malloc)
  556   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
  557 return block;
  558 }
  559 
  560 static void stack_free(void *block)
  561 {
  562 if (show_malloc)
  563   fprintf(outfile, "stack_free       %p\n", block);
  564 free(block);
  565 }
  566 
  567 
  568 /*************************************************
  569 *          Call pcre_fullinfo()                  *
  570 *************************************************/
  571 
  572 /* Get one piece of information from the pcre_fullinfo() function */
  573 
  574 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
  575 {
  576 int rc;
  577 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
  578   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
  579 }
  580 
  581 
  582 
  583 /*************************************************
  584 *         Byte flipping function                 *
  585 *************************************************/
  586 
  587 static unsigned long int
  588 byteflip(unsigned long int value, int n)
  589 {
  590 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
  591 return ((value & 0x000000ff) << 24) |
  592        ((value & 0x0000ff00) <<  8) |
  593        ((value & 0x00ff0000) >>  8) |
  594        ((value & 0xff000000) >> 24);
  595 }
  596 
  597 
  598 
  599 
  600 /*************************************************
  601 *        Check match or recursion limit          *
  602 *************************************************/
  603 
  604 static int
  605 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
  606   int start_offset, int options, int *use_offsets, int use_size_offsets,
  607   int flag, unsigned long int *limit, int errnumber, const char *msg)
  608 {
  609 int count;
  610 int min = 0;
  611 int mid = 64;
  612 int max = -1;
  613 
  614 extra->flags |= flag;
  615 
  616 for (;;)
  617   {
  618   *limit = mid;
  619 
  620   count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
  621     use_offsets, use_size_offsets);
  622 
  623   if (count == errnumber)
  624     {
  625     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
  626     min = mid;
  627     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
  628     }
  629 
  630   else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
  631                          count == PCRE_ERROR_PARTIAL)
  632     {
  633     if (mid == min + 1)
  634       {
  635       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
  636       break;
  637       }
  638     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
  639     max = mid;
  640     mid = (min + mid)/2;
  641     }
  642   else break;    /* Some other error */
  643   }
  644 
  645 extra->flags &= ~flag;
  646 return count;
  647 }
  648 
  649 
  650 
  651 /*************************************************
  652 *         Check newline indicator                *
  653 *************************************************/
  654 
  655 /* This is used both at compile and run-time to check for <xxx> escapes, where
  656 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
  657 
  658 Arguments:
  659   p           points after the leading '<'
  660   f           file for error message
  661 
  662 Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
  663 */
  664 
  665 static int
  666 check_newline(uschar *p, FILE *f)
  667 {
  668 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
  669 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
  670 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
  671 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
  672 fprintf(f, "Unknown newline type at: <%s\n", p);
  673 return 0;
  674 }
  675 
  676 
  677 
  678 /*************************************************
  679 *             Usage function                     *
  680 *************************************************/
  681 
  682 static void
  683 usage(void)
  684 {
  685 printf("Usage:     pcretest [options] [<input> [<output>]]\n");
  686 printf("  -b       show compiled code (bytecode)\n");
  687 printf("  -C       show PCRE compile-time options and exit\n");
  688 printf("  -d       debug: show compiled code and information (-b and -i)\n");
  689 #if !defined NODFA
  690 printf("  -dfa     force DFA matching for all subjects\n");
  691 #endif
  692 printf("  -help    show usage information\n");
  693 printf("  -i       show information about compiled patterns\n"
  694        "  -m       output memory used information\n"
  695        "  -o <n>   set size of offsets vector to <n>\n");
  696 #if !defined NOPOSIX
  697 printf("  -p       use POSIX interface\n");
  698 #endif
  699 printf("  -q       quiet: do not output PCRE version number at start\n");
  700 printf("  -S <n>   set stack size to <n> megabytes\n");
  701 printf("  -s       output store (memory) used information\n"
  702        "  -t       time compilation and execution\n");
  703 printf("  -t <n>   time compilation and execution, repeating <n> times\n");
  704 printf("  -tm      time execution (matching) only\n");
  705 printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
  706 }
  707 
  708 
  709 
  710 /*************************************************
  711 *                Main Program                    *
  712 *************************************************/
  713 
  714 /* Read lines from named file or stdin and write to named file or stdout; lines
  715 consist of a regular expression, in delimiters and optionally followed by
  716 options, followed by a set of test data, terminated by an empty line. */
  717 
  718 int main(int argc, char **argv)
  719 {
  720 FILE *infile = stdin;
  721 int options = 0;
  722 int study_options = 0;
  723 int op = 1;
  724 int timeit = 0;
  725 int timeitm = 0;
  726 int showinfo = 0;
  727 int showstore = 0;
  728 int quiet = 0;
  729 int size_offsets = 45;
  730 int size_offsets_max;
  731 int *offsets = NULL;
  732 #if !defined NOPOSIX
  733 int posix = 0;
  734 #endif
  735 int debug = 0;
  736 int done = 0;
  737 int all_use_dfa = 0;
  738 int yield = 0;
  739 int stack_size;
  740 
  741 /* These vectors store, end-to-end, a list of captured substring names. Assume
  742 that 1024 is plenty long enough for the few names we'll be testing. */
  743 
  744 uschar copynames[1024];
  745 uschar getnames[1024];
  746 
  747 uschar *copynamesptr;
  748 uschar *getnamesptr;
  749 
  750 /* Get buffers from malloc() so that Electric Fence will check their misuse
  751 when I am debugging. They grow automatically when very long lines are read. */
  752 
  753 buffer = (unsigned char *)malloc(buffer_size);
  754 dbuffer = (unsigned char *)malloc(buffer_size);
  755 pbuffer = (unsigned char *)malloc(buffer_size);
  756 
  757 /* The outfile variable is static so that new_malloc can use it. */
  758 
  759 outfile = stdout;
  760 
  761 /* The following  _setmode() stuff is some Windows magic that tells its runtime
  762 library to translate CRLF into a single LF character. At least, that's what
  763 I've been told: never having used Windows I take this all on trust. Originally
  764 it set 0x8000, but then I was advised that _O_BINARY was better. */
  765 
  766 #if defined(_WIN32) || defined(WIN32)
  767 _setmode( _fileno( stdout ), _O_BINARY );
  768 #endif
  769 
  770 /* Scan options */
  771 
  772 while (argc > 1 && argv[op][0] == '-')
  773   {
  774   unsigned char *endptr;
  775 
  776   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
  777     showstore = 1;
  778   else if (strcmp(argv[op], "-q") == 0) quiet = 1;
  779   else if (strcmp(argv[op], "-b") == 0) debug = 1;
  780   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
  781   else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
  782 #if !defined NODFA
  783   else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
  784 #endif
  785   else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
  786       ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
  787         *endptr == 0))
  788     {
  789     op++;
  790     argc--;
  791     }
  792   else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
  793     {
  794     int both = argv[op][2] == 0;
  795     int temp;
  796     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
  797                      *endptr == 0))
  798       {
  799       timeitm = temp;
  800       op++;
  801       argc--;
  802       }
  803     else timeitm = LOOPREPEAT;
  804     if (both) timeit = timeitm;
  805     }
  806   else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
  807       ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
  808         *endptr == 0))
  809     {
  810 #if defined(_WIN32) || defined(WIN32)
  811     printf("PCRE: -S not supported on this OS\n");
  812     exit(1);
  813 #else
  814     int rc;
  815     struct rlimit rlim;
  816     getrlimit(RLIMIT_STACK, &rlim);
  817     rlim.rlim_cur = stack_size * 1024 * 1024;
  818     rc = setrlimit(RLIMIT_STACK, &rlim);
  819     if (rc != 0)
  820       {
  821     printf("PCRE: setrlimit() failed with error %d\n", rc);
  822     exit(1);
  823       }
  824     op++;
  825     argc--;
  826 #endif
  827     }
  828 #if !defined NOPOSIX
  829   else if (strcmp(argv[op], "-p") == 0) posix = 1;
  830 #endif
  831   else if (strcmp(argv[op], "-C") == 0)
  832     {
  833     int rc;
  834     printf("PCRE version %s\n", pcre_version());
  835     printf("Compiled with\n");
  836     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
  837     printf("  %sUTF-8 support\n", rc? "" : "No ");
  838     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
  839     printf("  %sUnicode properties support\n", rc? "" : "No ");
  840     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
  841     printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
  842       (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
  843       (rc == -1)? "ANY" : "???");
  844     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
  845     printf("  Internal link size = %d\n", rc);
  846     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
  847     printf("  POSIX malloc threshold = %d\n", rc);
  848     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
  849     printf("  Default match limit = %d\n", rc);
  850     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
  851     printf("  Default recursion depth limit = %d\n", rc);
  852     (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
  853     printf("  Match recursion uses %s\n", rc? "stack" : "heap");
  854     exit(0);
  855     }
  856   else if (strcmp(argv[op], "-help") == 0 ||
  857            strcmp(argv[op], "--help") == 0)
  858     {
  859     usage();
  860     goto EXIT;
  861     }
  862   else
  863     {
  864     printf("** Unknown or malformed option %s\n", argv[op]);
  865     usage();
  866     yield = 1;
  867     goto EXIT;
  868     }
  869   op++;
  870   argc--;
  871   }
  872 
  873 /* Get the store for the offsets vector, and remember what it was */
  874 
  875 size_offsets_max = size_offsets;
  876 offsets = (int *)malloc(size_offsets_max * sizeof(int));
  877 if (offsets == NULL)
  878   {
  879   printf("** Failed to get %d bytes of memory for offsets vector\n",
  880     size_offsets_max * sizeof(int));
  881   yield = 1;
  882   goto EXIT;
  883   }
  884 
  885 /* Sort out the input and output files */
  886 
  887 if (argc > 1)
  888   {
  889   infile = fopen(argv[op], INPUT_MODE);
  890   if (infile == NULL)
  891     {
  892     printf("** Failed to open %s\n", argv[op]);
  893     yield = 1;
  894     goto EXIT;
  895     }
  896   }
  897 
  898 if (argc > 2)
  899   {
  900   outfile = fopen(argv[op+1], OUTPUT_MODE);
  901   if (outfile == NULL)
  902     {
  903     printf("** Failed to open %s\n", argv[op+1]);
  904     yield = 1;
  905     goto EXIT;
  906     }
  907   }
  908 
  909 /* Set alternative malloc function */
  910 
  911 pcre_malloc = new_malloc;
  912 pcre_free = new_free;
  913 pcre_stack_malloc = stack_malloc;
  914 pcre_stack_free = stack_free;
  915 
  916 /* Heading line unless quiet, then prompt for first regex if stdin */
  917 
  918 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
  919 
  920 /* Main loop */
  921 
  922 while (!done)
  923   {
  924   pcre *re = NULL;
  925   pcre_extra *extra = NULL;
  926 
  927 #if !defined NOPOSIX  /* There are still compilers that require no indent */
  928   regex_t preg;
  929   int do_posix = 0;
  930 #endif
  931 
  932   const char *error;
  933   unsigned char *p, *pp, *ppp;
  934   unsigned char *to_file = NULL;
  935   const unsigned char *tables = NULL;
  936   unsigned long int true_size, true_study_size = 0;
  937   size_t size, regex_gotten_store;
  938   int do_study = 0;
  939   int do_debug = debug;
  940   int do_G = 0;
  941   int do_g = 0;
  942   int do_showinfo = showinfo;
  943   int do_showrest = 0;
  944   int do_flip = 0;
  945   int erroroffset, len, delimiter, poffset;
  946 
  947   use_utf8 = 0;
  948 
  949   if (infile == stdin) printf("  re> ");
  950   if (extend_inputline(infile, buffer) == NULL) break;
  951   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
  952   fflush(outfile);
  953 
  954   p = buffer;
  955   while (isspace(*p)) p++;
  956   if (*p == 0) continue;
  957 
  958   /* See if the pattern is to be loaded pre-compiled from a file. */
  959 
  960   if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
  961     {
  962     unsigned long int magic, get_options;
  963     uschar sbuf[8];
  964     FILE *f;
  965 
  966     p++;
  967     pp = p + (int)strlen((char *)p);
  968     while (isspace(pp[-1])) pp--;
  969     *pp = 0;
  970 
  971     f = fopen((char *)p, "rb");
  972     if (f == NULL)
  973       {
  974       fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
  975       continue;
  976       }
  977 
  978     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
  979 
  980     true_size =
  981       (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
  982     true_study_size =
  983       (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
  984 
  985     re = (real_pcre *)new_malloc(true_size);
  986     regex_gotten_store = gotten_store;
  987 
  988     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
  989 
  990     magic = ((real_pcre *)re)->magic_number;
  991     if (magic != MAGIC_NUMBER)
  992       {
  993       if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
  994         {
  995         do_flip = 1;
  996         }
  997       else
  998         {
  999         fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
 1000         fclose(f);
 1001         continue;
 1002         }
 1003       }
 1004 
 1005     fprintf(outfile, "Compiled regex%s loaded from %s\n",
 1006       do_flip? " (byte-inverted)" : "", p);
 1007 
 1008     /* Need to know if UTF-8 for printing data strings */
 1009 
 1010     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
 1011     use_utf8 = (get_options & PCRE_UTF8) != 0;
 1012 
 1013     /* Now see if there is any following study data */
 1014 
 1015     if (true_study_size != 0)
 1016       {
 1017       pcre_study_data *psd;
 1018 
 1019       extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
 1020       extra->flags = PCRE_EXTRA_STUDY_DATA;
 1021 
 1022       psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
 1023       extra->study_data = psd;
 1024 
 1025       if (fread(psd, 1, true_study_size, f) != true_study_size)
 1026         {
 1027         FAIL_READ:
 1028         fprintf(outfile, "Failed to read data from %s\n", p);
 1029         if (extra != NULL) new_free(extra);
 1030         if (re != NULL) new_free(re);
 1031         fclose(f);
 1032         continue;
 1033         }
 1034       fprintf(outfile, "Study data loaded from %s\n", p);
 1035       do_study = 1;     /* To get the data output if requested */
 1036       }
 1037     else fprintf(outfile, "No study data\n");
 1038 
 1039     fclose(f);
 1040     goto SHOW_INFO;
 1041     }
 1042 
 1043   /* In-line pattern (the usual case). Get the delimiter and seek the end of
 1044   the pattern; if is isn't complete, read more. */
 1045 
 1046   delimiter = *p++;
 1047 
 1048   if (isalnum(delimiter) || delimiter == '\\')
 1049     {
 1050     fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
 1051     goto SKIP_DATA;
 1052     }
 1053 
 1054   pp = p;
 1055   poffset = p - buffer;
 1056 
 1057   for(;;)
 1058     {
 1059     while (*pp != 0)
 1060       {
 1061       if (*pp == '\\' && pp[1] != 0) pp++;
 1062         else if (*pp == delimiter) break;
 1063       pp++;
 1064       }
 1065     if (*pp != 0) break;
 1066     if (infile == stdin) printf("    > ");
 1067     if ((pp = extend_inputline(infile, pp)) == NULL)
 1068       {
 1069       fprintf(outfile, "** Unexpected EOF\n");
 1070       done = 1;
 1071       goto CONTINUE;
 1072       }
 1073     if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
 1074     }
 1075 
 1076   /* The buffer may have moved while being extended; reset the start of data
 1077   pointer to the correct relative point in the buffer. */
 1078 
 1079   p = buffer + poffset;
 1080 
 1081   /* If the first character after the delimiter is backslash, make
 1082   the pattern end with backslash. This is purely to provide a way
 1083   of testing for the error message when a pattern ends with backslash. */
 1084 
 1085   if (pp[1] == '\\') *pp++ = '\\';
 1086 
 1087   /* Terminate the pattern at the delimiter, and save a copy of the pattern
 1088   for callouts. */
 1089 
 1090   *pp++ = 0;
 1091   strcpy((char *)pbuffer, (char *)p);
 1092 
 1093   /* Look for options after final delimiter */
 1094 
 1095   options = 0;
 1096   study_options = 0;
 1097   log_store = showstore;  /* default from command line */
 1098 
 1099   while (*pp != 0)
 1100     {
 1101     switch (*pp++)
 1102       {
 1103       case 'f': options |= PCRE_FIRSTLINE; break;
 1104       case 'g': do_g = 1; break;
 1105       case 'i': options |= PCRE_CASELESS; break;
 1106       case 'm': options |= PCRE_MULTILINE; break;
 1107       case 's': options |= PCRE_DOTALL; break;
 1108       case 'x': options |= PCRE_EXTENDED; break;
 1109 
 1110       case '+': do_showrest = 1; break;
 1111       case 'A': options |= PCRE_ANCHORED; break;
 1112       case 'B': do_debug = 1; break;
 1113       case 'C': options |= PCRE_AUTO_CALLOUT; break;
 1114       case 'D': do_debug = do_showinfo = 1; break;
 1115       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
 1116       case 'F': do_flip = 1; break;
 1117       case 'G': do_G = 1; break;
 1118       case 'I': do_showinfo = 1; break;
 1119       case 'J': options |= PCRE_DUPNAMES; break;
 1120       case 'M': log_store = 1; break;
 1121       case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
 1122 
 1123 #if !defined NOPOSIX
 1124       case 'P': do_posix = 1; break;
 1125 #endif
 1126 
 1127       case 'S': do_study = 1; break;
 1128       case 'U': options |= PCRE_UNGREEDY; break;
 1129       case 'X': options |= PCRE_EXTRA; break;
 1130       case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
 1131       case '?': options |= PCRE_NO_UTF8_CHECK; break;
 1132 
 1133       case 'L':
 1134       ppp = pp;
 1135       /* The '\r' test here is so that it works on Windows. */
 1136       /* The '0' test is just in case this is an unterminated line. */
 1137       while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
 1138       *ppp = 0;
 1139       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
 1140         {
 1141         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
 1142         goto SKIP_DATA;
 1143         }
 1144       locale_set = 1;
 1145       tables = pcre_maketables();
 1146       pp = ppp;
 1147       break;
 1148 
 1149       case '>':
 1150       to_file = pp;
 1151       while (*pp != 0) pp++;
 1152       while (isspace(pp[-1])) pp--;
 1153       *pp = 0;
 1154       break;
 1155 
 1156       case '<':
 1157         {
 1158         int x = check_newline(pp, outfile);
 1159         if (x == 0) goto SKIP_DATA;
 1160         options |= x;
 1161         while (*pp++ != '>');
 1162         }
 1163       break;
 1164 
 1165       case '\r':                      /* So that it works in Windows */
 1166       case '\n':
 1167       case ' ':
 1168       break;
 1169 
 1170       default:
 1171       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
 1172       goto SKIP_DATA;
 1173       }
 1174     }
 1175 
 1176   /* Handle compiling via the POSIX interface, which doesn't support the
 1177   timing, showing, or debugging options, nor the ability to pass over
 1178   local character tables. */
 1179 
 1180 #if !defined NOPOSIX
 1181   if (posix || do_posix)
 1182     {
 1183     int rc;
 1184     int cflags = 0;
 1185 
 1186     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
 1187     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
 1188     if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
 1189     if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
 1190     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
 1191 
 1192     rc = regcomp(&preg, (char *)p, cflags);
 1193 
 1194     /* Compilation failed; go back for another re, skipping to blank line
 1195     if non-interactive. */
 1196 
 1197     if (rc != 0)
 1198       {
 1199       (void)regerror(rc, &preg, (char *)buffer, buffer_size);
 1200       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
 1201       goto SKIP_DATA;
 1202       }
 1203     }
 1204 
 1205   /* Handle compiling via the native interface */
 1206 
 1207   else
 1208 #endif  /* !defined NOPOSIX */
 1209 
 1210     {
 1211     if (timeit > 0)
 1212       {
 1213       register int i;
 1214       clock_t time_taken;
 1215       clock_t start_time = clock();
 1216       for (i = 0; i < timeit; i++)
 1217         {
 1218         re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
 1219         if (re != NULL) free(re);
 1220         }
 1221       time_taken = clock() - start_time;
 1222       fprintf(outfile, "Compile time %.4f milliseconds\n",
 1223         (((double)time_taken * 1000.0) / (double)timeit) /
 1224           (double)CLOCKS_PER_SEC);
 1225       }
 1226 
 1227     re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
 1228 
 1229     /* Compilation failed; go back for another re, skipping to blank line
 1230     if non-interactive. */
 1231 
 1232     if (re == NULL)
 1233       {
 1234       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
 1235       SKIP_DATA:
 1236       if (infile != stdin)
 1237         {
 1238         for (;;)
 1239           {
 1240           if (extend_inputline(infile, buffer) == NULL)
 1241             {
 1242             done = 1;
 1243             goto CONTINUE;
 1244             }
 1245           len = (int)strlen((char *)buffer);
 1246           while (len > 0 && isspace(buffer[len-1])) len--;
 1247           if (len == 0) break;
 1248           }
 1249         fprintf(outfile, "\n");
 1250         }
 1251       goto CONTINUE;
 1252       }
 1253 
 1254     /* Compilation succeeded; print data if required. There are now two
 1255     info-returning functions. The old one has a limited interface and
 1256     returns only limited data. Check that it agrees with the newer one. */
 1257 
 1258     if (log_store)
 1259       fprintf(outfile, "Memory allocation (code space): %d\n",
 1260         (int)(gotten_store -
 1261               sizeof(real_pcre) -
 1262               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
 1263 
 1264     /* Extract the size for possible writing before possibly flipping it,
 1265     and remember the store that was got. */
 1266 
 1267     true_size = ((real_pcre *)re)->size;
 1268     regex_gotten_store = gotten_store;
 1269 
 1270     /* If /S was present, study the regexp to generate additional info to
 1271     help with the matching. */
 1272 
 1273     if (do_study)
 1274       {
 1275       if (timeit > 0)
 1276         {
 1277         register int i;
 1278         clock_t time_taken;
 1279         clock_t start_time = clock();
 1280         for (i = 0; i < timeit; i++)
 1281           extra = pcre_study(re, study_options, &error);
 1282         time_taken = clock() - start_time;
 1283         if (extra != NULL) free(extra);
 1284         fprintf(outfile, "  Study time %.4f milliseconds\n",
 1285           (((double)time_taken * 1000.0) / (double)timeit) /
 1286             (double)CLOCKS_PER_SEC);
 1287         }
 1288       extra = pcre_study(re, study_options, &error);
 1289       if (error != NULL)
 1290         fprintf(outfile, "Failed to study: %s\n", error);
 1291       else if (extra != NULL)
 1292         true_study_size = ((pcre_study_data *)(extra->study_data))->size;
 1293       }
 1294 
 1295     /* If the 'F' option was present, we flip the bytes of all the integer
 1296     fields in the regex data block and the study block. This is to make it
 1297     possible to test PCRE's handling of byte-flipped patterns, e.g. those
 1298     compiled on a different architecture. */
 1299 
 1300     if (do_flip)
 1301       {
 1302       real_pcre *rre = (real_pcre *)re;
 1303       rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
 1304       rre->size = byteflip(rre->size, sizeof(rre->size));
 1305       rre->options = byteflip(rre->options, sizeof(rre->options));
 1306       rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
 1307       rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
 1308       rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
 1309       rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
 1310       rre->name_table_offset = byteflip(rre->name_table_offset,
 1311         sizeof(rre->name_table_offset));
 1312       rre->name_entry_size = byteflip(rre->name_entry_size,
 1313         sizeof(rre->name_entry_size));
 1314       rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
 1315 
 1316       if (extra != NULL)
 1317         {
 1318         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
 1319         rsd->size = byteflip(rsd->size, sizeof(rsd->size));
 1320         rsd->options = byteflip(rsd->options, sizeof(rsd->options));
 1321         }
 1322       }
 1323 
 1324     /* Extract information from the compiled data if required */
 1325 
 1326     SHOW_INFO:
 1327 
 1328     if (do_debug)
 1329       {
 1330       fprintf(outfile, "------------------------------------------------------------------\n");
 1331       pcre_printint(re, outfile);
 1332       }
 1333 
 1334     if (do_showinfo)
 1335       {
 1336       unsigned long int get_options, all_options;
 1337 #if !defined NOINFOCHECK
 1338       int old_first_char, old_options, old_count;
 1339 #endif
 1340       int count, backrefmax, first_char, need_char;
 1341       int nameentrysize, namecount;
 1342       const uschar *nametable;
 1343 
 1344       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
 1345       new_info(re, NULL, PCRE_INFO_SIZE, &size);
 1346       new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
 1347       new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
 1348       new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
 1349       new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
 1350       new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
 1351       new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
 1352       new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
 1353 
 1354 #if !defined NOINFOCHECK
 1355       old_count = pcre_info(re, &old_options, &old_first_char);
 1356       if (count < 0) fprintf(outfile,
 1357         "Error %d from pcre_info()\n", count);
 1358       else
 1359         {
 1360         if (old_count != count) fprintf(outfile,
 1361           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
 1362             old_count);
 1363 
 1364         if (old_first_char != first_char) fprintf(outfile,
 1365           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
 1366             first_char, old_first_char);
 1367 
 1368         if (old_options != (int)get_options) fprintf(outfile,
 1369           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
 1370             get_options, old_options);
 1371         }
 1372 #endif
 1373 
 1374       if (size != regex_gotten_store) fprintf(outfile,
 1375         "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
 1376         (int)size, (int)regex_gotten_store);
 1377 
 1378       fprintf(outfile, "Capturing subpattern count = %d\n", count);
 1379       if (backrefmax > 0)
 1380         fprintf(outfile, "Max back reference = %d\n", backrefmax);
 1381 
 1382       if (namecount > 0)
 1383         {
 1384         fprintf(outfile, "Named capturing subpatterns:\n");
 1385         while (namecount-- > 0)
 1386           {
 1387           fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
 1388             nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
 1389             GET2(nametable, 0));
 1390           nametable += nameentrysize;
 1391           }
 1392         }
 1393 
 1394       /* The NOPARTIAL bit is a private bit in the options, so we have
 1395       to fish it out via out back door */
 1396 
 1397       all_options = ((real_pcre *)re)->options;
 1398       if (do_flip)
 1399         {
 1400         all_options = byteflip(all_options, sizeof(all_options));
 1401          }
 1402 
 1403       if ((all_options & PCRE_NOPARTIAL) != 0)
 1404         fprintf(outfile, "Partial matching not supported\n");
 1405 
 1406       if (get_options == 0) fprintf(outfile, "No options\n");
 1407         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 1408           ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
 1409           ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
 1410           ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
 1411           ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
 1412           ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
 1413           ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
 1414           ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
 1415           ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
 1416           ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
 1417           ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
 1418           ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
 1419           ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
 1420           ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
 1421 
 1422       switch (get_options & PCRE_NEWLINE_BITS)
 1423         {
 1424         case PCRE_NEWLINE_CR:
 1425         fprintf(outfile, "Forced newline sequence: CR\n");
 1426         break;
 1427 
 1428         case PCRE_NEWLINE_LF:
 1429         fprintf(outfile, "Forced newline sequence: LF\n");
 1430         break;
 1431 
 1432         case PCRE_NEWLINE_CRLF:
 1433         fprintf(outfile, "Forced newline sequence: CRLF\n");
 1434         break;
 1435 
 1436         case PCRE_NEWLINE_ANY:
 1437         fprintf(outfile, "Forced newline sequence: ANY\n");
 1438         break;
 1439 
 1440         default:
 1441         break;
 1442         }
 1443 
 1444       if (first_char == -1)
 1445         {
 1446         fprintf(outfile, "First char at start or follows newline\n");
 1447         }
 1448       else if (first_char < 0)
 1449         {
 1450         fprintf(outfile, "No first char\n");
 1451         }
 1452       else
 1453         {
 1454         int ch = first_char & 255;
 1455         const char *caseless = ((first_char & REQ_CASELESS) == 0)?
 1456           "" : " (caseless)";
 1457         if (PRINTHEX(ch))
 1458           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
 1459         else
 1460           fprintf(outfile, "First char = %d%s\n", ch, caseless);
 1461         }
 1462 
 1463       if (need_char < 0)
 1464         {
 1465         fprintf(outfile, "No need char\n");
 1466         }
 1467       else
 1468         {
 1469         int ch = need_char & 255;
 1470         const char *caseless = ((need_char & REQ_CASELESS) == 0)?
 1471           "" : " (caseless)";
 1472         if (PRINTHEX(ch))
 1473           fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
 1474         else
 1475           fprintf(outfile, "Need char = %d%s\n", ch, caseless);
 1476         }
 1477 
 1478       /* Don't output study size; at present it is in any case a fixed
 1479       value, but it varies, depending on the computer architecture, and
 1480       so messes up the test suite. (And with the /F option, it might be
 1481       flipped.) */
 1482 
 1483       if (do_study)
 1484         {
 1485         if (extra == NULL)
 1486           fprintf(outfile, "Study returned NULL\n");
 1487         else
 1488           {
 1489           uschar *start_bits = NULL;
 1490           new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
 1491 
 1492           if (start_bits == NULL)
 1493             fprintf(outfile, "No starting byte set\n");
 1494           else
 1495             {
 1496             int i;
 1497             int c = 24;
 1498             fprintf(outfile, "Starting byte set: ");
 1499             for (i = 0; i < 256; i++)
 1500               {
 1501               if ((start_bits[i/8] & (1<<(i&7))) != 0)
 1502                 {
 1503                 if (c > 75)
 1504                   {
 1505                   fprintf(outfile, "\n  ");
 1506                   c = 2;
 1507                   }
 1508                 if (PRINTHEX(i) && i != ' ')
 1509                   {
 1510                   fprintf(outfile, "%c ", i);
 1511                   c += 2;
 1512                   }
 1513                 else
 1514                   {
 1515                   fprintf(outfile, "\\x%02x ", i);
 1516                   c += 5;
 1517                   }
 1518                 }
 1519               }
 1520             fprintf(outfile, "\n");
 1521             }
 1522           }
 1523         }
 1524       }
 1525 
 1526     /* If the '>' option was present, we write out the regex to a file, and
 1527     that is all. The first 8 bytes of the file are the regex length and then
 1528     the study length, in big-endian order. */
 1529 
 1530     if (to_file != NULL)
 1531       {
 1532       FILE *f = fopen((char *)to_file, "wb");
 1533       if (f == NULL)
 1534         {
 1535         fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
 1536         }
 1537       else
 1538         {
 1539         uschar sbuf[8];
 1540         sbuf[0] = (true_size >> 24)  & 255;
 1541         sbuf[1] = (true_size >> 16)  & 255;
 1542         sbuf[2] = (true_size >>  8)  & 255;
 1543         sbuf[3] = (true_size)  & 255;
 1544 
 1545         sbuf[4] = (true_study_size >> 24)  & 255;
 1546         sbuf[5] = (true_study_size >> 16)  & 255;
 1547         sbuf[6] = (true_study_size >>  8)  & 255;
 1548         sbuf[7] = (true_study_size)  & 255;
 1549 
 1550         if (fwrite(sbuf, 1, 8, f) < 8 ||
 1551             fwrite(re, 1, true_size, f) < true_size)
 1552           {
 1553           fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
 1554           }
 1555         else
 1556           {
 1557           fprintf(outfile, "Compiled regex written to %s\n", to_file);
 1558           if (extra != NULL)
 1559             {
 1560             if (fwrite(extra->study_data, 1, true_study_size, f) <
 1561                 true_study_size)
 1562               {
 1563               fprintf(outfile, "Write error on %s: %s\n", to_file,
 1564                 strerror(errno));
 1565               }
 1566             else fprintf(outfile, "Study data written to %s\n", to_file);
 1567 
 1568             }
 1569           }
 1570         fclose(f);
 1571         }
 1572 
 1573       new_free(re);
 1574       if (extra != NULL) new_free(extra);
 1575       if (tables != NULL) new_free((void *)tables);
 1576       continue;  /* With next regex */
 1577       }
 1578     }        /* End of non-POSIX compile */
 1579 
 1580   /* Read data lines and test them */
 1581 
 1582   for (;;)
 1583     {
 1584     uschar *q;
 1585     uschar *bptr = dbuffer;
 1586     int *use_offsets = offsets;
 1587     int use_size_offsets = size_offsets;
 1588     int callout_data = 0;
 1589     int callout_data_set = 0;
 1590     int count, c;
 1591     int copystrings = 0;
 1592     int find_match_limit = 0;
 1593     int getstrings = 0;
 1594     int getlist = 0;
 1595     int gmatched = 0;
 1596     int start_offset = 0;
 1597     int g_notempty = 0;
 1598     int use_dfa = 0;
 1599 
 1600     options = 0;
 1601 
 1602     *copynames = 0;
 1603     *getnames = 0;
 1604 
 1605     copynamesptr = copynames;
 1606     getnamesptr = getnames;
 1607 
 1608     pcre_callout = callout;
 1609     first_callout = 1;
 1610     callout_extra = 0;
 1611     callout_count = 0;
 1612     callout_fail_count = 999999;
 1613     callout_fail_id = -1;
 1614     show_malloc = 0;
 1615 
 1616     if (extra != NULL) extra->flags &=
 1617       ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
 1618 
 1619     len = 0;
 1620     for (;;)
 1621       {
 1622       if (infile == stdin) printf("data> ");
 1623       if (extend_inputline(infile, buffer + len) == NULL)
 1624         {
 1625         if (len > 0) break;
 1626         done = 1;
 1627         goto CONTINUE;
 1628         }
 1629       if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
 1630       len = (int)strlen((char *)buffer);
 1631       if (buffer[len-1] == '\n') break;
 1632       }
 1633 
 1634     while (len > 0 && isspace(buffer[len-1])) len--;
 1635     buffer[len] = 0;
 1636     if (len == 0) break;
 1637 
 1638     p = buffer;
 1639     while (isspace(*p)) p++;
 1640 
 1641     q = dbuffer;
 1642     while ((c = *p++) != 0)
 1643       {
 1644       int i = 0;
 1645       int n = 0;
 1646 
 1647       if (c == '\\') switch ((c = *p++))
 1648         {
 1649         case 'a': c =    7; break;
 1650         case 'b': c = '\b'; break;
 1651         case 'e': c =   27; break;
 1652         case 'f': c = '\f'; break;
 1653         case 'n': c = '\n'; break;
 1654         case 'r': c = '\r'; break;
 1655         case 't': c = '\t'; break;
 1656         case 'v': c = '\v'; break;
 1657 
 1658         case '0': case '1': case '2': case '3':
 1659         case '4': case '5': case '6': case '7':
 1660         c -= '0';
 1661         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
 1662           c = c * 8 + *p++ - '0';
 1663 
 1664 #if !defined NOUTF8
 1665         if (use_utf8 && c > 255)
 1666           {
 1667           unsigned char buff8[8];
 1668           int ii, utn;
 1669           utn = ord2utf8(c, buff8);
 1670           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
 1671           c = buff8[ii];   /* Last byte */
 1672           }
 1673 #endif
 1674         break;
 1675 
 1676         case 'x':
 1677 
 1678         /* Handle \x{..} specially - new Perl thing for utf8 */
 1679 
 1680 #if !defined NOUTF8
 1681         if (*p == '{')
 1682           {
 1683           unsigned char *pt = p;
 1684           c = 0;
 1685           while (isxdigit(*(++pt)))
 1686             c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
 1687           if (*pt == '}')
 1688             {
 1689             unsigned char buff8[8];
 1690             int ii, utn;
 1691             utn = ord2utf8(c, buff8);
 1692             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
 1693             c = buff8[ii];   /* Last byte */
 1694             p = pt + 1;
 1695             break;
 1696             }
 1697           /* Not correct form; fall through */
 1698           }
 1699 #endif
 1700 
 1701         /* Ordinary \x */
 1702 
 1703         c = 0;
 1704         while (i++ < 2 && isxdigit(*p))
 1705           {
 1706           c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
 1707           p++;
 1708           }
 1709         break;
 1710 
 1711         case 0:   /* \ followed by EOF allows for an empty line */
 1712         p--;
 1713         continue;
 1714 
 1715         case '>':
 1716         while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
 1717         continue;
 1718 
 1719         case 'A':  /* Option setting */
 1720         options |= PCRE_ANCHORED;
 1721         continue;
 1722 
 1723         case 'B':
 1724         options |= PCRE_NOTBOL;
 1725         continue;
 1726 
 1727         case 'C':
 1728         if (isdigit(*p))    /* Set copy string */
 1729           {
 1730           while(isdigit(*p)) n = n * 10 + *p++ - '0';
 1731           copystrings |= 1 << n;
 1732           }
 1733         else if (isalnum(*p))
 1734           {
 1735           uschar *npp = copynamesptr;
 1736           while (isalnum(*p)) *npp++ = *p++;
 1737           *npp++ = 0;
 1738           *npp = 0;
 1739           n = pcre_get_stringnumber(re, (char *)copynamesptr);
 1740           if (n < 0)
 1741             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
 1742           copynamesptr = npp;
 1743           }
 1744         else if (*p == '+')
 1745           {
 1746           callout_extra = 1;
 1747           p++;
 1748           }
 1749         else if (*p == '-')
 1750           {
 1751           pcre_callout = NULL;
 1752           p++;
 1753           }
 1754         else if (*p == '!')
 1755           {
 1756           callout_fail_id = 0;
 1757           p++;
 1758           while(isdigit(*p))
 1759             callout_fail_id = callout_fail_id * 10 + *p++ - '0';
 1760           callout_fail_count = 0;
 1761           if (*p == '!')
 1762             {
 1763             p++;
 1764             while(isdigit(*p))
 1765               callout_fail_count = callout_fail_count * 10 + *p++ - '0';
 1766             }
 1767           }
 1768         else if (*p == '*')
 1769           {
 1770           int sign = 1;
 1771           callout_data = 0;
 1772           if (*(++p) == '-') { sign = -1; p++; }
 1773           while(isdigit(*p))
 1774             callout_data = callout_data * 10 + *p++ - '0';
 1775           callout_data *= sign;
 1776           callout_data_set = 1;
 1777           }
 1778         continue;
 1779 
 1780 #if !defined NODFA
 1781         case 'D':
 1782 #if !defined NOPOSIX
 1783         if (posix || do_posix)
 1784           printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
 1785         else
 1786 #endif
 1787           use_dfa = 1;
 1788         continue;
 1789 
 1790         case 'F':
 1791         options |= PCRE_DFA_SHORTEST;
 1792         continue;
 1793 #endif
 1794 
 1795         case 'G':
 1796         if (isdigit(*p))
 1797           {
 1798           while(isdigit(*p)) n = n * 10 + *p++ - '0';
 1799           getstrings |= 1 << n;
 1800           }
 1801         else if (isalnum(*p))
 1802           {
 1803           uschar *npp = getnamesptr;
 1804           while (isalnum(*p)) *npp++ = *p++;
 1805           *npp++ = 0;
 1806           *npp = 0;
 1807           n = pcre_get_stringnumber(re, (char *)getnamesptr);
 1808           if (n < 0)
 1809             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
 1810           getnamesptr = npp;
 1811           }
 1812         continue;
 1813 
 1814         case 'L':
 1815         getlist = 1;
 1816         continue;
 1817 
 1818         case 'M':
 1819         find_match_limit = 1;
 1820         continue;
 1821 
 1822         case 'N':
 1823         options |= PCRE_NOTEMPTY;
 1824         continue;
 1825 
 1826         case 'O':
 1827         while(isdigit(*p)) n = n * 10 + *p++ - '0';
 1828         if (n > size_offsets_max)
 1829           {
 1830           size_offsets_max = n;
 1831           free(offsets);
 1832           use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
 1833           if (offsets == NULL)
 1834             {
 1835             printf("** Failed to get %d bytes of memory for offsets vector\n",
 1836               size_offsets_max * sizeof(int));
 1837             yield = 1;
 1838             goto EXIT;
 1839             }
 1840           }
 1841         use_size_offsets = n;
 1842         if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
 1843         continue;
 1844 
 1845         case 'P':
 1846         options |= PCRE_PARTIAL;
 1847         continue;
 1848 
 1849         case 'Q':
 1850         while(isdigit(*p)) n = n * 10 + *p++ - '0';
 1851         if (extra == NULL)
 1852           {
 1853           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 1854           extra->flags = 0;
 1855           }
 1856         extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
 1857         extra->match_limit_recursion = n;
 1858         continue;
 1859 
 1860         case 'q':
 1861         while(isdigit(*p)) n = n * 10 + *p++ - '0';
 1862         if (extra == NULL)
 1863           {
 1864           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 1865           extra->flags = 0;
 1866           }
 1867         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
 1868         extra->match_limit = n;
 1869         continue;
 1870 
 1871 #if !defined NODFA
 1872         case 'R':
 1873         options |= PCRE_DFA_RESTART;
 1874         continue;
 1875 #endif
 1876 
 1877         case 'S':
 1878         show_malloc = 1;
 1879         continue;
 1880 
 1881         case 'Z':
 1882         options |= PCRE_NOTEOL;
 1883         continue;
 1884 
 1885         case '?':
 1886         options |= PCRE_NO_UTF8_CHECK;
 1887         continue;
 1888 
 1889         case '<':
 1890           {
 1891           int x = check_newline(p, outfile);
 1892           if (x == 0) goto NEXT_DATA;
 1893           options |= x;
 1894           while (*p++ != '>');
 1895           }
 1896         continue;
 1897         }
 1898       *q++ = c;
 1899       }
 1900     *q = 0;
 1901     len = q - dbuffer;
 1902 
 1903     if ((all_use_dfa || use_dfa) && find_match_limit)
 1904       {
 1905       printf("**Match limit not relevant for DFA matching: ignored\n");
 1906       find_match_limit = 0;
 1907       }
 1908 
 1909     /* Handle matching via the POSIX interface, which does not
 1910     support timing or playing with the match limit or callout data. */
 1911 
 1912 #if !defined NOPOSIX
 1913     if (posix || do_posix)
 1914       {
 1915       int rc;
 1916       int eflags = 0;
 1917       regmatch_t *pmatch = NULL;
 1918       if (use_size_offsets > 0)
 1919         pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
 1920       if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
 1921       if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
 1922 
 1923       rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
 1924 
 1925       if (rc != 0)
 1926         {
 1927         (void)regerror(rc, &preg, (char *)buffer, buffer_size);
 1928         fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
 1929         }
 1930       else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
 1931               != 0)
 1932         {
 1933         fprintf(outfile, "Matched with REG_NOSUB\n");
 1934         }
 1935       else
 1936         {
 1937         size_t i;
 1938         for (i = 0; i < (size_t)use_size_offsets; i++)
 1939           {
 1940           if (pmatch[i].rm_so >= 0)
 1941             {
 1942             fprintf(outfile, "%2d: ", (int)i);
 1943             (void)pchars(dbuffer + pmatch[i].rm_so,
 1944               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
 1945             fprintf(outfile, "\n");
 1946             if (i == 0 && do_showrest)
 1947               {
 1948               fprintf(outfile, " 0+ ");
 1949               (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
 1950                 outfile);
 1951               fprintf(outfile, "\n");
 1952               }
 1953             }
 1954           }
 1955         }
 1956       free(pmatch);
 1957       }
 1958 
 1959     /* Handle matching via the native interface - repeats for /g and /G */
 1960 
 1961     else
 1962 #endif  /* !defined NOPOSIX */
 1963 
 1964     for (;; gmatched++)    /* Loop for /g or /G */
 1965       {
 1966       if (timeitm > 0)
 1967         {
 1968         register int i;
 1969         clock_t time_taken;
 1970         clock_t start_time = clock();
 1971 
 1972 #if !defined NODFA
 1973         if (all_use_dfa || use_dfa)
 1974           {
 1975           int workspace[1000];
 1976           for (i = 0; i < timeitm; i++)
 1977             count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
 1978               options | g_notempty, use_offsets, use_size_offsets, workspace,
 1979               sizeof(workspace)/sizeof(int));
 1980           }
 1981         else
 1982 #endif
 1983 
 1984         for (i = 0; i < timeitm; i++)
 1985           count = pcre_exec(re, extra, (char *)bptr, len,
 1986             start_offset, options | g_notempty, use_offsets, use_size_offsets);
 1987 
 1988         time_taken = clock() - start_time;
 1989         fprintf(outfile, "Execute time %.4f milliseconds\n",
 1990           (((double)time_taken * 1000.0) / (double)timeitm) /
 1991             (double)CLOCKS_PER_SEC);
 1992         }
 1993 
 1994       /* If find_match_limit is set, we want to do repeated matches with
 1995       varying limits in order to find the minimum value for the match limit and
 1996       for the recursion limit. */
 1997 
 1998       if (find_match_limit)
 1999         {
 2000         if (extra == NULL)
 2001           {
 2002           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 2003           extra->flags = 0;
 2004           }
 2005 
 2006         (void)check_match_limit(re, extra, bptr, len, start_offset,
 2007           options|g_notempty, use_offsets, use_size_offsets,
 2008           PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
 2009           PCRE_ERROR_MATCHLIMIT, "match()");
 2010 
 2011         count = check_match_limit(re, extra, bptr, len, start_offset,
 2012           options|g_notempty, use_offsets, use_size_offsets,
 2013           PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
 2014           PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
 2015         }
 2016 
 2017       /* If callout_data is set, use the interface with additional data */
 2018 
 2019       else if (callout_data_set)
 2020         {
 2021         if (extra == NULL)
 2022           {
 2023           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 2024           extra->flags = 0;
 2025           }
 2026         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
 2027         extra->callout_data = &callout_data;
 2028         count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
 2029           options | g_notempty, use_offsets, use_size_offsets);
 2030         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
 2031         }
 2032 
 2033       /* The normal case is just to do the match once, with the default
 2034       value of match_limit. */
 2035 
 2036 #if !defined NODFA
 2037       else if (all_use_dfa || use_dfa)
 2038         {
 2039         int workspace[1000];
 2040         count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
 2041           options | g_notempty, use_offsets, use_size_offsets, workspace,
 2042           sizeof(workspace)/sizeof(int));
 2043         if (count == 0)
 2044           {
 2045           fprintf(outfile, "Matched, but too many subsidiary matches\n");
 2046           count = use_size_offsets/2;
 2047           }
 2048         }
 2049 #endif
 2050 
 2051       else
 2052         {
 2053         count = pcre_exec(re, extra, (char *)bptr, len,
 2054           start_offset, options | g_notempty, use_offsets, use_size_offsets);
 2055         if (count == 0)
 2056           {
 2057           fprintf(outfile, "Matched, but too many substrings\n");
 2058           count = use_size_offsets/3;
 2059           }
 2060         }
 2061 
 2062       /* Matched */
 2063 
 2064       if (count >= 0)
 2065         {
 2066         int i, maxcount;
 2067 
 2068 #if !defined NODFA
 2069         if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
 2070 #endif
 2071           maxcount = use_size_offsets/3;
 2072 
 2073         /* This is a check against a lunatic return value. */
 2074 
 2075         if (count > maxcount)
 2076           {
 2077           fprintf(outfile,
 2078             "** PCRE error: returned count %d is too big for offset size %d\n",
 2079             count, use_size_offsets);
 2080           count = use_size_offsets/3;
 2081           if (do_g || do_G)
 2082             {
 2083             fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
 2084             do_g = do_G = FALSE;        /* Break g/G loop */
 2085             }
 2086           }
 2087 
 2088         for (i = 0; i < count * 2; i += 2)
 2089           {
 2090           if (use_offsets[i] < 0)
 2091             fprintf(outfile, "%2d: <unset>\n", i/2);
 2092           else
 2093             {
 2094             fprintf(outfile, "%2d: ", i/2);
 2095             (void)pchars(bptr + use_offsets[i],
 2096               use_offsets[i+1] - use_offsets[i], outfile);
 2097             fprintf(outfile, "\n");
 2098             if (i == 0)
 2099               {
 2100               if (do_showrest)
 2101                 {
 2102                 fprintf(outfile, " 0+ ");
 2103                 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
 2104                   outfile);
 2105                 fprintf(outfile, "\n");
 2106                 }
 2107               }
 2108             }
 2109           }
 2110 
 2111         for (i = 0; i < 32; i++)
 2112           {
 2113           if ((copystrings & (1 << i)) != 0)
 2114             {
 2115             char copybuffer[256];
 2116             int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
 2117               i, copybuffer, sizeof(copybuffer));
 2118             if (rc < 0)
 2119               fprintf(outfile, "copy substring %d failed %d\n", i, rc);
 2120             else
 2121               fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
 2122             }
 2123           }
 2124 
 2125         for (copynamesptr = copynames;
 2126              *copynamesptr != 0;
 2127              copynamesptr += (int)strlen((char*)copynamesptr) + 1)
 2128           {
 2129           char copybuffer[256];
 2130           int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
 2131             count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
 2132           if (rc < 0)
 2133             fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
 2134           else
 2135             fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
 2136           }
 2137 
 2138         for (i = 0; i < 32; i++)
 2139           {
 2140           if ((getstrings & (1 << i)) != 0)
 2141             {
 2142             const char *substring;
 2143             int rc = pcre_get_substring((char *)bptr, use_offsets, count,
 2144               i, &substring);
 2145             if (rc < 0)
 2146               fprintf(outfile, "get substring %d failed %d\n", i, rc);
 2147             else
 2148               {
 2149               fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
 2150               pcre_free_substring(substring);
 2151               }
 2152             }
 2153           }
 2154 
 2155         for (getnamesptr = getnames;
 2156              *getnamesptr != 0;
 2157              getnamesptr += (int)strlen((char*)getnamesptr) + 1)
 2158           {
 2159           const char *substring;
 2160           int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
 2161             count, (char *)getnamesptr, &substring);
 2162           if (rc < 0)
 2163             fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
 2164           else
 2165             {
 2166             fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
 2167             pcre_free_substring(substring);
 2168             }
 2169           }
 2170 
 2171         if (getlist)
 2172           {
 2173           const char **stringlist;
 2174           int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
 2175             &stringlist);
 2176           if (rc < 0)
 2177             fprintf(outfile, "get substring list failed %d\n", rc);
 2178           else
 2179             {
 2180             for (i = 0; i < count; i++)
 2181               fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
 2182             if (stringlist[i] != NULL)
 2183               fprintf(outfile, "string list not terminated by NULL\n");
 2184             /* free((void *)stringlist); */
 2185             pcre_free_substring_list(stringlist);
 2186             }
 2187           }
 2188         }
 2189 
 2190       /* There was a partial match */
 2191 
 2192       else if (count == PCRE_ERROR_PARTIAL)
 2193         {
 2194         fprintf(outfile, "Partial match");
 2195 #if !defined NODFA
 2196         if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
 2197           fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
 2198             bptr + use_offsets[0]);
 2199 #endif
 2200         fprintf(outfile, "\n");
 2201         break;  /* Out of the /g loop */
 2202         }
 2203 
 2204       /* Failed to match. If this is a /g or /G loop and we previously set
 2205       g_notempty after a null match, this is not necessarily the end.
 2206       We want to advance the start offset, and continue. In the case of UTF-8
 2207       matching, the advance must be one character, not one byte. Fudge the
 2208       offset values to achieve this. We won't be at the end of the string -
 2209       that was checked before setting g_notempty. */
 2210 
 2211       else
 2212         {
 2213         if (g_notempty != 0)
 2214           {
 2215           int onechar = 1;
 2216           use_offsets[0] = start_offset;
 2217           if (use_utf8)
 2218             {
 2219             while (start_offset + onechar < len)
 2220               {
 2221               int tb = bptr[start_offset+onechar];
 2222               if (tb <= 127) break;
 2223               tb &= 0xc0;
 2224               if (tb != 0 && tb != 0xc0) onechar++;
 2225               }
 2226             }
 2227           use_offsets[1] = start_offset + onechar;
 2228           }
 2229         else
 2230           {
 2231           if (count == PCRE_ERROR_NOMATCH)
 2232             {
 2233             if (gmatched == 0) fprintf(outfile, "No match\n");
 2234             }
 2235           else fprintf(outfile, "Error %d\n", count);
 2236           break;  /* Out of the /g loop */
 2237           }
 2238         }
 2239 
 2240       /* If not /g or /G we are done */
 2241 
 2242       if (!do_g && !do_G) break;
 2243 
 2244       /* If we have matched an empty string, first check to see if we are at
 2245       the end of the subject. If so, the /g loop is over. Otherwise, mimic
 2246       what Perl's /g options does. This turns out to be rather cunning. First
 2247       we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
 2248       same point. If this fails (picked up above) we advance to the next
 2249       character. */
 2250 
 2251       g_notempty = 0;
 2252       if (use_offsets[0] == use_offsets[1])
 2253         {
 2254         if (use_offsets[0] == len) break;
 2255         g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
 2256         }
 2257 
 2258       /* For /g, update the start offset, leaving the rest alone */
 2259 
 2260       if (do_g) start_offset = use_offsets[1];
 2261 
 2262       /* For /G, update the pointer and length */
 2263 
 2264       else
 2265         {
 2266         bptr += use_offsets[1];
 2267         len -= use_offsets[1];
 2268         }
 2269       }  /* End of loop for /g and /G */
 2270 
 2271     NEXT_DATA: continue;
 2272     }    /* End of loop for data lines */
 2273 
 2274   CONTINUE:
 2275 
 2276 #if !defined NOPOSIX
 2277   if (posix || do_posix) regfree(&preg);
 2278 #endif
 2279 
 2280   if (re != NULL) new_free(re);
 2281   if (extra != NULL) new_free(extra);
 2282   if (tables != NULL)
 2283     {
 2284     new_free((void *)tables);
 2285     setlocale(LC_CTYPE, "C");
 2286     locale_set = 0;
 2287     }
 2288   }
 2289 
 2290 if (infile == stdin) fprintf(outfile, "\n");
 2291 
 2292 EXIT:
 2293 
 2294 if (infile != NULL && infile != stdin) fclose(infile);
 2295 if (outfile != NULL && outfile != stdout) fclose(outfile);
 2296 
 2297 free(buffer);
 2298 free(dbuffer);
 2299 free(pbuffer);
 2300 free(offsets);
 2301 
 2302 return yield;
 2303 }
 2304 
 2305 /* End of pcretest.c */