tin  2.4.5
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.4.5.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pcretest.c
Go to the documentation of this file.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4 
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8 
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12 
13  * Redistributions of source code must retain the above copyright notice,
14  this list of conditions and the following disclaimer.
15 
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19 
20  * Neither the name of the University of Cambridge nor the names of its
21  contributors may be used to endorse or promote products derived from
22  this software without specific prior written permission.
23 
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37 
38 
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46 
47 
48 /* A number of things vary for Windows builds. Originally, pcretest opened its
49 input and output without "b"; then I was told that "b" was needed in some
50 environments, so it was added for release 5.0 to both the input and output. (It
51 makes no difference on Unix-like systems.) Later I was told that it is wrong
52 for the input on Windows. I've now abstracted the modes into two macros that
53 are set here, to make it easier to fiddle with them, and removed "b" from the
54 input mode under Windows. */
55 
56 #if defined(_WIN32) || defined(WIN32)
57 #include <io.h> /* For _setmode() */
58 #include <fcntl.h> /* For _O_BINARY */
59 #define INPUT_MODE "r"
60 #define OUTPUT_MODE "wb"
61 
62 #else
63 #include <sys/time.h> /* These two includes are needed */
64 #include <sys/resource.h> /* for setrlimit(). */
65 #define INPUT_MODE "rb"
66 #define OUTPUT_MODE "wb"
67 #endif
68 
69 
70 #define PCRE_SPY /* For Win32 build, import data, not export */
71 
72 /* We include pcre_internal.h because we need the internal info for displaying
73 the results of pcre_study() and we also need to know about the internal
74 macros, structures, and other internal data values; pcretest has "inside
75 information" compared to a program that strictly follows the PCRE API. */
76 
77 #include "pcre_internal.h"
78 
79 /* We need access to the data tables that PCRE uses. So as not to have to keep
80 two copies, we include the source file here, changing the names of the external
81 symbols to prevent clashes. */
82 
83 #define _pcre_utf8_table1 utf8_table1
84 #define _pcre_utf8_table1_size utf8_table1_size
85 #define _pcre_utf8_table2 utf8_table2
86 #define _pcre_utf8_table3 utf8_table3
87 #define _pcre_utf8_table4 utf8_table4
88 #define _pcre_utt utt
89 #define _pcre_utt_size utt_size
90 #define _pcre_OP_lengths OP_lengths
91 
92 #include "pcre_tables.c"
93 
94 /* We also need the pcre_printint() function for printing out compiled
95 patterns. This function is in a separate file so that it can be included in
96 pcre_compile.c when that module is compiled with debugging enabled.
97 
98 The definition of the macro PRINTABLE, which determines whether to print an
99 output character as-is or as a hex value when showing compiled patterns, is
100 contained in this file. We uses it here also, in cases when the locale has not
101 been explicitly changed, so as to get consistent output from systems that
102 differ in their output from isprint() even in the "C" locale. */
103 
104 #include "pcre_printint.src"
105 
106 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107 
108 
109 /* It is possible to compile this test program without including support for
110 testing the POSIX interface, though this is not available via the standard
111 Makefile. */
112 
113 #if !defined NOPOSIX
114 #include "pcreposix.h"
115 #endif
116 
117 /* It is also possible, for the benefit of the version imported into Exim, to
118 build pcretest without support for UTF8 (define NOUTF8), without the interface
119 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
120 function (define NOINFOCHECK). */
121 
122 
123 /* Other parameters */
124 
125 #ifndef CLOCKS_PER_SEC
126 #ifdef CLK_TCK
127 #define CLOCKS_PER_SEC CLK_TCK
128 #else
129 #define CLOCKS_PER_SEC 100
130 #endif
131 #endif
132 
133 /* This is the default loop count for timing. */
134 
135 #define LOOPREPEAT 500000
136 
137 /* Static variables */
138 
139 static FILE *outfile;
140 static int log_store = 0;
141 static int callout_count;
142 static int callout_extra;
144 static int callout_fail_id;
145 static int first_callout;
146 static int locale_set = 0;
147 static int show_malloc;
148 static int use_utf8;
149 static size_t gotten_store;
150 
151 /* The buffers grow automatically if very long input lines are encountered. */
152 
153 static int buffer_size = 50000;
154 static uschar *buffer = NULL;
155 static uschar *dbuffer = NULL;
156 static uschar *pbuffer = NULL;
157 
158 
159 
160 /*************************************************
161 * Read or extend an input line *
162 *************************************************/
163 
164 /* Input lines are read into buffer, but both patterns and data lines can be
165 continued over multiple input lines. In addition, if the buffer fills up, we
166 want to automatically expand it so as to be able to handle extremely large
167 lines that are needed for certain stress tests. When the input buffer is
168 expanded, the other two buffers must also be expanded likewise, and the
169 contents of pbuffer, which are a copy of the input for callouts, must be
170 preserved (for when expansion happens for a data line). This is not the most
171 optimal way of handling this, but hey, this is just a test program!
172 
173 Arguments:
174  f the file to read
175  start where in buffer to start (this *must* be within buffer)
176 
177 Returns: pointer to the start of new data
178  could be a copy of start, or could be moved
179  NULL if no data read and EOF reached
180 */
181 
182 static uschar *
183 extend_inputline(FILE *f, uschar *start)
184 {
185 uschar *here = start;
186 
187 for (;;)
188  {
189  int rlen = buffer_size - (here - buffer);
190 
191  if (rlen > 1000)
192  {
193  int dlen;
194  if (fgets((char *)here, rlen, f) == NULL)
195  return (here == start)? NULL : start;
196  dlen = (int)strlen((char *)here);
197  if (dlen > 0 && here[dlen - 1] == '\n') return start;
198  here += dlen;
199  }
200 
201  else
202  {
203  int new_buffer_size = 2*buffer_size;
204  uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
205  uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
206  uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
207 
208  if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
209  {
210  fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
211  exit(1);
212  }
213 
214  memcpy(new_buffer, buffer, buffer_size);
215  memcpy(new_pbuffer, pbuffer, buffer_size);
216 
217  buffer_size = new_buffer_size;
218 
219  start = new_buffer + (start - buffer);
220  here = new_buffer + (here - buffer);
221 
222  free(buffer);
223  free(dbuffer);
224  free(pbuffer);
225 
226  buffer = new_buffer;
227  dbuffer = new_dbuffer;
228  pbuffer = new_pbuffer;
229  }
230  }
231 
232 return NULL; /* Control never gets here */
233 }
234 
235 
236 
237 
238 
239 
240 
241 /*************************************************
242 * Read number from string *
243 *************************************************/
244 
245 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
246 around with conditional compilation, just do the job by hand. It is only used
247 for unpicking arguments, so just keep it simple.
248 
249 Arguments:
250  str string to be converted
251  endptr where to put the end pointer
252 
253 Returns: the unsigned long
254 */
255 
256 static int
257 get_value(unsigned char *str, unsigned char **endptr)
258 {
259 int result = 0;
260 while(*str != 0 && isspace(*str)) str++;
261 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
262 *endptr = str;
263 return(result);
264 }
265 
266 
267 
268 
269 /*************************************************
270 * Convert UTF-8 string to value *
271 *************************************************/
272 
273 /* This function takes one or more bytes that represents a UTF-8 character,
274 and returns the value of the character.
275 
276 Argument:
277  utf8bytes a pointer to the byte vector
278  vptr a pointer to an int to receive the value
279 
280 Returns: > 0 => the number of bytes consumed
281  -6 to 0 => malformed UTF-8 character at offset = (-return)
282 */
283 
284 #if !defined NOUTF8
285 
286 static int
287 utf82ord(unsigned char *utf8bytes, int *vptr)
288 {
289 int c = *utf8bytes++;
290 int d = c;
291 int i, j, s;
292 
293 for (i = -1; i < 6; i++) /* i is number of additional bytes */
294  {
295  if ((d & 0x80) == 0) break;
296  d <<= 1;
297  }
298 
299 if (i == -1) { *vptr = c; return 1; } /* ascii character */
300 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
301 
302 /* i now has a value in the range 1-5 */
303 
304 s = 6*i;
305 d = (c & utf8_table3[i]) << s;
306 
307 for (j = 0; j < i; j++)
308  {
309  c = *utf8bytes++;
310  if ((c & 0xc0) != 0x80) return -(j+1);
311  s -= 6;
312  d |= (c & 0x3f) << s;
313  }
314 
315 /* Check that encoding was the correct unique one */
316 
317 for (j = 0; j < utf8_table1_size; j++)
318  if (d <= utf8_table1[j]) break;
319 if (j != i) return -(i+1);
320 
321 /* Valid value */
322 
323 *vptr = d;
324 return i+1;
325 }
326 
327 #endif
328 
329 
330 
331 /*************************************************
332 * Convert character value to UTF-8 *
333 *************************************************/
334 
335 /* This function takes an integer value in the range 0 - 0x7fffffff
336 and encodes it as a UTF-8 character in 0 to 6 bytes.
337 
338 Arguments:
339  cvalue the character value
340  utf8bytes pointer to buffer for result - at least 6 bytes long
341 
342 Returns: number of characters placed in the buffer
343 */
344 
345 #if !defined NOUTF8
346 
347 static int
348 ord2utf8(int cvalue, uschar *utf8bytes)
349 {
350 register int i, j;
351 for (i = 0; i < utf8_table1_size; i++)
352  if (cvalue <= utf8_table1[i]) break;
353 utf8bytes += i;
354 for (j = i; j > 0; j--)
355  {
356  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
357  cvalue >>= 6;
358  }
359 *utf8bytes = utf8_table2[i] | cvalue;
360 return i + 1;
361 }
362 
363 #endif
364 
365 
366 
367 /*************************************************
368 * Print character string *
369 *************************************************/
370 
371 /* Character string printing function. Must handle UTF-8 strings in utf8
372 mode. Yields number of characters printed. If handed a NULL file, just counts
373 chars without printing. */
374 
375 static int pchars(unsigned char *p, int length, FILE *f)
376 {
377 int c = 0;
378 int yield = 0;
379 
380 while (length-- > 0)
381  {
382 #if !defined NOUTF8
383  if (use_utf8)
384  {
385  int rc = utf82ord(p, &c);
386 
387  if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
388  {
389  length -= rc - 1;
390  p += rc;
391  if (PRINTHEX(c))
392  {
393  if (f != NULL) fprintf(f, "%c", c);
394  yield++;
395  }
396  else
397  {
398  int n = 4;
399  if (f != NULL) fprintf(f, "\\x{%02x}", c);
400  yield += (n <= 0x000000ff)? 2 :
401  (n <= 0x00000fff)? 3 :
402  (n <= 0x0000ffff)? 4 :
403  (n <= 0x000fffff)? 5 : 6;
404  }
405  continue;
406  }
407  }
408 #endif
409 
410  /* Not UTF-8, or malformed UTF-8 */
411 
412  c = *p++;
413  if (PRINTHEX(c))
414  {
415  if (f != NULL) fprintf(f, "%c", c);
416  yield++;
417  }
418  else
419  {
420  if (f != NULL) fprintf(f, "\\x%02x", c);
421  yield += 4;
422  }
423  }
424 
425 return yield;
426 }
427 
428 
429 
430 /*************************************************
431 * Callout function *
432 *************************************************/
433 
434 /* Called from PCRE as a result of the (?C) item. We print out where we are in
435 the match. Yield zero unless more callouts than the fail count, or the callout
436 data is not zero. */
437 
439 {
440 FILE *f = (first_callout | callout_extra)? outfile : NULL;
441 int i, pre_start, post_start, subject_length;
442 
443 if (callout_extra)
444  {
445  fprintf(f, "Callout %d: last capture = %d\n",
446  cb->callout_number, cb->capture_last);
447 
448  for (i = 0; i < cb->capture_top * 2; i += 2)
449  {
450  if (cb->offset_vector[i] < 0)
451  fprintf(f, "%2d: <unset>\n", i/2);
452  else
453  {
454  fprintf(f, "%2d: ", i/2);
455  (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
456  cb->offset_vector[i+1] - cb->offset_vector[i], f);
457  fprintf(f, "\n");
458  }
459  }
460  }
461 
462 /* Re-print the subject in canonical form, the first time or if giving full
463 datails. On subsequent calls in the same match, we use pchars just to find the
464 printed lengths of the substrings. */
465 
466 if (f != NULL) fprintf(f, "--->");
467 
468 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
469 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
470  cb->current_position - cb->start_match, f);
471 
472 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
473 
474 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
475  cb->subject_length - cb->current_position, f);
476 
477 if (f != NULL) fprintf(f, "\n");
478 
479 /* Always print appropriate indicators, with callout number if not already
480 shown. For automatic callouts, show the pattern offset. */
481 
482 if (cb->callout_number == 255)
483  {
484  fprintf(outfile, "%+3d ", cb->pattern_position);
485  if (cb->pattern_position > 99) fprintf(outfile, "\n ");
486  }
487 else
488  {
489  if (callout_extra) fprintf(outfile, " ");
490  else fprintf(outfile, "%3d ", cb->callout_number);
491  }
492 
493 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
494 fprintf(outfile, "^");
495 
496 if (post_start > 0)
497  {
498  for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
499  fprintf(outfile, "^");
500  }
501 
502 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
503  fprintf(outfile, " ");
504 
505 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
506  pbuffer + cb->pattern_position);
507 
508 fprintf(outfile, "\n");
509 first_callout = 0;
510 
511 if (cb->callout_data != NULL)
512  {
513  int callout_data = *((int *)(cb->callout_data));
514  if (callout_data != 0)
515  {
516  fprintf(outfile, "Callout data = %d\n", callout_data);
517  return callout_data;
518  }
519  }
520 
521 return (cb->callout_number != callout_fail_id)? 0 :
522  (++callout_count >= callout_fail_count)? 1 : 0;
523 }
524 
525 
526 /*************************************************
527 * Local malloc functions *
528 *************************************************/
529 
530 /* Alternative malloc function, to test functionality and show the size of the
531 compiled re. */
532 
533 static void *new_malloc(size_t size)
534 {
535 void *block = malloc(size);
536 gotten_store = size;
537 if (show_malloc)
538  fprintf(outfile, "malloc %3d %p\n", (int)size, block);
539 return block;
540 }
541 
542 static void new_free(void *block)
543 {
544 if (show_malloc)
545  fprintf(outfile, "free %p\n", block);
546 free(block);
547 }
548 
549 
550 /* For recursion malloc/free, to test stacking calls */
551 
552 static void *stack_malloc(size_t size)
553 {
554 void *block = malloc(size);
555 if (show_malloc)
556  fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
557 return block;
558 }
559 
560 static void stack_free(void *block)
561 {
562 if (show_malloc)
563  fprintf(outfile, "stack_free %p\n", block);
564 free(block);
565 }
566 
567 
568 /*************************************************
569 * Call pcre_fullinfo() *
570 *************************************************/
571 
572 /* Get one piece of information from the pcre_fullinfo() function */
573 
574 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
575 {
576 int rc;
577 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
578  fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
579 }
580 
581 
582 
583 /*************************************************
584 * Byte flipping function *
585 *************************************************/
586 
587 static unsigned long int
588 byteflip(unsigned long int value, int n)
589 {
590 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
591 return ((value & 0x000000ff) << 24) |
592  ((value & 0x0000ff00) << 8) |
593  ((value & 0x00ff0000) >> 8) |
594  ((value & 0xff000000) >> 24);
595 }
596 
597 
598 
599 
600 /*************************************************
601 * Check match or recursion limit *
602 *************************************************/
603 
604 static int
605 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
606  int start_offset, int options, int *use_offsets, int use_size_offsets,
607  int flag, unsigned long int *limit, int errnumber, const char *msg)
608 {
609 int count;
610 int min = 0;
611 int mid = 64;
612 int max = -1;
613 
614 extra->flags |= flag;
615 
616 for (;;)
617  {
618  *limit = mid;
619 
620  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
621  use_offsets, use_size_offsets);
622 
623  if (count == errnumber)
624  {
625  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
626  min = mid;
627  mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
628  }
629 
630  else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
631  count == PCRE_ERROR_PARTIAL)
632  {
633  if (mid == min + 1)
634  {
635  fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
636  break;
637  }
638  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
639  max = mid;
640  mid = (min + mid)/2;
641  }
642  else break; /* Some other error */
643  }
644 
645 extra->flags &= ~flag;
646 return count;
647 }
648 
649 
650 
651 /*************************************************
652 * Check newline indicator *
653 *************************************************/
654 
655 /* This is used both at compile and run-time to check for <xxx> escapes, where
656 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
657 
658 Arguments:
659  p points after the leading '<'
660  f file for error message
661 
662 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
663 */
664 
665 static int
666 check_newline(uschar *p, FILE *f)
667 {
668 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
669 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
670 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
671 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
672 fprintf(f, "Unknown newline type at: <%s\n", p);
673 return 0;
674 }
675 
676 
677 
678 /*************************************************
679 * Usage function *
680 *************************************************/
681 
682 static void
683 usage(void)
684 {
685 printf("Usage: pcretest [options] [<input> [<output>]]\n");
686 printf(" -b show compiled code (bytecode)\n");
687 printf(" -C show PCRE compile-time options and exit\n");
688 printf(" -d debug: show compiled code and information (-b and -i)\n");
689 #if !defined NODFA
690 printf(" -dfa force DFA matching for all subjects\n");
691 #endif
692 printf(" -help show usage information\n");
693 printf(" -i show information about compiled patterns\n"
694  " -m output memory used information\n"
695  " -o <n> set size of offsets vector to <n>\n");
696 #if !defined NOPOSIX
697 printf(" -p use POSIX interface\n");
698 #endif
699 printf(" -q quiet: do not output PCRE version number at start\n");
700 printf(" -S <n> set stack size to <n> megabytes\n");
701 printf(" -s output store (memory) used information\n"
702  " -t time compilation and execution\n");
703 printf(" -t <n> time compilation and execution, repeating <n> times\n");
704 printf(" -tm time execution (matching) only\n");
705 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
706 }
707 
708 
709 
710 /*************************************************
711 * Main Program *
712 *************************************************/
713 
714 /* Read lines from named file or stdin and write to named file or stdout; lines
715 consist of a regular expression, in delimiters and optionally followed by
716 options, followed by a set of test data, terminated by an empty line. */
717 
718 int main(int argc, char **argv)
719 {
720 FILE *infile = stdin;
721 int options = 0;
722 int study_options = 0;
723 int op = 1;
724 int timeit = 0;
725 int timeitm = 0;
726 int showinfo = 0;
727 int showstore = 0;
728 int quiet = 0;
729 int size_offsets = 45;
730 int size_offsets_max;
731 int *offsets = NULL;
732 #if !defined NOPOSIX
733 int posix = 0;
734 #endif
735 int debug = 0;
736 int done = 0;
737 int all_use_dfa = 0;
738 int yield = 0;
739 int stack_size;
740 
741 /* These vectors store, end-to-end, a list of captured substring names. Assume
742 that 1024 is plenty long enough for the few names we'll be testing. */
743 
744 uschar copynames[1024];
745 uschar getnames[1024];
746 
747 uschar *copynamesptr;
748 uschar *getnamesptr;
749 
750 /* Get buffers from malloc() so that Electric Fence will check their misuse
751 when I am debugging. They grow automatically when very long lines are read. */
752 
753 buffer = (unsigned char *)malloc(buffer_size);
754 dbuffer = (unsigned char *)malloc(buffer_size);
755 pbuffer = (unsigned char *)malloc(buffer_size);
756 
757 /* The outfile variable is static so that new_malloc can use it. */
758 
759 outfile = stdout;
760 
761 /* The following _setmode() stuff is some Windows magic that tells its runtime
762 library to translate CRLF into a single LF character. At least, that's what
763 I've been told: never having used Windows I take this all on trust. Originally
764 it set 0x8000, but then I was advised that _O_BINARY was better. */
765 
766 #if defined(_WIN32) || defined(WIN32)
767 _setmode( _fileno( stdout ), _O_BINARY );
768 #endif
769 
770 /* Scan options */
771 
772 while (argc > 1 && argv[op][0] == '-')
773  {
774  unsigned char *endptr;
775 
776  if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
777  showstore = 1;
778  else if (strcmp(argv[op], "-q") == 0) quiet = 1;
779  else if (strcmp(argv[op], "-b") == 0) debug = 1;
780  else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
781  else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
782 #if !defined NODFA
783  else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
784 #endif
785  else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
786  ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
787  *endptr == 0))
788  {
789  op++;
790  argc--;
791  }
792  else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
793  {
794  int both = argv[op][2] == 0;
795  int temp;
796  if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
797  *endptr == 0))
798  {
799  timeitm = temp;
800  op++;
801  argc--;
802  }
803  else timeitm = LOOPREPEAT;
804  if (both) timeit = timeitm;
805  }
806  else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
807  ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
808  *endptr == 0))
809  {
810 #if defined(_WIN32) || defined(WIN32)
811  printf("PCRE: -S not supported on this OS\n");
812  exit(1);
813 #else
814  int rc;
815  struct rlimit rlim;
816  getrlimit(RLIMIT_STACK, &rlim);
817  rlim.rlim_cur = stack_size * 1024 * 1024;
818  rc = setrlimit(RLIMIT_STACK, &rlim);
819  if (rc != 0)
820  {
821  printf("PCRE: setrlimit() failed with error %d\n", rc);
822  exit(1);
823  }
824  op++;
825  argc--;
826 #endif
827  }
828 #if !defined NOPOSIX
829  else if (strcmp(argv[op], "-p") == 0) posix = 1;
830 #endif
831  else if (strcmp(argv[op], "-C") == 0)
832  {
833  int rc;
834  printf("PCRE version %s\n", pcre_version());
835  printf("Compiled with\n");
836  (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
837  printf(" %sUTF-8 support\n", rc? "" : "No ");
839  printf(" %sUnicode properties support\n", rc? "" : "No ");
840  (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
841  printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
842  (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
843  (rc == -1)? "ANY" : "???");
845  printf(" Internal link size = %d\n", rc);
847  printf(" POSIX malloc threshold = %d\n", rc);
849  printf(" Default match limit = %d\n", rc);
851  printf(" Default recursion depth limit = %d\n", rc);
853  printf(" Match recursion uses %s\n", rc? "stack" : "heap");
854  exit(0);
855  }
856  else if (strcmp(argv[op], "-help") == 0 ||
857  strcmp(argv[op], "--help") == 0)
858  {
859  usage();
860  goto EXIT;
861  }
862  else
863  {
864  printf("** Unknown or malformed option %s\n", argv[op]);
865  usage();
866  yield = 1;
867  goto EXIT;
868  }
869  op++;
870  argc--;
871  }
872 
873 /* Get the store for the offsets vector, and remember what it was */
874 
875 size_offsets_max = size_offsets;
876 offsets = (int *)malloc(size_offsets_max * sizeof(int));
877 if (offsets == NULL)
878  {
879  printf("** Failed to get %d bytes of memory for offsets vector\n",
880  size_offsets_max * sizeof(int));
881  yield = 1;
882  goto EXIT;
883  }
884 
885 /* Sort out the input and output files */
886 
887 if (argc > 1)
888  {
889  infile = fopen(argv[op], INPUT_MODE);
890  if (infile == NULL)
891  {
892  printf("** Failed to open %s\n", argv[op]);
893  yield = 1;
894  goto EXIT;
895  }
896  }
897 
898 if (argc > 2)
899  {
900  outfile = fopen(argv[op+1], OUTPUT_MODE);
901  if (outfile == NULL)
902  {
903  printf("** Failed to open %s\n", argv[op+1]);
904  yield = 1;
905  goto EXIT;
906  }
907  }
908 
909 /* Set alternative malloc function */
910 
915 
916 /* Heading line unless quiet, then prompt for first regex if stdin */
917 
918 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
919 
920 /* Main loop */
921 
922 while (!done)
923  {
924  pcre *re = NULL;
925  pcre_extra *extra = NULL;
926 
927 #if !defined NOPOSIX /* There are still compilers that require no indent */
928  regex_t preg;
929  int do_posix = 0;
930 #endif
931 
932  const char *error;
933  unsigned char *p, *pp, *ppp;
934  unsigned char *to_file = NULL;
935  const unsigned char *tables = NULL;
936  unsigned long int true_size, true_study_size = 0;
937  size_t size, regex_gotten_store;
938  int do_study = 0;
939  int do_debug = debug;
940  int do_G = 0;
941  int do_g = 0;
942  int do_showinfo = showinfo;
943  int do_showrest = 0;
944  int do_flip = 0;
945  int erroroffset, len, delimiter, poffset;
946 
947  use_utf8 = 0;
948 
949  if (infile == stdin) printf(" re> ");
950  if (extend_inputline(infile, buffer) == NULL) break;
951  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
952  fflush(outfile);
953 
954  p = buffer;
955  while (isspace(*p)) p++;
956  if (*p == 0) continue;
957 
958  /* See if the pattern is to be loaded pre-compiled from a file. */
959 
960  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
961  {
962  unsigned long int magic, get_options;
963  uschar sbuf[8];
964  FILE *f;
965 
966  p++;
967  pp = p + (int)strlen((char *)p);
968  while (isspace(pp[-1])) pp--;
969  *pp = 0;
970 
971  f = fopen((char *)p, "rb");
972  if (f == NULL)
973  {
974  fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
975  continue;
976  }
977 
978  if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
979 
980  true_size =
981  (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
982  true_study_size =
983  (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
984 
985  re = (real_pcre *)new_malloc(true_size);
986  regex_gotten_store = gotten_store;
987 
988  if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
989 
990  magic = ((real_pcre *)re)->magic_number;
991  if (magic != MAGIC_NUMBER)
992  {
993  if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
994  {
995  do_flip = 1;
996  }
997  else
998  {
999  fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1000  fclose(f);
1001  continue;
1002  }
1003  }
1004 
1005  fprintf(outfile, "Compiled regex%s loaded from %s\n",
1006  do_flip? " (byte-inverted)" : "", p);
1007 
1008  /* Need to know if UTF-8 for printing data strings */
1009 
1010  new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1011  use_utf8 = (get_options & PCRE_UTF8) != 0;
1012 
1013  /* Now see if there is any following study data */
1014 
1015  if (true_study_size != 0)
1016  {
1017  pcre_study_data *psd;
1018 
1019  extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1020  extra->flags = PCRE_EXTRA_STUDY_DATA;
1021 
1022  psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1023  extra->study_data = psd;
1024 
1025  if (fread(psd, 1, true_study_size, f) != true_study_size)
1026  {
1027  FAIL_READ:
1028  fprintf(outfile, "Failed to read data from %s\n", p);
1029  if (extra != NULL) new_free(extra);
1030  if (re != NULL) new_free(re);
1031  fclose(f);
1032  continue;
1033  }
1034  fprintf(outfile, "Study data loaded from %s\n", p);
1035  do_study = 1; /* To get the data output if requested */
1036  }
1037  else fprintf(outfile, "No study data\n");
1038 
1039  fclose(f);
1040  goto SHOW_INFO;
1041  }
1042 
1043  /* In-line pattern (the usual case). Get the delimiter and seek the end of
1044  the pattern; if is isn't complete, read more. */
1045 
1046  delimiter = *p++;
1047 
1048  if (isalnum(delimiter) || delimiter == '\\')
1049  {
1050  fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1051  goto SKIP_DATA;
1052  }
1053 
1054  pp = p;
1055  poffset = p - buffer;
1056 
1057  for(;;)
1058  {
1059  while (*pp != 0)
1060  {
1061  if (*pp == '\\' && pp[1] != 0) pp++;
1062  else if (*pp == delimiter) break;
1063  pp++;
1064  }
1065  if (*pp != 0) break;
1066  if (infile == stdin) printf(" > ");
1067  if ((pp = extend_inputline(infile, pp)) == NULL)
1068  {
1069  fprintf(outfile, "** Unexpected EOF\n");
1070  done = 1;
1071  goto CONTINUE;
1072  }
1073  if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1074  }
1075 
1076  /* The buffer may have moved while being extended; reset the start of data
1077  pointer to the correct relative point in the buffer. */
1078 
1079  p = buffer + poffset;
1080 
1081  /* If the first character after the delimiter is backslash, make
1082  the pattern end with backslash. This is purely to provide a way
1083  of testing for the error message when a pattern ends with backslash. */
1084 
1085  if (pp[1] == '\\') *pp++ = '\\';
1086 
1087  /* Terminate the pattern at the delimiter, and save a copy of the pattern
1088  for callouts. */
1089 
1090  *pp++ = 0;
1091  strcpy((char *)pbuffer, (char *)p);
1092 
1093  /* Look for options after final delimiter */
1094 
1095  options = 0;
1096  study_options = 0;
1097  log_store = showstore; /* default from command line */
1098 
1099  while (*pp != 0)
1100  {
1101  switch (*pp++)
1102  {
1103  case 'f': options |= PCRE_FIRSTLINE; break;
1104  case 'g': do_g = 1; break;
1105  case 'i': options |= PCRE_CASELESS; break;
1106  case 'm': options |= PCRE_MULTILINE; break;
1107  case 's': options |= PCRE_DOTALL; break;
1108  case 'x': options |= PCRE_EXTENDED; break;
1109 
1110  case '+': do_showrest = 1; break;
1111  case 'A': options |= PCRE_ANCHORED; break;
1112  case 'B': do_debug = 1; break;
1113  case 'C': options |= PCRE_AUTO_CALLOUT; break;
1114  case 'D': do_debug = do_showinfo = 1; break;
1115  case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1116  case 'F': do_flip = 1; break;
1117  case 'G': do_G = 1; break;
1118  case 'I': do_showinfo = 1; break;
1119  case 'J': options |= PCRE_DUPNAMES; break;
1120  case 'M': log_store = 1; break;
1121  case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1122 
1123 #if !defined NOPOSIX
1124  case 'P': do_posix = 1; break;
1125 #endif
1126 
1127  case 'S': do_study = 1; break;
1128  case 'U': options |= PCRE_UNGREEDY; break;
1129  case 'X': options |= PCRE_EXTRA; break;
1130  case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1131  case '?': options |= PCRE_NO_UTF8_CHECK; break;
1132 
1133  case 'L':
1134  ppp = pp;
1135  /* The '\r' test here is so that it works on Windows. */
1136  /* The '0' test is just in case this is an unterminated line. */
1137  while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1138  *ppp = 0;
1139  if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1140  {
1141  fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1142  goto SKIP_DATA;
1143  }
1144  locale_set = 1;
1145  tables = pcre_maketables();
1146  pp = ppp;
1147  break;
1148 
1149  case '>':
1150  to_file = pp;
1151  while (*pp != 0) pp++;
1152  while (isspace(pp[-1])) pp--;
1153  *pp = 0;
1154  break;
1155 
1156  case '<':
1157  {
1158  int x = check_newline(pp, outfile);
1159  if (x == 0) goto SKIP_DATA;
1160  options |= x;
1161  while (*pp++ != '>');
1162  }
1163  break;
1164 
1165  case '\r': /* So that it works in Windows */
1166  case '\n':
1167  case ' ':
1168  break;
1169 
1170  default:
1171  fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1172  goto SKIP_DATA;
1173  }
1174  }
1175 
1176  /* Handle compiling via the POSIX interface, which doesn't support the
1177  timing, showing, or debugging options, nor the ability to pass over
1178  local character tables. */
1179 
1180 #if !defined NOPOSIX
1181  if (posix || do_posix)
1182  {
1183  int rc;
1184  int cflags = 0;
1185 
1186  if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1187  if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1188  if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1189  if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1190  if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1191 
1192  rc = regcomp(&preg, (char *)p, cflags);
1193 
1194  /* Compilation failed; go back for another re, skipping to blank line
1195  if non-interactive. */
1196 
1197  if (rc != 0)
1198  {
1199  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1200  fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1201  goto SKIP_DATA;
1202  }
1203  }
1204 
1205  /* Handle compiling via the native interface */
1206 
1207  else
1208 #endif /* !defined NOPOSIX */
1209 
1210  {
1211  if (timeit > 0)
1212  {
1213  register int i;
1214  clock_t time_taken;
1215  clock_t start_time = clock();
1216  for (i = 0; i < timeit; i++)
1217  {
1218  re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1219  if (re != NULL) free(re);
1220  }
1221  time_taken = clock() - start_time;
1222  fprintf(outfile, "Compile time %.4f milliseconds\n",
1223  (((double)time_taken * 1000.0) / (double)timeit) /
1224  (double)CLOCKS_PER_SEC);
1225  }
1226 
1227  re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1228 
1229  /* Compilation failed; go back for another re, skipping to blank line
1230  if non-interactive. */
1231 
1232  if (re == NULL)
1233  {
1234  fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1235  SKIP_DATA:
1236  if (infile != stdin)
1237  {
1238  for (;;)
1239  {
1240  if (extend_inputline(infile, buffer) == NULL)
1241  {
1242  done = 1;
1243  goto CONTINUE;
1244  }
1245  len = (int)strlen((char *)buffer);
1246  while (len > 0 && isspace(buffer[len-1])) len--;
1247  if (len == 0) break;
1248  }
1249  fprintf(outfile, "\n");
1250  }
1251  goto CONTINUE;
1252  }
1253 
1254  /* Compilation succeeded; print data if required. There are now two
1255  info-returning functions. The old one has a limited interface and
1256  returns only limited data. Check that it agrees with the newer one. */
1257 
1258  if (log_store)
1259  fprintf(outfile, "Memory allocation (code space): %d\n",
1260  (int)(gotten_store -
1261  sizeof(real_pcre) -
1262  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1263 
1264  /* Extract the size for possible writing before possibly flipping it,
1265  and remember the store that was got. */
1266 
1267  true_size = ((real_pcre *)re)->size;
1268  regex_gotten_store = gotten_store;
1269 
1270  /* If /S was present, study the regexp to generate additional info to
1271  help with the matching. */
1272 
1273  if (do_study)
1274  {
1275  if (timeit > 0)
1276  {
1277  register int i;
1278  clock_t time_taken;
1279  clock_t start_time = clock();
1280  for (i = 0; i < timeit; i++)
1281  extra = pcre_study(re, study_options, &error);
1282  time_taken = clock() - start_time;
1283  if (extra != NULL) free(extra);
1284  fprintf(outfile, " Study time %.4f milliseconds\n",
1285  (((double)time_taken * 1000.0) / (double)timeit) /
1286  (double)CLOCKS_PER_SEC);
1287  }
1288  extra = pcre_study(re, study_options, &error);
1289  if (error != NULL)
1290  fprintf(outfile, "Failed to study: %s\n", error);
1291  else if (extra != NULL)
1292  true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1293  }
1294 
1295  /* If the 'F' option was present, we flip the bytes of all the integer
1296  fields in the regex data block and the study block. This is to make it
1297  possible to test PCRE's handling of byte-flipped patterns, e.g. those
1298  compiled on a different architecture. */
1299 
1300  if (do_flip)
1301  {
1302  real_pcre *rre = (real_pcre *)re;
1303  rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1304  rre->size = byteflip(rre->size, sizeof(rre->size));
1305  rre->options = byteflip(rre->options, sizeof(rre->options));
1306  rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1307  rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1308  rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1309  rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1311  sizeof(rre->name_table_offset));
1313  sizeof(rre->name_entry_size));
1314  rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1315 
1316  if (extra != NULL)
1317  {
1318  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1319  rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1320  rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1321  }
1322  }
1323 
1324  /* Extract information from the compiled data if required */
1325 
1326  SHOW_INFO:
1327 
1328  if (do_debug)
1329  {
1330  fprintf(outfile, "------------------------------------------------------------------\n");
1331  pcre_printint(re, outfile);
1332  }
1333 
1334  if (do_showinfo)
1335  {
1336  unsigned long int get_options, all_options;
1337 #if !defined NOINFOCHECK
1338  int old_first_char, old_options, old_count;
1339 #endif
1340  int count, backrefmax, first_char, need_char;
1341  int nameentrysize, namecount;
1342  const uschar *nametable;
1343 
1344  new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1345  new_info(re, NULL, PCRE_INFO_SIZE, &size);
1346  new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1347  new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1348  new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1349  new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1350  new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1351  new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1352  new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1353 
1354 #if !defined NOINFOCHECK
1355  old_count = pcre_info(re, &old_options, &old_first_char);
1356  if (count < 0) fprintf(outfile,
1357  "Error %d from pcre_info()\n", count);
1358  else
1359  {
1360  if (old_count != count) fprintf(outfile,
1361  "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1362  old_count);
1363 
1364  if (old_first_char != first_char) fprintf(outfile,
1365  "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1366  first_char, old_first_char);
1367 
1368  if (old_options != (int)get_options) fprintf(outfile,
1369  "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1370  get_options, old_options);
1371  }
1372 #endif
1373 
1374  if (size != regex_gotten_store) fprintf(outfile,
1375  "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1376  (int)size, (int)regex_gotten_store);
1377 
1378  fprintf(outfile, "Capturing subpattern count = %d\n", count);
1379  if (backrefmax > 0)
1380  fprintf(outfile, "Max back reference = %d\n", backrefmax);
1381 
1382  if (namecount > 0)
1383  {
1384  fprintf(outfile, "Named capturing subpatterns:\n");
1385  while (namecount-- > 0)
1386  {
1387  fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1388  nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1389  GET2(nametable, 0));
1390  nametable += nameentrysize;
1391  }
1392  }
1393 
1394  /* The NOPARTIAL bit is a private bit in the options, so we have
1395  to fish it out via out back door */
1396 
1397  all_options = ((real_pcre *)re)->options;
1398  if (do_flip)
1399  {
1400  all_options = byteflip(all_options, sizeof(all_options));
1401  }
1402 
1403  if ((all_options & PCRE_NOPARTIAL) != 0)
1404  fprintf(outfile, "Partial matching not supported\n");
1405 
1406  if (get_options == 0) fprintf(outfile, "No options\n");
1407  else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1408  ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1409  ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1410  ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1411  ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1412  ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1413  ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1414  ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1415  ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1416  ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1417  ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1418  ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1419  ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1420  ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1421 
1422  switch (get_options & PCRE_NEWLINE_BITS)
1423  {
1424  case PCRE_NEWLINE_CR:
1425  fprintf(outfile, "Forced newline sequence: CR\n");
1426  break;
1427 
1428  case PCRE_NEWLINE_LF:
1429  fprintf(outfile, "Forced newline sequence: LF\n");
1430  break;
1431 
1432  case PCRE_NEWLINE_CRLF:
1433  fprintf(outfile, "Forced newline sequence: CRLF\n");
1434  break;
1435 
1436  case PCRE_NEWLINE_ANY:
1437  fprintf(outfile, "Forced newline sequence: ANY\n");
1438  break;
1439 
1440  default:
1441  break;
1442  }
1443 
1444  if (first_char == -1)
1445  {
1446  fprintf(outfile, "First char at start or follows newline\n");
1447  }
1448  else if (first_char < 0)
1449  {
1450  fprintf(outfile, "No first char\n");
1451  }
1452  else
1453  {
1454  int ch = first_char & 255;
1455  const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1456  "" : " (caseless)";
1457  if (PRINTHEX(ch))
1458  fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1459  else
1460  fprintf(outfile, "First char = %d%s\n", ch, caseless);
1461  }
1462 
1463  if (need_char < 0)
1464  {
1465  fprintf(outfile, "No need char\n");
1466  }
1467  else
1468  {
1469  int ch = need_char & 255;
1470  const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1471  "" : " (caseless)";
1472  if (PRINTHEX(ch))
1473  fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1474  else
1475  fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1476  }
1477 
1478  /* Don't output study size; at present it is in any case a fixed
1479  value, but it varies, depending on the computer architecture, and
1480  so messes up the test suite. (And with the /F option, it might be
1481  flipped.) */
1482 
1483  if (do_study)
1484  {
1485  if (extra == NULL)
1486  fprintf(outfile, "Study returned NULL\n");
1487  else
1488  {
1489  uschar *start_bits = NULL;
1490  new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1491 
1492  if (start_bits == NULL)
1493  fprintf(outfile, "No starting byte set\n");
1494  else
1495  {
1496  int i;
1497  int c = 24;
1498  fprintf(outfile, "Starting byte set: ");
1499  for (i = 0; i < 256; i++)
1500  {
1501  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1502  {
1503  if (c > 75)
1504  {
1505  fprintf(outfile, "\n ");
1506  c = 2;
1507  }
1508  if (PRINTHEX(i) && i != ' ')
1509  {
1510  fprintf(outfile, "%c ", i);
1511  c += 2;
1512  }
1513  else
1514  {
1515  fprintf(outfile, "\\x%02x ", i);
1516  c += 5;
1517  }
1518  }
1519  }
1520  fprintf(outfile, "\n");
1521  }
1522  }
1523  }
1524  }
1525 
1526  /* If the '>' option was present, we write out the regex to a file, and
1527  that is all. The first 8 bytes of the file are the regex length and then
1528  the study length, in big-endian order. */
1529 
1530  if (to_file != NULL)
1531  {
1532  FILE *f = fopen((char *)to_file, "wb");
1533  if (f == NULL)
1534  {
1535  fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1536  }
1537  else
1538  {
1539  uschar sbuf[8];
1540  sbuf[0] = (true_size >> 24) & 255;
1541  sbuf[1] = (true_size >> 16) & 255;
1542  sbuf[2] = (true_size >> 8) & 255;
1543  sbuf[3] = (true_size) & 255;
1544 
1545  sbuf[4] = (true_study_size >> 24) & 255;
1546  sbuf[5] = (true_study_size >> 16) & 255;
1547  sbuf[6] = (true_study_size >> 8) & 255;
1548  sbuf[7] = (true_study_size) & 255;
1549 
1550  if (fwrite(sbuf, 1, 8, f) < 8 ||
1551  fwrite(re, 1, true_size, f) < true_size)
1552  {
1553  fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1554  }
1555  else
1556  {
1557  fprintf(outfile, "Compiled regex written to %s\n", to_file);
1558  if (extra != NULL)
1559  {
1560  if (fwrite(extra->study_data, 1, true_study_size, f) <
1561  true_study_size)
1562  {
1563  fprintf(outfile, "Write error on %s: %s\n", to_file,
1564  strerror(errno));
1565  }
1566  else fprintf(outfile, "Study data written to %s\n", to_file);
1567 
1568  }
1569  }
1570  fclose(f);
1571  }
1572 
1573  new_free(re);
1574  if (extra != NULL) new_free(extra);
1575  if (tables != NULL) new_free((void *)tables);
1576  continue; /* With next regex */
1577  }
1578  } /* End of non-POSIX compile */
1579 
1580  /* Read data lines and test them */
1581 
1582  for (;;)
1583  {
1584  uschar *q;
1585  uschar *bptr = dbuffer;
1586  int *use_offsets = offsets;
1587  int use_size_offsets = size_offsets;
1588  int callout_data = 0;
1589  int callout_data_set = 0;
1590  int count, c;
1591  int copystrings = 0;
1592  int find_match_limit = 0;
1593  int getstrings = 0;
1594  int getlist = 0;
1595  int gmatched = 0;
1596  int start_offset = 0;
1597  int g_notempty = 0;
1598  int use_dfa = 0;
1599 
1600  options = 0;
1601 
1602  *copynames = 0;
1603  *getnames = 0;
1604 
1605  copynamesptr = copynames;
1606  getnamesptr = getnames;
1607 
1609  first_callout = 1;
1610  callout_extra = 0;
1611  callout_count = 0;
1612  callout_fail_count = 999999;
1613  callout_fail_id = -1;
1614  show_malloc = 0;
1615 
1616  if (extra != NULL) extra->flags &=
1618 
1619  len = 0;
1620  for (;;)
1621  {
1622  if (infile == stdin) printf("data> ");
1623  if (extend_inputline(infile, buffer + len) == NULL)
1624  {
1625  if (len > 0) break;
1626  done = 1;
1627  goto CONTINUE;
1628  }
1629  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1630  len = (int)strlen((char *)buffer);
1631  if (buffer[len-1] == '\n') break;
1632  }
1633 
1634  while (len > 0 && isspace(buffer[len-1])) len--;
1635  buffer[len] = 0;
1636  if (len == 0) break;
1637 
1638  p = buffer;
1639  while (isspace(*p)) p++;
1640 
1641  q = dbuffer;
1642  while ((c = *p++) != 0)
1643  {
1644  int i = 0;
1645  int n = 0;
1646 
1647  if (c == '\\') switch ((c = *p++))
1648  {
1649  case 'a': c = 7; break;
1650  case 'b': c = '\b'; break;
1651  case 'e': c = 27; break;
1652  case 'f': c = '\f'; break;
1653  case 'n': c = '\n'; break;
1654  case 'r': c = '\r'; break;
1655  case 't': c = '\t'; break;
1656  case 'v': c = '\v'; break;
1657 
1658  case '0': case '1': case '2': case '3':
1659  case '4': case '5': case '6': case '7':
1660  c -= '0';
1661  while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1662  c = c * 8 + *p++ - '0';
1663 
1664 #if !defined NOUTF8
1665  if (use_utf8 && c > 255)
1666  {
1667  unsigned char buff8[8];
1668  int ii, utn;
1669  utn = ord2utf8(c, buff8);
1670  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1671  c = buff8[ii]; /* Last byte */
1672  }
1673 #endif
1674  break;
1675 
1676  case 'x':
1677 
1678  /* Handle \x{..} specially - new Perl thing for utf8 */
1679 
1680 #if !defined NOUTF8
1681  if (*p == '{')
1682  {
1683  unsigned char *pt = p;
1684  c = 0;
1685  while (isxdigit(*(++pt)))
1686  c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1687  if (*pt == '}')
1688  {
1689  unsigned char buff8[8];
1690  int ii, utn;
1691  utn = ord2utf8(c, buff8);
1692  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1693  c = buff8[ii]; /* Last byte */
1694  p = pt + 1;
1695  break;
1696  }
1697  /* Not correct form; fall through */
1698  }
1699 #endif
1700 
1701  /* Ordinary \x */
1702 
1703  c = 0;
1704  while (i++ < 2 && isxdigit(*p))
1705  {
1706  c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1707  p++;
1708  }
1709  break;
1710 
1711  case 0: /* \ followed by EOF allows for an empty line */
1712  p--;
1713  continue;
1714 
1715  case '>':
1716  while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1717  continue;
1718 
1719  case 'A': /* Option setting */
1720  options |= PCRE_ANCHORED;
1721  continue;
1722 
1723  case 'B':
1724  options |= PCRE_NOTBOL;
1725  continue;
1726 
1727  case 'C':
1728  if (isdigit(*p)) /* Set copy string */
1729  {
1730  while(isdigit(*p)) n = n * 10 + *p++ - '0';
1731  copystrings |= 1 << n;
1732  }
1733  else if (isalnum(*p))
1734  {
1735  uschar *npp = copynamesptr;
1736  while (isalnum(*p)) *npp++ = *p++;
1737  *npp++ = 0;
1738  *npp = 0;
1739  n = pcre_get_stringnumber(re, (char *)copynamesptr);
1740  if (n < 0)
1741  fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1742  copynamesptr = npp;
1743  }
1744  else if (*p == '+')
1745  {
1746  callout_extra = 1;
1747  p++;
1748  }
1749  else if (*p == '-')
1750  {
1751  pcre_callout = NULL;
1752  p++;
1753  }
1754  else if (*p == '!')
1755  {
1756  callout_fail_id = 0;
1757  p++;
1758  while(isdigit(*p))
1759  callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1760  callout_fail_count = 0;
1761  if (*p == '!')
1762  {
1763  p++;
1764  while(isdigit(*p))
1765  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1766  }
1767  }
1768  else if (*p == '*')
1769  {
1770  int sign = 1;
1771  callout_data = 0;
1772  if (*(++p) == '-') { sign = -1; p++; }
1773  while(isdigit(*p))
1774  callout_data = callout_data * 10 + *p++ - '0';
1775  callout_data *= sign;
1776  callout_data_set = 1;
1777  }
1778  continue;
1779 
1780 #if !defined NODFA
1781  case 'D':
1782 #if !defined NOPOSIX
1783  if (posix || do_posix)
1784  printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1785  else
1786 #endif
1787  use_dfa = 1;
1788  continue;
1789 
1790  case 'F':
1791  options |= PCRE_DFA_SHORTEST;
1792  continue;
1793 #endif
1794 
1795  case 'G':
1796  if (isdigit(*p))
1797  {
1798  while(isdigit(*p)) n = n * 10 + *p++ - '0';
1799  getstrings |= 1 << n;
1800  }
1801  else if (isalnum(*p))
1802  {
1803  uschar *npp = getnamesptr;
1804  while (isalnum(*p)) *npp++ = *p++;
1805  *npp++ = 0;
1806  *npp = 0;
1807  n = pcre_get_stringnumber(re, (char *)getnamesptr);
1808  if (n < 0)
1809  fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1810  getnamesptr = npp;
1811  }
1812  continue;
1813 
1814  case 'L':
1815  getlist = 1;
1816  continue;
1817 
1818  case 'M':
1819  find_match_limit = 1;
1820  continue;
1821 
1822  case 'N':
1823  options |= PCRE_NOTEMPTY;
1824  continue;
1825 
1826  case 'O':
1827  while(isdigit(*p)) n = n * 10 + *p++ - '0';
1828  if (n > size_offsets_max)
1829  {
1830  size_offsets_max = n;
1831  free(offsets);
1832  use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1833  if (offsets == NULL)
1834  {
1835  printf("** Failed to get %d bytes of memory for offsets vector\n",
1836  size_offsets_max * sizeof(int));
1837  yield = 1;
1838  goto EXIT;
1839  }
1840  }
1841  use_size_offsets = n;
1842  if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1843  continue;
1844 
1845  case 'P':
1846  options |= PCRE_PARTIAL;
1847  continue;
1848 
1849  case 'Q':
1850  while(isdigit(*p)) n = n * 10 + *p++ - '0';
1851  if (extra == NULL)
1852  {
1853  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1854  extra->flags = 0;
1855  }
1857  extra->match_limit_recursion = n;
1858  continue;
1859 
1860  case 'q':
1861  while(isdigit(*p)) n = n * 10 + *p++ - '0';
1862  if (extra == NULL)
1863  {
1864  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1865  extra->flags = 0;
1866  }
1867  extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1868  extra->match_limit = n;
1869  continue;
1870 
1871 #if !defined NODFA
1872  case 'R':
1873  options |= PCRE_DFA_RESTART;
1874  continue;
1875 #endif
1876 
1877  case 'S':
1878  show_malloc = 1;
1879  continue;
1880 
1881  case 'Z':
1882  options |= PCRE_NOTEOL;
1883  continue;
1884 
1885  case '?':
1886  options |= PCRE_NO_UTF8_CHECK;
1887  continue;
1888 
1889  case '<':
1890  {
1891  int x = check_newline(p, outfile);
1892  if (x == 0) goto NEXT_DATA;
1893  options |= x;
1894  while (*p++ != '>');
1895  }
1896  continue;
1897  }
1898  *q++ = c;
1899  }
1900  *q = 0;
1901  len = q - dbuffer;
1902 
1903  if ((all_use_dfa || use_dfa) && find_match_limit)
1904  {
1905  printf("**Match limit not relevant for DFA matching: ignored\n");
1906  find_match_limit = 0;
1907  }
1908 
1909  /* Handle matching via the POSIX interface, which does not
1910  support timing or playing with the match limit or callout data. */
1911 
1912 #if !defined NOPOSIX
1913  if (posix || do_posix)
1914  {
1915  int rc;
1916  int eflags = 0;
1917  regmatch_t *pmatch = NULL;
1918  if (use_size_offsets > 0)
1919  pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1920  if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1921  if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1922 
1923  rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1924 
1925  if (rc != 0)
1926  {
1927  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1928  fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1929  }
1930  else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1931  != 0)
1932  {
1933  fprintf(outfile, "Matched with REG_NOSUB\n");
1934  }
1935  else
1936  {
1937  size_t i;
1938  for (i = 0; i < (size_t)use_size_offsets; i++)
1939  {
1940  if (pmatch[i].rm_so >= 0)
1941  {
1942  fprintf(outfile, "%2d: ", (int)i);
1943  (void)pchars(dbuffer + pmatch[i].rm_so,
1944  pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1945  fprintf(outfile, "\n");
1946  if (i == 0 && do_showrest)
1947  {
1948  fprintf(outfile, " 0+ ");
1949  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1950  outfile);
1951  fprintf(outfile, "\n");
1952  }
1953  }
1954  }
1955  }
1956  free(pmatch);
1957  }
1958 
1959  /* Handle matching via the native interface - repeats for /g and /G */
1960 
1961  else
1962 #endif /* !defined NOPOSIX */
1963 
1964  for (;; gmatched++) /* Loop for /g or /G */
1965  {
1966  if (timeitm > 0)
1967  {
1968  register int i;
1969  clock_t time_taken;
1970  clock_t start_time = clock();
1971 
1972 #if !defined NODFA
1973  if (all_use_dfa || use_dfa)
1974  {
1975  int workspace[1000];
1976  for (i = 0; i < timeitm; i++)
1977  count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1978  options | g_notempty, use_offsets, use_size_offsets, workspace,
1979  sizeof(workspace)/sizeof(int));
1980  }
1981  else
1982 #endif
1983 
1984  for (i = 0; i < timeitm; i++)
1985  count = pcre_exec(re, extra, (char *)bptr, len,
1986  start_offset, options | g_notempty, use_offsets, use_size_offsets);
1987 
1988  time_taken = clock() - start_time;
1989  fprintf(outfile, "Execute time %.4f milliseconds\n",
1990  (((double)time_taken * 1000.0) / (double)timeitm) /
1991  (double)CLOCKS_PER_SEC);
1992  }
1993 
1994  /* If find_match_limit is set, we want to do repeated matches with
1995  varying limits in order to find the minimum value for the match limit and
1996  for the recursion limit. */
1997 
1998  if (find_match_limit)
1999  {
2000  if (extra == NULL)
2001  {
2002  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2003  extra->flags = 0;
2004  }
2005 
2006  (void)check_match_limit(re, extra, bptr, len, start_offset,
2007  options|g_notempty, use_offsets, use_size_offsets,
2009  PCRE_ERROR_MATCHLIMIT, "match()");
2010 
2011  count = check_match_limit(re, extra, bptr, len, start_offset,
2012  options|g_notempty, use_offsets, use_size_offsets,
2014  PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2015  }
2016 
2017  /* If callout_data is set, use the interface with additional data */
2018 
2019  else if (callout_data_set)
2020  {
2021  if (extra == NULL)
2022  {
2023  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024  extra->flags = 0;
2025  }
2026  extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2027  extra->callout_data = &callout_data;
2028  count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2029  options | g_notempty, use_offsets, use_size_offsets);
2030  extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2031  }
2032 
2033  /* The normal case is just to do the match once, with the default
2034  value of match_limit. */
2035 
2036 #if !defined NODFA
2037  else if (all_use_dfa || use_dfa)
2038  {
2039  int workspace[1000];
2040  count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2041  options | g_notempty, use_offsets, use_size_offsets, workspace,
2042  sizeof(workspace)/sizeof(int));
2043  if (count == 0)
2044  {
2045  fprintf(outfile, "Matched, but too many subsidiary matches\n");
2046  count = use_size_offsets/2;
2047  }
2048  }
2049 #endif
2050 
2051  else
2052  {
2053  count = pcre_exec(re, extra, (char *)bptr, len,
2054  start_offset, options | g_notempty, use_offsets, use_size_offsets);
2055  if (count == 0)
2056  {
2057  fprintf(outfile, "Matched, but too many substrings\n");
2058  count = use_size_offsets/3;
2059  }
2060  }
2061 
2062  /* Matched */
2063 
2064  if (count >= 0)
2065  {
2066  int i, maxcount;
2067 
2068 #if !defined NODFA
2069  if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2070 #endif
2071  maxcount = use_size_offsets/3;
2072 
2073  /* This is a check against a lunatic return value. */
2074 
2075  if (count > maxcount)
2076  {
2077  fprintf(outfile,
2078  "** PCRE error: returned count %d is too big for offset size %d\n",
2079  count, use_size_offsets);
2080  count = use_size_offsets/3;
2081  if (do_g || do_G)
2082  {
2083  fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2084  do_g = do_G = FALSE; /* Break g/G loop */
2085  }
2086  }
2087 
2088  for (i = 0; i < count * 2; i += 2)
2089  {
2090  if (use_offsets[i] < 0)
2091  fprintf(outfile, "%2d: <unset>\n", i/2);
2092  else
2093  {
2094  fprintf(outfile, "%2d: ", i/2);
2095  (void)pchars(bptr + use_offsets[i],
2096  use_offsets[i+1] - use_offsets[i], outfile);
2097  fprintf(outfile, "\n");
2098  if (i == 0)
2099  {
2100  if (do_showrest)
2101  {
2102  fprintf(outfile, " 0+ ");
2103  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2104  outfile);
2105  fprintf(outfile, "\n");
2106  }
2107  }
2108  }
2109  }
2110 
2111  for (i = 0; i < 32; i++)
2112  {
2113  if ((copystrings & (1 << i)) != 0)
2114  {
2115  char copybuffer[256];
2116  int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2117  i, copybuffer, sizeof(copybuffer));
2118  if (rc < 0)
2119  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2120  else
2121  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2122  }
2123  }
2124 
2125  for (copynamesptr = copynames;
2126  *copynamesptr != 0;
2127  copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2128  {
2129  char copybuffer[256];
2130  int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2131  count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2132  if (rc < 0)
2133  fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2134  else
2135  fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2136  }
2137 
2138  for (i = 0; i < 32; i++)
2139  {
2140  if ((getstrings & (1 << i)) != 0)
2141  {
2142  const char *substring;
2143  int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2144  i, &substring);
2145  if (rc < 0)
2146  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2147  else
2148  {
2149  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2150  pcre_free_substring(substring);
2151  }
2152  }
2153  }
2154 
2155  for (getnamesptr = getnames;
2156  *getnamesptr != 0;
2157  getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2158  {
2159  const char *substring;
2160  int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2161  count, (char *)getnamesptr, &substring);
2162  if (rc < 0)
2163  fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2164  else
2165  {
2166  fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2167  pcre_free_substring(substring);
2168  }
2169  }
2170 
2171  if (getlist)
2172  {
2173  const char **stringlist;
2174  int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2175  &stringlist);
2176  if (rc < 0)
2177  fprintf(outfile, "get substring list failed %d\n", rc);
2178  else
2179  {
2180  for (i = 0; i < count; i++)
2181  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2182  if (stringlist[i] != NULL)
2183  fprintf(outfile, "string list not terminated by NULL\n");
2184  /* free((void *)stringlist); */
2185  pcre_free_substring_list(stringlist);
2186  }
2187  }
2188  }
2189 
2190  /* There was a partial match */
2191 
2192  else if (count == PCRE_ERROR_PARTIAL)
2193  {
2194  fprintf(outfile, "Partial match");
2195 #if !defined NODFA
2196  if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2197  fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2198  bptr + use_offsets[0]);
2199 #endif
2200  fprintf(outfile, "\n");
2201  break; /* Out of the /g loop */
2202  }
2203 
2204  /* Failed to match. If this is a /g or /G loop and we previously set
2205  g_notempty after a null match, this is not necessarily the end.
2206  We want to advance the start offset, and continue. In the case of UTF-8
2207  matching, the advance must be one character, not one byte. Fudge the
2208  offset values to achieve this. We won't be at the end of the string -
2209  that was checked before setting g_notempty. */
2210 
2211  else
2212  {
2213  if (g_notempty != 0)
2214  {
2215  int onechar = 1;
2216  use_offsets[0] = start_offset;
2217  if (use_utf8)
2218  {
2219  while (start_offset + onechar < len)
2220  {
2221  int tb = bptr[start_offset+onechar];
2222  if (tb <= 127) break;
2223  tb &= 0xc0;
2224  if (tb != 0 && tb != 0xc0) onechar++;
2225  }
2226  }
2227  use_offsets[1] = start_offset + onechar;
2228  }
2229  else
2230  {
2231  if (count == PCRE_ERROR_NOMATCH)
2232  {
2233  if (gmatched == 0) fprintf(outfile, "No match\n");
2234  }
2235  else fprintf(outfile, "Error %d\n", count);
2236  break; /* Out of the /g loop */
2237  }
2238  }
2239 
2240  /* If not /g or /G we are done */
2241 
2242  if (!do_g && !do_G) break;
2243 
2244  /* If we have matched an empty string, first check to see if we are at
2245  the end of the subject. If so, the /g loop is over. Otherwise, mimic
2246  what Perl's /g options does. This turns out to be rather cunning. First
2247  we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2248  same point. If this fails (picked up above) we advance to the next
2249  character. */
2250 
2251  g_notempty = 0;
2252  if (use_offsets[0] == use_offsets[1])
2253  {
2254  if (use_offsets[0] == len) break;
2255  g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2256  }
2257 
2258  /* For /g, update the start offset, leaving the rest alone */
2259 
2260  if (do_g) start_offset = use_offsets[1];
2261 
2262  /* For /G, update the pointer and length */
2263 
2264  else
2265  {
2266  bptr += use_offsets[1];
2267  len -= use_offsets[1];
2268  }
2269  } /* End of loop for /g and /G */
2270 
2271  NEXT_DATA: continue;
2272  } /* End of loop for data lines */
2273 
2274  CONTINUE:
2275 
2276 #if !defined NOPOSIX
2277  if (posix || do_posix) regfree(&preg);
2278 #endif
2279 
2280  if (re != NULL) new_free(re);
2281  if (extra != NULL) new_free(extra);
2282  if (tables != NULL)
2283  {
2284  new_free((void *)tables);
2285  setlocale(LC_CTYPE, "C");
2286  locale_set = 0;
2287  }
2288  }
2289 
2290 if (infile == stdin) fprintf(outfile, "\n");
2291 
2292 EXIT:
2293 
2294 if (infile != NULL && infile != stdin) fclose(infile);
2295 if (outfile != NULL && outfile != stdout) fclose(outfile);
2296 
2297 free(buffer);
2298 free(dbuffer);
2299 free(pbuffer);
2300 free(offsets);
2301 
2302 return yield;
2303 }
2304 
2305 /* End of pcretest.c */
#define FALSE
Definition: bool.h:70
unsigned short debug
Definition: debug.c:51
#define PCRE_CONFIG_LINK_SIZE
Definition: pcre.h:171
#define PCRE_INFO_NAMETABLE
Definition: pcre.h:162
#define PCRE_CONFIG_UNICODE_PROPERTIES
Definition: pcre.h:175
#define PCRE_INFO_NAMEENTRYSIZE
Definition: pcre.h:160
int pcre_dfa_exec(const pcre *, const pcre_extra *, const char *, int, int, int, int *, int, int *, int)
#define PCRE_NO_UTF8_CHECK
Definition: pcre.h:111
#define PCRE_INFO_OPTIONS
Definition: pcre.h:152
int pcre_get_substring_list(const char *, int *, int, const char ***)
Definition: pcre_get.c:308
#define PCRE_FIRSTLINE
Definition: pcre.h:116
#define PCRE_INFO_SIZE
Definition: pcre.h:153
int pcre_copy_named_substring(const pcre *, const char *, int *, int, const char *, char *, int)
Definition: pcre_get.c:276
#define PCRE_ERROR_PARTIAL
Definition: pcre.h:137
#define PCRE_NOTBOL
Definition: pcre.h:105
#define PCRE_INFO_LASTLITERAL
Definition: pcre.h:159
#define PCRE_UTF8
Definition: pcre.h:109
#define PCRE_EXTRA_CALLOUT_DATA
Definition: pcre.h:183
#define PCRE_NEWLINE_ANY
Definition: pcre.h:121
#define PCRE_INFO_CAPTURECOUNT
Definition: pcre.h:154
#define PCRE_EXTRA_STUDY_DATA
Definition: pcre.h:181
#define PCRE_EXTENDED
Definition: pcre.h:101
void *(* pcre_malloc)(size_t)
Definition: pcre_globals.c:75
#define PCRE_CASELESS
Definition: pcre.h:98
#define PCRE_AUTO_CALLOUT
Definition: pcre.h:112
#define PCRE_INFO_BACKREFMAX
Definition: pcre.h:155
#define PCRE_EXTRA_MATCH_LIMIT
Definition: pcre.h:182
void *(* pcre_stack_malloc)(size_t)
Definition: pcre_globals.c:77
#define PCRE_PARTIAL
Definition: pcre.h:113
int pcre_copy_substring(const char *, int *, int, int, char *, int)
Definition: pcre_get.c:231
#define PCRE_CONFIG_MATCH_LIMIT
Definition: pcre.h:173
#define PCRE_DFA_SHORTEST
Definition: pcre.h:114
int pcre_get_stringnumber(const pcre *, const char *)
Definition: pcre_get.c:65
int pcre_info(const pcre *, int *, int *)
Definition: pcre_info.c:72
#define PCRE_NOTEOL
Definition: pcre.h:106
#define PCRE_MULTILINE
Definition: pcre.h:99
void(* pcre_free)(void *)
Definition: pcre_globals.c:76
#define PCRE_INFO_FIRSTBYTE
Definition: pcre.h:156
#define PCRE_INFO_NAMECOUNT
Definition: pcre.h:161
#define PCRE_INFO_FIRSTTABLE
Definition: pcre.h:158
#define PCRE_NEWLINE_CRLF
Definition: pcre.h:120
const char * pcre_version(void)
Definition: pcre_version.c:79
#define PCRE_NO_AUTO_CAPTURE
Definition: pcre.h:110
const unsigned char * pcre_maketables(void)
#define PCRE_ERROR_RECURSIONLIMIT
Definition: pcre.h:146
int pcre_get_named_substring(const pcre *, const char *, int *, int, const char *, const char **)
Definition: pcre_get.c:433
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION
Definition: pcre.h:176
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
Definition: pcre.h:172
void pcre_free_substring(const char *)
Definition: pcre_get.c:456
pcre_extra * pcre_study(const pcre *, int, const char **)
Definition: pcre_study.c:491
#define PCRE_NOTEMPTY
Definition: pcre.h:108
int pcre_config(int, void *)
Definition: pcre_config.c:62
#define PCRE_NEWLINE_LF
Definition: pcre.h:119
#define PCRE_DOTALL
Definition: pcre.h:100
#define PCRE_ANCHORED
Definition: pcre.h:102
void pcre_free_substring_list(const char **)
Definition: pcre_get.c:353
#define PCRE_DFA_RESTART
Definition: pcre.h:115
#define PCRE_NEWLINE_CR
Definition: pcre.h:118
#define PCRE_ERROR_MATCHLIMIT
Definition: pcre.h:133
#define PCRE_DUPNAMES
Definition: pcre.h:117
#define PCRE_CONFIG_NEWLINE
Definition: pcre.h:170
#define PCRE_CONFIG_STACKRECURSE
Definition: pcre.h:174
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION
Definition: pcre.h:185
#define PCRE_CONFIG_UTF8
Definition: pcre.h:169
void(* pcre_stack_free)(void *)
Definition: pcre_globals.c:78
#define PCRE_ERROR_NOMATCH
Definition: pcre.h:125
#define PCRE_DOLLAR_ENDONLY
Definition: pcre.h:103
int pcre_get_substring(const char *, int *, int, int, const char **)
Definition: pcre_get.c:386
#define PCRE_EXTRA
Definition: pcre.h:104
pcre * pcre_compile(const char *, int, const char **, int *, const unsigned char *)
int pcre_exec(const pcre *, const pcre_extra *, const char *, int, int, int, int *, int)
Definition: pcre_exec.c:3690
int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *)
Definition: pcre_fullinfo.c:65
int(* pcre_callout)(pcre_callout_block *)
Definition: pcre_globals.c:79
#define PCRE_UNGREEDY
Definition: pcre.h:107
#define PCRE_NEWLINE_BITS
#define MAGIC_NUMBER
#define PCRE_NOPARTIAL
unsigned char uschar
#define REQ_CASELESS
#define GET2(a, n)
const int utf8_table3[]
Definition: pcregrep.c:227
static BOOL quiet
Definition: pcregrep.c:145
char * strerror(int n)
Definition: pcregrep.c:477
size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: pcreposix.c:145
int regcomp(regex_t *preg, const char *pattern, int cflags)
Definition: pcreposix.c:203
int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
Definition: pcreposix.c:245
void regfree(regex_t *preg)
Definition: pcreposix.c:180
#define REG_ICASE
Definition: pcreposix.h:55
#define REG_UTF8
Definition: pcreposix.h:61
#define REG_NOTEOL
Definition: pcreposix.h:58
#define REG_NOTBOL
Definition: pcreposix.h:57
#define REG_NEWLINE
Definition: pcreposix.h:56
#define REG_NOSUB
Definition: pcreposix.h:60
#define REG_DOTALL
Definition: pcreposix.h:59
static int use_utf8
Definition: pcretest.c:148
#define LOOPREPEAT
Definition: pcretest.c:135
static uschar * buffer
Definition: pcretest.c:154
static unsigned long int byteflip(unsigned long int value, int n)
Definition: pcretest.c:588
static int get_value(unsigned char *str, unsigned char **endptr)
Definition: pcretest.c:257
static int show_malloc
Definition: pcretest.c:147
int main(int argc, char **argv)
Definition: pcretest.c:718
#define CLOCKS_PER_SEC
Definition: pcretest.c:129
static void new_free(void *block)
Definition: pcretest.c:542
static uschar * dbuffer
Definition: pcretest.c:155
static int check_newline(uschar *p, FILE *f)
Definition: pcretest.c:666
static int callout_fail_count
Definition: pcretest.c:143
static int log_store
Definition: pcretest.c:140
#define OUTPUT_MODE
Definition: pcretest.c:66
#define PRINTHEX(c)
Definition: pcretest.c:106
static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
Definition: pcretest.c:574
static int callout_count
Definition: pcretest.c:141
static int utf82ord(unsigned char *utf8bytes, int *vptr)
Definition: pcretest.c:287
static void * new_malloc(size_t size)
Definition: pcretest.c:533
static uschar * pbuffer
Definition: pcretest.c:156
#define INPUT_MODE
Definition: pcretest.c:65
static int callout_extra
Definition: pcretest.c:142
static int locale_set
Definition: pcretest.c:146
static int check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len, int start_offset, int options, int *use_offsets, int use_size_offsets, int flag, unsigned long int *limit, int errnumber, const char *msg)
Definition: pcretest.c:605
static int ord2utf8(int cvalue, uschar *utf8bytes)
Definition: pcretest.c:348
static int callout_fail_id
Definition: pcretest.c:144
static int first_callout
Definition: pcretest.c:145
static void usage(void)
Definition: pcretest.c:683
static void stack_free(void *block)
Definition: pcretest.c:560
static size_t gotten_store
Definition: pcretest.c:149
static FILE * outfile
Definition: pcretest.c:139
static int pchars(unsigned char *p, int length, FILE *f)
Definition: pcretest.c:375
static int buffer_size
Definition: pcretest.c:153
static int callout(pcre_callout_block *cb)
Definition: pcretest.c:438
static uschar * extend_inputline(FILE *f, uschar *start)
Definition: pcretest.c:183
static void * stack_malloc(size_t size)
Definition: pcretest.c:552
int errno
int current_position
Definition: pcre.h:226
int * offset_vector
Definition: pcre.h:222
void * callout_data
Definition: pcre.h:229
int next_item_length
Definition: pcre.h:232
int capture_last
Definition: pcre.h:228
int subject_length
Definition: pcre.h:224
int pattern_position
Definition: pcre.h:231
const char * subject
Definition: pcre.h:223
int callout_number
Definition: pcre.h:221
void * study_data
Definition: pcre.h:206
unsigned long int flags
Definition: pcre.h:205
unsigned long int match_limit
Definition: pcre.h:207
unsigned long int match_limit_recursion
Definition: pcre.h:210
void * callout_data
Definition: pcre.h:208
pcre_uint32 options
pcre_uint32 size
pcre_uint32 magic_number
pcre_uint32 options
pcre_uint32 size
pcre_uint16 name_entry_size
pcre_uint16 name_count
pcre_uint16 name_table_offset
pcre_uint16 first_byte
pcre_uint16 req_byte
pcre_uint16 top_backref
pcre_uint16 top_bracket
void * re_pcre
Definition: pcreposix.h:94
#define setlocale(Category, Locale)
Definition: tin.h:78