pcre  8.37
About: The PCRE library implements Perl compatible regular expression pattern matching.
  Fossies Dox: pcre-8.37.tar.gz  ("inofficial" and yet experimental doxygen-generated source code documentation)  

pcretest.c
Go to the documentation of this file.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4 
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9 
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13 
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16 
17  * Redistributions in binary form must reproduce the above copyright
18  notice, this list of conditions and the following disclaimer in the
19  documentation and/or other materials provided with the distribution.
20 
21  * Neither the name of the University of Cambridge nor the names of its
22  contributors may be used to endorse or promote products derived from
23  this software without specific prior written permission.
24 
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38 
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48 
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52 
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60 
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65 
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81 
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89 
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95 
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99  /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103 
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105 
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109 
110 /* Not Windows */
111 
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123 
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
128 
129 
130 #define PRIV(name) name
131 
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
136 
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
140 
141 #include "pcre.h"
142 #include "pcre_internal.h"
143 
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148 
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
158 
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
162 
163 #define PCRE_INCLUDED
164 
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
167 
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
173 
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
179 
180 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181 
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
186 
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
190 
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
194 
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
199 
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
205 
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
213 
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
219 
220 #ifdef SUPPORT_PCRE8
221 
222 #define PCHARS8(lv, p, offset, len, f) \
223  lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224 
225 #define PCHARSV8(p, offset, len, f) \
226  (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227 
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229  p = read_capture_name8(p, cn8, re)
230 
231 #define STRLEN8(p) ((int)strlen((char *)p))
232 
233 #define SET_PCRE_CALLOUT8(callout) \
234  pcre_callout = callout
235 
236 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237  pcre_stack_guard = stack_guard
238 
239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240  pcre_assign_jit_stack(extra, callback, userdata)
241 
242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243  re = pcre_compile((char *)pat, options, error, erroffset, tables)
244 
245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246  namesptr, cbuffer, size) \
247  rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248  (char *)namesptr, cbuffer, size)
249 
250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251  rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
252 
253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254  offsets, size_offsets, workspace, size_workspace) \
255  count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256  offsets, size_offsets, workspace, size_workspace)
257 
258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259  offsets, size_offsets) \
260  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261  offsets, size_offsets)
262 
263 #define PCRE_FREE_STUDY8(extra) \
264  pcre_free_study(extra)
265 
266 #define PCRE_FREE_SUBSTRING8(substring) \
267  pcre_free_substring(substring)
268 
269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270  pcre_free_substring_list(listptr)
271 
272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273  getnamesptr, subsptr) \
274  rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275  (char *)getnamesptr, subsptr)
276 
277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278  n = pcre_get_stringnumber(re, (char *)ptr)
279 
280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281  rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
282 
283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284  rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
285 
286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287  rc = pcre_pattern_to_host_byte_order(re, extra, tables)
288 
289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290  pcre_printint(re, outfile, debug_lengths)
291 
292 #define PCRE_STUDY8(extra, re, options, error) \
293  extra = pcre_study(re, options, error)
294 
295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296  pcre_jit_stack_alloc(startsize, maxsize)
297 
298 #define PCRE_JIT_STACK_FREE8(stack) \
299  pcre_jit_stack_free(stack)
300 
301 #define pcre8_maketables pcre_maketables
302 
303 #endif /* SUPPORT_PCRE8 */
304 
305 /* -----------------------------------------------------------*/
306 
307 #ifdef SUPPORT_PCRE16
308 
309 #define PCHARS16(lv, p, offset, len, f) \
310  lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
311 
312 #define PCHARSV16(p, offset, len, f) \
313  (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
314 
315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316  p = read_capture_name16(p, cn16, re)
317 
318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
319 
320 #define SET_PCRE_CALLOUT16(callout) \
321  pcre16_callout = (int (*)(pcre16_callout_block *))callout
322 
323 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324  pcre16_stack_guard = (int (*)(void))stack_guard
325 
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327  pcre16_assign_jit_stack((pcre16_extra *)extra, \
328  (pcre16_jit_callback)callback, userdata)
329 
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331  re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332  tables)
333 
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335  namesptr, cbuffer, size) \
336  rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337  count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338 
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340  rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341  (PCRE_UCHAR16 *)cbuffer, size/2)
342 
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344  offsets, size_offsets, workspace, size_workspace) \
345  count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346  (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347  workspace, size_workspace)
348 
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350  offsets, size_offsets) \
351  count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352  len, start_offset, options, offsets, size_offsets)
353 
354 #define PCRE_FREE_STUDY16(extra) \
355  pcre16_free_study((pcre16_extra *)extra)
356 
357 #define PCRE_FREE_SUBSTRING16(substring) \
358  pcre16_free_substring((PCRE_SPTR16)substring)
359 
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361  pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362 
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364  getnamesptr, subsptr) \
365  rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366  count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367 
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369  n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370 
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372  rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373  (PCRE_SPTR16 *)(void*)subsptr)
374 
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376  rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377  (PCRE_SPTR16 **)(void*)listptr)
378 
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380  rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381  tables)
382 
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384  pcre16_printint(re, outfile, debug_lengths)
385 
386 #define PCRE_STUDY16(extra, re, options, error) \
387  extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388 
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390  (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391 
392 #define PCRE_JIT_STACK_FREE16(stack) \
393  pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394 
395 #endif /* SUPPORT_PCRE16 */
396 
397 /* -----------------------------------------------------------*/
398 
399 #ifdef SUPPORT_PCRE32
400 
401 #define PCHARS32(lv, p, offset, len, f) \
402  lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403 
404 #define PCHARSV32(p, offset, len, f) \
405  (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406 
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408  p = read_capture_name32(p, cn32, re)
409 
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411 
412 #define SET_PCRE_CALLOUT32(callout) \
413  pcre32_callout = (int (*)(pcre32_callout_block *))callout
414 
415 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416  pcre32_stack_guard = (int (*)(void))stack_guard
417 
418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419  pcre32_assign_jit_stack((pcre32_extra *)extra, \
420  (pcre32_jit_callback)callback, userdata)
421 
422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423  re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424  tables)
425 
426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427  namesptr, cbuffer, size) \
428  rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429  count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
430 
431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432  rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433  (PCRE_UCHAR32 *)cbuffer, size/2)
434 
435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436  offsets, size_offsets, workspace, size_workspace) \
437  count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438  (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439  workspace, size_workspace)
440 
441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442  offsets, size_offsets) \
443  count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444  len, start_offset, options, offsets, size_offsets)
445 
446 #define PCRE_FREE_STUDY32(extra) \
447  pcre32_free_study((pcre32_extra *)extra)
448 
449 #define PCRE_FREE_SUBSTRING32(substring) \
450  pcre32_free_substring((PCRE_SPTR32)substring)
451 
452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453  pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
454 
455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456  getnamesptr, subsptr) \
457  rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458  count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
459 
460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461  n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
462 
463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464  rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465  (PCRE_SPTR32 *)(void*)subsptr)
466 
467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468  rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469  (PCRE_SPTR32 **)(void*)listptr)
470 
471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472  rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473  tables)
474 
475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476  pcre32_printint(re, outfile, debug_lengths)
477 
478 #define PCRE_STUDY32(extra, re, options, error) \
479  extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
480 
481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482  (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
483 
484 #define PCRE_JIT_STACK_FREE32(stack) \
485  pcre32_jit_stack_free((pcre32_jit_stack *)stack)
486 
487 #endif /* SUPPORT_PCRE32 */
488 
489 
490 /* ----- More than one mode is supported; a runtime test is needed, except for
491 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 available version is called. ----- */
493 
494 enum {
498 };
499 
500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501  defined (SUPPORT_PCRE32)) >= 2
502 
503 #define CHAR_SIZE (1 << pcre_mode)
504 
505 /* There doesn't seem to be an easy way of writing these macros that can cope
506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507 cases separately. */
508 
509 /* ----- All three modes supported ----- */
510 
511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
512 
513 #define PCHARS(lv, p, offset, len, f) \
514  if (pcre_mode == PCRE32_MODE) \
515  PCHARS32(lv, p, offset, len, f); \
516  else if (pcre_mode == PCRE16_MODE) \
517  PCHARS16(lv, p, offset, len, f); \
518  else \
519  PCHARS8(lv, p, offset, len, f)
520 
521 #define PCHARSV(p, offset, len, f) \
522  if (pcre_mode == PCRE32_MODE) \
523  PCHARSV32(p, offset, len, f); \
524  else if (pcre_mode == PCRE16_MODE) \
525  PCHARSV16(p, offset, len, f); \
526  else \
527  PCHARSV8(p, offset, len, f)
528 
529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530  if (pcre_mode == PCRE32_MODE) \
531  READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532  else if (pcre_mode == PCRE16_MODE) \
533  READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534  else \
535  READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
536 
537 #define SET_PCRE_CALLOUT(callout) \
538  if (pcre_mode == PCRE32_MODE) \
539  SET_PCRE_CALLOUT32(callout); \
540  else if (pcre_mode == PCRE16_MODE) \
541  SET_PCRE_CALLOUT16(callout); \
542  else \
543  SET_PCRE_CALLOUT8(callout)
544 
545 #define SET_PCRE_STACK_GUARD(stack_guard) \
546  if (pcre_mode == PCRE32_MODE) \
547  SET_PCRE_STACK_GUARD32(stack_guard); \
548  else if (pcre_mode == PCRE16_MODE) \
549  SET_PCRE_STACK_GUARD16(stack_guard); \
550  else \
551  SET_PCRE_STACK_GUARD8(stack_guard)
552 
553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
554 
555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556  if (pcre_mode == PCRE32_MODE) \
557  PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558  else if (pcre_mode == PCRE16_MODE) \
559  PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560  else \
561  PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
562 
563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564  if (pcre_mode == PCRE32_MODE) \
565  PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566  else if (pcre_mode == PCRE16_MODE) \
567  PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568  else \
569  PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
570 
571 #define PCRE_CONFIG pcre_config
572 
573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574  namesptr, cbuffer, size) \
575  if (pcre_mode == PCRE32_MODE) \
576  PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577  namesptr, cbuffer, size); \
578  else if (pcre_mode == PCRE16_MODE) \
579  PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580  namesptr, cbuffer, size); \
581  else \
582  PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583  namesptr, cbuffer, size)
584 
585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586  if (pcre_mode == PCRE32_MODE) \
587  PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588  else if (pcre_mode == PCRE16_MODE) \
589  PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590  else \
591  PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
592 
593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594  offsets, size_offsets, workspace, size_workspace) \
595  if (pcre_mode == PCRE32_MODE) \
596  PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597  offsets, size_offsets, workspace, size_workspace); \
598  else if (pcre_mode == PCRE16_MODE) \
599  PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600  offsets, size_offsets, workspace, size_workspace); \
601  else \
602  PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603  offsets, size_offsets, workspace, size_workspace)
604 
605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606  offsets, size_offsets) \
607  if (pcre_mode == PCRE32_MODE) \
608  PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609  offsets, size_offsets); \
610  else if (pcre_mode == PCRE16_MODE) \
611  PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612  offsets, size_offsets); \
613  else \
614  PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615  offsets, size_offsets)
616 
617 #define PCRE_FREE_STUDY(extra) \
618  if (pcre_mode == PCRE32_MODE) \
619  PCRE_FREE_STUDY32(extra); \
620  else if (pcre_mode == PCRE16_MODE) \
621  PCRE_FREE_STUDY16(extra); \
622  else \
623  PCRE_FREE_STUDY8(extra)
624 
625 #define PCRE_FREE_SUBSTRING(substring) \
626  if (pcre_mode == PCRE32_MODE) \
627  PCRE_FREE_SUBSTRING32(substring); \
628  else if (pcre_mode == PCRE16_MODE) \
629  PCRE_FREE_SUBSTRING16(substring); \
630  else \
631  PCRE_FREE_SUBSTRING8(substring)
632 
633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634  if (pcre_mode == PCRE32_MODE) \
635  PCRE_FREE_SUBSTRING_LIST32(listptr); \
636  else if (pcre_mode == PCRE16_MODE) \
637  PCRE_FREE_SUBSTRING_LIST16(listptr); \
638  else \
639  PCRE_FREE_SUBSTRING_LIST8(listptr)
640 
641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642  getnamesptr, subsptr) \
643  if (pcre_mode == PCRE32_MODE) \
644  PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645  getnamesptr, subsptr); \
646  else if (pcre_mode == PCRE16_MODE) \
647  PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648  getnamesptr, subsptr); \
649  else \
650  PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651  getnamesptr, subsptr)
652 
653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654  if (pcre_mode == PCRE32_MODE) \
655  PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656  else if (pcre_mode == PCRE16_MODE) \
657  PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658  else \
659  PCRE_GET_STRINGNUMBER8(n, rc, ptr)
660 
661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662  if (pcre_mode == PCRE32_MODE) \
663  PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664  else if (pcre_mode == PCRE16_MODE) \
665  PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666  else \
667  PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
668 
669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670  if (pcre_mode == PCRE32_MODE) \
671  PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672  else if (pcre_mode == PCRE16_MODE) \
673  PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674  else \
675  PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
676 
677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678  (pcre_mode == PCRE32_MODE ? \
679  PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680  : pcre_mode == PCRE16_MODE ? \
681  PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682  : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
683 
684 #define PCRE_JIT_STACK_FREE(stack) \
685  if (pcre_mode == PCRE32_MODE) \
686  PCRE_JIT_STACK_FREE32(stack); \
687  else if (pcre_mode == PCRE16_MODE) \
688  PCRE_JIT_STACK_FREE16(stack); \
689  else \
690  PCRE_JIT_STACK_FREE8(stack)
691 
692 #define PCRE_MAKETABLES \
693  (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
694 
695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696  if (pcre_mode == PCRE32_MODE) \
697  PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698  else if (pcre_mode == PCRE16_MODE) \
699  PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700  else \
701  PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
702 
703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704  if (pcre_mode == PCRE32_MODE) \
705  PCRE_PRINTINT32(re, outfile, debug_lengths); \
706  else if (pcre_mode == PCRE16_MODE) \
707  PCRE_PRINTINT16(re, outfile, debug_lengths); \
708  else \
709  PCRE_PRINTINT8(re, outfile, debug_lengths)
710 
711 #define PCRE_STUDY(extra, re, options, error) \
712  if (pcre_mode == PCRE32_MODE) \
713  PCRE_STUDY32(extra, re, options, error); \
714  else if (pcre_mode == PCRE16_MODE) \
715  PCRE_STUDY16(extra, re, options, error); \
716  else \
717  PCRE_STUDY8(extra, re, options, error)
718 
719 
720 /* ----- Two out of three modes are supported ----- */
721 
722 #else
723 
724 /* We can use some macro trickery to make a single set of definitions work in
725 the three different cases. */
726 
727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
728 
729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730 #define BITONE 32
731 #define BITTWO 16
732 
733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
734 
735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 #define BITONE 32
737 #define BITTWO 8
738 
739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
740 
741 #else
742 #define BITONE 16
743 #define BITTWO 8
744 #endif
745 
746 #define glue(a,b) a##b
747 #define G(a,b) glue(a,b)
748 
749 
750 /* ----- Common macros for two-mode cases ----- */
751 
752 #define PCHARS(lv, p, offset, len, f) \
753  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754  G(PCHARS,BITONE)(lv, p, offset, len, f); \
755  else \
756  G(PCHARS,BITTWO)(lv, p, offset, len, f)
757 
758 #define PCHARSV(p, offset, len, f) \
759  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760  G(PCHARSV,BITONE)(p, offset, len, f); \
761  else \
762  G(PCHARSV,BITTWO)(p, offset, len, f)
763 
764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766  G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767  else \
768  G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
769 
770 #define SET_PCRE_CALLOUT(callout) \
771  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772  G(SET_PCRE_CALLOUT,BITONE)(callout); \
773  else \
774  G(SET_PCRE_CALLOUT,BITTWO)(callout)
775 
776 #define SET_PCRE_STACK_GUARD(stack_guard) \
777  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778  G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779  else \
780  G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
781 
782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783  G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
784 
785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787  G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788  else \
789  G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
790 
791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793  G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794  else \
795  G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
796 
797 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
798 
799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800  namesptr, cbuffer, size) \
801  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802  G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803  namesptr, cbuffer, size); \
804  else \
805  G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806  namesptr, cbuffer, size)
807 
808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810  G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811  else \
812  G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
813 
814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815  offsets, size_offsets, workspace, size_workspace) \
816  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817  G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818  offsets, size_offsets, workspace, size_workspace); \
819  else \
820  G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821  offsets, size_offsets, workspace, size_workspace)
822 
823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824  offsets, size_offsets) \
825  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826  G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827  offsets, size_offsets); \
828  else \
829  G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830  offsets, size_offsets)
831 
832 #define PCRE_FREE_STUDY(extra) \
833  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834  G(PCRE_FREE_STUDY,BITONE)(extra); \
835  else \
836  G(PCRE_FREE_STUDY,BITTWO)(extra)
837 
838 #define PCRE_FREE_SUBSTRING(substring) \
839  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840  G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841  else \
842  G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
843 
844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846  G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847  else \
848  G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
849 
850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851  getnamesptr, subsptr) \
852  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853  G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854  getnamesptr, subsptr); \
855  else \
856  G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857  getnamesptr, subsptr)
858 
859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861  G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862  else \
863  G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
864 
865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867  G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868  else \
869  G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
870 
871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873  G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874  else \
875  G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
876 
877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878  (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879  G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880  : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
881 
882 #define PCRE_JIT_STACK_FREE(stack) \
883  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884  G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885  else \
886  G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
887 
888 #define PCRE_MAKETABLES \
889  (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890  G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
891 
892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894  G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895  else \
896  G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
897 
898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900  G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901  else \
902  G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
903 
904 #define PCRE_STUDY(extra, re, options, error) \
905  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906  G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907  else \
908  G(PCRE_STUDY,BITTWO)(extra, re, options, error)
909 
910 #endif /* Two out of three modes */
911 
912 /* ----- End of cases where more than one mode is supported ----- */
913 
914 
915 /* ----- Only 8-bit mode is supported ----- */
916 
917 #elif defined SUPPORT_PCRE8
918 #define CHAR_SIZE 1
919 #define PCHARS PCHARS8
920 #define PCHARSV PCHARSV8
921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 #define STRLEN STRLEN8
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 #define PCRE_COMPILE PCRE_COMPILE8
927 #define PCRE_CONFIG pcre_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931 #define PCRE_EXEC PCRE_EXEC8
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 #define PCRE_MAKETABLES pcre_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943 #define PCRE_PRINTINT PCRE_PRINTINT8
944 #define PCRE_STUDY PCRE_STUDY8
945 
946 /* ----- Only 16-bit mode is supported ----- */
947 
948 #elif defined SUPPORT_PCRE16
949 #define CHAR_SIZE 2
950 #define PCHARS PCHARS16
951 #define PCHARSV PCHARSV16
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 #define STRLEN STRLEN16
956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 #define PCRE_COMPILE PCRE_COMPILE16
958 #define PCRE_CONFIG pcre16_config
959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962 #define PCRE_EXEC PCRE_EXEC16
963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 #define PCRE_MAKETABLES pcre16_maketables()
973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974 #define PCRE_PRINTINT PCRE_PRINTINT16
975 #define PCRE_STUDY PCRE_STUDY16
976 
977 /* ----- Only 32-bit mode is supported ----- */
978 
979 #elif defined SUPPORT_PCRE32
980 #define CHAR_SIZE 4
981 #define PCHARS PCHARS32
982 #define PCHARSV PCHARSV32
983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 #define STRLEN STRLEN32
987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988 #define PCRE_COMPILE PCRE_COMPILE32
989 #define PCRE_CONFIG pcre32_config
990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993 #define PCRE_EXEC PCRE_EXEC32
994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003 #define PCRE_MAKETABLES pcre32_maketables()
1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005 #define PCRE_PRINTINT PCRE_PRINTINT32
1006 #define PCRE_STUDY PCRE_STUDY32
1007 
1008 #endif
1009 
1010 /* ----- End of mode-specific function call macros ----- */
1011 
1012 
1013 /* Other parameters */
1014 
1015 #ifndef CLOCKS_PER_SEC
1016 #ifdef CLK_TCK
1017 #define CLOCKS_PER_SEC CLK_TCK
1018 #else
1019 #define CLOCKS_PER_SEC 100
1020 #endif
1021 #endif
1022 
1023 #if !defined NODFA
1024 #define DFA_WS_DIMENSION 1000
1025 #endif
1026 
1027 /* This is the default loop count for timing. */
1028 
1029 #define LOOPREPEAT 500000
1030 
1031 /* Static variables */
1032 
1033 static FILE *outfile;
1034 static int log_store = 0;
1035 static int callout_count;
1036 static int callout_extra;
1037 static int callout_fail_count;
1038 static int callout_fail_id;
1039 static int debug_lengths;
1040 static int first_callout;
1041 static int jit_was_used;
1042 static int locale_set = 0;
1043 static int show_malloc;
1044 static int stack_guard_return;
1045 static int use_utf;
1046 static const unsigned char *last_callout_mark = NULL;
1047 
1048 /* The buffers grow automatically if very long input lines are encountered. */
1049 
1050 static int buffer_size = 50000;
1051 static pcre_uint8 *buffer = NULL;
1052 static pcre_uint8 *pbuffer = NULL;
1053 
1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1055 
1056 #ifdef COMPILE_PCRE16
1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058 #endif
1059 
1060 #ifdef COMPILE_PCRE32
1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062 #endif
1063 
1064 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066 pattern for saving/reloading testing. Luckily, the data for these tables is
1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069 LINK_SIZE is also used later in this program. */
1070 
1071 #ifdef SUPPORT_PCRE16
1072 #undef IMM2_SIZE
1073 #define IMM2_SIZE 1
1074 
1075 #if LINK_SIZE == 2
1076 #undef LINK_SIZE
1077 #define LINK_SIZE 1
1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079 #undef LINK_SIZE
1080 #define LINK_SIZE 2
1081 #else
1082 #error LINK_SIZE must be either 2, 3, or 4
1083 #endif
1084 
1085 static int buffer16_size = 0;
1086 static pcre_uint16 *buffer16 = NULL;
1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088 #endif /* SUPPORT_PCRE16 */
1089 
1090 #ifdef SUPPORT_PCRE32
1091 #undef IMM2_SIZE
1092 #define IMM2_SIZE 1
1093 #undef LINK_SIZE
1094 #define LINK_SIZE 1
1095 
1096 static int buffer32_size = 0;
1097 static pcre_uint32 *buffer32 = NULL;
1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099 #endif /* SUPPORT_PCRE32 */
1100 
1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102 support, it can be changed by an option. If there is no 8-bit support, there
1103 must be 16-or 32-bit support, so default it to 1. */
1104 
1105 #if defined SUPPORT_PCRE8
1106 static int pcre_mode = PCRE8_MODE;
1107 #elif defined SUPPORT_PCRE16
1108 static int pcre_mode = PCRE16_MODE;
1109 #elif defined SUPPORT_PCRE32
1110 static int pcre_mode = PCRE32_MODE;
1111 #endif
1112 
1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1114 
1115 static int jit_study_bits[] =
1116  {
1117  PCRE_STUDY_JIT_COMPILE,
1118  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1125 };
1126 
1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1129 
1130 /* Textual explanations for runtime error codes */
1131 
1132 static const char *errtexts[] = {
1133  NULL, /* 0 is no error */
1134  NULL, /* NOMATCH is handled specially */
1135  "NULL argument passed",
1136  "bad option value",
1137  "magic number missing",
1138  "unknown opcode - pattern overwritten?",
1139  "no more memory",
1140  NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141  "match limit exceeded",
1142  "callout error code",
1143  NULL, /* BADUTF8/16 is handled specially */
1144  NULL, /* BADUTF8/16 offset is handled specially */
1145  NULL, /* PARTIAL is handled specially */
1146  "not used - internal error",
1147  "internal error - pattern overwritten?",
1148  "bad count value",
1149  "item unsupported for DFA matching",
1150  "backreference condition or recursion test not supported for DFA matching",
1151  "match limit not supported for DFA matching",
1152  "workspace size exceeded in DFA matching",
1153  "too much recursion for DFA matching",
1154  "recursion limit exceeded",
1155  "not used - internal error",
1156  "invalid combination of newline options",
1157  "bad offset value",
1158  NULL, /* SHORTUTF8/16 is handled specially */
1159  "nested recursion at the same subject position",
1160  "JIT stack limit reached",
1161  "pattern compiled in wrong mode: 8-bit/16-bit error",
1162  "pattern compiled with other endianness",
1163  "invalid data in workspace for DFA restart",
1164  "bad JIT option",
1165  "bad length"
1166 };
1167 
1168 
1169 /*************************************************
1170 * Alternate character tables *
1171 *************************************************/
1172 
1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174 using the default tables of the library. However, the T option can be used to
1175 select alternate sets of tables, for different kinds of testing. Note also that
1176 the L (locale) option also adjusts the tables. */
1177 
1178 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 only ASCII characters. */
1180 
1181 static const pcre_uint8 tables0[] = {
1182 
1183 /* This table is a lower casing table. */
1184 
1185  0, 1, 2, 3, 4, 5, 6, 7,
1186  8, 9, 10, 11, 12, 13, 14, 15,
1187  16, 17, 18, 19, 20, 21, 22, 23,
1188  24, 25, 26, 27, 28, 29, 30, 31,
1189  32, 33, 34, 35, 36, 37, 38, 39,
1190  40, 41, 42, 43, 44, 45, 46, 47,
1191  48, 49, 50, 51, 52, 53, 54, 55,
1192  56, 57, 58, 59, 60, 61, 62, 63,
1193  64, 97, 98, 99,100,101,102,103,
1194  104,105,106,107,108,109,110,111,
1195  112,113,114,115,116,117,118,119,
1196  120,121,122, 91, 92, 93, 94, 95,
1197  96, 97, 98, 99,100,101,102,103,
1198  104,105,106,107,108,109,110,111,
1199  112,113,114,115,116,117,118,119,
1200  120,121,122,123,124,125,126,127,
1201  128,129,130,131,132,133,134,135,
1202  136,137,138,139,140,141,142,143,
1203  144,145,146,147,148,149,150,151,
1204  152,153,154,155,156,157,158,159,
1205  160,161,162,163,164,165,166,167,
1206  168,169,170,171,172,173,174,175,
1207  176,177,178,179,180,181,182,183,
1208  184,185,186,187,188,189,190,191,
1209  192,193,194,195,196,197,198,199,
1210  200,201,202,203,204,205,206,207,
1211  208,209,210,211,212,213,214,215,
1212  216,217,218,219,220,221,222,223,
1213  224,225,226,227,228,229,230,231,
1214  232,233,234,235,236,237,238,239,
1215  240,241,242,243,244,245,246,247,
1216  248,249,250,251,252,253,254,255,
1217 
1218 /* This table is a case flipping table. */
1219 
1220  0, 1, 2, 3, 4, 5, 6, 7,
1221  8, 9, 10, 11, 12, 13, 14, 15,
1222  16, 17, 18, 19, 20, 21, 22, 23,
1223  24, 25, 26, 27, 28, 29, 30, 31,
1224  32, 33, 34, 35, 36, 37, 38, 39,
1225  40, 41, 42, 43, 44, 45, 46, 47,
1226  48, 49, 50, 51, 52, 53, 54, 55,
1227  56, 57, 58, 59, 60, 61, 62, 63,
1228  64, 97, 98, 99,100,101,102,103,
1229  104,105,106,107,108,109,110,111,
1230  112,113,114,115,116,117,118,119,
1231  120,121,122, 91, 92, 93, 94, 95,
1232  96, 65, 66, 67, 68, 69, 70, 71,
1233  72, 73, 74, 75, 76, 77, 78, 79,
1234  80, 81, 82, 83, 84, 85, 86, 87,
1235  88, 89, 90,123,124,125,126,127,
1236  128,129,130,131,132,133,134,135,
1237  136,137,138,139,140,141,142,143,
1238  144,145,146,147,148,149,150,151,
1239  152,153,154,155,156,157,158,159,
1240  160,161,162,163,164,165,166,167,
1241  168,169,170,171,172,173,174,175,
1242  176,177,178,179,180,181,182,183,
1243  184,185,186,187,188,189,190,191,
1244  192,193,194,195,196,197,198,199,
1245  200,201,202,203,204,205,206,207,
1246  208,209,210,211,212,213,214,215,
1247  216,217,218,219,220,221,222,223,
1248  224,225,226,227,228,229,230,231,
1249  232,233,234,235,236,237,238,239,
1250  240,241,242,243,244,245,246,247,
1251  248,249,250,251,252,253,254,255,
1252 
1253 /* This table contains bit maps for various character classes. Each map is 32
1254 bytes long and the bits run from the least significant end of each byte. The
1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256 graph, print, punct, and cntrl. Other classes are built from combinations. */
1257 
1258  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262 
1263  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267 
1268  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272 
1273  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277 
1278  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282 
1283  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287 
1288  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1292 
1293  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1297 
1298  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1302 
1303  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1307 
1308 /* This table identifies various classes of character by individual bits:
1309  0x01 white space character
1310  0x02 letter
1311  0x04 decimal digit
1312  0x08 hexadecimal digit
1313  0x10 alphanumeric or '_'
1314  0x80 regular expression metacharacter or binary zero
1315 */
1316 
1317  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318  0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321  0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322  0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328  0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332  0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1349 
1350 /* This is a set of tables that came originally from a Windows user. It seems
1351 to be at least an approximation of ISO 8859. In particular, there are
1352 characters greater than 128 that are marked as spaces, letters, etc. */
1353 
1354 static const pcre_uint8 tables1[] = {
1355 0,1,2,3,4,5,6,7,
1356 8,9,10,11,12,13,14,15,
1357 16,17,18,19,20,21,22,23,
1358 24,25,26,27,28,29,30,31,
1359 32,33,34,35,36,37,38,39,
1360 40,41,42,43,44,45,46,47,
1361 48,49,50,51,52,53,54,55,
1362 56,57,58,59,60,61,62,63,
1363 64,97,98,99,100,101,102,103,
1364 104,105,106,107,108,109,110,111,
1365 112,113,114,115,116,117,118,119,
1366 120,121,122,91,92,93,94,95,
1367 96,97,98,99,100,101,102,103,
1368 104,105,106,107,108,109,110,111,
1369 112,113,114,115,116,117,118,119,
1370 120,121,122,123,124,125,126,127,
1371 128,129,130,131,132,133,134,135,
1372 136,137,138,139,140,141,142,143,
1373 144,145,146,147,148,149,150,151,
1374 152,153,154,155,156,157,158,159,
1375 160,161,162,163,164,165,166,167,
1376 168,169,170,171,172,173,174,175,
1377 176,177,178,179,180,181,182,183,
1378 184,185,186,187,188,189,190,191,
1379 224,225,226,227,228,229,230,231,
1380 232,233,234,235,236,237,238,239,
1381 240,241,242,243,244,245,246,215,
1382 248,249,250,251,252,253,254,223,
1383 224,225,226,227,228,229,230,231,
1384 232,233,234,235,236,237,238,239,
1385 240,241,242,243,244,245,246,247,
1386 248,249,250,251,252,253,254,255,
1387 0,1,2,3,4,5,6,7,
1388 8,9,10,11,12,13,14,15,
1389 16,17,18,19,20,21,22,23,
1390 24,25,26,27,28,29,30,31,
1391 32,33,34,35,36,37,38,39,
1392 40,41,42,43,44,45,46,47,
1393 48,49,50,51,52,53,54,55,
1394 56,57,58,59,60,61,62,63,
1395 64,97,98,99,100,101,102,103,
1396 104,105,106,107,108,109,110,111,
1397 112,113,114,115,116,117,118,119,
1398 120,121,122,91,92,93,94,95,
1399 96,65,66,67,68,69,70,71,
1400 72,73,74,75,76,77,78,79,
1401 80,81,82,83,84,85,86,87,
1402 88,89,90,123,124,125,126,127,
1403 128,129,130,131,132,133,134,135,
1404 136,137,138,139,140,141,142,143,
1405 144,145,146,147,148,149,150,151,
1406 152,153,154,155,156,157,158,159,
1407 160,161,162,163,164,165,166,167,
1408 168,169,170,171,172,173,174,175,
1409 176,177,178,179,180,181,182,183,
1410 184,185,186,187,188,189,190,191,
1411 224,225,226,227,228,229,230,231,
1412 232,233,234,235,236,237,238,239,
1413 240,241,242,243,244,245,246,215,
1414 248,249,250,251,252,253,254,223,
1415 192,193,194,195,196,197,198,199,
1416 200,201,202,203,204,205,206,207,
1417 208,209,210,211,212,213,214,247,
1418 216,217,218,219,220,221,222,255,
1419 0,62,0,0,1,0,0,0,
1420 0,0,0,0,0,0,0,0,
1421 32,0,0,0,1,0,0,0,
1422 0,0,0,0,0,0,0,0,
1423 0,0,0,0,0,0,255,3,
1424 126,0,0,0,126,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 0,0,0,0,0,0,0,0,
1427 0,0,0,0,0,0,255,3,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,12,2,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 254,255,255,7,0,0,0,0,
1433 0,0,0,0,0,0,0,0,
1434 255,255,127,127,0,0,0,0,
1435 0,0,0,0,0,0,0,0,
1436 0,0,0,0,254,255,255,7,
1437 0,0,0,0,0,4,32,4,
1438 0,0,0,128,255,255,127,255,
1439 0,0,0,0,0,0,255,3,
1440 254,255,255,135,254,255,255,7,
1441 0,0,0,0,0,4,44,6,
1442 255,255,127,255,255,255,127,255,
1443 0,0,0,0,254,255,255,255,
1444 255,255,255,255,255,255,255,127,
1445 0,0,0,0,254,255,255,255,
1446 255,255,255,255,255,255,255,255,
1447 0,2,0,0,255,255,255,255,
1448 255,255,255,255,255,255,255,127,
1449 0,0,0,0,255,255,255,255,
1450 255,255,255,255,255,255,255,255,
1451 0,0,0,0,254,255,0,252,
1452 1,0,0,248,1,0,0,120,
1453 0,0,0,0,254,255,255,255,
1454 0,0,128,0,0,0,128,0,
1455 255,255,255,255,0,0,0,0,
1456 0,0,0,0,0,0,0,128,
1457 255,255,255,255,0,0,0,0,
1458 0,0,0,0,0,0,0,0,
1459 128,0,0,0,0,0,0,0,
1460 0,1,1,0,1,1,0,0,
1461 0,0,0,0,0,0,0,0,
1462 0,0,0,0,0,0,0,0,
1463 1,0,0,0,128,0,0,0,
1464 128,128,128,128,0,0,128,0,
1465 28,28,28,28,28,28,28,28,
1466 28,28,0,0,0,0,0,128,
1467 0,26,26,26,26,26,26,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,128,128,0,128,16,
1471 0,26,26,26,26,26,26,18,
1472 18,18,18,18,18,18,18,18,
1473 18,18,18,18,18,18,18,18,
1474 18,18,18,128,128,0,0,0,
1475 0,0,0,0,0,1,0,0,
1476 0,0,0,0,0,0,0,0,
1477 0,0,0,0,0,0,0,0,
1478 0,0,0,0,0,0,0,0,
1479 1,0,0,0,0,0,0,0,
1480 0,0,18,0,0,0,0,0,
1481 0,0,20,20,0,18,0,0,
1482 0,20,18,0,0,0,0,0,
1483 18,18,18,18,18,18,18,18,
1484 18,18,18,18,18,18,18,18,
1485 18,18,18,18,18,18,18,0,
1486 18,18,18,18,18,18,18,18,
1487 18,18,18,18,18,18,18,18,
1488 18,18,18,18,18,18,18,18,
1489 18,18,18,18,18,18,18,0,
1490 18,18,18,18,18,18,18,18
1491 };
1492 
1493 
1494 
1495 
1496 #ifndef HAVE_STRERROR
1497 /*************************************************
1498 * Provide strerror() for non-ANSI libraries *
1499 *************************************************/
1500 
1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502 in their libraries, but can provide the same facility by this simple
1503 alternative function. */
1504 
1505 extern int sys_nerr;
1506 extern char *sys_errlist[];
1507 
1508 char *
1509 strerror(int n)
1510 {
1511 if (n < 0 || n >= sys_nerr) return "unknown error number";
1512 return sys_errlist[n];
1513 }
1514 #endif /* HAVE_STRERROR */
1515 
1516 
1517 
1518 /*************************************************
1519 * Print newline configuration *
1520 *************************************************/
1521 
1522 /*
1523 Arguments:
1524  rc the return code from PCRE_CONFIG_NEWLINE
1525  isc TRUE if called from "-C newline"
1526 Returns: nothing
1527 */
1528 
1529 static void
1530 print_newline_config(int rc, BOOL isc)
1531 {
1532 const char *s = NULL;
1533 if (!isc) printf(" Newline sequence is ");
1534 switch(rc)
1535  {
1536  case CHAR_CR: s = "CR"; break;
1537  case CHAR_LF: s = "LF"; break;
1538  case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539  case -1: s = "ANY"; break;
1540  case -2: s = "ANYCRLF"; break;
1541 
1542  default:
1543  printf("a non-standard value: 0x%04x\n", rc);
1544  return;
1545  }
1546 
1547 printf("%s\n", s);
1548 }
1549 
1550 
1551 
1552 /*************************************************
1553 * JIT memory callback *
1554 *************************************************/
1555 
1556 static pcre_jit_stack* jit_callback(void *arg)
1557 {
1558 jit_was_used = TRUE;
1559 return (pcre_jit_stack *)arg;
1560 }
1561 
1562 
1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 /*************************************************
1565 * Convert UTF-8 string to value *
1566 *************************************************/
1567 
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1569 and returns the value of the character.
1570 
1571 Argument:
1572  utf8bytes a pointer to the byte vector
1573  vptr a pointer to an int to receive the value
1574 
1575 Returns: > 0 => the number of bytes consumed
1576  -6 to 0 => malformed UTF-8 character at offset = (-return)
1577 */
1578 
1579 static int
1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1581 {
1582 pcre_uint32 c = *utf8bytes++;
1583 pcre_uint32 d = c;
1584 int i, j, s;
1585 
1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1587  {
1588  if ((d & 0x80) == 0) break;
1589  d <<= 1;
1590  }
1591 
1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1594 
1595 /* i now has a value in the range 1-5 */
1596 
1597 s = 6*i;
1598 d = (c & utf8_table3[i]) << s;
1599 
1600 for (j = 0; j < i; j++)
1601  {
1602  c = *utf8bytes++;
1603  if ((c & 0xc0) != 0x80) return -(j+1);
1604  s -= 6;
1605  d |= (c & 0x3f) << s;
1606  }
1607 
1608 /* Check that encoding was the correct unique one */
1609 
1610 for (j = 0; j < utf8_table1_size; j++)
1611  if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 if (j != i) return -(i+1);
1613 
1614 /* Valid value */
1615 
1616 *vptr = d;
1617 return i+1;
1618 }
1619 #endif /* NOUTF || SUPPORT_PCRE16 */
1620 
1621 
1622 
1623 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 /*************************************************
1625 * Convert character value to UTF-8 *
1626 *************************************************/
1627 
1628 /* This function takes an integer value in the range 0 - 0x7fffffff
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1630 
1631 Arguments:
1632  cvalue the character value
1633  utf8bytes pointer to buffer for result - at least 6 bytes long
1634 
1635 Returns: number of characters placed in the buffer
1636 */
1637 
1638 static int
1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1640 {
1641 register int i, j;
1642 if (cvalue > 0x7fffffffu)
1643  return -1;
1644 for (i = 0; i < utf8_table1_size; i++)
1645  if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 utf8bytes += i;
1647 for (j = i; j > 0; j--)
1648  {
1649  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650  cvalue >>= 6;
1651  }
1652 *utf8bytes = utf8_table2[i] | cvalue;
1653 return i + 1;
1654 }
1655 #endif
1656 
1657 
1658 #ifdef SUPPORT_PCRE16
1659 /*************************************************
1660 * Convert a string to 16-bit *
1661 *************************************************/
1662 
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667 result is always left in buffer16.
1668 
1669 Note that this function does not object to surrogate values. This is
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671 for the purpose of testing that they are correctly faulted.
1672 
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1675 
1676 Arguments:
1677  data TRUE if converting a data line; FALSE for a regex
1678  p points to a byte string
1679  utf true if UTF-8 (to be converted to UTF-16)
1680  len number of bytes in the string (excluding trailing zero)
1681 
1682 Returns: number of 16-bit data items used (excluding trailing zero)
1683  OR -1 if a UTF-8 string is malformed
1684  OR -2 if a value > 0x10ffff is encountered
1685  OR -3 if a value > 0xffff is encountered when not in UTF mode
1686 */
1687 
1688 static int
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1690 {
1691 pcre_uint16 *pp;
1692 
1693 if (buffer16_size < 2*len + 2)
1694  {
1695  if (buffer16 != NULL) free(buffer16);
1696  buffer16_size = 2*len + 2;
1697  buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698  if (buffer16 == NULL)
1699  {
1700  fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701  exit(1);
1702  }
1703  }
1704 
1705 pp = buffer16;
1706 
1707 if (!utf && !data)
1708  {
1709  while (len-- > 0) *pp++ = *p++;
1710  }
1711 
1712 else
1713  {
1714  pcre_uint32 c = 0;
1715  while (len > 0)
1716  {
1717  int chlen = utf82ord(p, &c);
1718  if (chlen <= 0) return -1;
1719  if (c > 0x10ffff) return -2;
1720  p += chlen;
1721  len -= chlen;
1722  if (c < 0x10000) *pp++ = c; else
1723  {
1724  if (!utf) return -3;
1725  c -= 0x10000;
1726  *pp++ = 0xD800 | (c >> 10);
1727  *pp++ = 0xDC00 | (c & 0x3ff);
1728  }
1729  }
1730  }
1731 
1732 *pp = 0;
1733 return pp - buffer16;
1734 }
1735 #endif
1736 
1737 #ifdef SUPPORT_PCRE32
1738 /*************************************************
1739 * Convert a string to 32-bit *
1740 *************************************************/
1741 
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746 result is always left in buffer32.
1747 
1748 Note that this function does not object to surrogate values. This is
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750 for the purpose of testing that they are correctly faulted.
1751 
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1754 
1755 Arguments:
1756  data TRUE if converting a data line; FALSE for a regex
1757  p points to a byte string
1758  utf true if UTF-8 (to be converted to UTF-32)
1759  len number of bytes in the string (excluding trailing zero)
1760 
1761 Returns: number of 32-bit data items used (excluding trailing zero)
1762  OR -1 if a UTF-8 string is malformed
1763  OR -2 if a value > 0x10ffff is encountered
1764  OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1765 */
1766 
1767 static int
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1769 {
1770 pcre_uint32 *pp;
1771 
1772 if (buffer32_size < 4*len + 4)
1773  {
1774  if (buffer32 != NULL) free(buffer32);
1775  buffer32_size = 4*len + 4;
1776  buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777  if (buffer32 == NULL)
1778  {
1779  fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780  exit(1);
1781  }
1782  }
1783 
1784 pp = buffer32;
1785 
1786 if (!utf && !data)
1787  {
1788  while (len-- > 0) *pp++ = *p++;
1789  }
1790 
1791 else
1792  {
1793  pcre_uint32 c = 0;
1794  while (len > 0)
1795  {
1796  int chlen = utf82ord(p, &c);
1797  if (chlen <= 0) return -1;
1798  if (utf)
1799  {
1800  if (c > 0x10ffff) return -2;
1801  if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1802  }
1803 
1804  p += chlen;
1805  len -= chlen;
1806  *pp++ = c;
1807  }
1808  }
1809 
1810 *pp = 0;
1811 return pp - buffer32;
1812 }
1813 
1814 /* Check that a 32-bit character string is valid UTF-32.
1815 
1816 Arguments:
1817  string points to the string
1818  length length of string, or -1 if the string is zero-terminated
1819 
1820 Returns: TRUE if the string is a valid UTF-32 string
1821  FALSE otherwise
1822 */
1823 
1824 #ifdef NEVER /* Not used */
1825 #ifdef SUPPORT_UTF
1826 static BOOL
1827 valid_utf32(pcre_uint32 *string, int length)
1828 {
1829 register pcre_uint32 *p;
1830 register pcre_uint32 c;
1831 
1832 for (p = string; length-- > 0; p++)
1833  {
1834  c = *p;
1835  if (c > 0x10ffffu) return FALSE; /* Too big */
1836  if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1837  }
1838 
1839 return TRUE;
1840 }
1841 #endif /* SUPPORT_UTF */
1842 #endif /* NEVER */
1843 #endif /* SUPPORT_PCRE32 */
1844 
1845 
1846 /*************************************************
1847 * Read or extend an input line *
1848 *************************************************/
1849 
1850 /* Input lines are read into buffer, but both patterns and data lines can be
1851 continued over multiple input lines. In addition, if the buffer fills up, we
1852 want to automatically expand it so as to be able to handle extremely large
1853 lines that are needed for certain stress tests. When the input buffer is
1854 expanded, the other two buffers must also be expanded likewise, and the
1855 contents of pbuffer, which are a copy of the input for callouts, must be
1856 preserved (for when expansion happens for a data line). This is not the most
1857 optimal way of handling this, but hey, this is just a test program!
1858 
1859 Arguments:
1860  f the file to read
1861  start where in buffer to start (this *must* be within buffer)
1862  prompt for stdin or readline()
1863 
1864 Returns: pointer to the start of new data
1865  could be a copy of start, or could be moved
1866  NULL if no data read and EOF reached
1867 */
1868 
1869 static pcre_uint8 *
1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1871 {
1872 pcre_uint8 *here = start;
1873 
1874 for (;;)
1875  {
1876  size_t rlen = (size_t)(buffer_size - (here - buffer));
1877 
1878  if (rlen > 1000)
1879  {
1880  int dlen;
1881 
1882  /* If libreadline or libedit support is required, use readline() to read a
1883  line if the input is a terminal. Note that readline() removes the trailing
1884  newline, so we must put it back again, to be compatible with fgets(). */
1885 
1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887  if (isatty(fileno(f)))
1888  {
1889  size_t len;
1890  char *s = readline(prompt);
1891  if (s == NULL) return (here == start)? NULL : start;
1892  len = strlen(s);
1893  if (len > 0) add_history(s);
1894  if (len > rlen - 1) len = rlen - 1;
1895  memcpy(here, s, len);
1896  here[len] = '\n';
1897  here[len+1] = 0;
1898  free(s);
1899  }
1900  else
1901 #endif
1902 
1903  /* Read the next line by normal means, prompting if the file is stdin. */
1904 
1905  {
1906  if (f == stdin) printf("%s", prompt);
1907  if (fgets((char *)here, rlen, f) == NULL)
1908  return (here == start)? NULL : start;
1909  }
1910 
1911  dlen = (int)strlen((char *)here);
1912  if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913  here += dlen;
1914  }
1915 
1916  else
1917  {
1918  int new_buffer_size = 2*buffer_size;
1919  pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920  pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1921 
1922  if (new_buffer == NULL || new_pbuffer == NULL)
1923  {
1924  fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925  exit(1);
1926  }
1927 
1928  memcpy(new_buffer, buffer, buffer_size);
1929  memcpy(new_pbuffer, pbuffer, buffer_size);
1930 
1931  buffer_size = new_buffer_size;
1932 
1933  start = new_buffer + (start - buffer);
1934  here = new_buffer + (here - buffer);
1935 
1936  free(buffer);
1937  free(pbuffer);
1938 
1939  buffer = new_buffer;
1940  pbuffer = new_pbuffer;
1941  }
1942  }
1943 
1944 /* Control never gets here */
1945 }
1946 
1947 
1948 
1949 /*************************************************
1950 * Read number from string *
1951 *************************************************/
1952 
1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954 around with conditional compilation, just do the job by hand. It is only used
1955 for unpicking arguments, so just keep it simple.
1956 
1957 Arguments:
1958  str string to be converted
1959  endptr where to put the end pointer
1960 
1961 Returns: the unsigned long
1962 */
1963 
1964 static int
1965 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1966 {
1967 int result = 0;
1968 while(*str != 0 && isspace(*str)) str++;
1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970 *endptr = str;
1971 return(result);
1972 }
1973 
1974 
1975 
1976 /*************************************************
1977 * Print one character *
1978 *************************************************/
1979 
1980 /* Print a single character either literally, or as a hex escape. */
1981 
1982 static int pchar(pcre_uint32 c, FILE *f)
1983 {
1984 int n = 0;
1985 if (PRINTOK(c))
1986  {
1987  if (f != NULL) fprintf(f, "%c", c);
1988  return 1;
1989  }
1990 
1991 if (c < 0x100)
1992  {
1993  if (use_utf)
1994  {
1995  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1996  return 6;
1997  }
1998  else
1999  {
2000  if (f != NULL) fprintf(f, "\\x%02x", c);
2001  return 4;
2002  }
2003  }
2004 
2005 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2006 return n >= 0 ? n : 0;
2007 }
2008 
2009 
2010 
2011 #ifdef SUPPORT_PCRE8
2012 /*************************************************
2013 * Print 8-bit character string *
2014 *************************************************/
2015 
2016 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2017 If handed a NULL file, just counts chars without printing. */
2018 
2019 static int pchars(pcre_uint8 *p, int length, FILE *f)
2020 {
2021 pcre_uint32 c = 0;
2022 int yield = 0;
2023 
2024 if (length < 0)
2025  length = strlen((char *)p);
2026 
2027 while (length-- > 0)
2028  {
2029 #if !defined NOUTF
2030  if (use_utf)
2031  {
2032  int rc = utf82ord(p, &c);
2033  if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2034  {
2035  length -= rc - 1;
2036  p += rc;
2037  yield += pchar(c, f);
2038  continue;
2039  }
2040  }
2041 #endif
2042  c = *p++;
2043  yield += pchar(c, f);
2044  }
2045 
2046 return yield;
2047 }
2048 #endif
2049 
2050 
2051 
2052 #ifdef SUPPORT_PCRE16
2053 /*************************************************
2054 * Find length of 0-terminated 16-bit string *
2055 *************************************************/
2056 
2057 static int strlen16(PCRE_SPTR16 p)
2058 {
2059 PCRE_SPTR16 pp = p;
2060 while (*pp != 0) pp++;
2061 return (int)(pp - p);
2062 }
2063 #endif /* SUPPORT_PCRE16 */
2064 
2065 
2066 
2067 #ifdef SUPPORT_PCRE32
2068 /*************************************************
2069 * Find length of 0-terminated 32-bit string *
2070 *************************************************/
2071 
2072 static int strlen32(PCRE_SPTR32 p)
2073 {
2074 PCRE_SPTR32 pp = p;
2075 while (*pp != 0) pp++;
2076 return (int)(pp - p);
2077 }
2078 #endif /* SUPPORT_PCRE32 */
2079 
2080 
2081 
2082 #ifdef SUPPORT_PCRE16
2083 /*************************************************
2084 * Print 16-bit character string *
2085 *************************************************/
2086 
2087 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2088 If handed a NULL file, just counts chars without printing. */
2089 
2090 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2091 {
2092 int yield = 0;
2093 
2094 if (length < 0)
2095  length = strlen16(p);
2096 
2097 while (length-- > 0)
2098  {
2099  pcre_uint32 c = *p++ & 0xffff;
2100 #if !defined NOUTF
2101  if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2102  {
2103  int d = *p & 0xffff;
2104  if (d >= 0xDC00 && d <= 0xDFFF)
2105  {
2106  c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2107  length--;
2108  p++;
2109  }
2110  }
2111 #endif
2112  yield += pchar(c, f);
2113  }
2114 
2115 return yield;
2116 }
2117 #endif /* SUPPORT_PCRE16 */
2118 
2119 
2120 
2121 #ifdef SUPPORT_PCRE32
2122 /*************************************************
2123 * Print 32-bit character string *
2124 *************************************************/
2125 
2126 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2127 If handed a NULL file, just counts chars without printing. */
2128 
2129 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2130 {
2131 int yield = 0;
2132 
2133 (void)(utf); /* Avoid compiler warning */
2134 
2135 if (length < 0)
2136  length = strlen32(p);
2137 
2138 while (length-- > 0)
2139  {
2140  pcre_uint32 c = *p++;
2141  yield += pchar(c, f);
2142  }
2143 
2144 return yield;
2145 }
2146 #endif /* SUPPORT_PCRE32 */
2147 
2148 
2149 
2150 #ifdef SUPPORT_PCRE8
2151 /*************************************************
2152 * Read a capture name (8-bit) and check it *
2153 *************************************************/
2154 
2155 static pcre_uint8 *
2156 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2157 {
2158 pcre_uint8 *npp = *pp;
2159 while (isalnum(*p)) *npp++ = *p++;
2160 *npp++ = 0;
2161 *npp = 0;
2162 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2163  {
2164  fprintf(outfile, "no parentheses with name \"");
2165  PCHARSV(*pp, 0, -1, outfile);
2166  fprintf(outfile, "\"\n");
2167  }
2168 
2169 *pp = npp;
2170 return p;
2171 }
2172 #endif /* SUPPORT_PCRE8 */
2173 
2174 
2175 
2176 #ifdef SUPPORT_PCRE16
2177 /*************************************************
2178 * Read a capture name (16-bit) and check it *
2179 *************************************************/
2180 
2181 /* Note that the text being read is 8-bit. */
2182 
2183 static pcre_uint8 *
2184 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2185 {
2186 pcre_uint16 *npp = *pp;
2187 while (isalnum(*p)) *npp++ = *p++;
2188 *npp++ = 0;
2189 *npp = 0;
2190 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2191  {
2192  fprintf(outfile, "no parentheses with name \"");
2193  PCHARSV(*pp, 0, -1, outfile);
2194  fprintf(outfile, "\"\n");
2195  }
2196 *pp = npp;
2197 return p;
2198 }
2199 #endif /* SUPPORT_PCRE16 */
2200 
2201 
2202 
2203 #ifdef SUPPORT_PCRE32
2204 /*************************************************
2205 * Read a capture name (32-bit) and check it *
2206 *************************************************/
2207 
2208 /* Note that the text being read is 8-bit. */
2209 
2210 static pcre_uint8 *
2211 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2212 {
2213 pcre_uint32 *npp = *pp;
2214 while (isalnum(*p)) *npp++ = *p++;
2215 *npp++ = 0;
2216 *npp = 0;
2217 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2218  {
2219  fprintf(outfile, "no parentheses with name \"");
2220  PCHARSV(*pp, 0, -1, outfile);
2221  fprintf(outfile, "\"\n");
2222  }
2223 *pp = npp;
2224 return p;
2225 }
2226 #endif /* SUPPORT_PCRE32 */
2227 
2228 
2229 
2230 /*************************************************
2231 * Stack guard function *
2232 *************************************************/
2233 
2234 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2235 return when a count overflows. */
2236 
2237 static int stack_guard(void)
2238 {
2239 return stack_guard_return;
2240 }
2241 
2242 /*************************************************
2243 * Callout function *
2244 *************************************************/
2245 
2246 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2247 the match. Yield zero unless more callouts than the fail count, or the callout
2248 data is not zero. */
2249 
2250 static int callout(pcre_callout_block *cb)
2251 {
2252 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2253 int i, pre_start, post_start, subject_length;
2254 
2255 if (callout_extra)
2256  {
2257  fprintf(f, "Callout %d: last capture = %d\n",
2258  cb->callout_number, cb->capture_last);
2259 
2260  if (cb->offset_vector != NULL)
2261  {
2262  for (i = 0; i < cb->capture_top * 2; i += 2)
2263  {
2264  if (cb->offset_vector[i] < 0)
2265  fprintf(f, "%2d: <unset>\n", i/2);
2266  else
2267  {
2268  fprintf(f, "%2d: ", i/2);
2269  PCHARSV(cb->subject, cb->offset_vector[i],
2270  cb->offset_vector[i+1] - cb->offset_vector[i], f);
2271  fprintf(f, "\n");
2272  }
2273  }
2274  }
2275  }
2276 
2277 /* Re-print the subject in canonical form, the first time or if giving full
2278 datails. On subsequent calls in the same match, we use pchars just to find the
2279 printed lengths of the substrings. */
2280 
2281 if (f != NULL) fprintf(f, "--->");
2282 
2283 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2284 PCHARS(post_start, cb->subject, cb->start_match,
2285  cb->current_position - cb->start_match, f);
2286 
2287 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2288 
2289 PCHARSV(cb->subject, cb->current_position,
2290  cb->subject_length - cb->current_position, f);
2291 
2292 if (f != NULL) fprintf(f, "\n");
2293 
2294 /* Always print appropriate indicators, with callout number if not already
2295 shown. For automatic callouts, show the pattern offset. */
2296 
2297 if (cb->callout_number == 255)
2298  {
2299  fprintf(outfile, "%+3d ", cb->pattern_position);
2300  if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2301  }
2302 else
2303  {
2304  if (callout_extra) fprintf(outfile, " ");
2305  else fprintf(outfile, "%3d ", cb->callout_number);
2306  }
2307 
2308 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2309 fprintf(outfile, "^");
2310 
2311 if (post_start > 0)
2312  {
2313  for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2314  fprintf(outfile, "^");
2315  }
2316 
2317 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2318  fprintf(outfile, " ");
2319 
2320 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2321  pbuffer + cb->pattern_position);
2322 
2323 fprintf(outfile, "\n");
2324 first_callout = 0;
2325 
2326 if (cb->mark != last_callout_mark)
2327  {
2328  if (cb->mark == NULL)
2329  fprintf(outfile, "Latest Mark: <unset>\n");
2330  else
2331  {
2332  fprintf(outfile, "Latest Mark: ");
2333  PCHARSV(cb->mark, 0, -1, outfile);
2334  putc('\n', outfile);
2335  }
2336  last_callout_mark = cb->mark;
2337  }
2338 
2339 if (cb->callout_data != NULL)
2340  {
2341  int callout_data = *((int *)(cb->callout_data));
2342  if (callout_data != 0)
2343  {
2344  fprintf(outfile, "Callout data = %d\n", callout_data);
2345  return callout_data;
2346  }
2347  }
2348 
2349 return (cb->callout_number != callout_fail_id)? 0 :
2350  (++callout_count >= callout_fail_count)? 1 : 0;
2351 }
2352 
2353 
2354 /*************************************************
2355 * Local malloc functions *
2356 *************************************************/
2357 
2358 /* Alternative malloc function, to test functionality and save the size of a
2359 compiled re, which is the first store request that pcre_compile() makes. The
2360 show_malloc variable is set only during matching. */
2361 
2362 static void *new_malloc(size_t size)
2363 {
2364 void *block = malloc(size);
2365 if (show_malloc)
2366  fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2367 return block;
2368 }
2369 
2370 static void new_free(void *block)
2371 {
2372 if (show_malloc)
2373  fprintf(outfile, "free %p\n", block);
2374 free(block);
2375 }
2376 
2377 /* For recursion malloc/free, to test stacking calls */
2378 
2379 static void *stack_malloc(size_t size)
2380 {
2381 void *block = malloc(size);
2382 if (show_malloc)
2383  fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2384 return block;
2385 }
2386 
2387 static void stack_free(void *block)
2388 {
2389 if (show_malloc)
2390  fprintf(outfile, "stack_free %p\n", block);
2391 free(block);
2392 }
2393 
2394 
2395 /*************************************************
2396 * Call pcre_fullinfo() *
2397 *************************************************/
2398 
2399 /* Get one piece of information from the pcre_fullinfo() function. When only
2400 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2401 value, but the code is defensive.
2402 
2403 Arguments:
2404  re compiled regex
2405  study study data
2406  option PCRE_INFO_xxx option
2407  ptr where to put the data
2408 
2409 Returns: 0 when OK, < 0 on error
2410 */
2411 
2412 static int
2413 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2414 {
2415 int rc;
2416 
2417 if (pcre_mode == PCRE32_MODE)
2418 #ifdef SUPPORT_PCRE32
2419  rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2420 #else
2421  rc = PCRE_ERROR_BADMODE;
2422 #endif
2423 else if (pcre_mode == PCRE16_MODE)
2424 #ifdef SUPPORT_PCRE16
2425  rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2426 #else
2427  rc = PCRE_ERROR_BADMODE;
2428 #endif
2429 else
2430 #ifdef SUPPORT_PCRE8
2431  rc = pcre_fullinfo(re, study, option, ptr);
2432 #else
2433  rc = PCRE_ERROR_BADMODE;
2434 #endif
2435 
2436 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2437  {
2438  fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2439  pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2440  if (rc == PCRE_ERROR_BADMODE)
2441  fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2442  "%d-bit mode\n", 8 * CHAR_SIZE,
2443  8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2444  }
2445 
2446 return rc;
2447 }
2448 
2449 
2450 
2451 /*************************************************
2452 * Swap byte functions *
2453 *************************************************/
2454 
2455 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2456 value, respectively.
2457 
2458 Arguments:
2459  value any number
2460 
2461 Returns: the byte swapped value
2462 */
2463 
2464 static pcre_uint32
2465 swap_uint32(pcre_uint32 value)
2466 {
2467 return ((value & 0x000000ff) << 24) |
2468  ((value & 0x0000ff00) << 8) |
2469  ((value & 0x00ff0000) >> 8) |
2470  (value >> 24);
2471 }
2472 
2473 static pcre_uint16
2474 swap_uint16(pcre_uint16 value)
2475 {
2476 return (value >> 8) | (value << 8);
2477 }
2478 
2479 
2480 
2481 /*************************************************
2482 * Flip bytes in a compiled pattern *
2483 *************************************************/
2484 
2485 /* This function is called if the 'F' option was present on a pattern that is
2486 to be written to a file. We flip the bytes of all the integer fields in the
2487 regex data block and the study block. In 16-bit mode this also flips relevant
2488 bytes in the pattern itself. This is to make it possible to test PCRE's
2489 ability to reload byte-flipped patterns, e.g. those compiled on a different
2490 architecture. */
2491 
2492 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2493 static void
2494 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2495 {
2496 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2497 #ifdef SUPPORT_PCRE16
2498 int op;
2499 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2500 int length = re->name_count * re->name_entry_size;
2501 #ifdef SUPPORT_UTF
2502 BOOL utf = (re->options & PCRE_UTF16) != 0;
2503 BOOL utf16_char = FALSE;
2504 #endif /* SUPPORT_UTF */
2505 #endif /* SUPPORT_PCRE16 */
2506 
2507 /* Always flip the bytes in the main data block and study blocks. */
2508 
2510 re->size = swap_uint32(re->size);
2511 re->options = swap_uint32(re->options);
2512 re->flags = swap_uint32(re->flags);
2513 re->limit_match = swap_uint32(re->limit_match);
2514 re->limit_recursion = swap_uint32(re->limit_recursion);
2515 re->first_char = swap_uint16(re->first_char);
2516 re->req_char = swap_uint16(re->req_char);
2517 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2518 re->top_bracket = swap_uint16(re->top_bracket);
2519 re->top_backref = swap_uint16(re->top_backref);
2520 re->name_table_offset = swap_uint16(re->name_table_offset);
2521 re->name_entry_size = swap_uint16(re->name_entry_size);
2522 re->name_count = swap_uint16(re->name_count);
2523 re->ref_count = swap_uint16(re->ref_count);
2524 
2525 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2526  {
2527  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2528  rsd->size = swap_uint32(rsd->size);
2529  rsd->flags = swap_uint32(rsd->flags);
2530  rsd->minlength = swap_uint32(rsd->minlength);
2531  }
2532 
2533 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2534 in the name table, if present, and then in the pattern itself. */
2535 
2536 #ifdef SUPPORT_PCRE16
2537 if (pcre_mode != PCRE16_MODE) return;
2538 
2539 while(TRUE)
2540  {
2541  /* Swap previous characters. */
2542  while (length-- > 0)
2543  {
2544  *ptr = swap_uint16(*ptr);
2545  ptr++;
2546  }
2547 #ifdef SUPPORT_UTF
2548  if (utf16_char)
2549  {
2550  if ((ptr[-1] & 0xfc00) == 0xd800)
2551  {
2552  /* We know that there is only one extra character in UTF-16. */
2553  *ptr = swap_uint16(*ptr);
2554  ptr++;
2555  }
2556  }
2557  utf16_char = FALSE;
2558 #endif /* SUPPORT_UTF */
2559 
2560  /* Get next opcode. */
2561 
2562  length = 0;
2563  op = *ptr;
2564  *ptr++ = swap_uint16(op);
2565 
2566  switch (op)
2567  {
2568  case OP_END:
2569  return;
2570 
2571 #ifdef SUPPORT_UTF
2572  case OP_CHAR:
2573  case OP_CHARI:
2574  case OP_NOT:
2575  case OP_NOTI:
2576  case OP_STAR:
2577  case OP_MINSTAR:
2578  case OP_PLUS:
2579  case OP_MINPLUS:
2580  case OP_QUERY:
2581  case OP_MINQUERY:
2582  case OP_UPTO:
2583  case OP_MINUPTO:
2584  case OP_EXACT:
2585  case OP_POSSTAR:
2586  case OP_POSPLUS:
2587  case OP_POSQUERY:
2588  case OP_POSUPTO:
2589  case OP_STARI:
2590  case OP_MINSTARI:
2591  case OP_PLUSI:
2592  case OP_MINPLUSI:
2593  case OP_QUERYI:
2594  case OP_MINQUERYI:
2595  case OP_UPTOI:
2596  case OP_MINUPTOI:
2597  case OP_EXACTI:
2598  case OP_POSSTARI:
2599  case OP_POSPLUSI:
2600  case OP_POSQUERYI:
2601  case OP_POSUPTOI:
2602  case OP_NOTSTAR:
2603  case OP_NOTMINSTAR:
2604  case OP_NOTPLUS:
2605  case OP_NOTMINPLUS:
2606  case OP_NOTQUERY:
2607  case OP_NOTMINQUERY:
2608  case OP_NOTUPTO:
2609  case OP_NOTMINUPTO:
2610  case OP_NOTEXACT:
2611  case OP_NOTPOSSTAR:
2612  case OP_NOTPOSPLUS:
2613  case OP_NOTPOSQUERY:
2614  case OP_NOTPOSUPTO:
2615  case OP_NOTSTARI:
2616  case OP_NOTMINSTARI:
2617  case OP_NOTPLUSI:
2618  case OP_NOTMINPLUSI:
2619  case OP_NOTQUERYI:
2620  case OP_NOTMINQUERYI:
2621  case OP_NOTUPTOI:
2622  case OP_NOTMINUPTOI:
2623  case OP_NOTEXACTI:
2624  case OP_NOTPOSSTARI:
2625  case OP_NOTPOSPLUSI:
2626  case OP_NOTPOSQUERYI:
2627  case OP_NOTPOSUPTOI:
2628  if (utf) utf16_char = TRUE;
2629 #endif
2630  /* Fall through. */
2631 
2632  default:
2633  length = OP_lengths16[op] - 1;
2634  break;
2635 
2636  case OP_CLASS:
2637  case OP_NCLASS:
2638  /* Skip the character bit map. */
2639  ptr += 32/sizeof(pcre_uint16);
2640  length = 0;
2641  break;
2642 
2643  case OP_XCLASS:
2644  /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2645  if (LINK_SIZE > 1)
2646  length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2647  - (1 + LINK_SIZE + 1));
2648  else
2649  length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2650 
2651  /* Reverse the size of the XCLASS instance. */
2652  *ptr = swap_uint16(*ptr);
2653  ptr++;
2654  if (LINK_SIZE > 1)
2655  {
2656  *ptr = swap_uint16(*ptr);
2657  ptr++;
2658  }
2659 
2660  op = *ptr;
2661  *ptr = swap_uint16(op);
2662  ptr++;
2663  if ((op & XCL_MAP) != 0)
2664  {
2665  /* Skip the character bit map. */
2666  ptr += 32/sizeof(pcre_uint16);
2667  length -= 32/sizeof(pcre_uint16);
2668  }
2669  break;
2670  }
2671  }
2672 /* Control should never reach here in 16 bit mode. */
2673 #endif /* SUPPORT_PCRE16 */
2674 }
2675 #endif /* SUPPORT_PCRE[8|16] */
2676 
2677 
2678 
2679 #if defined SUPPORT_PCRE32
2680 static void
2681 regexflip_32(pcre *ere, pcre_extra *extra)
2682 {
2683 real_pcre32 *re = (real_pcre32 *)ere;
2684 int op;
2685 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2686 int length = re->name_count * re->name_entry_size;
2687 
2688 /* Always flip the bytes in the main data block and study blocks. */
2689 
2691 re->size = swap_uint32(re->size);
2692 re->options = swap_uint32(re->options);
2693 re->flags = swap_uint32(re->flags);
2694 re->limit_match = swap_uint32(re->limit_match);
2695 re->limit_recursion = swap_uint32(re->limit_recursion);
2696 re->first_char = swap_uint32(re->first_char);
2697 re->req_char = swap_uint32(re->req_char);
2698 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2699 re->top_bracket = swap_uint16(re->top_bracket);
2700 re->top_backref = swap_uint16(re->top_backref);
2701 re->name_table_offset = swap_uint16(re->name_table_offset);
2702 re->name_entry_size = swap_uint16(re->name_entry_size);
2703 re->name_count = swap_uint16(re->name_count);
2704 re->ref_count = swap_uint16(re->ref_count);
2705 
2706 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2707  {
2708  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2709  rsd->size = swap_uint32(rsd->size);
2710  rsd->flags = swap_uint32(rsd->flags);
2711  rsd->minlength = swap_uint32(rsd->minlength);
2712  }
2713 
2714 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2715 the pattern itself. */
2716 
2717 while(TRUE)
2718  {
2719  /* Swap previous characters. */
2720  while (length-- > 0)
2721  {
2722  *ptr = swap_uint32(*ptr);
2723  ptr++;
2724  }
2725 
2726  /* Get next opcode. */
2727 
2728  length = 0;
2729  op = *ptr;
2730  *ptr++ = swap_uint32(op);
2731 
2732  switch (op)
2733  {
2734  case OP_END:
2735  return;
2736 
2737  default:
2738  length = OP_lengths32[op] - 1;
2739  break;
2740 
2741  case OP_CLASS:
2742  case OP_NCLASS:
2743  /* Skip the character bit map. */
2744  ptr += 32/sizeof(pcre_uint32);
2745  length = 0;
2746  break;
2747 
2748  case OP_XCLASS:
2749  /* LINK_SIZE can only be 1 in 32-bit mode. */
2750  length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2751 
2752  /* Reverse the size of the XCLASS instance. */
2753  *ptr = swap_uint32(*ptr);
2754  ptr++;
2755 
2756  op = *ptr;
2757  *ptr = swap_uint32(op);
2758  ptr++;
2759  if ((op & XCL_MAP) != 0)
2760  {
2761  /* Skip the character bit map. */
2762  ptr += 32/sizeof(pcre_uint32);
2763  length -= 32/sizeof(pcre_uint32);
2764  }
2765  break;
2766  }
2767  }
2768 /* Control should never reach here in 32 bit mode. */
2769 }
2770 
2771 #endif /* SUPPORT_PCRE32 */
2772 
2773 
2774 
2775 static void
2776 regexflip(pcre *ere, pcre_extra *extra)
2777 {
2778 #if defined SUPPORT_PCRE32
2779  if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2780  regexflip_32(ere, extra);
2781 #endif
2782 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2783  if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2784  regexflip8_or_16(ere, extra);
2785 #endif
2786 }
2787 
2788 
2789 
2790 /*************************************************
2791 * Check match or recursion limit *
2792 *************************************************/
2793 
2794 static int
2795 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2796  int start_offset, int options, int *use_offsets, int use_size_offsets,
2797  int flag, unsigned long int *limit, int errnumber, const char *msg)
2798 {
2799 int count;
2800 int min = 0;
2801 int mid = 64;
2802 int max = -1;
2803 
2804 extra->flags |= flag;
2805 
2806 for (;;)
2807  {
2808  *limit = mid;
2809 
2810  PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2811  use_offsets, use_size_offsets);
2812 
2813  if (count == errnumber)
2814  {
2815  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2816  min = mid;
2817  mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2818  }
2819 
2820  else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2821  count == PCRE_ERROR_PARTIAL)
2822  {
2823  if (mid == min + 1)
2824  {
2825  fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2826  break;
2827  }
2828  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2829  max = mid;
2830  mid = (min + mid)/2;
2831  }
2832  else break; /* Some other error */
2833  }
2834 
2835 extra->flags &= ~flag;
2836 return count;
2837 }
2838 
2839 
2840 
2841 /*************************************************
2842 * Case-independent strncmp() function *
2843 *************************************************/
2844 
2845 /*
2846 Arguments:
2847  s first string
2848  t second string
2849  n number of characters to compare
2850 
2851 Returns: < 0, = 0, or > 0, according to the comparison
2852 */
2853 
2854 static int
2855 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2856 {
2857 while (n--)
2858  {
2859  int c = tolower(*s++) - tolower(*t++);
2860  if (c) return c;
2861  }
2862 return 0;
2863 }
2864 
2865 
2866 
2867 /*************************************************
2868 * Check multicharacter option *
2869 *************************************************/
2870 
2871 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2872 a message and return 0 if there is no match.
2873 
2874 Arguments:
2875  p points after the leading '<'
2876  f file for error message
2877  nl TRUE to check only for newline settings
2878  stype "modifier" or "escape sequence"
2879 
2880 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2881 */
2882 
2883 static int
2884 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2885 {
2886 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2887 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2888 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2889 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2890 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2891 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2892 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2893 
2894 if (!nl)
2895  {
2896  if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2897  }
2898 
2899 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2900 return 0;
2901 }
2902 
2903 
2904 
2905 /*************************************************
2906 * Usage function *
2907 *************************************************/
2908 
2909 static void
2910 usage(void)
2911 {
2912 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2913 printf("Input and output default to stdin and stdout.\n");
2914 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2915 printf("If input is a terminal, readline() is used to read from it.\n");
2916 #else
2917 printf("This version of pcretest is not linked with readline().\n");
2918 #endif
2919 printf("\nOptions:\n");
2920 #ifdef SUPPORT_PCRE16
2921 printf(" -16 use the 16-bit library\n");
2922 #endif
2923 #ifdef SUPPORT_PCRE32
2924 printf(" -32 use the 32-bit library\n");
2925 #endif
2926 printf(" -b show compiled code\n");
2927 printf(" -C show PCRE compile-time options and exit\n");
2928 printf(" -C arg show a specific compile-time option and exit\n");
2929 printf(" with its value if numeric (else 0). The arg can be:\n");
2930 printf(" linksize internal link size [2, 3, 4]\n");
2931 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2932 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2933 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2934 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2935 printf(" ucp Unicode Properties supported [0, 1]\n");
2936 printf(" jit Just-in-time compiler supported [0, 1]\n");
2937 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2938 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2939 printf(" -d debug: show compiled code and information (-b and -i)\n");
2940 #if !defined NODFA
2941 printf(" -dfa force DFA matching for all subjects\n");
2942 #endif
2943 printf(" -help show usage information\n");
2944 printf(" -i show information about compiled patterns\n"
2945  " -M find MATCH_LIMIT minimum for each subject\n"
2946  " -m output memory used information\n"
2947  " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2948  " -o <n> set size of offsets vector to <n>\n");
2949 #if !defined NOPOSIX
2950 printf(" -p use POSIX interface\n");
2951 #endif
2952 printf(" -q quiet: do not output PCRE version number at start\n");
2953 printf(" -S <n> set stack size to <n> megabytes\n");
2954 printf(" -s force each pattern to be studied at basic level\n"
2955  " -s+ force each pattern to be studied, using JIT if available\n"
2956  " -s++ ditto, verifying when JIT was actually used\n"
2957  " -s+n force each pattern to be studied, using JIT if available,\n"
2958  " where 1 <= n <= 7 selects JIT options\n"
2959  " -s++n ditto, verifying when JIT was actually used\n"
2960  " -t time compilation and execution\n");
2961 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2962 printf(" -tm time execution (matching) only\n");
2963 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2964 printf(" -T same as -t, but show total times at the end\n");
2965 printf(" -TM same as -tm, but show total time at the end\n");
2966 }
2967 
2968 
2969 
2970 /*************************************************
2971 * Main Program *
2972 *************************************************/
2973 
2974 /* Read lines from named file or stdin and write to named file or stdout; lines
2975 consist of a regular expression, in delimiters and optionally followed by
2976 options, followed by a set of test data, terminated by an empty line. */
2977 
2978 int main(int argc, char **argv)
2979 {
2980 FILE *infile = stdin;
2981 const char *version;
2982 int options = 0;
2983 int study_options = 0;
2984 int default_find_match_limit = FALSE;
2985 pcre_uint32 default_options = 0;
2986 int op = 1;
2987 int timeit = 0;
2988 int timeitm = 0;
2989 int showtotaltimes = 0;
2990 int showinfo = 0;
2991 int showstore = 0;
2992 int force_study = -1;
2993 int force_study_options = 0;
2994 int quiet = 0;
2995 int size_offsets = 45;
2996 int size_offsets_max;
2997 int *offsets = NULL;
2998 int debug = 0;
2999 int done = 0;
3000 int all_use_dfa = 0;
3001 int verify_jit = 0;
3002 int yield = 0;
3003 int stack_size;
3004 pcre_uint8 *dbuffer = NULL;
3005 pcre_uint8 lockout[24] = { 0 };
3006 size_t dbuffer_size = 1u << 14;
3007 clock_t total_compile_time = 0;
3008 clock_t total_study_time = 0;
3009 clock_t total_match_time = 0;
3010 
3011 #if !defined NOPOSIX
3012 int posix = 0;
3013 #endif
3014 #if !defined NODFA
3015 int *dfa_workspace = NULL;
3016 #endif
3017 
3018 pcre_jit_stack *jit_stack = NULL;
3019 
3020 /* These vectors store, end-to-end, a list of zero-terminated captured
3021 substring names, each list itself being terminated by an empty name. Assume
3022 that 1024 is plenty long enough for the few names we'll be testing. It is
3023 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3024 for the actual memory, to ensure alignment. */
3025 
3026 pcre_uint32 copynames[1024];
3027 pcre_uint32 getnames[1024];
3028 
3029 #ifdef SUPPORT_PCRE32
3030 pcre_uint32 *cn32ptr;
3031 pcre_uint32 *gn32ptr;
3032 #endif
3033 
3034 #ifdef SUPPORT_PCRE16
3035 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3036 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3037 pcre_uint16 *cn16ptr;
3038 pcre_uint16 *gn16ptr;
3039 #endif
3040 
3041 #ifdef SUPPORT_PCRE8
3042 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3043 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3044 pcre_uint8 *cn8ptr;
3045 pcre_uint8 *gn8ptr;
3046 #endif
3047 
3048 /* Get buffers from malloc() so that valgrind will check their misuse when
3049 debugging. They grow automatically when very long lines are read. The 16-
3050 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3051 
3052 buffer = (pcre_uint8 *)malloc(buffer_size);
3053 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3054 
3055 /* The outfile variable is static so that new_malloc can use it. */
3056 
3057 outfile = stdout;
3058 
3059 /* The following _setmode() stuff is some Windows magic that tells its runtime
3060 library to translate CRLF into a single LF character. At least, that's what
3061 I've been told: never having used Windows I take this all on trust. Originally
3062 it set 0x8000, but then I was advised that _O_BINARY was better. */
3063 
3064 #if defined(_WIN32) || defined(WIN32)
3065 _setmode( _fileno( stdout ), _O_BINARY );
3066 #endif
3067 
3068 /* Get the version number: both pcre_version() and pcre16_version() give the
3069 same answer. We just need to ensure that we call one that is available. */
3070 
3071 #if defined SUPPORT_PCRE8
3072 version = pcre_version();
3073 #elif defined SUPPORT_PCRE16
3074 version = pcre16_version();
3075 #elif defined SUPPORT_PCRE32
3076 version = pcre32_version();
3077 #endif
3078 
3079 /* Scan options */
3080 
3081 while (argc > 1 && argv[op][0] == '-')
3082  {
3083  pcre_uint8 *endptr;
3084  char *arg = argv[op];
3085 
3086  if (strcmp(arg, "-m") == 0) showstore = 1;
3087  else if (strcmp(arg, "-s") == 0) force_study = 0;
3088 
3089  else if (strncmp(arg, "-s+", 3) == 0)
3090  {
3091  arg += 3;
3092  if (*arg == '+') { arg++; verify_jit = TRUE; }
3093  force_study = 1;
3094  if (*arg == 0)
3095  force_study_options = jit_study_bits[6];
3096  else if (*arg >= '1' && *arg <= '7')
3097  force_study_options = jit_study_bits[*arg - '1'];
3098  else goto BAD_ARG;
3099  }
3100  else if (strcmp(arg, "-8") == 0)
3101  {
3102 #ifdef SUPPORT_PCRE8
3103  pcre_mode = PCRE8_MODE;
3104 #else
3105  printf("** This version of PCRE was built without 8-bit support\n");
3106  exit(1);
3107 #endif
3108  }
3109  else if (strcmp(arg, "-16") == 0)
3110  {
3111 #ifdef SUPPORT_PCRE16
3112  pcre_mode = PCRE16_MODE;
3113 #else
3114  printf("** This version of PCRE was built without 16-bit support\n");
3115  exit(1);
3116 #endif
3117  }
3118  else if (strcmp(arg, "-32") == 0)
3119  {
3120 #ifdef SUPPORT_PCRE32
3121  pcre_mode = PCRE32_MODE;
3122 #else
3123  printf("** This version of PCRE was built without 32-bit support\n");
3124  exit(1);
3125 #endif
3126  }
3127  else if (strcmp(arg, "-q") == 0) quiet = 1;
3128  else if (strcmp(arg, "-b") == 0) debug = 1;
3129  else if (strcmp(arg, "-i") == 0) showinfo = 1;
3130  else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3131  else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3132  else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3133 #if !defined NODFA
3134  else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3135 #endif
3136  else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3137  ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3138  *endptr == 0))
3139  {
3140  op++;
3141  argc--;
3142  }
3143  else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3144  strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3145  {
3146  int temp;
3147  int both = arg[2] == 0;
3148  showtotaltimes = arg[1] == 'T';
3149  if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3150  *endptr == 0))
3151  {
3152  timeitm = temp;
3153  op++;
3154  argc--;
3155  }
3156  else timeitm = LOOPREPEAT;
3157  if (both) timeit = timeitm;
3158  }
3159  else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3160  ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3161  *endptr == 0))
3162  {
3163 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3164  printf("PCRE: -S not supported on this OS\n");
3165  exit(1);
3166 #else
3167  int rc;
3168  struct rlimit rlim;
3169  getrlimit(RLIMIT_STACK, &rlim);
3170  rlim.rlim_cur = stack_size * 1024 * 1024;
3171  rc = setrlimit(RLIMIT_STACK, &rlim);
3172  if (rc != 0)
3173  {
3174  printf("PCRE: setrlimit() failed with error %d\n", rc);
3175  exit(1);
3176  }
3177  op++;
3178  argc--;
3179 #endif
3180  }
3181 #if !defined NOPOSIX
3182  else if (strcmp(arg, "-p") == 0) posix = 1;
3183 #endif
3184  else if (strcmp(arg, "-C") == 0)
3185  {
3186  int rc;
3187  unsigned long int lrc;
3188 
3189  if (argc > 2)
3190  {
3191  if (strcmp(argv[op + 1], "linksize") == 0)
3192  {
3193  (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3194  printf("%d\n", rc);
3195  yield = rc;
3196 
3197 #ifdef __VMS
3198  vms_setsymbol("LINKSIZE",0,yield );
3199 #endif
3200  }
3201  else if (strcmp(argv[op + 1], "pcre8") == 0)
3202  {
3203 #ifdef SUPPORT_PCRE8
3204  printf("1\n");
3205  yield = 1;
3206 #else
3207  printf("0\n");
3208  yield = 0;
3209 #endif
3210 #ifdef __VMS
3211  vms_setsymbol("PCRE8",0,yield );
3212 #endif
3213  }
3214  else if (strcmp(argv[op + 1], "pcre16") == 0)
3215  {
3216 #ifdef SUPPORT_PCRE16
3217  printf("1\n");
3218  yield = 1;
3219 #else
3220  printf("0\n");
3221  yield = 0;
3222 #endif
3223 #ifdef __VMS
3224  vms_setsymbol("PCRE16",0,yield );
3225 #endif
3226  }
3227  else if (strcmp(argv[op + 1], "pcre32") == 0)
3228  {
3229 #ifdef SUPPORT_PCRE32
3230  printf("1\n");
3231  yield = 1;
3232 #else
3233  printf("0\n");
3234  yield = 0;
3235 #endif
3236 #ifdef __VMS
3237  vms_setsymbol("PCRE32",0,yield );
3238 #endif
3239  }
3240  else if (strcmp(argv[op + 1], "utf") == 0)
3241  {
3242 #ifdef SUPPORT_PCRE8
3243  if (pcre_mode == PCRE8_MODE)
3244  (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3245 #endif
3246 #ifdef SUPPORT_PCRE16
3247  if (pcre_mode == PCRE16_MODE)
3248  (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3249 #endif
3250 #ifdef SUPPORT_PCRE32
3251  if (pcre_mode == PCRE32_MODE)
3252  (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3253 #endif
3254  printf("%d\n", rc);
3255  yield = rc;
3256 #ifdef __VMS
3257  vms_setsymbol("UTF",0,yield );
3258 #endif
3259  }
3260  else if (strcmp(argv[op + 1], "ucp") == 0)
3261  {
3262  (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3263  printf("%d\n", rc);
3264  yield = rc;
3265  }
3266  else if (strcmp(argv[op + 1], "jit") == 0)
3267  {
3268  (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3269  printf("%d\n", rc);
3270  yield = rc;
3271  }
3272  else if (strcmp(argv[op + 1], "newline") == 0)
3273  {
3274  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3275  print_newline_config(rc, TRUE);
3276  }
3277  else if (strcmp(argv[op + 1], "bsr") == 0)
3278  {
3279  (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3280  printf("%s\n", rc? "ANYCRLF" : "ANY");
3281  }
3282  else if (strcmp(argv[op + 1], "ebcdic") == 0)
3283  {
3284 #ifdef EBCDIC
3285  printf("1\n");
3286  yield = 1;
3287 #else
3288  printf("0\n");
3289 #endif
3290  }
3291  else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3292  {
3293 #ifdef EBCDIC
3294  printf("0x%02x\n", CHAR_LF);
3295 #else
3296  printf("0\n");
3297 #endif
3298  }
3299  else
3300  {
3301  printf("Unknown -C option: %s\n", argv[op + 1]);
3302  }
3303  goto EXIT;
3304  }
3305 
3306  /* No argument for -C: output all configuration information. */
3307 
3308  printf("PCRE version %s\n", version);
3309  printf("Compiled with\n");
3310 
3311 #ifdef EBCDIC
3312  printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3313 #endif
3314 
3315 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3316 are set, either both UTFs are supported or both are not supported. */
3317 
3318 #ifdef SUPPORT_PCRE8
3319  printf(" 8-bit support\n");
3320  (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3321  printf (" %sUTF-8 support\n", rc ? "" : "No ");
3322 #endif
3323 #ifdef SUPPORT_PCRE16
3324  printf(" 16-bit support\n");
3325  (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3326  printf (" %sUTF-16 support\n", rc ? "" : "No ");
3327 #endif
3328 #ifdef SUPPORT_PCRE32
3329  printf(" 32-bit support\n");
3330  (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3331  printf (" %sUTF-32 support\n", rc ? "" : "No ");
3332 #endif
3333 
3334  (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3335  printf(" %sUnicode properties support\n", rc? "" : "No ");
3336  (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3337  if (rc)
3338  {
3339  const char *arch;
3340  (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3341  printf(" Just-in-time compiler support: %s\n", arch);
3342  }
3343  else
3344  printf(" No just-in-time compiler support\n");
3345  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3346  print_newline_config(rc, FALSE);
3347  (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3348  printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3349  "all Unicode newlines");
3350  (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3351  printf(" Internal link size = %d\n", rc);
3352  (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3353  printf(" POSIX malloc threshold = %d\n", rc);
3354  (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3355  printf(" Parentheses nest limit = %ld\n", lrc);
3356  (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3357  printf(" Default match limit = %ld\n", lrc);
3358  (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3359  printf(" Default recursion depth limit = %ld\n", lrc);
3360  (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3361  printf(" Match recursion uses %s", rc? "stack" : "heap");
3362  if (showstore)
3363  {
3364  PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3365  printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3366  }
3367  printf("\n");
3368  goto EXIT;
3369  }
3370  else if (strcmp(arg, "-help") == 0 ||
3371  strcmp(arg, "--help") == 0)
3372  {
3373  usage();
3374  goto EXIT;
3375  }
3376  else
3377  {
3378  BAD_ARG:
3379  printf("** Unknown or malformed option %s\n", arg);
3380  usage();
3381  yield = 1;
3382  goto EXIT;
3383  }
3384  op++;
3385  argc--;
3386  }
3387 
3388 /* Get the store for the offsets vector, and remember what it was */
3389 
3390 size_offsets_max = size_offsets;
3391 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3392 if (offsets == NULL)
3393  {
3394  printf("** Failed to get %d bytes of memory for offsets vector\n",
3395  (int)(size_offsets_max * sizeof(int)));
3396  yield = 1;
3397  goto EXIT;
3398  }
3399 
3400 /* Sort out the input and output files */
3401 
3402 if (argc > 1)
3403  {
3404  infile = fopen(argv[op], INPUT_MODE);
3405  if (infile == NULL)
3406  {
3407  printf("** Failed to open %s\n", argv[op]);
3408  yield = 1;
3409  goto EXIT;
3410  }
3411  }
3412 
3413 if (argc > 2)
3414  {
3415  outfile = fopen(argv[op+1], OUTPUT_MODE);
3416  if (outfile == NULL)
3417  {
3418  printf("** Failed to open %s\n", argv[op+1]);
3419  yield = 1;
3420  goto EXIT;
3421  }
3422  }
3423 
3424 /* Set alternative malloc function */
3425 
3426 #ifdef SUPPORT_PCRE8
3427 pcre_malloc = new_malloc;
3428 pcre_free = new_free;
3429 pcre_stack_malloc = stack_malloc;
3430 pcre_stack_free = stack_free;
3431 #endif
3432 
3433 #ifdef SUPPORT_PCRE16
3434 pcre16_malloc = new_malloc;
3435 pcre16_free = new_free;
3436 pcre16_stack_malloc = stack_malloc;
3437 pcre16_stack_free = stack_free;
3438 #endif
3439 
3440 #ifdef SUPPORT_PCRE32
3441 pcre32_malloc = new_malloc;
3442 pcre32_free = new_free;
3443 pcre32_stack_malloc = stack_malloc;
3444 pcre32_stack_free = stack_free;
3445 #endif
3446 
3447 /* Heading line unless quiet */
3448 
3449 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3450 
3451 /* Main loop */
3452 
3453 while (!done)
3454  {
3455  pcre *re = NULL;
3456  pcre_extra *extra = NULL;
3457 
3458 #if !defined NOPOSIX /* There are still compilers that require no indent */
3459  regex_t preg = { NULL, 0, 0} ;
3460  int do_posix = 0;
3461 #endif
3462 
3463  const char *error;
3464  pcre_uint8 *markptr;
3465  pcre_uint8 *p, *pp, *ppp;
3466  pcre_uint8 *to_file = NULL;
3467  const pcre_uint8 *tables = NULL;
3468  unsigned long int get_options;
3469  unsigned long int true_size, true_study_size = 0;
3470  size_t size;
3471  int do_allcaps = 0;
3472  int do_mark = 0;
3473  int do_study = 0;
3474  int no_force_study = 0;
3475  int do_debug = debug;
3476  int do_G = 0;
3477  int do_g = 0;
3478  int do_showinfo = showinfo;
3479  int do_showrest = 0;
3480  int do_showcaprest = 0;
3481  int do_flip = 0;
3482  int erroroffset, len, delimiter, poffset;
3483 
3484 #if !defined NODFA
3485  int dfa_matched = 0;
3486 #endif
3487 
3488  use_utf = 0;
3489  debug_lengths = 1;
3490  SET_PCRE_STACK_GUARD(NULL);
3491 
3492  if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3493  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3494  fflush(outfile);
3495 
3496  p = buffer;
3497  while (isspace(*p)) p++;
3498  if (*p == 0) continue;
3499 
3500  /* Handle option lock-out setting */
3501 
3502  if (*p == '<' && p[1] == ' ')
3503  {
3504  p += 2;
3505  while (isspace(*p)) p++;
3506  if (strncmp((char *)p, "forbid ", 7) == 0)
3507  {
3508  p += 7;
3509  while (isspace(*p)) p++;
3510  pp = lockout;
3511  while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3512  *pp++ = *p++;
3513  *pp = 0;
3514  }
3515  else
3516  {
3517  printf("** Unrecognized special command '%s'\n", p);
3518  yield = 1;
3519  goto EXIT;
3520  }
3521  continue;
3522  }
3523 
3524  /* See if the pattern is to be loaded pre-compiled from a file. */
3525 
3526  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3527  {
3528  pcre_uint32 magic;
3529  pcre_uint8 sbuf[8];
3530  FILE *f;
3531 
3532  p++;
3533  if (*p == '!')
3534  {
3535  do_debug = TRUE;
3536  do_showinfo = TRUE;
3537  p++;
3538  }
3539 
3540  pp = p + (int)strlen((char *)p);
3541  while (isspace(pp[-1])) pp--;
3542  *pp = 0;
3543 
3544  f = fopen((char *)p, "rb");
3545  if (f == NULL)
3546  {
3547  fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3548  continue;
3549  }
3550  if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3551 
3552  true_size =
3553  (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3554  true_study_size =
3555  (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3556 
3557  re = (pcre *)new_malloc(true_size);
3558  if (re == NULL)
3559  {
3560  printf("** Failed to get %d bytes of memory for pcre object\n",
3561  (int)true_size);
3562  yield = 1;
3563  goto EXIT;
3564  }
3565  if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3566 
3567  magic = REAL_PCRE_MAGIC(re);
3568  if (magic != MAGIC_NUMBER)
3569  {
3570  if (swap_uint32(magic) == MAGIC_NUMBER)
3571  {
3572  do_flip = 1;
3573  }
3574  else
3575  {
3576  fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3577  new_free(re);
3578  fclose(f);
3579  continue;
3580  }
3581  }
3582 
3583  /* We hide the byte-invert info for little and big endian tests. */
3584  fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3585  do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3586 
3587  /* Now see if there is any following study data. */
3588 
3589  if (true_study_size != 0)
3590  {
3591  pcre_study_data *psd;
3592 
3593  extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3594  extra->flags = PCRE_EXTRA_STUDY_DATA;
3595 
3596  psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3597  extra->study_data = psd;
3598 
3599  if (fread(psd, 1, true_study_size, f) != true_study_size)
3600  {
3601  FAIL_READ:
3602  fprintf(outfile, "Failed to read data from %s\n", p);
3603  if (extra != NULL)
3604  {
3605  PCRE_FREE_STUDY(extra);
3606  }
3607  new_free(re);
3608  fclose(f);
3609  continue;
3610  }
3611  fprintf(outfile, "Study data loaded from %s\n", p);
3612  do_study = 1; /* To get the data output if requested */
3613  }
3614  else fprintf(outfile, "No study data\n");
3615 
3616  /* Flip the necessary bytes. */
3617  if (do_flip)
3618  {
3619  int rc;
3620  PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3621  if (rc == PCRE_ERROR_BADMODE)
3622  {
3623  pcre_uint32 flags_in_host_byte_order;
3624  if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3625  flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3626  else
3627  flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3628  /* Simulate the result of the function call below. */
3629  fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3630  pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3631  PCRE_INFO_OPTIONS);
3632  fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3633  "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3634  new_free(re);
3635  fclose(f);
3636  continue;
3637  }
3638  }
3639 
3640  /* Need to know if UTF-8 for printing data strings. */
3641 
3642  if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3643  {
3644  new_free(re);
3645  fclose(f);
3646  continue;
3647  }
3648  use_utf = (get_options & PCRE_UTF8) != 0;
3649 
3650  fclose(f);
3651  goto SHOW_INFO;
3652  }
3653 
3654  /* In-line pattern (the usual case). Get the delimiter and seek the end of
3655  the pattern; if it isn't complete, read more. */
3656 
3657  delimiter = *p++;
3658 
3659  if (isalnum(delimiter) || delimiter == '\\')
3660  {
3661  fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3662  goto SKIP_DATA;
3663  }
3664 
3665  pp = p;
3666  poffset = (int)(p - buffer);
3667 
3668  for(;;)
3669  {
3670  while (*pp != 0)
3671  {
3672  if (*pp == '\\' && pp[1] != 0) pp++;
3673  else if (*pp == delimiter) break;
3674  pp++;
3675  }
3676  if (*pp != 0) break;
3677  if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3678  {
3679  fprintf(outfile, "** Unexpected EOF\n");
3680  done = 1;
3681  goto CONTINUE;
3682  }
3683  if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3684  }
3685 
3686  /* The buffer may have moved while being extended; reset the start of data
3687  pointer to the correct relative point in the buffer. */
3688 
3689  p = buffer + poffset;
3690 
3691  /* If the first character after the delimiter is backslash, make
3692  the pattern end with backslash. This is purely to provide a way
3693  of testing for the error message when a pattern ends with backslash. */
3694 
3695  if (pp[1] == '\\') *pp++ = '\\';
3696 
3697  /* Terminate the pattern at the delimiter, and save a copy of the pattern
3698  for callouts. */
3699 
3700  *pp++ = 0;
3701  strcpy((char *)pbuffer, (char *)p);
3702 
3703  /* Look for modifiers and options after the final delimiter. */
3704 
3705  options = default_options;
3706  study_options = force_study_options;
3707  log_store = showstore; /* default from command line */
3708 
3709  while (*pp != 0)
3710  {
3711  /* Check to see whether this modifier has been locked out for this file.
3712  This is complicated for the multi-character options that begin with '<'.
3713  If there is no '>' in the lockout string, all multi-character modifiers are
3714  locked out. */
3715 
3716  if (strchr((char *)lockout, *pp) != NULL)
3717  {
3718  if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3719  {
3720  int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3721  if (x == 0) goto SKIP_DATA;
3722 
3723  for (ppp = lockout; *ppp != 0; ppp++)
3724  {
3725  if (*ppp == '<')
3726  {
3727  int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3728  if (y == 0)
3729  {
3730  printf("** Error in modifier forbid data - giving up.\n");
3731  yield = 1;
3732  goto EXIT;
3733  }
3734  if (x == y)
3735  {
3736  ppp = pp;
3737  while (*ppp != '>') ppp++;
3738  printf("** The %.*s modifier is locked out - giving up.\n",
3739  (int)(ppp - pp + 1), pp);
3740  yield = 1;
3741  goto EXIT;
3742  }
3743  }
3744  }
3745  }
3746 
3747  /* The single-character modifiers are straightforward. */
3748 
3749  else
3750  {
3751  printf("** The /%c modifier is locked out - giving up.\n", *pp);
3752  yield = 1;
3753  goto EXIT;
3754  }
3755  }
3756 
3757  /* The modifier is not locked out; handle it. */
3758 
3759  switch (*pp++)
3760  {
3761  case 'f': options |= PCRE_FIRSTLINE; break;
3762  case 'g': do_g = 1; break;
3763  case 'i': options |= PCRE_CASELESS; break;
3764  case 'm': options |= PCRE_MULTILINE; break;
3765  case 's': options |= PCRE_DOTALL; break;
3766  case 'x': options |= PCRE_EXTENDED; break;
3767 
3768  case '+':
3769  if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3770  break;
3771 
3772  case '=': do_allcaps = 1; break;
3773  case 'A': options |= PCRE_ANCHORED; break;
3774  case 'B': do_debug = 1; break;
3775  case 'C': options |= PCRE_AUTO_CALLOUT; break;
3776  case 'D': do_debug = do_showinfo = 1; break;
3777  case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3778  case 'F': do_flip = 1; break;
3779  case 'G': do_G = 1; break;
3780  case 'I': do_showinfo = 1; break;
3781  case 'J': options |= PCRE_DUPNAMES; break;
3782  case 'K': do_mark = 1; break;
3783  case 'M': log_store = 1; break;
3784  case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3785  case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3786 
3787 #if !defined NOPOSIX
3788  case 'P': do_posix = 1; break;
3789 #endif
3790 
3791  case 'Q':
3792  switch (*pp)
3793  {
3794  case '0':
3795  case '1':
3796  stack_guard_return = *pp++ - '0';
3797  break;
3798 
3799  default:
3800  fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3801  goto SKIP_DATA;
3802  }
3803  SET_PCRE_STACK_GUARD(stack_guard);
3804  break;
3805 
3806  case 'S':
3807  do_study = 1;
3808  for (;;)
3809  {
3810  switch (*pp++)
3811  {
3812  case 'S':
3813  do_study = 0;
3814  no_force_study = 1;
3815  break;
3816 
3817  case '!':
3818  study_options |= PCRE_STUDY_EXTRA_NEEDED;
3819  break;
3820 
3821  case '+':
3822  if (*pp == '+')
3823  {
3824  verify_jit = TRUE;
3825  pp++;
3826  }
3827  if (*pp >= '1' && *pp <= '7')
3828  study_options |= jit_study_bits[*pp++ - '1'];
3829  else
3830  study_options |= jit_study_bits[6];
3831  break;
3832 
3833  case '-':
3834  study_options &= ~PCRE_STUDY_ALLJIT;
3835  break;
3836 
3837  default:
3838  pp--;
3839  goto ENDLOOP;
3840  }
3841  }
3842  ENDLOOP:
3843  break;
3844 
3845  case 'U': options |= PCRE_UNGREEDY; break;
3846  case 'W': options |= PCRE_UCP; break;
3847  case 'X': options |= PCRE_EXTRA; break;
3848  case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3849  case 'Z': debug_lengths = 0; break;
3850  case '8': options |= PCRE_UTF8; use_utf = 1; break;
3851  case '9': options |= PCRE_NEVER_UTF; break;
3852  case '?': options |= PCRE_NO_UTF8_CHECK; break;
3853 
3854  case 'T':
3855  switch (*pp++)
3856  {
3857  case '0': tables = tables0; break;
3858  case '1': tables = tables1; break;
3859 
3860  case '\r':
3861  case '\n':
3862  case ' ':
3863  case 0:
3864  fprintf(outfile, "** Missing table number after /T\n");
3865  goto SKIP_DATA;
3866 
3867  default:
3868  fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3869  goto SKIP_DATA;
3870  }
3871  break;
3872 
3873  case 'L':
3874  ppp = pp;
3875  /* The '\r' test here is so that it works on Windows. */
3876  /* The '0' test is just in case this is an unterminated line. */
3877  while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3878  *ppp = 0;
3879  if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3880  {
3881  fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3882  goto SKIP_DATA;
3883  }
3884  locale_set = 1;
3885  tables = PCRE_MAKETABLES;
3886  pp = ppp;
3887  break;
3888 
3889  case '>':
3890  to_file = pp;
3891  while (*pp != 0) pp++;
3892  while (isspace(pp[-1])) pp--;
3893  *pp = 0;
3894  break;
3895 
3896  case '<':
3897  {
3898  int x = check_mc_option(pp, outfile, FALSE, "modifier");
3899  if (x == 0) goto SKIP_DATA;
3900  options |= x;
3901  while (*pp++ != '>');
3902  }
3903  break;
3904 
3905  case '\r': /* So that it works in Windows */
3906  case '\n':
3907  case ' ':
3908  break;
3909 
3910  default:
3911  fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3912  goto SKIP_DATA;
3913  }
3914  }
3915 
3916  /* Handle compiling via the POSIX interface, which doesn't support the
3917  timing, showing, or debugging options, nor the ability to pass over
3918  local character tables. Neither does it have 16-bit support. */
3919 
3920 #if !defined NOPOSIX
3921  if (posix || do_posix)
3922  {
3923  int rc;
3924  int cflags = 0;
3925 
3926  if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3927  if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3928  if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3929  if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3930  if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3931  if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3932  if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3933 
3934  rc = regcomp(&preg, (char *)p, cflags);
3935 
3936  /* Compilation failed; go back for another re, skipping to blank line
3937  if non-interactive. */
3938 
3939  if (rc != 0)
3940  {
3941  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3942  fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3943  goto SKIP_DATA;
3944  }
3945  }
3946 
3947  /* Handle compiling via the native interface */
3948 
3949  else
3950 #endif /* !defined NOPOSIX */
3951 
3952  {
3953  /* In 16- or 32-bit mode, convert the input. */
3954 
3955 #ifdef SUPPORT_PCRE16
3956  if (pcre_mode == PCRE16_MODE)
3957  {
3958  switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3959  {
3960  case -1:
3961  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3962  "converted to UTF-16\n");
3963  goto SKIP_DATA;
3964 
3965  case -2:
3966  fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3967  "cannot be converted to UTF-16\n");
3968  goto SKIP_DATA;
3969 
3970  case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3971  fprintf(outfile, "**Failed: character value greater than 0xffff "
3972  "cannot be converted to 16-bit in non-UTF mode\n");
3973  goto SKIP_DATA;
3974 
3975  default:
3976  break;
3977  }
3978  p = (pcre_uint8 *)buffer16;
3979  }
3980 #endif
3981 
3982 #ifdef SUPPORT_PCRE32
3983  if (pcre_mode == PCRE32_MODE)
3984  {
3985  switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3986  {
3987  case -1:
3988  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3989  "converted to UTF-32\n");
3990  goto SKIP_DATA;
3991 
3992  case -2:
3993  fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3994  "cannot be converted to UTF-32\n");
3995  goto SKIP_DATA;
3996 
3997  case -3:
3998  fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3999  goto SKIP_DATA;
4000 
4001  default:
4002  break;
4003  }
4004  p = (pcre_uint8 *)buffer32;
4005  }
4006 #endif
4007 
4008  /* Compile many times when timing */
4009 
4010  if (timeit > 0)
4011  {
4012  register int i;
4013  clock_t time_taken;
4014  clock_t start_time = clock();
4015  for (i = 0; i < timeit; i++)
4016  {
4017  PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4018  if (re != NULL) free(re);
4019  }
4020  total_compile_time += (time_taken = clock() - start_time);
4021  fprintf(outfile, "Compile time %.4f milliseconds\n",
4022  (((double)time_taken * 1000.0) / (double)timeit) /
4023  (double)CLOCKS_PER_SEC);
4024  }
4025 
4026  PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4027 
4028  /* Compilation failed; go back for another re, skipping to blank line
4029  if non-interactive. */
4030 
4031  if (re == NULL)
4032  {
4033  fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4034  SKIP_DATA:
4035  if (infile != stdin)
4036  {
4037  for (;;)
4038  {
4039  if (extend_inputline(infile, buffer, NULL) == NULL)
4040  {
4041  done = 1;
4042  goto CONTINUE;
4043  }
4044  len = (int)strlen((char *)buffer);
4045  while (len > 0 && isspace(buffer[len-1])) len--;
4046  if (len == 0) break;
4047  }
4048  fprintf(outfile, "\n");
4049  }
4050  goto CONTINUE;
4051  }
4052 
4053  /* Compilation succeeded. It is now possible to set the UTF-8 option from
4054  within the regex; check for this so that we know how to process the data
4055  lines. */
4056 
4057  if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4058  goto SKIP_DATA;
4059  if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4060 
4061  /* Extract the size for possible writing before possibly flipping it,
4062  and remember the store that was got. */
4063 
4064  true_size = REAL_PCRE_SIZE(re);
4065 
4066  /* Output code size information if requested */
4067 
4068  if (log_store)
4069  {
4070  int name_count, name_entry_size, real_pcre_size;
4071 
4072  new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4073  new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4074  real_pcre_size = 0;
4075 #ifdef SUPPORT_PCRE8
4076  if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4077  real_pcre_size = sizeof(real_pcre);
4078 #endif
4079 #ifdef SUPPORT_PCRE16
4080  if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4081  real_pcre_size = sizeof(real_pcre16);
4082 #endif
4083 #ifdef SUPPORT_PCRE32
4084  if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4085  real_pcre_size = sizeof(real_pcre32);
4086 #endif
4087  new_info(re, NULL, PCRE_INFO_SIZE, &size);
4088  fprintf(outfile, "Memory allocation (code space): %d\n",
4089  (int)(size - real_pcre_size - name_count * name_entry_size));
4090  }
4091 
4092  /* If -s or /S was present, study the regex to generate additional info to
4093  help with the matching, unless the pattern has the SS option, which
4094  suppresses the effect of /S (used for a few test patterns where studying is
4095  never sensible). */
4096 
4097  if (do_study || (force_study >= 0 && !no_force_study))
4098  {
4099  if (timeit > 0)
4100  {
4101  register int i;
4102  clock_t time_taken;
4103  clock_t start_time = clock();
4104  for (i = 0; i < timeit; i++)
4105  {
4106  PCRE_STUDY(extra, re, study_options, &error);
4107  }
4108  total_study_time = (time_taken = clock() - start_time);
4109  if (extra != NULL)
4110  {
4111  PCRE_FREE_STUDY(extra);
4112  }
4113  fprintf(outfile, " Study time %.4f milliseconds\n",
4114  (((double)time_taken * 1000.0) / (double)timeit) /
4115  (double)CLOCKS_PER_SEC);
4116  }
4117  PCRE_STUDY(extra, re, study_options, &error);
4118  if (error != NULL)
4119  fprintf(outfile, "Failed to study: %s\n", error);
4120  else if (extra != NULL)
4121  {
4122  true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4123  if (log_store)
4124  {
4125  size_t jitsize;
4126  if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4127  jitsize != 0)
4128  fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4129  }
4130  }
4131  }
4132 
4133  /* If /K was present, we set up for handling MARK data. */
4134 
4135  if (do_mark)
4136  {
4137  if (extra == NULL)
4138  {
4139  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4140  extra->flags = 0;
4141  }
4142  extra->mark = &markptr;
4143  extra->flags |= PCRE_EXTRA_MARK;
4144  }
4145 
4146  /* Extract and display information from the compiled data if required. */
4147 
4148  SHOW_INFO:
4149 
4150  if (do_debug)
4151  {
4152  fprintf(outfile, "------------------------------------------------------------------\n");
4153  PCRE_PRINTINT(re, outfile, debug_lengths);
4154  }
4155 
4156  /* We already have the options in get_options (see above) */
4157 
4158  if (do_showinfo)
4159  {
4160  unsigned long int all_options;
4161  pcre_uint32 first_char, need_char;
4162  pcre_uint32 match_limit, recursion_limit;
4163  int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4164  hascrorlf, maxlookbehind, match_empty;
4165  int nameentrysize, namecount;
4166  const pcre_uint8 *nametable;
4167 
4168  if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4169  new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4170  new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4171  new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4172  new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4173  new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4174  new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4175  new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4176  new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4177  new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4178  new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4179  new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4180  new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4181  new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4182  != 0)
4183  goto SKIP_DATA;
4184 
4185  fprintf(outfile, "Capturing subpattern count = %d\n", count);
4186 
4187  if (backrefmax > 0)
4188  fprintf(outfile, "Max back reference = %d\n", backrefmax);
4189 
4190  if (maxlookbehind > 0)
4191  fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4192 
4193  if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4194  fprintf(outfile, "Match limit = %u\n", match_limit);
4195 
4196  if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4197  fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4198 
4199  if (namecount > 0)
4200  {
4201  fprintf(outfile, "Named capturing subpatterns:\n");
4202  while (namecount-- > 0)
4203  {
4204  int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4205  int length = (int)STRLEN(nametable + imm2_size);
4206  fprintf(outfile, " ");
4207  PCHARSV(nametable, imm2_size, length, outfile);
4208  while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4209 #ifdef SUPPORT_PCRE32
4210  if (pcre_mode == PCRE32_MODE)
4211  fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4212 #endif
4213 #ifdef SUPPORT_PCRE16
4214  if (pcre_mode == PCRE16_MODE)
4215  fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4216 #endif
4217 #ifdef SUPPORT_PCRE8
4218  if (pcre_mode == PCRE8_MODE)
4219  fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4220 #endif
4221  nametable += nameentrysize * CHAR_SIZE;
4222  }
4223  }
4224 
4225  if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4226  if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4227  if (match_empty) fprintf(outfile, "May match empty string\n");
4228 
4229  all_options = REAL_PCRE_OPTIONS(re);
4230  if (do_flip) all_options = swap_uint32(all_options);
4231 
4232  if (get_options == 0) fprintf(outfile, "No options\n");
4233  else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4234  ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4235  ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4236  ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4237  ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4238  ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4239  ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4240  ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4241  ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4242  ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4243  ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4244  ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4245  ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4246  ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
4247  ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4248  ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4249  ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4250  ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4251  ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4252  ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4253 
4254  if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4255 
4256  switch (get_options & PCRE_NEWLINE_BITS)
4257  {
4258  case PCRE_NEWLINE_CR:
4259  fprintf(outfile, "Forced newline sequence: CR\n");
4260  break;
4261 
4262  case PCRE_NEWLINE_LF:
4263  fprintf(outfile, "Forced newline sequence: LF\n");
4264  break;
4265 
4266  case PCRE_NEWLINE_CRLF:
4267  fprintf(outfile, "Forced newline sequence: CRLF\n");
4268  break;
4269 
4270  case PCRE_NEWLINE_ANYCRLF:
4271  fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4272  break;
4273 
4274  case PCRE_NEWLINE_ANY:
4275  fprintf(outfile, "Forced newline sequence: ANY\n");
4276  break;
4277 
4278  default:
4279  break;
4280  }
4281 
4282  if (first_char_set == 2)
4283  {
4284  fprintf(outfile, "First char at start or follows newline\n");
4285  }
4286  else if (first_char_set == 1)
4287  {
4288  const char *caseless =
4289  ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4290  "" : " (caseless)";
4291 
4292  if (PRINTOK(first_char))
4293  fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4294  else
4295  {
4296  fprintf(outfile, "First char = ");
4297  pchar(first_char, outfile);
4298  fprintf(outfile, "%s\n", caseless);
4299  }
4300  }
4301  else
4302  {
4303  fprintf(outfile, "No first char\n");
4304  }
4305 
4306  if (need_char_set == 0)
4307  {
4308  fprintf(outfile, "No need char\n");
4309  }
4310  else
4311  {
4312  const char *caseless =
4313  ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4314  "" : " (caseless)";
4315 
4316  if (PRINTOK(need_char))
4317  fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4318  else
4319  {
4320  fprintf(outfile, "Need char = ");
4321  pchar(need_char, outfile);
4322  fprintf(outfile, "%s\n", caseless);
4323  }
4324  }
4325 
4326  /* Don't output study size; at present it is in any case a fixed
4327  value, but it varies, depending on the computer architecture, and
4328  so messes up the test suite. (And with the /F option, it might be
4329  flipped.) If study was forced by an external -s, don't show this
4330  information unless -i or -d was also present. This means that, except
4331  when auto-callouts are involved, the output from runs with and without
4332  -s should be identical. */
4333 
4334  if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4335  {
4336  if (extra == NULL)
4337  fprintf(outfile, "Study returned NULL\n");
4338  else
4339  {
4340  pcre_uint8 *start_bits = NULL;
4341  int minlength;
4342 
4343  if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4344  fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4345 
4346  if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4347  {
4348  if (start_bits == NULL)
4349  fprintf(outfile, "No starting char list\n");
4350  else
4351  {
4352  int i;
4353  int c = 24;
4354  fprintf(outfile, "Starting chars: ");
4355  for (i = 0; i < 256; i++)
4356  {
4357  if ((start_bits[i/8] & (1<<(i&7))) != 0)
4358  {
4359  if (c > 75)
4360  {
4361  fprintf(outfile, "\n ");
4362  c = 2;
4363  }
4364  if (PRINTOK(i) && i != ' ')
4365  {
4366  fprintf(outfile, "%c ", i);
4367  c += 2;
4368  }
4369  else
4370  {
4371  fprintf(outfile, "\\x%02x ", i);
4372  c += 5;
4373  }
4374  }
4375  }
4376  fprintf(outfile, "\n");
4377  }
4378  }
4379  }
4380 
4381  /* Show this only if the JIT was set by /S, not by -s. */
4382 
4383  if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4384  (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4385  {
4386  int jit;
4387  if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4388  {
4389  if (jit)
4390  fprintf(outfile, "JIT study was successful\n");
4391  else
4392 #ifdef SUPPORT_JIT
4393  fprintf(outfile, "JIT study was not successful\n");
4394 #else
4395  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4396 #endif
4397  }
4398  }
4399  }
4400  }
4401 
4402  /* If the '>' option was present, we write out the regex to a file, and
4403  that is all. The first 8 bytes of the file are the regex length and then
4404  the study length, in big-endian order. */
4405 
4406  if (to_file != NULL)
4407  {
4408  FILE *f = fopen((char *)to_file, "wb");
4409  if (f == NULL)
4410  {
4411  fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4412  }
4413  else
4414  {
4415  pcre_uint8 sbuf[8];
4416 
4417  if (do_flip) regexflip(re, extra);
4418  sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4419  sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4420  sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4421  sbuf[3] = (pcre_uint8)((true_size) & 255);
4422  sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4423  sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4424  sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4425  sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4426 
4427  if (fwrite(sbuf, 1, 8, f) < 8 ||
4428  fwrite(re, 1, true_size, f) < true_size)
4429  {
4430  fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4431  }
4432  else
4433  {
4434  fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4435 
4436  /* If there is study data, write it. */
4437 
4438  if (extra != NULL)
4439  {
4440  if (fwrite(extra->study_data, 1, true_study_size, f) <
4441  true_study_size)
4442  {
4443  fprintf(outfile, "Write error on %s: %s\n", to_file,
4444  strerror(errno));
4445  }
4446  else fprintf(outfile, "Study data written to %s\n", to_file);
4447  }
4448  }
4449  fclose(f);
4450  }
4451 
4452  new_free(re);
4453  if (extra != NULL)
4454  {
4455  PCRE_FREE_STUDY(extra);
4456  }
4457  if (locale_set)
4458  {
4459  new_free((void *)tables);
4460  setlocale(LC_CTYPE, "C");
4461  locale_set = 0;
4462  }
4463  continue; /* With next regex */
4464  }
4465  } /* End of non-POSIX compile */
4466 
4467  /* Read data lines and test them */
4468 
4469  for (;;)
4470  {
4471 #ifdef SUPPORT_PCRE8
4472  pcre_uint8 *q8;
4473 #endif
4474 #ifdef SUPPORT_PCRE16
4475  pcre_uint16 *q16;
4476 #endif
4477 #ifdef SUPPORT_PCRE32
4478  pcre_uint32 *q32;
4479 #endif
4480  pcre_uint8 *bptr;
4481  int *use_offsets = offsets;
4482  int use_size_offsets = size_offsets;
4483  int callout_data = 0;
4484  int callout_data_set = 0;
4485  int count;
4486  pcre_uint32 c;
4487  int copystrings = 0;
4488  int find_match_limit = default_find_match_limit;
4489  int getstrings = 0;
4490  int getlist = 0;
4491  int gmatched = 0;
4492  int start_offset = 0;
4493  int start_offset_sign = 1;
4494  int g_notempty = 0;
4495  int use_dfa = 0;
4496 
4497  *copynames = 0;
4498  *getnames = 0;
4499 
4500 #ifdef SUPPORT_PCRE32
4501  cn32ptr = copynames;
4502  gn32ptr = getnames;
4503 #endif
4504 #ifdef SUPPORT_PCRE16
4505  cn16ptr = copynames16;
4506  gn16ptr = getnames16;
4507 #endif
4508 #ifdef SUPPORT_PCRE8
4509  cn8ptr = copynames8;
4510  gn8ptr = getnames8;
4511 #endif
4512 
4513  SET_PCRE_CALLOUT(callout);
4514  first_callout = 1;
4515  last_callout_mark = NULL;
4516  callout_extra = 0;
4517  callout_count = 0;
4518  callout_fail_count = 999999;
4519  callout_fail_id = -1;
4520  show_malloc = 0;
4521  options = 0;
4522 
4523  if (extra != NULL) extra->flags &=
4524  ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4525 
4526  len = 0;
4527  for (;;)
4528  {
4529  if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4530  {
4531  if (len > 0) /* Reached EOF without hitting a newline */
4532  {
4533  fprintf(outfile, "\n");
4534  break;
4535  }
4536  done = 1;
4537  goto CONTINUE;
4538  }
4539  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4540  len = (int)strlen((char *)buffer);
4541  if (buffer[len-1] == '\n') break;
4542  }
4543 
4544  while (len > 0 && isspace(buffer[len-1])) len--;
4545  buffer[len] = 0;
4546  if (len == 0) break;
4547 
4548  p = buffer;
4549  while (isspace(*p)) p++;
4550 
4551 #ifndef NOUTF
4552  /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4553  invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4554 
4555  if (use_utf)
4556  {
4557  pcre_uint8 *q;
4558  pcre_uint32 cc;
4559  int n = 1;
4560 
4561  for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4562  if (n <= 0)
4563  {
4564  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4565  goto NEXT_DATA;
4566  }
4567  }
4568 #endif
4569 
4570 #ifdef SUPPORT_VALGRIND
4571  /* Mark the dbuffer as addressable but undefined again. */
4572 
4573  if (dbuffer != NULL)
4574  {
4575  VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4576  }
4577 #endif
4578 
4579  /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4580  the number of pcre_uchar units that will be needed. */
4581 
4582  while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4583  {
4584  dbuffer_size *= 2;
4585  dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4586  if (dbuffer == NULL)
4587  {
4588  fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4589  exit(1);
4590  }
4591  }
4592 
4593 #ifdef SUPPORT_PCRE8
4594  q8 = (pcre_uint8 *) dbuffer;
4595 #endif
4596 #ifdef SUPPORT_PCRE16
4597  q16 = (pcre_uint16 *) dbuffer;
4598 #endif
4599 #ifdef SUPPORT_PCRE32
4600  q32 = (pcre_uint32 *) dbuffer;
4601 #endif
4602 
4603  while ((c = *p++) != 0)
4604  {
4605  int i = 0;
4606  int n = 0;
4607 
4608  /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4609  In non-UTF mode, allow the value of the byte to fall through to later,
4610  where values greater than 127 are turned into UTF-8 when running in
4611  16-bit or 32-bit mode. */
4612 
4613  if (c != '\\')
4614  {
4615 #ifndef NOUTF
4616  if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4617 #endif
4618  }
4619 
4620  /* Handle backslash escapes */
4621 
4622  else switch ((c = *p++))
4623  {
4624  case 'a': c = 7; break;
4625  case 'b': c = '\b'; break;
4626  case 'e': c = 27; break;
4627  case 'f': c = '\f'; break;
4628  case 'n': c = '\n'; break;
4629  case 'r': c = '\r'; break;
4630  case 't': c = '\t'; break;
4631  case 'v': c = '\v'; break;
4632 
4633  case '0': case '1': case '2': case '3':
4634  case '4': case '5': case '6': case '7':
4635  c -= '0';
4636  while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4637  c = c * 8 + *p++ - '0';
4638  break;
4639 
4640  case 'o':
4641  if (*p == '{')
4642  {
4643  pcre_uint8 *pt = p;
4644  c = 0;
4645  for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
4646  {
4647  if (++i == 12)
4648  fprintf(outfile, "** Too many octal digits in \\o{...} item; "
4649  "using only the first twelve.\n");
4650  else c = c * 8 + *pt - '0';
4651  }
4652  if (*pt == '}') p = pt + 1;
4653  else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
4654  }
4655  break;
4656 
4657  case 'x':
4658  if (*p == '{')
4659  {
4660  pcre_uint8 *pt = p;
4661  c = 0;
4662 
4663  /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4664  when isxdigit() is a macro that refers to its argument more than
4665  once. This is banned by the C Standard, but apparently happens in at
4666  least one MacOS environment. */
4667 
4668  for (pt++; isxdigit(*pt); pt++)
4669  {
4670  if (++i == 9)
4671  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4672  "using only the first eight.\n");
4673  else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4674  }
4675  if (*pt == '}')
4676  {
4677  p = pt + 1;
4678  break;
4679  }
4680  /* Not correct form for \x{...}; fall through */
4681  }
4682 
4683  /* \x without {} always defines just one byte in 8-bit mode. This
4684  allows UTF-8 characters to be constructed byte by byte, and also allows
4685  invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4686  Otherwise, pass it down to later code so that it can be turned into
4687  UTF-8 when running in 16/32-bit mode. */
4688 
4689  c = 0;
4690  while (i++ < 2 && isxdigit(*p))
4691  {
4692  c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4693  p++;
4694  }
4695 #if !defined NOUTF && defined SUPPORT_PCRE8
4696  if (use_utf && (pcre_mode == PCRE8_MODE))
4697  {
4698  *q8++ = c;
4699  continue;
4700  }
4701 #endif
4702  break;
4703 
4704  case 0: /* \ followed by EOF allows for an empty line */
4705  p--;
4706  continue;
4707 
4708  case '>':
4709  if (*p == '-')
4710  {
4711  start_offset_sign = -1;
4712  p++;
4713  }
4714  while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4715  start_offset *= start_offset_sign;
4716  continue;
4717 
4718  case 'A': /* Option setting */
4719  options |= PCRE_ANCHORED;
4720  continue;
4721 
4722  case 'B':
4723  options |= PCRE_NOTBOL;
4724  continue;
4725 
4726  case 'C':
4727  if (isdigit(*p)) /* Set copy string */
4728  {
4729  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4730  copystrings |= 1 << n;
4731  }
4732  else if (isalnum(*p))
4733  {
4734  READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4735  }
4736  else if (*p == '+')
4737  {
4738  callout_extra = 1;
4739  p++;
4740  }
4741  else if (*p == '-')
4742  {
4743  SET_PCRE_CALLOUT(NULL);
4744  p++;
4745  }
4746  else if (*p == '!')
4747  {
4748  callout_fail_id = 0;
4749  p++;
4750  while(isdigit(*p))
4751  callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4752  callout_fail_count = 0;
4753  if (*p == '!')
4754  {
4755  p++;
4756  while(isdigit(*p))
4757  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4758  }
4759  }
4760  else if (*p == '*')
4761  {
4762  int sign = 1;
4763  callout_data = 0;
4764  if (*(++p) == '-') { sign = -1; p++; }
4765  while(isdigit(*p))
4766  callout_data = callout_data * 10 + *p++ - '0';
4767  callout_data *= sign;
4768  callout_data_set = 1;
4769  }
4770  continue;
4771 
4772 #if !defined NODFA
4773  case 'D':
4774 #if !defined NOPOSIX
4775  if (posix || do_posix)
4776  printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4777  else
4778 #endif
4779  use_dfa = 1;
4780  continue;
4781 #endif
4782 
4783 #if !defined NODFA
4784  case 'F':
4785  options |= PCRE_DFA_SHORTEST;
4786  continue;
4787 #endif
4788 
4789  case 'G':
4790  if (isdigit(*p))
4791  {
4792  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4793  getstrings |= 1 << n;
4794  }
4795  else if (isalnum(*p))
4796  {
4797  READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4798  }
4799  continue;
4800 
4801  case 'J':
4802  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4803  if (extra != NULL
4804  && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4805  && extra->executable_jit != NULL)
4806  {
4807  if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4808  jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4809  PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4810  }
4811  continue;
4812 
4813  case 'L':
4814  getlist = 1;
4815  continue;
4816 
4817  case 'M':
4818  find_match_limit = 1;
4819  continue;
4820 
4821  case 'N':
4822  if ((options & PCRE_NOTEMPTY) != 0)
4823  options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4824  else
4825  options |= PCRE_NOTEMPTY;
4826  continue;
4827 
4828  case 'O':
4829  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4830  if (n > size_offsets_max)
4831  {
4832  size_offsets_max = n;
4833  free(offsets);
4834  use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4835  if (offsets == NULL)
4836  {
4837  printf("** Failed to get %d bytes of memory for offsets vector\n",
4838  (int)(size_offsets_max * sizeof(int)));
4839  yield = 1;
4840  goto EXIT;
4841  }
4842  }
4843  use_size_offsets = n;
4844  if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4845  else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4846  continue;
4847 
4848  case 'P':
4849  options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4850  PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4851  continue;
4852 
4853  case 'Q':
4854  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4855  if (extra == NULL)
4856  {
4857  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4858  extra->flags = 0;
4859  }
4860  extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4861  extra->match_limit_recursion = n;
4862  continue;
4863 
4864  case 'q':
4865  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4866  if (extra == NULL)
4867  {
4868  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4869  extra->flags = 0;
4870  }
4871  extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4872  extra->match_limit = n;
4873  continue;
4874 
4875 #if !defined NODFA
4876  case 'R':
4877  options |= PCRE_DFA_RESTART;
4878  continue;
4879 #endif
4880 
4881  case 'S':
4882  show_malloc = 1;
4883  continue;
4884 
4885  case 'Y':
4886  options |= PCRE_NO_START_OPTIMIZE;
4887  continue;
4888 
4889  case 'Z':
4890  options |= PCRE_NOTEOL;
4891  continue;
4892 
4893  case '?':
4894  options |= PCRE_NO_UTF8_CHECK;
4895  continue;
4896 
4897  case '<':
4898  {
4899  int x = check_mc_option(p, outfile, TRUE, "escape sequence");
4900  if (x == 0) goto NEXT_DATA;
4901  options |= x;
4902  while (*p++ != '>');
4903  }
4904  continue;
4905  }
4906 
4907  /* We now have a character value in c that may be greater than 255.
4908  In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4909  than 127 in UTF mode must have come from \x{...} or octal constructs
4910  because values from \x.. get this far only in non-UTF mode. */
4911 
4912 #ifdef SUPPORT_PCRE8
4913  if (pcre_mode == PCRE8_MODE)
4914  {
4915 #ifndef NOUTF
4916  if (use_utf)
4917  {
4918  if (c > 0x7fffffff)
4919  {
4920  fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4921  "and so cannot be converted to UTF-8\n", c);
4922  goto NEXT_DATA;
4923  }
4924  q8 += ord2utf8(c, q8);
4925  }
4926  else
4927 #endif
4928  {
4929  if (c > 0xffu)
4930  {
4931  fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4932  "and UTF-8 mode is not enabled.\n", c);
4933  fprintf(outfile, "** Truncation will probably give the wrong "
4934  "result.\n");
4935  }
4936  *q8++ = c;
4937  }
4938  }
4939 #endif
4940 #ifdef SUPPORT_PCRE16
4941  if (pcre_mode == PCRE16_MODE)
4942  {
4943 #ifndef NOUTF
4944  if (use_utf)
4945  {
4946  if (c > 0x10ffffu)
4947  {
4948  fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4949  "0x10ffff and so cannot be converted to UTF-16\n", c);
4950  goto NEXT_DATA;
4951  }
4952  else if (c >= 0x10000u)
4953  {
4954  c-= 0x10000u;
4955  *q16++ = 0xD800 | (c >> 10);
4956  *q16++ = 0xDC00 | (c & 0x3ff);
4957  }
4958  else
4959  *q16++ = c;
4960  }
4961  else
4962 #endif
4963  {
4964  if (c > 0xffffu)
4965  {
4966  fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4967  "and UTF-16 mode is not enabled.\n", c);
4968  fprintf(outfile, "** Truncation will probably give the wrong "
4969  "result.\n");
4970  }
4971 
4972  *q16++ = c;
4973  }
4974  }
4975 #endif
4976 #ifdef SUPPORT_PCRE32
4977  if (pcre_mode == PCRE32_MODE)
4978  {
4979  *q32++ = c;
4980  }
4981 #endif
4982 
4983  }
4984 
4985  /* Reached end of subject string */
4986 
4987 #ifdef SUPPORT_PCRE8
4988  if (pcre_mode == PCRE8_MODE)
4989  {
4990  *q8 = 0;
4991  len = (int)(q8 - (pcre_uint8 *)dbuffer);
4992  }
4993 #endif
4994 #ifdef SUPPORT_PCRE16
4995  if (pcre_mode == PCRE16_MODE)
4996  {
4997  *q16 = 0;
4998  len = (int)(q16 - (pcre_uint16 *)dbuffer);
4999  }
5000 #endif
5001 #ifdef SUPPORT_PCRE32
5002  if (pcre_mode == PCRE32_MODE)
5003  {
5004  *q32 = 0;
5005  len = (int)(q32 - (pcre_uint32 *)dbuffer);
5006  }
5007 #endif
5008 
5009  /* If we're compiling with explicit valgrind support, Mark the data from after
5010  its end to the end of the buffer as unaddressable, so that a read over the end
5011  of the buffer will be seen by valgrind, even if it doesn't cause a crash.
5012  If we're not building with valgrind support, at least move the data to the end
5013  of the buffer so that it might at least cause a crash.
5014  If we are using the POSIX interface, we must include the terminating zero. */
5015 
5016  bptr = dbuffer;
5017 
5018 #if !defined NOPOSIX
5019  if (posix || do_posix)
5020  {
5021 #ifdef SUPPORT_VALGRIND
5022  VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
5023 #else
5024  memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5025  bptr += dbuffer_size - len - 1;
5026 #endif
5027  }
5028  else
5029 #endif
5030  {
5031 #ifdef SUPPORT_VALGRIND
5032  VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
5033 #else
5034  bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5035 #endif
5036  }
5037 
5038  if ((all_use_dfa || use_dfa) && find_match_limit)
5039  {
5040  printf("**Match limit not relevant for DFA matching: ignored\n");
5041  find_match_limit = 0;
5042  }
5043 
5044  /* Handle matching via the POSIX interface, which does not
5045  support timing or playing with the match limit or callout data. */
5046 
5047 #if !defined NOPOSIX
5048  if (posix || do_posix)
5049  {
5050  int rc;
5051  int eflags = 0;
5052  regmatch_t *pmatch = NULL;
5053  if (use_size_offsets > 0)
5054  pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5055  if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5056  if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5057  if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5058 
5059  rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5060 
5061  if (rc != 0)
5062  {
5063  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5064  fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5065  }
5066  else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5067  {
5068  fprintf(outfile, "Matched with REG_NOSUB\n");
5069  }
5070  else
5071  {
5072  size_t i;
5073  for (i = 0; i < (size_t)use_size_offsets; i++)
5074  {
5075  if (pmatch[i].rm_so >= 0)
5076  {
5077  fprintf(outfile, "%2d: ", (int)i);
5078  PCHARSV(dbuffer, pmatch[i].rm_so,
5079  pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5080  fprintf(outfile, "\n");
5081  if (do_showcaprest || (i == 0 && do_showrest))
5082  {
5083  fprintf(outfile, "%2d+ ", (int)i);
5084  PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5085  outfile);
5086  fprintf(outfile, "\n");
5087  }
5088  }
5089  }
5090  }
5091  free(pmatch);
5092  goto NEXT_DATA;
5093  }
5094 
5095 #endif /* !defined NOPOSIX */
5096 
5097  /* Handle matching via the native interface - repeats for /g and /G */
5098 
5099  /* Ensure that there is a JIT callback if we want to verify that JIT was
5100  actually used. If jit_stack == NULL, no stack has yet been assigned. */
5101 
5102  if (verify_jit && jit_stack == NULL && extra != NULL)
5103  { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5104 
5105  for (;; gmatched++) /* Loop for /g or /G */
5106  {
5107  markptr = NULL;
5108  jit_was_used = FALSE;
5109 
5110  if (timeitm > 0)
5111  {
5112  register int i;
5113  clock_t time_taken;
5114  clock_t start_time = clock();
5115 
5116 #if !defined NODFA
5117  if (all_use_dfa || use_dfa)
5118  {
5119  if ((options & PCRE_DFA_RESTART) != 0)
5120  {
5121  fprintf(outfile, "Timing DFA restarts is not supported\n");
5122  break;
5123  }
5124  if (dfa_workspace == NULL)
5125  dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5126  for (i = 0; i < timeitm; i++)
5127  {
5128  PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5129  (options | g_notempty), use_offsets, use_size_offsets,
5130  dfa_workspace, DFA_WS_DIMENSION);
5131  }
5132  }
5133  else
5134 #endif
5135 
5136  for (i = 0; i < timeitm; i++)
5137  {
5138  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5139  (options | g_notempty), use_offsets, use_size_offsets);
5140  }
5141  total_match_time += (time_taken = clock() - start_time);
5142  fprintf(outfile, "Execute time %.4f milliseconds\n",
5143  (((double)time_taken * 1000.0) / (double)timeitm) /
5144  (double)CLOCKS_PER_SEC);
5145  }
5146 
5147  /* If find_match_limit is set, we want to do repeated matches with
5148  varying limits in order to find the minimum value for the match limit and
5149  for the recursion limit. The match limits are relevant only to the normal
5150  running of pcre_exec(), so disable the JIT optimization. This makes it
5151  possible to run the same set of tests with and without JIT externally
5152  requested. */
5153 
5154  if (find_match_limit)
5155  {
5156  if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5157  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5158  extra->flags = 0;
5159 
5160  (void)check_match_limit(re, extra, bptr, len, start_offset,
5161  options|g_notempty, use_offsets, use_size_offsets,
5162  PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5163  PCRE_ERROR_MATCHLIMIT, "match()");
5164 
5165  count = check_match_limit(re, extra, bptr, len, start_offset,
5166  options|g_notempty, use_offsets, use_size_offsets,
5167  PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5168  PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5169  }
5170 
5171  /* If callout_data is set, use the interface with additional data */
5172 
5173  else if (callout_data_set)
5174  {
5175  if (extra == NULL)
5176  {
5177  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5178  extra->flags = 0;
5179  }
5180  extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5181  extra->callout_data = &callout_data;
5182  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5183  options | g_notempty, use_offsets, use_size_offsets);
5184  extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5185  }
5186 
5187  /* The normal case is just to do the match once, with the default
5188  value of match_limit. */
5189 
5190 #if !defined NODFA
5191  else if (all_use_dfa || use_dfa)
5192  {
5193  if (dfa_workspace == NULL)
5194  dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5195  if (dfa_matched++ == 0)
5196  dfa_workspace[0] = -1; /* To catch bad restart */
5197  PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5198  (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5200  if (count == 0)
5201  {
5202  fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5203  count = use_size_offsets/2;
5204  }
5205  }
5206 #endif
5207 
5208  else
5209  {
5210  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5211  options | g_notempty, use_offsets, use_size_offsets);
5212  if (count == 0)
5213  {
5214  fprintf(outfile, "Matched, but too many substrings\n");
5215  /* 2 is a special case; match can be returned */
5216  count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5217  }
5218  }
5219 
5220  /* Matched */
5221 
5222  if (count >= 0)
5223  {
5224  int i, maxcount;
5225  void *cnptr, *gnptr;
5226 
5227 #if !defined NODFA
5228  if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5229 #endif
5230  /* 2 is a special case; match can be returned */
5231  maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5232 
5233  /* This is a check against a lunatic return value. */
5234 
5235  if (count > maxcount)
5236  {
5237  fprintf(outfile,
5238  "** PCRE error: returned count %d is too big for offset size %d\n",
5239  count, use_size_offsets);
5240  count = use_size_offsets/3;
5241  if (do_g || do_G)
5242  {
5243  fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5244  do_g = do_G = FALSE; /* Break g/G loop */
5245  }
5246  }
5247 
5248  /* do_allcaps requests showing of all captures in the pattern, to check
5249  unset ones at the end. */
5250 
5251  if (do_allcaps)
5252  {
5253  if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5254  goto SKIP_DATA;
5255  count++; /* Allow for full match */
5256  if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5257  }
5258 
5259  /* Output the captured substrings. Note that, for the matched string,
5260  the use of \K in an assertion can make the start later than the end. */
5261 
5262  for (i = 0; i < count * 2; i += 2)
5263  {
5264  if (use_offsets[i] < 0)
5265  {
5266  if (use_offsets[i] != -1)
5267  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5268  use_offsets[i], i);
5269  if (use_offsets[i+1] != -1)
5270  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5271  use_offsets[i+1], i+1);
5272  fprintf(outfile, "%2d: <unset>\n", i/2);
5273  }
5274  else
5275  {
5276  int start = use_offsets[i];
5277  int end = use_offsets[i+1];
5278 
5279  if (start > end)
5280  {
5281  start = use_offsets[i+1];
5282  end = use_offsets[i];
5283  fprintf(outfile, "Start of matched string is beyond its end - "
5284  "displaying from end to start.\n");
5285  }
5286 
5287  fprintf(outfile, "%2d: ", i/2);
5288  PCHARSV(bptr, start, end - start, outfile);
5289  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5290  fprintf(outfile, "\n");
5291 
5292  /* Note: don't use the start/end variables here because we want to
5293  show the text from what is reported as the end. */
5294 
5295  if (do_showcaprest || (i == 0 && do_showrest))
5296  {
5297  fprintf(outfile, "%2d+ ", i/2);
5298  PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5299  outfile);
5300  fprintf(outfile, "\n");
5301  }
5302  }
5303  }
5304 
5305  if (markptr != NULL)
5306  {
5307  fprintf(outfile, "MK: ");
5308  PCHARSV(markptr, 0, -1, outfile);
5309  fprintf(outfile, "\n");
5310  }
5311 
5312  for (i = 0; i < 32; i++)
5313  {
5314  if ((copystrings & (1 << i)) != 0)
5315  {
5316  int rc;
5317  char copybuffer[256];
5318  PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5319  copybuffer, sizeof(copybuffer));
5320  if (rc < 0)
5321  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5322  else
5323  {
5324  fprintf(outfile, "%2dC ", i);
5325  PCHARSV(copybuffer, 0, rc, outfile);
5326  fprintf(outfile, " (%d)\n", rc);
5327  }
5328  }
5329  }
5330 
5331  cnptr = copynames;
5332  for (;;)
5333  {
5334  int rc;
5335  char copybuffer[256];
5336 
5337 #ifdef SUPPORT_PCRE32
5338  if (pcre_mode == PCRE32_MODE)
5339  {
5340  if (*(pcre_uint32 *)cnptr == 0) break;
5341  }
5342 #endif
5343 #ifdef SUPPORT_PCRE16
5344  if (pcre_mode == PCRE16_MODE)
5345  {
5346  if (*(pcre_uint16 *)cnptr == 0) break;
5347  }
5348 #endif
5349 #ifdef SUPPORT_PCRE8
5350  if (pcre_mode == PCRE8_MODE)
5351  {
5352  if (*(pcre_uint8 *)cnptr == 0) break;
5353  }
5354 #endif
5355 
5356  PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5357  cnptr, copybuffer, sizeof(copybuffer));
5358 
5359  if (rc < 0)
5360  {
5361  fprintf(outfile, "copy substring ");
5362  PCHARSV(cnptr, 0, -1, outfile);
5363  fprintf(outfile, " failed %d\n", rc);
5364  }
5365  else
5366  {
5367  fprintf(outfile, " C ");
5368  PCHARSV(copybuffer, 0, rc, outfile);
5369  fprintf(outfile, " (%d) ", rc);
5370  PCHARSV(cnptr, 0, -1, outfile);
5371  putc('\n', outfile);
5372  }
5373 
5374  cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5375  }
5376 
5377  for (i = 0; i < 32; i++)
5378  {
5379  if ((getstrings & (1 << i)) != 0)
5380  {
5381  int rc;
5382  const char *substring;
5383  PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5384  if (rc < 0)
5385  fprintf(outfile, "get substring %d failed %d\n", i, rc);
5386  else
5387  {
5388  fprintf(outfile, "%2dG ", i);
5389  PCHARSV(substring, 0, rc, outfile);
5390  fprintf(outfile, " (%d)\n", rc);
5391  PCRE_FREE_SUBSTRING(substring);
5392  }
5393  }
5394  }
5395 
5396  gnptr = getnames;
5397  for (;;)
5398  {
5399  int rc;
5400  const char *substring;
5401 
5402 #ifdef SUPPORT_PCRE32
5403  if (pcre_mode == PCRE32_MODE)
5404  {
5405  if (*(pcre_uint32 *)gnptr == 0) break;
5406  }
5407 #endif
5408 #ifdef SUPPORT_PCRE16
5409  if (pcre_mode == PCRE16_MODE)
5410  {
5411  if (*(pcre_uint16 *)gnptr == 0) break;
5412  }
5413 #endif
5414 #ifdef SUPPORT_PCRE8
5415  if (pcre_mode == PCRE8_MODE)
5416  {
5417  if (*(pcre_uint8 *)gnptr == 0) break;
5418  }
5419 #endif
5420 
5421  PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5422  gnptr, &substring);
5423  if (rc < 0)
5424  {
5425  fprintf(outfile, "get substring ");
5426  PCHARSV(gnptr, 0, -1, outfile);
5427  fprintf(outfile, " failed %d\n", rc);
5428  }
5429  else
5430  {
5431  fprintf(outfile, " G ");
5432  PCHARSV(substring, 0, rc, outfile);
5433  fprintf(outfile, " (%d) ", rc);
5434  PCHARSV(gnptr, 0, -1, outfile);
5435  PCRE_FREE_SUBSTRING(substring);
5436  putc('\n', outfile);
5437  }
5438 
5439  gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5440  }
5441 
5442  if (getlist)
5443  {
5444  int rc;
5445  const char **stringlist;
5446  PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5447  if (rc < 0)
5448  fprintf(outfile, "get substring list failed %d\n", rc);
5449  else
5450  {
5451  for (i = 0; i < count; i++)
5452  {
5453  fprintf(outfile, "%2dL ", i);
5454  PCHARSV(stringlist[i], 0, -1, outfile);
5455  putc('\n', outfile);
5456  }
5457  if (stringlist[i] != NULL)
5458  fprintf(outfile, "string list not terminated by NULL\n");
5459  PCRE_FREE_SUBSTRING_LIST(stringlist);
5460  }
5461  }
5462  }
5463 
5464  /* There was a partial match. If the bumpalong point is not the same as
5465  the first inspected character, show the offset explicitly. */
5466 
5467  else if (count == PCRE_ERROR_PARTIAL)
5468  {
5469  fprintf(outfile, "Partial match");
5470  if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5471  fprintf(outfile, " at offset %d", use_offsets[2]);
5472  if (markptr != NULL)
5473  {
5474  fprintf(outfile, ", mark=");
5475  PCHARSV(markptr, 0, -1, outfile);
5476  }
5477  if (use_size_offsets > 1)
5478  {
5479  fprintf(outfile, ": ");
5480  PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5481  outfile);
5482  }
5483  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5484  fprintf(outfile, "\n");
5485  break; /* Out of the /g loop */
5486  }
5487 
5488  /* Failed to match. If this is a /g or /G loop and we previously set
5489  g_notempty after a null match, this is not necessarily the end. We want
5490  to advance the start offset, and continue. We won't be at the end of the
5491  string - that was checked before setting g_notempty.
5492 
5493  Complication arises in the case when the newline convention is "any",
5494  "crlf", or "anycrlf". If the previous match was at the end of a line
5495  terminated by CRLF, an advance of one character just passes the \r,
5496  whereas we should prefer the longer newline sequence, as does the code in
5497  pcre_exec(). Fudge the offset value to achieve this. We check for a
5498  newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5499  find the default.
5500 
5501  Otherwise, in the case of UTF-8 matching, the advance must be one
5502  character, not one byte. */
5503 
5504  else
5505  {
5506  if (g_notempty != 0)
5507  {
5508  int onechar = 1;
5509  unsigned int obits = REAL_PCRE_OPTIONS(re);
5510  use_offsets[0] = start_offset;
5511  if ((obits & PCRE_NEWLINE_BITS) == 0)
5512  {
5513  int d;
5514  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5515  /* Note that these values are always the ASCII ones, even in
5516  EBCDIC environments. CR = 13, NL = 10. */
5517  obits = (d == 13)? PCRE_NEWLINE_CR :
5518  (d == 10)? PCRE_NEWLINE_LF :
5519  (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5520  (d == -2)? PCRE_NEWLINE_ANYCRLF :
5521  (d == -1)? PCRE_NEWLINE_ANY : 0;
5522  }
5523  if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5524  (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5525  (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5526  &&
5527  start_offset < len - 1 && (
5528 #ifdef SUPPORT_PCRE8
5529  (pcre_mode == PCRE8_MODE &&
5530  bptr[start_offset] == '\r' &&
5531  bptr[start_offset + 1] == '\n') ||
5532 #endif
5533 #ifdef SUPPORT_PCRE16
5534  (pcre_mode == PCRE16_MODE &&
5535  ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5536  ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5537 #endif
5538 #ifdef SUPPORT_PCRE32
5539  (pcre_mode == PCRE32_MODE &&
5540  ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5541  ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5542 #endif
5543  0))
5544  onechar++;
5545  else if (use_utf)
5546  {
5547  while (start_offset + onechar < len)
5548  {
5549  if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5550  onechar++;
5551  }
5552  }
5553  use_offsets[1] = start_offset + onechar;
5554  }
5555  else
5556  {
5557  switch(count)
5558  {
5559  case PCRE_ERROR_NOMATCH:
5560  if (gmatched == 0)
5561  {
5562  if (markptr == NULL)
5563  {
5564  fprintf(outfile, "No match");
5565  }
5566  else
5567  {
5568  fprintf(outfile, "No match, mark = ");
5569  PCHARSV(markptr, 0, -1, outfile);
5570  }
5571  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5572  putc('\n', outfile);
5573  }
5574  break;
5575 
5576  case PCRE_ERROR_BADUTF8:
5577  case PCRE_ERROR_SHORTUTF8:
5578  fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5579  (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5580  8 * CHAR_SIZE);
5581  if (use_size_offsets >= 2)
5582  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5583  use_offsets[1]);
5584  fprintf(outfile, "\n");
5585  break;
5586 
5587  case PCRE_ERROR_BADUTF8_OFFSET:
5588  fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5589  8 * CHAR_SIZE);
5590  break;
5591 
5592  default:
5593  if (count < 0 &&
5594  (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5595  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5596  else
5597  fprintf(outfile, "Error %d (Unexpected value)\n", count);
5598  break;
5599  }
5600 
5601  break; /* Out of the /g loop */
5602  }
5603  }
5604 
5605  /* If not /g or /G we are done */
5606 
5607  if (!do_g && !do_G) break;
5608 
5609  if (use_offsets == NULL)
5610  {
5611  fprintf(outfile, "Cannot do global matching without an ovector\n");
5612  break;
5613  }
5614 
5615  /* If we have matched an empty string, first check to see if we are at
5616  the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5617  Perl's /g options does. This turns out to be rather cunning. First we set
5618  PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5619  same point. If this fails (picked up above) we advance to the next
5620  character. */
5621 
5622  g_notempty = 0;
5623 
5624  if (use_offsets[0] == use_offsets[1])
5625  {
5626  if (use_offsets[0] == len) break;
5627  g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5628  }
5629 
5630  /* For /g, update the start offset, leaving the rest alone. There is a
5631  tricky case when \K is used in a positive lookbehind assertion. This can
5632  cause the end of the match to be less than or equal to the start offset.
5633  In this case we restart at one past the start offset. This may return the
5634  same match if the original start offset was bumped along during the
5635  match, but eventually the new start offset will hit the actual start
5636  offset. (In PCRE2 the true start offset is available, and this can be
5637  done better. It is not worth doing more than making sure we do not loop
5638  at this stage in the life of PCRE1.) */
5639 
5640  if (do_g)
5641  {
5642  if (g_notempty == 0 && use_offsets[1] <= start_offset)
5643  {
5644  if (start_offset >= len) break; /* End of subject */
5645  start_offset++;
5646  if (use_utf)
5647  {
5648  while (start_offset < len)
5649  {
5650  if ((bptr[start_offset] & 0xc0) != 0x80) break;
5651  start_offset++;
5652  }
5653  }
5654  }
5655  else start_offset = use_offsets[1];
5656  }
5657 
5658  /* For /G, update the pointer and length */
5659 
5660  else
5661  {
5662  bptr += use_offsets[1] * CHAR_SIZE;
5663  len -= use_offsets[1];
5664  }
5665  } /* End of loop for /g and /G */
5666 
5667  NEXT_DATA: continue;
5668  } /* End of loop for data lines */
5669 
5670  CONTINUE:
5671 
5672 #if !defined NOPOSIX
5673  if ((posix || do_posix) && preg.re_pcre != 0) regfree(&preg);
5674 #endif
5675 
5676  if (re != NULL) new_free(re);
5677  if (extra != NULL)
5678  {
5679  PCRE_FREE_STUDY(extra);
5680  }
5681  if (locale_set)
5682  {
5683  new_free((void *)tables);
5684  setlocale(LC_CTYPE, "C");
5685  locale_set = 0;
5686  }
5687  if (jit_stack != NULL)
5688  {
5689  PCRE_JIT_STACK_FREE(jit_stack);
5690  jit_stack = NULL;
5691  }
5692  }
5693 
5694 if (infile == stdin) fprintf(outfile, "\n");
5695 
5696 if (showtotaltimes)
5697  {
5698  fprintf(outfile, "--------------------------------------\n");
5699  if (timeit > 0)
5700  {
5701  fprintf(outfile, "Total compile time %.4f milliseconds\n",
5702  (((double)total_compile_time * 1000.0) / (double)timeit) /
5703  (double)CLOCKS_PER_SEC);
5704  fprintf(outfile, "Total study time %.4f milliseconds\n",
5705  (((double)total_study_time * 1000.0) / (double)timeit) /
5706  (double)CLOCKS_PER_SEC);
5707  }
5708  fprintf(outfile, "Total execute time %.4f milliseconds\n",
5709  (((double)total_match_time * 1000.0) / (double)timeitm) /
5710  (double)CLOCKS_PER_SEC);
5711  }
5712 
5713 EXIT:
5714 
5715 if (infile != NULL && infile != stdin) fclose(infile);
5716 if (outfile != NULL && outfile != stdout) fclose(outfile);
5717 
5718 free(buffer);
5719 free(dbuffer);
5720 free(pbuffer);
5721 free(offsets);
5722 
5723 #ifdef SUPPORT_PCRE16
5724 if (buffer16 != NULL) free(buffer16);
5725 #endif
5726 #ifdef SUPPORT_PCRE32
5727 if (buffer32 != NULL) free(buffer32);
5728 #endif
5729 
5730 #if !defined NODFA
5731 if (dfa_workspace != NULL)
5732  free(dfa_workspace);
5733 #endif
5734 
5735 #if defined(__VMS)
5736  yield = SS$_NORMAL; /* Return values via DCL symbols */
5737 #endif
5738 
5739 return yield;
5740 }
5741 
5742 /* End of pcretest.c */
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what, void *where)
Definition: pcre_fullinfo.c:70
#define memmove(a, b, c)
pcre_uint16 name_table_offset
#define INPUT_MODE
Definition: pcretest.c:119
unsigned char pcre_uint8
pcre_uint16 req_char
pcre_uint32 limit_match
pcre_uint32 limit_match
pcre_uint16 name_entry_size
pcre_uint16 name_count
pcre_uint32 size
pcre_uint32 size
pcre_uint16 top_bracket
pcre_uint16 top_backref
PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION regfree(regex_t *preg)
Definition: pcreposix.c:245
#define REG_UNGREEDY
Definition: pcreposix.h:64
#define REAL_PCRE_MAGIC(re)
pcre_uint16 max_lookbehind
#define OUTPUT_MODE
Definition: pcretest.c:120
int BOOL
#define LOOPREPEAT
Definition: pcretest.c:1029
pcre_uint16 ref_count
#define PCRE_FCH_CASELESS
PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: pcreposix.c:210
pcre_uint32 flags
#define CHAR_CR
int sys_nerr
pcre_uint16 max_lookbehind
pcre_uint32 limit_recursion
#define CHAR_LF
#define REG_NOTEMPTY
Definition: pcreposix.h:63
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION regcomp(regex_t *preg, const char *pattern, int cflags)
Definition: pcreposix.c:268
#define PCRE_MODE_MASK
pcre_uint32 flags
#define REVERSED_MAGIC_NUMBER
#define REG_NOTBOL
Definition: pcreposix.h:57
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
Definition: pcreposix.c:322
#define PCRE_NEWLINE_BITS
const int utf8_table3[]
Definition: pcregrep.c:413
pcre_uint32 first_char
pcre_uint32 options
int main(int argc, char **argv)
Definition: pcretest.c:2978
#define CLOCKS_PER_SEC
Definition: pcretest.c:1019
#define REAL_PCRE_SIZE(re)
#define REG_ICASE
Definition: pcreposix.h:55
pcre_uint16 name_table_offset
char * strerror(int n)
Definition: pcretest.c:1509
pcre_uint16 name_count
#define XCL_MAP
pcre_uint32 size
#define OP_LENGTHS
#define REG_UCP
Definition: pcreposix.h:65
#define REG_NEWLINE
Definition: pcreposix.h:56
struct real_pcre8_or_16 real_pcre
pcre_uint32 minlength
#define PCRE_MODE32
pcre_uint32 flags
pcre_uint32 magic_number
#define PCRE_MODE8
char * sys_errlist[]
#define PCRE_RCH_CASELESS
#define TRUE
PCRE_EXP_DEFN const char *PCRE_CALL_CONVENTION pcre_version(void)
Definition: pcre_version.c:84
pcre_uint16 name_entry_size
#define PRINTOK(c)
Definition: pcretest.c:180
pcre_uint16 ref_count
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
#define PCRE_MODE16
#define REAL_PCRE_FLAGS(re)
void * re_pcre
Definition: pcreposix.h:98
#define REG_NOTEOL
Definition: pcreposix.h:58
#define MAGIC_NUMBER
pcre_uint16 top_bracket
pcre_uint16 top_backref
pcre_uint32 limit_recursion
#define REG_UTF8
Definition: pcreposix.h:61
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_config(int what, void *where)
Definition: pcre_config.c:70
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_get_stringnumber(const pcre *code, const char *stringname)
Definition: pcre_get.c:70
pcre_uint32 options
pcre_uint16 first_char
#define REAL_PCRE_OPTIONS(re)
#define DFA_WS_DIMENSION
Definition: pcretest.c:1024
#define PCRE_STUDY_ALLJIT
Definition: pcretest.c:1127
#define REG_DOTALL
Definition: pcreposix.h:59
#define FALSE
#define REG_NOSUB
Definition: pcreposix.h:60
pcre_uint32 req_char
pcre_uint32 magic_number