pcre  8.39
About: The PCRE library implements Perl compatible regular expression pattern matching.
  Fossies Dox: pcre-8.39.tar.gz  ("inofficial" and yet experimental doxygen-generated source code documentation)  

pcretest.c
Go to the documentation of this file.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4 
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9 
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13 
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16 
17  * Redistributions in binary form must reproduce the above copyright
18  notice, this list of conditions and the following disclaimer in the
19  documentation and/or other materials provided with the distribution.
20 
21  * Neither the name of the University of Cambridge nor the names of its
22  contributors may be used to endorse or promote products derived from
23  this software without specific prior written permission.
24 
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38 
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48 
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52 
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60 
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65 
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81 
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89 
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95 
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99  /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103 
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105 
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109 
110 /* Not Windows */
111 
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123 
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
128 
129 
130 #define PRIV(name) name
131 
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
136 
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
140 
141 #include "pcre.h"
142 #include "pcre_internal.h"
143 
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148 
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
158 
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
162 
163 #define PCRE_INCLUDED
164 
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
167 
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
173 
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
179 
180 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181 
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
186 
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
190 
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
194 
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
199 
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
205 
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
213 
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
219 
220 #ifdef SUPPORT_PCRE8
221 
222 #define PCHARS8(lv, p, offset, len, f) \
223  lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224 
225 #define PCHARSV8(p, offset, len, f) \
226  (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227 
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229  p = read_capture_name8(p, cn8, re)
230 
231 #define STRLEN8(p) ((int)strlen((char *)p))
232 
233 #define SET_PCRE_CALLOUT8(callout) \
234  pcre_callout = callout
235 
236 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237  pcre_stack_guard = stack_guard
238 
239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240  pcre_assign_jit_stack(extra, callback, userdata)
241 
242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243  re = pcre_compile((char *)pat, options, error, erroffset, tables)
244 
245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246  namesptr, cbuffer, size) \
247  rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248  (char *)namesptr, cbuffer, size)
249 
250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251  rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
252 
253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254  offsets, size_offsets, workspace, size_workspace) \
255  count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256  offsets, size_offsets, workspace, size_workspace)
257 
258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259  offsets, size_offsets) \
260  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261  offsets, size_offsets)
262 
263 #define PCRE_FREE_STUDY8(extra) \
264  pcre_free_study(extra)
265 
266 #define PCRE_FREE_SUBSTRING8(substring) \
267  pcre_free_substring(substring)
268 
269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270  pcre_free_substring_list(listptr)
271 
272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273  getnamesptr, subsptr) \
274  rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275  (char *)getnamesptr, subsptr)
276 
277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278  n = pcre_get_stringnumber(re, (char *)ptr)
279 
280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281  rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
282 
283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284  rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
285 
286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287  rc = pcre_pattern_to_host_byte_order(re, extra, tables)
288 
289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290  pcre_printint(re, outfile, debug_lengths)
291 
292 #define PCRE_STUDY8(extra, re, options, error) \
293  extra = pcre_study(re, options, error)
294 
295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296  pcre_jit_stack_alloc(startsize, maxsize)
297 
298 #define PCRE_JIT_STACK_FREE8(stack) \
299  pcre_jit_stack_free(stack)
300 
301 #define pcre8_maketables pcre_maketables
302 
303 #endif /* SUPPORT_PCRE8 */
304 
305 /* -----------------------------------------------------------*/
306 
307 #ifdef SUPPORT_PCRE16
308 
309 #define PCHARS16(lv, p, offset, len, f) \
310  lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
311 
312 #define PCHARSV16(p, offset, len, f) \
313  (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
314 
315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316  p = read_capture_name16(p, cn16, re)
317 
318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
319 
320 #define SET_PCRE_CALLOUT16(callout) \
321  pcre16_callout = (int (*)(pcre16_callout_block *))callout
322 
323 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324  pcre16_stack_guard = (int (*)(void))stack_guard
325 
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327  pcre16_assign_jit_stack((pcre16_extra *)extra, \
328  (pcre16_jit_callback)callback, userdata)
329 
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331  re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332  tables)
333 
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335  namesptr, cbuffer, size) \
336  rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337  count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338 
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340  rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341  (PCRE_UCHAR16 *)cbuffer, size/2)
342 
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344  offsets, size_offsets, workspace, size_workspace) \
345  count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346  (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347  workspace, size_workspace)
348 
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350  offsets, size_offsets) \
351  count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352  len, start_offset, options, offsets, size_offsets)
353 
354 #define PCRE_FREE_STUDY16(extra) \
355  pcre16_free_study((pcre16_extra *)extra)
356 
357 #define PCRE_FREE_SUBSTRING16(substring) \
358  pcre16_free_substring((PCRE_SPTR16)substring)
359 
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361  pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362 
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364  getnamesptr, subsptr) \
365  rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366  count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367 
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369  n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370 
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372  rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373  (PCRE_SPTR16 *)(void*)subsptr)
374 
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376  rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377  (PCRE_SPTR16 **)(void*)listptr)
378 
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380  rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381  tables)
382 
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384  pcre16_printint(re, outfile, debug_lengths)
385 
386 #define PCRE_STUDY16(extra, re, options, error) \
387  extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388 
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390  (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391 
392 #define PCRE_JIT_STACK_FREE16(stack) \
393  pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394 
395 #endif /* SUPPORT_PCRE16 */
396 
397 /* -----------------------------------------------------------*/
398 
399 #ifdef SUPPORT_PCRE32
400 
401 #define PCHARS32(lv, p, offset, len, f) \
402  lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403 
404 #define PCHARSV32(p, offset, len, f) \
405  (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406 
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408  p = read_capture_name32(p, cn32, re)
409 
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411 
412 #define SET_PCRE_CALLOUT32(callout) \
413  pcre32_callout = (int (*)(pcre32_callout_block *))callout
414 
415 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416  pcre32_stack_guard = (int (*)(void))stack_guard
417 
418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419  pcre32_assign_jit_stack((pcre32_extra *)extra, \
420  (pcre32_jit_callback)callback, userdata)
421 
422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423  re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424  tables)
425 
426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427  namesptr, cbuffer, size) \
428  rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429  count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
430 
431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432  rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433  (PCRE_UCHAR32 *)cbuffer, size/2)
434 
435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436  offsets, size_offsets, workspace, size_workspace) \
437  count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438  (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439  workspace, size_workspace)
440 
441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442  offsets, size_offsets) \
443  count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444  len, start_offset, options, offsets, size_offsets)
445 
446 #define PCRE_FREE_STUDY32(extra) \
447  pcre32_free_study((pcre32_extra *)extra)
448 
449 #define PCRE_FREE_SUBSTRING32(substring) \
450  pcre32_free_substring((PCRE_SPTR32)substring)
451 
452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453  pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
454 
455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456  getnamesptr, subsptr) \
457  rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458  count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
459 
460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461  n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
462 
463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464  rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465  (PCRE_SPTR32 *)(void*)subsptr)
466 
467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468  rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469  (PCRE_SPTR32 **)(void*)listptr)
470 
471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472  rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473  tables)
474 
475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476  pcre32_printint(re, outfile, debug_lengths)
477 
478 #define PCRE_STUDY32(extra, re, options, error) \
479  extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
480 
481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482  (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
483 
484 #define PCRE_JIT_STACK_FREE32(stack) \
485  pcre32_jit_stack_free((pcre32_jit_stack *)stack)
486 
487 #endif /* SUPPORT_PCRE32 */
488 
489 
490 /* ----- More than one mode is supported; a runtime test is needed, except for
491 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 available version is called. ----- */
493 
494 enum {
498 };
499 
500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501  defined (SUPPORT_PCRE32)) >= 2
502 
503 #define CHAR_SIZE (1 << pcre_mode)
504 
505 /* There doesn't seem to be an easy way of writing these macros that can cope
506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507 cases separately. */
508 
509 /* ----- All three modes supported ----- */
510 
511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
512 
513 #define PCHARS(lv, p, offset, len, f) \
514  if (pcre_mode == PCRE32_MODE) \
515  PCHARS32(lv, p, offset, len, f); \
516  else if (pcre_mode == PCRE16_MODE) \
517  PCHARS16(lv, p, offset, len, f); \
518  else \
519  PCHARS8(lv, p, offset, len, f)
520 
521 #define PCHARSV(p, offset, len, f) \
522  if (pcre_mode == PCRE32_MODE) \
523  PCHARSV32(p, offset, len, f); \
524  else if (pcre_mode == PCRE16_MODE) \
525  PCHARSV16(p, offset, len, f); \
526  else \
527  PCHARSV8(p, offset, len, f)
528 
529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530  if (pcre_mode == PCRE32_MODE) \
531  READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532  else if (pcre_mode == PCRE16_MODE) \
533  READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534  else \
535  READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
536 
537 #define SET_PCRE_CALLOUT(callout) \
538  if (pcre_mode == PCRE32_MODE) \
539  SET_PCRE_CALLOUT32(callout); \
540  else if (pcre_mode == PCRE16_MODE) \
541  SET_PCRE_CALLOUT16(callout); \
542  else \
543  SET_PCRE_CALLOUT8(callout)
544 
545 #define SET_PCRE_STACK_GUARD(stack_guard) \
546  if (pcre_mode == PCRE32_MODE) \
547  SET_PCRE_STACK_GUARD32(stack_guard); \
548  else if (pcre_mode == PCRE16_MODE) \
549  SET_PCRE_STACK_GUARD16(stack_guard); \
550  else \
551  SET_PCRE_STACK_GUARD8(stack_guard)
552 
553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
554 
555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556  if (pcre_mode == PCRE32_MODE) \
557  PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558  else if (pcre_mode == PCRE16_MODE) \
559  PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560  else \
561  PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
562 
563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564  if (pcre_mode == PCRE32_MODE) \
565  PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566  else if (pcre_mode == PCRE16_MODE) \
567  PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568  else \
569  PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
570 
571 #define PCRE_CONFIG pcre_config
572 
573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574  namesptr, cbuffer, size) \
575  if (pcre_mode == PCRE32_MODE) \
576  PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577  namesptr, cbuffer, size); \
578  else if (pcre_mode == PCRE16_MODE) \
579  PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580  namesptr, cbuffer, size); \
581  else \
582  PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583  namesptr, cbuffer, size)
584 
585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586  if (pcre_mode == PCRE32_MODE) \
587  PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588  else if (pcre_mode == PCRE16_MODE) \
589  PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590  else \
591  PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
592 
593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594  offsets, size_offsets, workspace, size_workspace) \
595  if (pcre_mode == PCRE32_MODE) \
596  PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597  offsets, size_offsets, workspace, size_workspace); \
598  else if (pcre_mode == PCRE16_MODE) \
599  PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600  offsets, size_offsets, workspace, size_workspace); \
601  else \
602  PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603  offsets, size_offsets, workspace, size_workspace)
604 
605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606  offsets, size_offsets) \
607  if (pcre_mode == PCRE32_MODE) \
608  PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609  offsets, size_offsets); \
610  else if (pcre_mode == PCRE16_MODE) \
611  PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612  offsets, size_offsets); \
613  else \
614  PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615  offsets, size_offsets)
616 
617 #define PCRE_FREE_STUDY(extra) \
618  if (pcre_mode == PCRE32_MODE) \
619  PCRE_FREE_STUDY32(extra); \
620  else if (pcre_mode == PCRE16_MODE) \
621  PCRE_FREE_STUDY16(extra); \
622  else \
623  PCRE_FREE_STUDY8(extra)
624 
625 #define PCRE_FREE_SUBSTRING(substring) \
626  if (pcre_mode == PCRE32_MODE) \
627  PCRE_FREE_SUBSTRING32(substring); \
628  else if (pcre_mode == PCRE16_MODE) \
629  PCRE_FREE_SUBSTRING16(substring); \
630  else \
631  PCRE_FREE_SUBSTRING8(substring)
632 
633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634  if (pcre_mode == PCRE32_MODE) \
635  PCRE_FREE_SUBSTRING_LIST32(listptr); \
636  else if (pcre_mode == PCRE16_MODE) \
637  PCRE_FREE_SUBSTRING_LIST16(listptr); \
638  else \
639  PCRE_FREE_SUBSTRING_LIST8(listptr)
640 
641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642  getnamesptr, subsptr) \
643  if (pcre_mode == PCRE32_MODE) \
644  PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645  getnamesptr, subsptr); \
646  else if (pcre_mode == PCRE16_MODE) \
647  PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648  getnamesptr, subsptr); \
649  else \
650  PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651  getnamesptr, subsptr)
652 
653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654  if (pcre_mode == PCRE32_MODE) \
655  PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656  else if (pcre_mode == PCRE16_MODE) \
657  PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658  else \
659  PCRE_GET_STRINGNUMBER8(n, rc, ptr)
660 
661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662  if (pcre_mode == PCRE32_MODE) \
663  PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664  else if (pcre_mode == PCRE16_MODE) \
665  PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666  else \
667  PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
668 
669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670  if (pcre_mode == PCRE32_MODE) \
671  PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672  else if (pcre_mode == PCRE16_MODE) \
673  PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674  else \
675  PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
676 
677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678  (pcre_mode == PCRE32_MODE ? \
679  PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680  : pcre_mode == PCRE16_MODE ? \
681  PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682  : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
683 
684 #define PCRE_JIT_STACK_FREE(stack) \
685  if (pcre_mode == PCRE32_MODE) \
686  PCRE_JIT_STACK_FREE32(stack); \
687  else if (pcre_mode == PCRE16_MODE) \
688  PCRE_JIT_STACK_FREE16(stack); \
689  else \
690  PCRE_JIT_STACK_FREE8(stack)
691 
692 #define PCRE_MAKETABLES \
693  (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
694 
695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696  if (pcre_mode == PCRE32_MODE) \
697  PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698  else if (pcre_mode == PCRE16_MODE) \
699  PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700  else \
701  PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
702 
703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704  if (pcre_mode == PCRE32_MODE) \
705  PCRE_PRINTINT32(re, outfile, debug_lengths); \
706  else if (pcre_mode == PCRE16_MODE) \
707  PCRE_PRINTINT16(re, outfile, debug_lengths); \
708  else \
709  PCRE_PRINTINT8(re, outfile, debug_lengths)
710 
711 #define PCRE_STUDY(extra, re, options, error) \
712  if (pcre_mode == PCRE32_MODE) \
713  PCRE_STUDY32(extra, re, options, error); \
714  else if (pcre_mode == PCRE16_MODE) \
715  PCRE_STUDY16(extra, re, options, error); \
716  else \
717  PCRE_STUDY8(extra, re, options, error)
718 
719 
720 /* ----- Two out of three modes are supported ----- */
721 
722 #else
723 
724 /* We can use some macro trickery to make a single set of definitions work in
725 the three different cases. */
726 
727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
728 
729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730 #define BITONE 32
731 #define BITTWO 16
732 
733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
734 
735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 #define BITONE 32
737 #define BITTWO 8
738 
739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
740 
741 #else
742 #define BITONE 16
743 #define BITTWO 8
744 #endif
745 
746 #define glue(a,b) a##b
747 #define G(a,b) glue(a,b)
748 
749 
750 /* ----- Common macros for two-mode cases ----- */
751 
752 #define PCHARS(lv, p, offset, len, f) \
753  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754  G(PCHARS,BITONE)(lv, p, offset, len, f); \
755  else \
756  G(PCHARS,BITTWO)(lv, p, offset, len, f)
757 
758 #define PCHARSV(p, offset, len, f) \
759  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760  G(PCHARSV,BITONE)(p, offset, len, f); \
761  else \
762  G(PCHARSV,BITTWO)(p, offset, len, f)
763 
764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766  G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767  else \
768  G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
769 
770 #define SET_PCRE_CALLOUT(callout) \
771  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772  G(SET_PCRE_CALLOUT,BITONE)(callout); \
773  else \
774  G(SET_PCRE_CALLOUT,BITTWO)(callout)
775 
776 #define SET_PCRE_STACK_GUARD(stack_guard) \
777  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778  G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779  else \
780  G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
781 
782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783  G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
784 
785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787  G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788  else \
789  G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
790 
791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793  G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794  else \
795  G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
796 
797 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
798 
799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800  namesptr, cbuffer, size) \
801  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802  G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803  namesptr, cbuffer, size); \
804  else \
805  G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806  namesptr, cbuffer, size)
807 
808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810  G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811  else \
812  G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
813 
814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815  offsets, size_offsets, workspace, size_workspace) \
816  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817  G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818  offsets, size_offsets, workspace, size_workspace); \
819  else \
820  G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821  offsets, size_offsets, workspace, size_workspace)
822 
823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824  offsets, size_offsets) \
825  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826  G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827  offsets, size_offsets); \
828  else \
829  G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830  offsets, size_offsets)
831 
832 #define PCRE_FREE_STUDY(extra) \
833  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834  G(PCRE_FREE_STUDY,BITONE)(extra); \
835  else \
836  G(PCRE_FREE_STUDY,BITTWO)(extra)
837 
838 #define PCRE_FREE_SUBSTRING(substring) \
839  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840  G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841  else \
842  G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
843 
844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846  G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847  else \
848  G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
849 
850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851  getnamesptr, subsptr) \
852  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853  G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854  getnamesptr, subsptr); \
855  else \
856  G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857  getnamesptr, subsptr)
858 
859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861  G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862  else \
863  G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
864 
865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867  G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868  else \
869  G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
870 
871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873  G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874  else \
875  G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
876 
877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878  (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879  G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880  : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
881 
882 #define PCRE_JIT_STACK_FREE(stack) \
883  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884  G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885  else \
886  G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
887 
888 #define PCRE_MAKETABLES \
889  (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890  G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
891 
892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894  G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895  else \
896  G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
897 
898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900  G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901  else \
902  G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
903 
904 #define PCRE_STUDY(extra, re, options, error) \
905  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906  G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907  else \
908  G(PCRE_STUDY,BITTWO)(extra, re, options, error)
909 
910 #endif /* Two out of three modes */
911 
912 /* ----- End of cases where more than one mode is supported ----- */
913 
914 
915 /* ----- Only 8-bit mode is supported ----- */
916 
917 #elif defined SUPPORT_PCRE8
918 #define CHAR_SIZE 1
919 #define PCHARS PCHARS8
920 #define PCHARSV PCHARSV8
921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 #define STRLEN STRLEN8
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 #define PCRE_COMPILE PCRE_COMPILE8
927 #define PCRE_CONFIG pcre_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931 #define PCRE_EXEC PCRE_EXEC8
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 #define PCRE_MAKETABLES pcre_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943 #define PCRE_PRINTINT PCRE_PRINTINT8
944 #define PCRE_STUDY PCRE_STUDY8
945 
946 /* ----- Only 16-bit mode is supported ----- */
947 
948 #elif defined SUPPORT_PCRE16
949 #define CHAR_SIZE 2
950 #define PCHARS PCHARS16
951 #define PCHARSV PCHARSV16
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 #define STRLEN STRLEN16
956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 #define PCRE_COMPILE PCRE_COMPILE16
958 #define PCRE_CONFIG pcre16_config
959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962 #define PCRE_EXEC PCRE_EXEC16
963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 #define PCRE_MAKETABLES pcre16_maketables()
973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974 #define PCRE_PRINTINT PCRE_PRINTINT16
975 #define PCRE_STUDY PCRE_STUDY16
976 
977 /* ----- Only 32-bit mode is supported ----- */
978 
979 #elif defined SUPPORT_PCRE32
980 #define CHAR_SIZE 4
981 #define PCHARS PCHARS32
982 #define PCHARSV PCHARSV32
983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 #define STRLEN STRLEN32
987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988 #define PCRE_COMPILE PCRE_COMPILE32
989 #define PCRE_CONFIG pcre32_config
990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993 #define PCRE_EXEC PCRE_EXEC32
994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003 #define PCRE_MAKETABLES pcre32_maketables()
1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005 #define PCRE_PRINTINT PCRE_PRINTINT32
1006 #define PCRE_STUDY PCRE_STUDY32
1007 
1008 #endif
1009 
1010 /* ----- End of mode-specific function call macros ----- */
1011 
1012 
1013 /* Other parameters */
1014 
1015 #ifndef CLOCKS_PER_SEC
1016 #ifdef CLK_TCK
1017 #define CLOCKS_PER_SEC CLK_TCK
1018 #else
1019 #define CLOCKS_PER_SEC 100
1020 #endif
1021 #endif
1022 
1023 #if !defined NODFA
1024 #define DFA_WS_DIMENSION 1000
1025 #endif
1026 
1027 /* This is the default loop count for timing. */
1028 
1029 #define LOOPREPEAT 500000
1030 
1031 /* Static variables */
1032 
1033 static FILE *outfile;
1034 static int log_store = 0;
1035 static int callout_count;
1036 static int callout_extra;
1037 static int callout_fail_count;
1038 static int callout_fail_id;
1039 static int debug_lengths;
1040 static int first_callout;
1041 static int jit_was_used;
1042 static int locale_set = 0;
1043 static int show_malloc;
1044 static int stack_guard_return;
1045 static int use_utf;
1046 static const unsigned char *last_callout_mark = NULL;
1047 
1048 /* The buffers grow automatically if very long input lines are encountered. */
1049 
1050 static int buffer_size = 50000;
1051 static pcre_uint8 *buffer = NULL;
1052 static pcre_uint8 *pbuffer = NULL;
1053 
1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1055 
1056 #ifdef COMPILE_PCRE16
1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058 #endif
1059 
1060 #ifdef COMPILE_PCRE32
1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062 #endif
1063 
1064 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066 pattern for saving/reloading testing. Luckily, the data for these tables is
1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069 LINK_SIZE is also used later in this program. */
1070 
1071 #ifdef SUPPORT_PCRE16
1072 #undef IMM2_SIZE
1073 #define IMM2_SIZE 1
1074 
1075 #if LINK_SIZE == 2
1076 #undef LINK_SIZE
1077 #define LINK_SIZE 1
1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079 #undef LINK_SIZE
1080 #define LINK_SIZE 2
1081 #else
1082 #error LINK_SIZE must be either 2, 3, or 4
1083 #endif
1084 
1085 static int buffer16_size = 0;
1086 static pcre_uint16 *buffer16 = NULL;
1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088 #endif /* SUPPORT_PCRE16 */
1089 
1090 #ifdef SUPPORT_PCRE32
1091 #undef IMM2_SIZE
1092 #define IMM2_SIZE 1
1093 #undef LINK_SIZE
1094 #define LINK_SIZE 1
1095 
1096 static int buffer32_size = 0;
1097 static pcre_uint32 *buffer32 = NULL;
1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099 #endif /* SUPPORT_PCRE32 */
1100 
1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102 support, it can be changed by an option. If there is no 8-bit support, there
1103 must be 16-or 32-bit support, so default it to 1. */
1104 
1105 #if defined SUPPORT_PCRE8
1106 static int pcre_mode = PCRE8_MODE;
1107 #elif defined SUPPORT_PCRE16
1108 static int pcre_mode = PCRE16_MODE;
1109 #elif defined SUPPORT_PCRE32
1110 static int pcre_mode = PCRE32_MODE;
1111 #endif
1112 
1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1114 
1115 static int jit_study_bits[] =
1116  {
1117  PCRE_STUDY_JIT_COMPILE,
1118  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1125 };
1126 
1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1129 
1130 /* Textual explanations for runtime error codes */
1131 
1132 static const char *errtexts[] = {
1133  NULL, /* 0 is no error */
1134  NULL, /* NOMATCH is handled specially */
1135  "NULL argument passed",
1136  "bad option value",
1137  "magic number missing",
1138  "unknown opcode - pattern overwritten?",
1139  "no more memory",
1140  NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141  "match limit exceeded",
1142  "callout error code",
1143  NULL, /* BADUTF8/16 is handled specially */
1144  NULL, /* BADUTF8/16 offset is handled specially */
1145  NULL, /* PARTIAL is handled specially */
1146  "not used - internal error",
1147  "internal error - pattern overwritten?",
1148  "bad count value",
1149  "item unsupported for DFA matching",
1150  "backreference condition or recursion test not supported for DFA matching",
1151  "match limit not supported for DFA matching",
1152  "workspace size exceeded in DFA matching",
1153  "too much recursion for DFA matching",
1154  "recursion limit exceeded",
1155  "not used - internal error",
1156  "invalid combination of newline options",
1157  "bad offset value",
1158  NULL, /* SHORTUTF8/16 is handled specially */
1159  "nested recursion at the same subject position",
1160  "JIT stack limit reached",
1161  "pattern compiled in wrong mode: 8-bit/16-bit error",
1162  "pattern compiled with other endianness",
1163  "invalid data in workspace for DFA restart",
1164  "bad JIT option",
1165  "bad length"
1166 };
1167 
1168 
1169 /*************************************************
1170 * Alternate character tables *
1171 *************************************************/
1172 
1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174 using the default tables of the library. However, the T option can be used to
1175 select alternate sets of tables, for different kinds of testing. Note also that
1176 the L (locale) option also adjusts the tables. */
1177 
1178 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 only ASCII characters. */
1180 
1181 static const pcre_uint8 tables0[] = {
1182 
1183 /* This table is a lower casing table. */
1184 
1185  0, 1, 2, 3, 4, 5, 6, 7,
1186  8, 9, 10, 11, 12, 13, 14, 15,
1187  16, 17, 18, 19, 20, 21, 22, 23,
1188  24, 25, 26, 27, 28, 29, 30, 31,
1189  32, 33, 34, 35, 36, 37, 38, 39,
1190  40, 41, 42, 43, 44, 45, 46, 47,
1191  48, 49, 50, 51, 52, 53, 54, 55,
1192  56, 57, 58, 59, 60, 61, 62, 63,
1193  64, 97, 98, 99,100,101,102,103,
1194  104,105,106,107,108,109,110,111,
1195  112,113,114,115,116,117,118,119,
1196  120,121,122, 91, 92, 93, 94, 95,
1197  96, 97, 98, 99,100,101,102,103,
1198  104,105,106,107,108,109,110,111,
1199  112,113,114,115,116,117,118,119,
1200  120,121,122,123,124,125,126,127,
1201  128,129,130,131,132,133,134,135,
1202  136,137,138,139,140,141,142,143,
1203  144,145,146,147,148,149,150,151,
1204  152,153,154,155,156,157,158,159,
1205  160,161,162,163,164,165,166,167,
1206  168,169,170,171,172,173,174,175,
1207  176,177,178,179,180,181,182,183,
1208  184,185,186,187,188,189,190,191,
1209  192,193,194,195,196,197,198,199,
1210  200,201,202,203,204,205,206,207,
1211  208,209,210,211,212,213,214,215,
1212  216,217,218,219,220,221,222,223,
1213  224,225,226,227,228,229,230,231,
1214  232,233,234,235,236,237,238,239,
1215  240,241,242,243,244,245,246,247,
1216  248,249,250,251,252,253,254,255,
1217 
1218 /* This table is a case flipping table. */
1219 
1220  0, 1, 2, 3, 4, 5, 6, 7,
1221  8, 9, 10, 11, 12, 13, 14, 15,
1222  16, 17, 18, 19, 20, 21, 22, 23,
1223  24, 25, 26, 27, 28, 29, 30, 31,
1224  32, 33, 34, 35, 36, 37, 38, 39,
1225  40, 41, 42, 43, 44, 45, 46, 47,
1226  48, 49, 50, 51, 52, 53, 54, 55,
1227  56, 57, 58, 59, 60, 61, 62, 63,
1228  64, 97, 98, 99,100,101,102,103,
1229  104,105,106,107,108,109,110,111,
1230  112,113,114,115,116,117,118,119,
1231  120,121,122, 91, 92, 93, 94, 95,
1232  96, 65, 66, 67, 68, 69, 70, 71,
1233  72, 73, 74, 75, 76, 77, 78, 79,
1234  80, 81, 82, 83, 84, 85, 86, 87,
1235  88, 89, 90,123,124,125,126,127,
1236  128,129,130,131,132,133,134,135,
1237  136,137,138,139,140,141,142,143,
1238  144,145,146,147,148,149,150,151,
1239  152,153,154,155,156,157,158,159,
1240  160,161,162,163,164,165,166,167,
1241  168,169,170,171,172,173,174,175,
1242  176,177,178,179,180,181,182,183,
1243  184,185,186,187,188,189,190,191,
1244  192,193,194,195,196,197,198,199,
1245  200,201,202,203,204,205,206,207,
1246  208,209,210,211,212,213,214,215,
1247  216,217,218,219,220,221,222,223,
1248  224,225,226,227,228,229,230,231,
1249  232,233,234,235,236,237,238,239,
1250  240,241,242,243,244,245,246,247,
1251  248,249,250,251,252,253,254,255,
1252 
1253 /* This table contains bit maps for various character classes. Each map is 32
1254 bytes long and the bits run from the least significant end of each byte. The
1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256 graph, print, punct, and cntrl. Other classes are built from combinations. */
1257 
1258  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262 
1263  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267 
1268  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272 
1273  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277 
1278  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282 
1283  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287 
1288  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1292 
1293  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1297 
1298  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1302 
1303  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1307 
1308 /* This table identifies various classes of character by individual bits:
1309  0x01 white space character
1310  0x02 letter
1311  0x04 decimal digit
1312  0x08 hexadecimal digit
1313  0x10 alphanumeric or '_'
1314  0x80 regular expression metacharacter or binary zero
1315 */
1316 
1317  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318  0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321  0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322  0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328  0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332  0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1349 
1350 /* This is a set of tables that came originally from a Windows user. It seems
1351 to be at least an approximation of ISO 8859. In particular, there are
1352 characters greater than 128 that are marked as spaces, letters, etc. */
1353 
1354 static const pcre_uint8 tables1[] = {
1355 0,1,2,3,4,5,6,7,
1356 8,9,10,11,12,13,14,15,
1357 16,17,18,19,20,21,22,23,
1358 24,25,26,27,28,29,30,31,
1359 32,33,34,35,36,37,38,39,
1360 40,41,42,43,44,45,46,47,
1361 48,49,50,51,52,53,54,55,
1362 56,57,58,59,60,61,62,63,
1363 64,97,98,99,100,101,102,103,
1364 104,105,106,107,108,109,110,111,
1365 112,113,114,115,116,117,118,119,
1366 120,121,122,91,92,93,94,95,
1367 96,97,98,99,100,101,102,103,
1368 104,105,106,107,108,109,110,111,
1369 112,113,114,115,116,117,118,119,
1370 120,121,122,123,124,125,126,127,
1371 128,129,130,131,132,133,134,135,
1372 136,137,138,139,140,141,142,143,
1373 144,145,146,147,148,149,150,151,
1374 152,153,154,155,156,157,158,159,
1375 160,161,162,163,164,165,166,167,
1376 168,169,170,171,172,173,174,175,
1377 176,177,178,179,180,181,182,183,
1378 184,185,186,187,188,189,190,191,
1379 224,225,226,227,228,229,230,231,
1380 232,233,234,235,236,237,238,239,
1381 240,241,242,243,244,245,246,215,
1382 248,249,250,251,252,253,254,223,
1383 224,225,226,227,228,229,230,231,
1384 232,233,234,235,236,237,238,239,
1385 240,241,242,243,244,245,246,247,
1386 248,249,250,251,252,253,254,255,
1387 0,1,2,3,4,5,6,7,
1388 8,9,10,11,12,13,14,15,
1389 16,17,18,19,20,21,22,23,
1390 24,25,26,27,28,29,30,31,
1391 32,33,34,35,36,37,38,39,
1392 40,41,42,43,44,45,46,47,
1393 48,49,50,51,52,53,54,55,
1394 56,57,58,59,60,61,62,63,
1395 64,97,98,99,100,101,102,103,
1396 104,105,106,107,108,109,110,111,
1397 112,113,114,115,116,117,118,119,
1398 120,121,122,91,92,93,94,95,
1399 96,65,66,67,68,69,70,71,
1400 72,73,74,75,76,77,78,79,
1401 80,81,82,83,84,85,86,87,
1402 88,89,90,123,124,125,126,127,
1403 128,129,130,131,132,133,134,135,
1404 136,137,138,139,140,141,142,143,
1405 144,145,146,147,148,149,150,151,
1406 152,153,154,155,156,157,158,159,
1407 160,161,162,163,164,165,166,167,
1408 168,169,170,171,172,173,174,175,
1409 176,177,178,179,180,181,182,183,
1410 184,185,186,187,188,189,190,191,
1411 224,225,226,227,228,229,230,231,
1412 232,233,234,235,236,237,238,239,
1413 240,241,242,243,244,245,246,215,
1414 248,249,250,251,252,253,254,223,
1415 192,193,194,195,196,197,198,199,
1416 200,201,202,203,204,205,206,207,
1417 208,209,210,211,212,213,214,247,
1418 216,217,218,219,220,221,222,255,
1419 0,62,0,0,1,0,0,0,
1420 0,0,0,0,0,0,0,0,
1421 32,0,0,0,1,0,0,0,
1422 0,0,0,0,0,0,0,0,
1423 0,0,0,0,0,0,255,3,
1424 126,0,0,0,126,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 0,0,0,0,0,0,0,0,
1427 0,0,0,0,0,0,255,3,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,12,2,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 254,255,255,7,0,0,0,0,
1433 0,0,0,0,0,0,0,0,
1434 255,255,127,127,0,0,0,0,
1435 0,0,0,0,0,0,0,0,
1436 0,0,0,0,254,255,255,7,
1437 0,0,0,0,0,4,32,4,
1438 0,0,0,128,255,255,127,255,
1439 0,0,0,0,0,0,255,3,
1440 254,255,255,135,254,255,255,7,
1441 0,0,0,0,0,4,44,6,
1442 255,255,127,255,255,255,127,255,
1443 0,0,0,0,254,255,255,255,
1444 255,255,255,255,255,255,255,127,
1445 0,0,0,0,254,255,255,255,
1446 255,255,255,255,255,255,255,255,
1447 0,2,0,0,255,255,255,255,
1448 255,255,255,255,255,255,255,127,
1449 0,0,0,0,255,255,255,255,
1450 255,255,255,255,255,255,255,255,
1451 0,0,0,0,254,255,0,252,
1452 1,0,0,248,1,0,0,120,
1453 0,0,0,0,254,255,255,255,
1454 0,0,128,0,0,0,128,0,
1455 255,255,255,255,0,0,0,0,
1456 0,0,0,0,0,0,0,128,
1457 255,255,255,255,0,0,0,0,
1458 0,0,0,0,0,0,0,0,
1459 128,0,0,0,0,0,0,0,
1460 0,1,1,0,1,1,0,0,
1461 0,0,0,0,0,0,0,0,
1462 0,0,0,0,0,0,0,0,
1463 1,0,0,0,128,0,0,0,
1464 128,128,128,128,0,0,128,0,
1465 28,28,28,28,28,28,28,28,
1466 28,28,0,0,0,0,0,128,
1467 0,26,26,26,26,26,26,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,128,128,0,128,16,
1471 0,26,26,26,26,26,26,18,
1472 18,18,18,18,18,18,18,18,
1473 18,18,18,18,18,18,18,18,
1474 18,18,18,128,128,0,0,0,
1475 0,0,0,0,0,1,0,0,
1476 0,0,0,0,0,0,0,0,
1477 0,0,0,0,0,0,0,0,
1478 0,0,0,0,0,0,0,0,
1479 1,0,0,0,0,0,0,0,
1480 0,0,18,0,0,0,0,0,
1481 0,0,20,20,0,18,0,0,
1482 0,20,18,0,0,0,0,0,
1483 18,18,18,18,18,18,18,18,
1484 18,18,18,18,18,18,18,18,
1485 18,18,18,18,18,18,18,0,
1486 18,18,18,18,18,18,18,18,
1487 18,18,18,18,18,18,18,18,
1488 18,18,18,18,18,18,18,18,
1489 18,18,18,18,18,18,18,0,
1490 18,18,18,18,18,18,18,18
1491 };
1492 
1493 
1494 
1495 
1496 #ifndef HAVE_STRERROR
1497 /*************************************************
1498 * Provide strerror() for non-ANSI libraries *
1499 *************************************************/
1500 
1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502 in their libraries, but can provide the same facility by this simple
1503 alternative function. */
1504 
1505 extern int sys_nerr;
1506 extern char *sys_errlist[];
1507 
1508 char *
1509 strerror(int n)
1510 {
1511 if (n < 0 || n >= sys_nerr) return "unknown error number";
1512 return sys_errlist[n];
1513 }
1514 #endif /* HAVE_STRERROR */
1515 
1516 
1517 
1518 /*************************************************
1519 * Print newline configuration *
1520 *************************************************/
1521 
1522 /*
1523 Arguments:
1524  rc the return code from PCRE_CONFIG_NEWLINE
1525  isc TRUE if called from "-C newline"
1526 Returns: nothing
1527 */
1528 
1529 static void
1530 print_newline_config(int rc, BOOL isc)
1531 {
1532 const char *s = NULL;
1533 if (!isc) printf(" Newline sequence is ");
1534 switch(rc)
1535  {
1536  case CHAR_CR: s = "CR"; break;
1537  case CHAR_LF: s = "LF"; break;
1538  case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539  case -1: s = "ANY"; break;
1540  case -2: s = "ANYCRLF"; break;
1541 
1542  default:
1543  printf("a non-standard value: 0x%04x\n", rc);
1544  return;
1545  }
1546 
1547 printf("%s\n", s);
1548 }
1549 
1550 
1551 
1552 /*************************************************
1553 * JIT memory callback *
1554 *************************************************/
1555 
1556 static pcre_jit_stack* jit_callback(void *arg)
1557 {
1558 jit_was_used = TRUE;
1559 return (pcre_jit_stack *)arg;
1560 }
1561 
1562 
1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 /*************************************************
1565 * Convert UTF-8 string to value *
1566 *************************************************/
1567 
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1569 and returns the value of the character.
1570 
1571 Argument:
1572  utf8bytes a pointer to the byte vector
1573  vptr a pointer to an int to receive the value
1574 
1575 Returns: > 0 => the number of bytes consumed
1576  -6 to 0 => malformed UTF-8 character at offset = (-return)
1577 */
1578 
1579 static int
1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1581 {
1582 pcre_uint32 c = *utf8bytes++;
1583 pcre_uint32 d = c;
1584 int i, j, s;
1585 
1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1587  {
1588  if ((d & 0x80) == 0) break;
1589  d <<= 1;
1590  }
1591 
1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1594 
1595 /* i now has a value in the range 1-5 */
1596 
1597 s = 6*i;
1598 d = (c & utf8_table3[i]) << s;
1599 
1600 for (j = 0; j < i; j++)
1601  {
1602  c = *utf8bytes++;
1603  if ((c & 0xc0) != 0x80) return -(j+1);
1604  s -= 6;
1605  d |= (c & 0x3f) << s;
1606  }
1607 
1608 /* Check that encoding was the correct unique one */
1609 
1610 for (j = 0; j < utf8_table1_size; j++)
1611  if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 if (j != i) return -(i+1);
1613 
1614 /* Valid value */
1615 
1616 *vptr = d;
1617 return i+1;
1618 }
1619 #endif /* NOUTF || SUPPORT_PCRE16 */
1620 
1621 
1622 
1623 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 /*************************************************
1625 * Convert character value to UTF-8 *
1626 *************************************************/
1627 
1628 /* This function takes an integer value in the range 0 - 0x7fffffff
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1630 
1631 Arguments:
1632  cvalue the character value
1633  utf8bytes pointer to buffer for result - at least 6 bytes long
1634 
1635 Returns: number of characters placed in the buffer
1636 */
1637 
1638 static int
1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1640 {
1641 register int i, j;
1642 if (cvalue > 0x7fffffffu)
1643  return -1;
1644 for (i = 0; i < utf8_table1_size; i++)
1645  if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 utf8bytes += i;
1647 for (j = i; j > 0; j--)
1648  {
1649  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650  cvalue >>= 6;
1651  }
1652 *utf8bytes = utf8_table2[i] | cvalue;
1653 return i + 1;
1654 }
1655 #endif
1656 
1657 
1658 #ifdef SUPPORT_PCRE16
1659 /*************************************************
1660 * Convert a string to 16-bit *
1661 *************************************************/
1662 
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667 result is always left in buffer16.
1668 
1669 Note that this function does not object to surrogate values. This is
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671 for the purpose of testing that they are correctly faulted.
1672 
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1675 
1676 Arguments:
1677  data TRUE if converting a data line; FALSE for a regex
1678  p points to a byte string
1679  utf true if UTF-8 (to be converted to UTF-16)
1680  len number of bytes in the string (excluding trailing zero)
1681 
1682 Returns: number of 16-bit data items used (excluding trailing zero)
1683  OR -1 if a UTF-8 string is malformed
1684  OR -2 if a value > 0x10ffff is encountered
1685  OR -3 if a value > 0xffff is encountered when not in UTF mode
1686 */
1687 
1688 static int
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1690 {
1691 pcre_uint16 *pp;
1692 
1693 if (buffer16_size < 2*len + 2)
1694  {
1695  if (buffer16 != NULL) free(buffer16);
1696  buffer16_size = 2*len + 2;
1697  buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698  if (buffer16 == NULL)
1699  {
1700  fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701  exit(1);
1702  }
1703  }
1704 
1705 pp = buffer16;
1706 
1707 if (!utf && !data)
1708  {
1709  while (len-- > 0) *pp++ = *p++;
1710  }
1711 
1712 else
1713  {
1714  pcre_uint32 c = 0;
1715  while (len > 0)
1716  {
1717  int chlen = utf82ord(p, &c);
1718  if (chlen <= 0) return -1;
1719  if (c > 0x10ffff) return -2;
1720  p += chlen;
1721  len -= chlen;
1722  if (c < 0x10000) *pp++ = c; else
1723  {
1724  if (!utf) return -3;
1725  c -= 0x10000;
1726  *pp++ = 0xD800 | (c >> 10);
1727  *pp++ = 0xDC00 | (c & 0x3ff);
1728  }
1729  }
1730  }
1731 
1732 *pp = 0;
1733 return pp - buffer16;
1734 }
1735 #endif
1736 
1737 #ifdef SUPPORT_PCRE32
1738 /*************************************************
1739 * Convert a string to 32-bit *
1740 *************************************************/
1741 
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746 result is always left in buffer32.
1747 
1748 Note that this function does not object to surrogate values. This is
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750 for the purpose of testing that they are correctly faulted.
1751 
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1754 
1755 Arguments:
1756  data TRUE if converting a data line; FALSE for a regex
1757  p points to a byte string
1758  utf true if UTF-8 (to be converted to UTF-32)
1759  len number of bytes in the string (excluding trailing zero)
1760 
1761 Returns: number of 32-bit data items used (excluding trailing zero)
1762  OR -1 if a UTF-8 string is malformed
1763  OR -2 if a value > 0x10ffff is encountered
1764  OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1765 */
1766 
1767 static int
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1769 {
1770 pcre_uint32 *pp;
1771 
1772 if (buffer32_size < 4*len + 4)
1773  {
1774  if (buffer32 != NULL) free(buffer32);
1775  buffer32_size = 4*len + 4;
1776  buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777  if (buffer32 == NULL)
1778  {
1779  fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780  exit(1);
1781  }
1782  }
1783 
1784 pp = buffer32;
1785 
1786 if (!utf && !data)
1787  {
1788  while (len-- > 0) *pp++ = *p++;
1789  }
1790 
1791 else
1792  {
1793  pcre_uint32 c = 0;
1794  while (len > 0)
1795  {
1796  int chlen = utf82ord(p, &c);
1797  if (chlen <= 0) return -1;
1798  if (utf)
1799  {
1800  if (c > 0x10ffff) return -2;
1801  if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1802  }
1803 
1804  p += chlen;
1805  len -= chlen;
1806  *pp++ = c;
1807  }
1808  }
1809 
1810 *pp = 0;
1811 return pp - buffer32;
1812 }
1813 
1814 /* Check that a 32-bit character string is valid UTF-32.
1815 
1816 Arguments:
1817  string points to the string
1818  length length of string, or -1 if the string is zero-terminated
1819 
1820 Returns: TRUE if the string is a valid UTF-32 string
1821  FALSE otherwise
1822 */
1823 
1824 #ifdef NEVER /* Not used */
1825 #ifdef SUPPORT_UTF
1826 static BOOL
1827 valid_utf32(pcre_uint32 *string, int length)
1828 {
1829 register pcre_uint32 *p;
1830 register pcre_uint32 c;
1831 
1832 for (p = string; length-- > 0; p++)
1833  {
1834  c = *p;
1835  if (c > 0x10ffffu) return FALSE; /* Too big */
1836  if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1837  }
1838 
1839 return TRUE;
1840 }
1841 #endif /* SUPPORT_UTF */
1842 #endif /* NEVER */
1843 #endif /* SUPPORT_PCRE32 */
1844 
1845 
1846 /*************************************************
1847 * Read or extend an input line *
1848 *************************************************/
1849 
1850 /* Input lines are read into buffer, but both patterns and data lines can be
1851 continued over multiple input lines. In addition, if the buffer fills up, we
1852 want to automatically expand it so as to be able to handle extremely large
1853 lines that are needed for certain stress tests. When the input buffer is
1854 expanded, the other two buffers must also be expanded likewise, and the
1855 contents of pbuffer, which are a copy of the input for callouts, must be
1856 preserved (for when expansion happens for a data line). This is not the most
1857 optimal way of handling this, but hey, this is just a test program!
1858 
1859 Arguments:
1860  f the file to read
1861  start where in buffer to start (this *must* be within buffer)
1862  prompt for stdin or readline()
1863 
1864 Returns: pointer to the start of new data
1865  could be a copy of start, or could be moved
1866  NULL if no data read and EOF reached
1867 */
1868 
1869 static pcre_uint8 *
1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1871 {
1872 pcre_uint8 *here = start;
1873 
1874 for (;;)
1875  {
1876  size_t rlen = (size_t)(buffer_size - (here - buffer));
1877 
1878  if (rlen > 1000)
1879  {
1880  int dlen;
1881 
1882  /* If libreadline or libedit support is required, use readline() to read a
1883  line if the input is a terminal. Note that readline() removes the trailing
1884  newline, so we must put it back again, to be compatible with fgets(). */
1885 
1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887  if (isatty(fileno(f)))
1888  {
1889  size_t len;
1890  char *s = readline(prompt);
1891  if (s == NULL) return (here == start)? NULL : start;
1892  len = strlen(s);
1893  if (len > 0) add_history(s);
1894  if (len > rlen - 1) len = rlen - 1;
1895  memcpy(here, s, len);
1896  here[len] = '\n';
1897  here[len+1] = 0;
1898  free(s);
1899  }
1900  else
1901 #endif
1902 
1903  /* Read the next line by normal means, prompting if the file is stdin. */
1904 
1905  {
1906  if (f == stdin) printf("%s", prompt);
1907  if (fgets((char *)here, rlen, f) == NULL)
1908  return (here == start)? NULL : start;
1909  }
1910 
1911  dlen = (int)strlen((char *)here);
1912  if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913  here += dlen;
1914  }
1915 
1916  else
1917  {
1918  int new_buffer_size = 2*buffer_size;
1919  pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920  pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1921 
1922  if (new_buffer == NULL || new_pbuffer == NULL)
1923  {
1924  fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925  exit(1);
1926  }
1927 
1928  memcpy(new_buffer, buffer, buffer_size);
1929  memcpy(new_pbuffer, pbuffer, buffer_size);
1930 
1931  buffer_size = new_buffer_size;
1932 
1933  start = new_buffer + (start - buffer);
1934  here = new_buffer + (here - buffer);
1935 
1936  free(buffer);
1937  free(pbuffer);
1938 
1939  buffer = new_buffer;
1940  pbuffer = new_pbuffer;
1941  }
1942  }
1943 
1944 /* Control never gets here */
1945 }
1946 
1947 
1948 
1949 /*************************************************
1950 * Read number from string *
1951 *************************************************/
1952 
1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954 around with conditional compilation, just do the job by hand. It is only used
1955 for unpicking arguments, so just keep it simple.
1956 
1957 Arguments:
1958  str string to be converted
1959  endptr where to put the end pointer
1960 
1961 Returns: the unsigned long
1962 */
1963 
1964 static int
1965 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1966 {
1967 int result = 0;
1968 while(*str != 0 && isspace(*str)) str++;
1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970 *endptr = str;
1971 return(result);
1972 }
1973 
1974 
1975 
1976 /*************************************************
1977 * Print one character *
1978 *************************************************/
1979 
1980 /* Print a single character either literally, or as a hex escape. */
1981 
1982 static int pchar(pcre_uint32 c, FILE *f)
1983 {
1984 int n = 0;
1985 if (PRINTOK(c))
1986  {
1987  if (f != NULL) fprintf(f, "%c", c);
1988  return 1;
1989  }
1990 
1991 if (c < 0x100)
1992  {
1993  if (use_utf)
1994  {
1995  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1996  return 6;
1997  }
1998  else
1999  {
2000  if (f != NULL) fprintf(f, "\\x%02x", c);
2001  return 4;
2002  }
2003  }
2004 
2005 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2006 return n >= 0 ? n : 0;
2007 }
2008 
2009 
2010 
2011 #ifdef SUPPORT_PCRE8
2012 /*************************************************
2013 * Print 8-bit character string *
2014 *************************************************/
2015 
2016 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2017 If handed a NULL file, just counts chars without printing. */
2018 
2019 static int pchars(pcre_uint8 *p, int length, FILE *f)
2020 {
2021 pcre_uint32 c = 0;
2022 int yield = 0;
2023 
2024 if (length < 0)
2025  length = strlen((char *)p);
2026 
2027 while (length-- > 0)
2028  {
2029 #if !defined NOUTF
2030  if (use_utf)
2031  {
2032  int rc = utf82ord(p, &c);
2033  if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2034  {
2035  length -= rc - 1;
2036  p += rc;
2037  yield += pchar(c, f);
2038  continue;
2039  }
2040  }
2041 #endif
2042  c = *p++;
2043  yield += pchar(c, f);
2044  }
2045 
2046 return yield;
2047 }
2048 #endif
2049 
2050 
2051 
2052 #ifdef SUPPORT_PCRE16
2053 /*************************************************
2054 * Find length of 0-terminated 16-bit string *
2055 *************************************************/
2056 
2057 static int strlen16(PCRE_SPTR16 p)
2058 {
2059 PCRE_SPTR16 pp = p;
2060 while (*pp != 0) pp++;
2061 return (int)(pp - p);
2062 }
2063 #endif /* SUPPORT_PCRE16 */
2064 
2065 
2066 
2067 #ifdef SUPPORT_PCRE32
2068 /*************************************************
2069 * Find length of 0-terminated 32-bit string *
2070 *************************************************/
2071 
2072 static int strlen32(PCRE_SPTR32 p)
2073 {
2074 PCRE_SPTR32 pp = p;
2075 while (*pp != 0) pp++;
2076 return (int)(pp - p);
2077 }
2078 #endif /* SUPPORT_PCRE32 */
2079 
2080 
2081 
2082 #ifdef SUPPORT_PCRE16
2083 /*************************************************
2084 * Print 16-bit character string *
2085 *************************************************/
2086 
2087 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2088 If handed a NULL file, just counts chars without printing. */
2089 
2090 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2091 {
2092 int yield = 0;
2093 
2094 if (length < 0)
2095  length = strlen16(p);
2096 
2097 while (length-- > 0)
2098  {
2099  pcre_uint32 c = *p++ & 0xffff;
2100 #if !defined NOUTF
2101  if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2102  {
2103  int d = *p & 0xffff;
2104  if (d >= 0xDC00 && d <= 0xDFFF)
2105  {
2106  c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2107  length--;
2108  p++;
2109  }
2110  }
2111 #endif
2112  yield += pchar(c, f);
2113  }
2114 
2115 return yield;
2116 }
2117 #endif /* SUPPORT_PCRE16 */
2118 
2119 
2120 
2121 #ifdef SUPPORT_PCRE32
2122 /*************************************************
2123 * Print 32-bit character string *
2124 *************************************************/
2125 
2126 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2127 If handed a NULL file, just counts chars without printing. */
2128 
2129 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2130 {
2131 int yield = 0;
2132 
2133 (void)(utf); /* Avoid compiler warning */
2134 
2135 if (length < 0)
2136  length = strlen32(p);
2137 
2138 while (length-- > 0)
2139  {
2140  pcre_uint32 c = *p++;
2141  yield += pchar(c, f);
2142  }
2143 
2144 return yield;
2145 }
2146 #endif /* SUPPORT_PCRE32 */
2147 
2148 
2149 
2150 #ifdef SUPPORT_PCRE8
2151 /*************************************************
2152 * Read a capture name (8-bit) and check it *
2153 *************************************************/
2154 
2155 static pcre_uint8 *
2156 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2157 {
2158 pcre_uint8 *npp = *pp;
2159 while (isalnum(*p)) *npp++ = *p++;
2160 *npp++ = 0;
2161 *npp = 0;
2162 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2163  {
2164  fprintf(outfile, "no parentheses with name \"");
2165  PCHARSV(*pp, 0, -1, outfile);
2166  fprintf(outfile, "\"\n");
2167  }
2168 
2169 *pp = npp;
2170 return p;
2171 }
2172 #endif /* SUPPORT_PCRE8 */
2173 
2174 
2175 
2176 #ifdef SUPPORT_PCRE16
2177 /*************************************************
2178 * Read a capture name (16-bit) and check it *
2179 *************************************************/
2180 
2181 /* Note that the text being read is 8-bit. */
2182 
2183 static pcre_uint8 *
2184 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2185 {
2186 pcre_uint16 *npp = *pp;
2187 while (isalnum(*p)) *npp++ = *p++;
2188 *npp++ = 0;
2189 *npp = 0;
2190 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2191  {
2192  fprintf(outfile, "no parentheses with name \"");
2193  PCHARSV(*pp, 0, -1, outfile);
2194  fprintf(outfile, "\"\n");
2195  }
2196 *pp = npp;
2197 return p;
2198 }
2199 #endif /* SUPPORT_PCRE16 */
2200 
2201 
2202 
2203 #ifdef SUPPORT_PCRE32
2204 /*************************************************
2205 * Read a capture name (32-bit) and check it *
2206 *************************************************/
2207 
2208 /* Note that the text being read is 8-bit. */
2209 
2210 static pcre_uint8 *
2211 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2212 {
2213 pcre_uint32 *npp = *pp;
2214 while (isalnum(*p)) *npp++ = *p++;
2215 *npp++ = 0;
2216 *npp = 0;
2217 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2218  {
2219  fprintf(outfile, "no parentheses with name \"");
2220  PCHARSV(*pp, 0, -1, outfile);
2221  fprintf(outfile, "\"\n");
2222  }
2223 *pp = npp;
2224 return p;
2225 }
2226 #endif /* SUPPORT_PCRE32 */
2227 
2228 
2229 
2230 /*************************************************
2231 * Stack guard function *
2232 *************************************************/
2233 
2234 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2235 return when a count overflows. */
2236 
2237 static int stack_guard(void)
2238 {
2239 return stack_guard_return;
2240 }
2241 
2242 /*************************************************
2243 * Callout function *
2244 *************************************************/
2245 
2246 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2247 the match. Yield zero unless more callouts than the fail count, or the callout
2248 data is not zero. */
2249 
2250 static int callout(pcre_callout_block *cb)
2251 {
2252 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2253 int i, current_position, pre_start, post_start, subject_length;
2254 
2255 if (callout_extra)
2256  {
2257  fprintf(f, "Callout %d: last capture = %d\n",
2258  cb->callout_number, cb->capture_last);
2259 
2260  if (cb->offset_vector != NULL)
2261  {
2262  for (i = 0; i < cb->capture_top * 2; i += 2)
2263  {
2264  if (cb->offset_vector[i] < 0)
2265  fprintf(f, "%2d: <unset>\n", i/2);
2266  else
2267  {
2268  fprintf(f, "%2d: ", i/2);
2269  PCHARSV(cb->subject, cb->offset_vector[i],
2270  cb->offset_vector[i+1] - cb->offset_vector[i], f);
2271  fprintf(f, "\n");
2272  }
2273  }
2274  }
2275  }
2276 
2277 /* Re-print the subject in canonical form, the first time or if giving full
2278 datails. On subsequent calls in the same match, we use pchars just to find the
2279 printed lengths of the substrings. */
2280 
2281 if (f != NULL) fprintf(f, "--->");
2282 
2283 /* If a lookbehind is involved, the current position may be earlier than the
2284 match start. If so, use the match start instead. */
2285 
2286 current_position = (cb->current_position >= cb->start_match)?
2287  cb->current_position : cb->start_match;
2288 
2289 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2290 PCHARS(post_start, cb->subject, cb->start_match,
2291  current_position - cb->start_match, f);
2292 
2293 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2294 
2295 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
2296 
2297 if (f != NULL) fprintf(f, "\n");
2298 
2299 /* Always print appropriate indicators, with callout number if not already
2300 shown. For automatic callouts, show the pattern offset. */
2301 
2302 if (cb->callout_number == 255)
2303  {
2304  fprintf(outfile, "%+3d ", cb->pattern_position);
2305  if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2306  }
2307 else
2308  {
2309  if (callout_extra) fprintf(outfile, " ");
2310  else fprintf(outfile, "%3d ", cb->callout_number);
2311  }
2312 
2313 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2314 fprintf(outfile, "^");
2315 
2316 if (post_start > 0)
2317  {
2318  for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2319  fprintf(outfile, "^");
2320  }
2321 
2322 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2323  fprintf(outfile, " ");
2324 
2325 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2326  pbuffer + cb->pattern_position);
2327 
2328 fprintf(outfile, "\n");
2329 first_callout = 0;
2330 
2331 if (cb->mark != last_callout_mark)
2332  {
2333  if (cb->mark == NULL)
2334  fprintf(outfile, "Latest Mark: <unset>\n");
2335  else
2336  {
2337  fprintf(outfile, "Latest Mark: ");
2338  PCHARSV(cb->mark, 0, -1, outfile);
2339  putc('\n', outfile);
2340  }
2341  last_callout_mark = cb->mark;
2342  }
2343 
2344 if (cb->callout_data != NULL)
2345  {
2346  int callout_data = *((int *)(cb->callout_data));
2347  if (callout_data != 0)
2348  {
2349  fprintf(outfile, "Callout data = %d\n", callout_data);
2350  return callout_data;
2351  }
2352  }
2353 
2354 return (cb->callout_number != callout_fail_id)? 0 :
2355  (++callout_count >= callout_fail_count)? 1 : 0;
2356 }
2357 
2358 
2359 /*************************************************
2360 * Local malloc functions *
2361 *************************************************/
2362 
2363 /* Alternative malloc function, to test functionality and save the size of a
2364 compiled re, which is the first store request that pcre_compile() makes. The
2365 show_malloc variable is set only during matching. */
2366 
2367 static void *new_malloc(size_t size)
2368 {
2369 void *block = malloc(size);
2370 if (show_malloc)
2371  fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2372 return block;
2373 }
2374 
2375 static void new_free(void *block)
2376 {
2377 if (show_malloc)
2378  fprintf(outfile, "free %p\n", block);
2379 free(block);
2380 }
2381 
2382 /* For recursion malloc/free, to test stacking calls */
2383 
2384 static void *stack_malloc(size_t size)
2385 {
2386 void *block = malloc(size);
2387 if (show_malloc)
2388  fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2389 return block;
2390 }
2391 
2392 static void stack_free(void *block)
2393 {
2394 if (show_malloc)
2395  fprintf(outfile, "stack_free %p\n", block);
2396 free(block);
2397 }
2398 
2399 
2400 /*************************************************
2401 * Call pcre_fullinfo() *
2402 *************************************************/
2403 
2404 /* Get one piece of information from the pcre_fullinfo() function. When only
2405 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2406 value, but the code is defensive.
2407 
2408 Arguments:
2409  re compiled regex
2410  study study data
2411  option PCRE_INFO_xxx option
2412  ptr where to put the data
2413 
2414 Returns: 0 when OK, < 0 on error
2415 */
2416 
2417 static int
2418 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2419 {
2420 int rc;
2421 
2422 if (pcre_mode == PCRE32_MODE)
2423 #ifdef SUPPORT_PCRE32
2424  rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2425 #else
2426  rc = PCRE_ERROR_BADMODE;
2427 #endif
2428 else if (pcre_mode == PCRE16_MODE)
2429 #ifdef SUPPORT_PCRE16
2430  rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2431 #else
2432  rc = PCRE_ERROR_BADMODE;
2433 #endif
2434 else
2435 #ifdef SUPPORT_PCRE8
2436  rc = pcre_fullinfo(re, study, option, ptr);
2437 #else
2438  rc = PCRE_ERROR_BADMODE;
2439 #endif
2440 
2441 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2442  {
2443  fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2444  pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2445  if (rc == PCRE_ERROR_BADMODE)
2446  fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2447  "%d-bit mode\n", 8 * CHAR_SIZE,
2448  8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2449  }
2450 
2451 return rc;
2452 }
2453 
2454 
2455 
2456 /*************************************************
2457 * Swap byte functions *
2458 *************************************************/
2459 
2460 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2461 value, respectively.
2462 
2463 Arguments:
2464  value any number
2465 
2466 Returns: the byte swapped value
2467 */
2468 
2469 static pcre_uint32
2470 swap_uint32(pcre_uint32 value)
2471 {
2472 return ((value & 0x000000ff) << 24) |
2473  ((value & 0x0000ff00) << 8) |
2474  ((value & 0x00ff0000) >> 8) |
2475  (value >> 24);
2476 }
2477 
2478 static pcre_uint16
2479 swap_uint16(pcre_uint16 value)
2480 {
2481 return (value >> 8) | (value << 8);
2482 }
2483 
2484 
2485 
2486 /*************************************************
2487 * Flip bytes in a compiled pattern *
2488 *************************************************/
2489 
2490 /* This function is called if the 'F' option was present on a pattern that is
2491 to be written to a file. We flip the bytes of all the integer fields in the
2492 regex data block and the study block. In 16-bit mode this also flips relevant
2493 bytes in the pattern itself. This is to make it possible to test PCRE's
2494 ability to reload byte-flipped patterns, e.g. those compiled on a different
2495 architecture. */
2496 
2497 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2498 static void
2499 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2500 {
2501 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2502 #ifdef SUPPORT_PCRE16
2503 int op;
2504 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2505 int length = re->name_count * re->name_entry_size;
2506 #ifdef SUPPORT_UTF
2507 BOOL utf = (re->options & PCRE_UTF16) != 0;
2508 BOOL utf16_char = FALSE;
2509 #endif /* SUPPORT_UTF */
2510 #endif /* SUPPORT_PCRE16 */
2511 
2512 /* Always flip the bytes in the main data block and study blocks. */
2513 
2515 re->size = swap_uint32(re->size);
2516 re->options = swap_uint32(re->options);
2517 re->flags = swap_uint32(re->flags);
2518 re->limit_match = swap_uint32(re->limit_match);
2519 re->limit_recursion = swap_uint32(re->limit_recursion);
2520 re->first_char = swap_uint16(re->first_char);
2521 re->req_char = swap_uint16(re->req_char);
2522 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2523 re->top_bracket = swap_uint16(re->top_bracket);
2524 re->top_backref = swap_uint16(re->top_backref);
2525 re->name_table_offset = swap_uint16(re->name_table_offset);
2526 re->name_entry_size = swap_uint16(re->name_entry_size);
2527 re->name_count = swap_uint16(re->name_count);
2528 re->ref_count = swap_uint16(re->ref_count);
2529 
2530 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2531  {
2532  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2533  rsd->size = swap_uint32(rsd->size);
2534  rsd->flags = swap_uint32(rsd->flags);
2535  rsd->minlength = swap_uint32(rsd->minlength);
2536  }
2537 
2538 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2539 in the name table, if present, and then in the pattern itself. */
2540 
2541 #ifdef SUPPORT_PCRE16
2542 if (pcre_mode != PCRE16_MODE) return;
2543 
2544 while(TRUE)
2545  {
2546  /* Swap previous characters. */
2547  while (length-- > 0)
2548  {
2549  *ptr = swap_uint16(*ptr);
2550  ptr++;
2551  }
2552 #ifdef SUPPORT_UTF
2553  if (utf16_char)
2554  {
2555  if ((ptr[-1] & 0xfc00) == 0xd800)
2556  {
2557  /* We know that there is only one extra character in UTF-16. */
2558  *ptr = swap_uint16(*ptr);
2559  ptr++;
2560  }
2561  }
2562  utf16_char = FALSE;
2563 #endif /* SUPPORT_UTF */
2564 
2565  /* Get next opcode. */
2566 
2567  length = 0;
2568  op = *ptr;
2569  *ptr++ = swap_uint16(op);
2570 
2571  switch (op)
2572  {
2573  case OP_END:
2574  return;
2575 
2576 #ifdef SUPPORT_UTF
2577  case OP_CHAR:
2578  case OP_CHARI:
2579  case OP_NOT:
2580  case OP_NOTI:
2581  case OP_STAR:
2582  case OP_MINSTAR:
2583  case OP_PLUS:
2584  case OP_MINPLUS:
2585  case OP_QUERY:
2586  case OP_MINQUERY:
2587  case OP_UPTO:
2588  case OP_MINUPTO:
2589  case OP_EXACT:
2590  case OP_POSSTAR:
2591  case OP_POSPLUS:
2592  case OP_POSQUERY:
2593  case OP_POSUPTO:
2594  case OP_STARI:
2595  case OP_MINSTARI:
2596  case OP_PLUSI:
2597  case OP_MINPLUSI:
2598  case OP_QUERYI:
2599  case OP_MINQUERYI:
2600  case OP_UPTOI:
2601  case OP_MINUPTOI:
2602  case OP_EXACTI:
2603  case OP_POSSTARI:
2604  case OP_POSPLUSI:
2605  case OP_POSQUERYI:
2606  case OP_POSUPTOI:
2607  case OP_NOTSTAR:
2608  case OP_NOTMINSTAR:
2609  case OP_NOTPLUS:
2610  case OP_NOTMINPLUS:
2611  case OP_NOTQUERY:
2612  case OP_NOTMINQUERY:
2613  case OP_NOTUPTO:
2614  case OP_NOTMINUPTO:
2615  case OP_NOTEXACT:
2616  case OP_NOTPOSSTAR:
2617  case OP_NOTPOSPLUS:
2618  case OP_NOTPOSQUERY:
2619  case OP_NOTPOSUPTO:
2620  case OP_NOTSTARI:
2621  case OP_NOTMINSTARI:
2622  case OP_NOTPLUSI:
2623  case OP_NOTMINPLUSI:
2624  case OP_NOTQUERYI:
2625  case OP_NOTMINQUERYI:
2626  case OP_NOTUPTOI:
2627  case OP_NOTMINUPTOI:
2628  case OP_NOTEXACTI:
2629  case OP_NOTPOSSTARI:
2630  case OP_NOTPOSPLUSI:
2631  case OP_NOTPOSQUERYI:
2632  case OP_NOTPOSUPTOI:
2633  if (utf) utf16_char = TRUE;
2634 #endif
2635  /* Fall through. */
2636 
2637  default:
2638  length = OP_lengths16[op] - 1;
2639  break;
2640 
2641  case OP_CLASS:
2642  case OP_NCLASS:
2643  /* Skip the character bit map. */
2644  ptr += 32/sizeof(pcre_uint16);
2645  length = 0;
2646  break;
2647 
2648  case OP_XCLASS:
2649  /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2650  if (LINK_SIZE > 1)
2651  length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2652  - (1 + LINK_SIZE + 1));
2653  else
2654  length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2655 
2656  /* Reverse the size of the XCLASS instance. */
2657  *ptr = swap_uint16(*ptr);
2658  ptr++;
2659  if (LINK_SIZE > 1)
2660  {
2661  *ptr = swap_uint16(*ptr);
2662  ptr++;
2663  }
2664 
2665  op = *ptr;
2666  *ptr = swap_uint16(op);
2667  ptr++;
2668  if ((op & XCL_MAP) != 0)
2669  {
2670  /* Skip the character bit map. */
2671  ptr += 32/sizeof(pcre_uint16);
2672  length -= 32/sizeof(pcre_uint16);
2673  }
2674  break;
2675  }
2676  }
2677 /* Control should never reach here in 16 bit mode. */
2678 #endif /* SUPPORT_PCRE16 */
2679 }
2680 #endif /* SUPPORT_PCRE[8|16] */
2681 
2682 
2683 
2684 #if defined SUPPORT_PCRE32
2685 static void
2686 regexflip_32(pcre *ere, pcre_extra *extra)
2687 {
2688 real_pcre32 *re = (real_pcre32 *)ere;
2689 int op;
2690 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2691 int length = re->name_count * re->name_entry_size;
2692 
2693 /* Always flip the bytes in the main data block and study blocks. */
2694 
2696 re->size = swap_uint32(re->size);
2697 re->options = swap_uint32(re->options);
2698 re->flags = swap_uint32(re->flags);
2699 re->limit_match = swap_uint32(re->limit_match);
2700 re->limit_recursion = swap_uint32(re->limit_recursion);
2701 re->first_char = swap_uint32(re->first_char);
2702 re->req_char = swap_uint32(re->req_char);
2703 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2704 re->top_bracket = swap_uint16(re->top_bracket);
2705 re->top_backref = swap_uint16(re->top_backref);
2706 re->name_table_offset = swap_uint16(re->name_table_offset);
2707 re->name_entry_size = swap_uint16(re->name_entry_size);
2708 re->name_count = swap_uint16(re->name_count);
2709 re->ref_count = swap_uint16(re->ref_count);
2710 
2711 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2712  {
2713  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2714  rsd->size = swap_uint32(rsd->size);
2715  rsd->flags = swap_uint32(rsd->flags);
2716  rsd->minlength = swap_uint32(rsd->minlength);
2717  }
2718 
2719 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2720 the pattern itself. */
2721 
2722 while(TRUE)
2723  {
2724  /* Swap previous characters. */
2725  while (length-- > 0)
2726  {
2727  *ptr = swap_uint32(*ptr);
2728  ptr++;
2729  }
2730 
2731  /* Get next opcode. */
2732 
2733  length = 0;
2734  op = *ptr;
2735  *ptr++ = swap_uint32(op);
2736 
2737  switch (op)
2738  {
2739  case OP_END:
2740  return;
2741 
2742  default:
2743  length = OP_lengths32[op] - 1;
2744  break;
2745 
2746  case OP_CLASS:
2747  case OP_NCLASS:
2748  /* Skip the character bit map. */
2749  ptr += 32/sizeof(pcre_uint32);
2750  length = 0;
2751  break;
2752 
2753  case OP_XCLASS:
2754  /* LINK_SIZE can only be 1 in 32-bit mode. */
2755  length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2756 
2757  /* Reverse the size of the XCLASS instance. */
2758  *ptr = swap_uint32(*ptr);
2759  ptr++;
2760 
2761  op = *ptr;
2762  *ptr = swap_uint32(op);
2763  ptr++;
2764  if ((op & XCL_MAP) != 0)
2765  {
2766  /* Skip the character bit map. */
2767  ptr += 32/sizeof(pcre_uint32);
2768  length -= 32/sizeof(pcre_uint32);
2769  }
2770  break;
2771  }
2772  }
2773 /* Control should never reach here in 32 bit mode. */
2774 }
2775 
2776 #endif /* SUPPORT_PCRE32 */
2777 
2778 
2779 
2780 static void
2781 regexflip(pcre *ere, pcre_extra *extra)
2782 {
2783 #if defined SUPPORT_PCRE32
2784  if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2785  regexflip_32(ere, extra);
2786 #endif
2787 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2788  if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2789  regexflip8_or_16(ere, extra);
2790 #endif
2791 }
2792 
2793 
2794 
2795 /*************************************************
2796 * Check match or recursion limit *
2797 *************************************************/
2798 
2799 static int
2800 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2801  int start_offset, int options, int *use_offsets, int use_size_offsets,
2802  int flag, unsigned long int *limit, int errnumber, const char *msg)
2803 {
2804 int count;
2805 int min = 0;
2806 int mid = 64;
2807 int max = -1;
2808 
2809 extra->flags |= flag;
2810 
2811 for (;;)
2812  {
2813  *limit = mid;
2814 
2815  PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2816  use_offsets, use_size_offsets);
2817 
2818  if (count == errnumber)
2819  {
2820  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2821  min = mid;
2822  mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2823  }
2824 
2825  else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2826  count == PCRE_ERROR_PARTIAL)
2827  {
2828  if (mid == min + 1)
2829  {
2830  fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2831  break;
2832  }
2833  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2834  max = mid;
2835  mid = (min + mid)/2;
2836  }
2837  else break; /* Some other error */
2838  }
2839 
2840 extra->flags &= ~flag;
2841 return count;
2842 }
2843 
2844 
2845 
2846 /*************************************************
2847 * Case-independent strncmp() function *
2848 *************************************************/
2849 
2850 /*
2851 Arguments:
2852  s first string
2853  t second string
2854  n number of characters to compare
2855 
2856 Returns: < 0, = 0, or > 0, according to the comparison
2857 */
2858 
2859 static int
2860 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2861 {
2862 while (n--)
2863  {
2864  int c = tolower(*s++) - tolower(*t++);
2865  if (c) return c;
2866  }
2867 return 0;
2868 }
2869 
2870 
2871 
2872 /*************************************************
2873 * Check multicharacter option *
2874 *************************************************/
2875 
2876 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2877 a message and return 0 if there is no match.
2878 
2879 Arguments:
2880  p points after the leading '<'
2881  f file for error message
2882  nl TRUE to check only for newline settings
2883  stype "modifier" or "escape sequence"
2884 
2885 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2886 */
2887 
2888 static int
2889 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2890 {
2891 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2892 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2893 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2894 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2895 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2896 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2897 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2898 
2899 if (!nl)
2900  {
2901  if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2902  }
2903 
2904 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2905 return 0;
2906 }
2907 
2908 
2909 
2910 /*************************************************
2911 * Usage function *
2912 *************************************************/
2913 
2914 static void
2915 usage(void)
2916 {
2917 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2918 printf("Input and output default to stdin and stdout.\n");
2919 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2920 printf("If input is a terminal, readline() is used to read from it.\n");
2921 #else
2922 printf("This version of pcretest is not linked with readline().\n");
2923 #endif
2924 printf("\nOptions:\n");
2925 #ifdef SUPPORT_PCRE16
2926 printf(" -16 use the 16-bit library\n");
2927 #endif
2928 #ifdef SUPPORT_PCRE32
2929 printf(" -32 use the 32-bit library\n");
2930 #endif
2931 printf(" -b show compiled code\n");
2932 printf(" -C show PCRE compile-time options and exit\n");
2933 printf(" -C arg show a specific compile-time option and exit\n");
2934 printf(" with its value if numeric (else 0). The arg can be:\n");
2935 printf(" linksize internal link size [2, 3, 4]\n");
2936 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2937 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2938 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2939 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2940 printf(" ucp Unicode Properties supported [0, 1]\n");
2941 printf(" jit Just-in-time compiler supported [0, 1]\n");
2942 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2943 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2944 printf(" -d debug: show compiled code and information (-b and -i)\n");
2945 #if !defined NODFA
2946 printf(" -dfa force DFA matching for all subjects\n");
2947 #endif
2948 printf(" -help show usage information\n");
2949 printf(" -i show information about compiled patterns\n"
2950  " -M find MATCH_LIMIT minimum for each subject\n"
2951  " -m output memory used information\n"
2952  " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2953  " -o <n> set size of offsets vector to <n>\n");
2954 #if !defined NOPOSIX
2955 printf(" -p use POSIX interface\n");
2956 #endif
2957 printf(" -q quiet: do not output PCRE version number at start\n");
2958 printf(" -S <n> set stack size to <n> megabytes\n");
2959 printf(" -s force each pattern to be studied at basic level\n"
2960  " -s+ force each pattern to be studied, using JIT if available\n"
2961  " -s++ ditto, verifying when JIT was actually used\n"
2962  " -s+n force each pattern to be studied, using JIT if available,\n"
2963  " where 1 <= n <= 7 selects JIT options\n"
2964  " -s++n ditto, verifying when JIT was actually used\n"
2965  " -t time compilation and execution\n");
2966 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2967 printf(" -tm time execution (matching) only\n");
2968 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2969 printf(" -T same as -t, but show total times at the end\n");
2970 printf(" -TM same as -tm, but show total time at the end\n");
2971 }
2972 
2973 
2974 
2975 /*************************************************
2976 * Main Program *
2977 *************************************************/
2978 
2979 /* Read lines from named file or stdin and write to named file or stdout; lines
2980 consist of a regular expression, in delimiters and optionally followed by
2981 options, followed by a set of test data, terminated by an empty line. */
2982 
2983 int main(int argc, char **argv)
2984 {
2985 FILE *infile = stdin;
2986 const char *version;
2987 int options = 0;
2988 int study_options = 0;
2989 int default_find_match_limit = FALSE;
2990 pcre_uint32 default_options = 0;
2991 int op = 1;
2992 int timeit = 0;
2993 int timeitm = 0;
2994 int showtotaltimes = 0;
2995 int showinfo = 0;
2996 int showstore = 0;
2997 int force_study = -1;
2998 int force_study_options = 0;
2999 int quiet = 0;
3000 int size_offsets = 45;
3001 int size_offsets_max;
3002 int *offsets = NULL;
3003 int debug = 0;
3004 int done = 0;
3005 int all_use_dfa = 0;
3006 int verify_jit = 0;
3007 int yield = 0;
3008 int stack_size;
3009 pcre_uint8 *dbuffer = NULL;
3010 pcre_uint8 lockout[24] = { 0 };
3011 size_t dbuffer_size = 1u << 14;
3012 clock_t total_compile_time = 0;
3013 clock_t total_study_time = 0;
3014 clock_t total_match_time = 0;
3015 
3016 #if !defined NOPOSIX
3017 int posix = 0;
3018 #endif
3019 #if !defined NODFA
3020 int *dfa_workspace = NULL;
3021 #endif
3022 
3023 pcre_jit_stack *jit_stack = NULL;
3024 
3025 /* These vectors store, end-to-end, a list of zero-terminated captured
3026 substring names, each list itself being terminated by an empty name. Assume
3027 that 1024 is plenty long enough for the few names we'll be testing. It is
3028 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3029 for the actual memory, to ensure alignment. */
3030 
3031 pcre_uint32 copynames[1024];
3032 pcre_uint32 getnames[1024];
3033 
3034 #ifdef SUPPORT_PCRE32
3035 pcre_uint32 *cn32ptr;
3036 pcre_uint32 *gn32ptr;
3037 #endif
3038 
3039 #ifdef SUPPORT_PCRE16
3040 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3041 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3042 pcre_uint16 *cn16ptr;
3043 pcre_uint16 *gn16ptr;
3044 #endif
3045 
3046 #ifdef SUPPORT_PCRE8
3047 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3048 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3049 pcre_uint8 *cn8ptr;
3050 pcre_uint8 *gn8ptr;
3051 #endif
3052 
3053 /* Get buffers from malloc() so that valgrind will check their misuse when
3054 debugging. They grow automatically when very long lines are read. The 16-
3055 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3056 
3057 buffer = (pcre_uint8 *)malloc(buffer_size);
3058 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3059 
3060 /* The outfile variable is static so that new_malloc can use it. */
3061 
3062 outfile = stdout;
3063 
3064 /* The following _setmode() stuff is some Windows magic that tells its runtime
3065 library to translate CRLF into a single LF character. At least, that's what
3066 I've been told: never having used Windows I take this all on trust. Originally
3067 it set 0x8000, but then I was advised that _O_BINARY was better. */
3068 
3069 #if defined(_WIN32) || defined(WIN32)
3070 _setmode( _fileno( stdout ), _O_BINARY );
3071 #endif
3072 
3073 /* Get the version number: both pcre_version() and pcre16_version() give the
3074 same answer. We just need to ensure that we call one that is available. */
3075 
3076 #if defined SUPPORT_PCRE8
3077 version = pcre_version();
3078 #elif defined SUPPORT_PCRE16
3079 version = pcre16_version();
3080 #elif defined SUPPORT_PCRE32
3081 version = pcre32_version();
3082 #endif
3083 
3084 /* Scan options */
3085 
3086 while (argc > 1 && argv[op][0] == '-')
3087  {
3088  pcre_uint8 *endptr;
3089  char *arg = argv[op];
3090 
3091  if (strcmp(arg, "-m") == 0) showstore = 1;
3092  else if (strcmp(arg, "-s") == 0) force_study = 0;
3093 
3094  else if (strncmp(arg, "-s+", 3) == 0)
3095  {
3096  arg += 3;
3097  if (*arg == '+') { arg++; verify_jit = TRUE; }
3098  force_study = 1;
3099  if (*arg == 0)
3100  force_study_options = jit_study_bits[6];
3101  else if (*arg >= '1' && *arg <= '7')
3102  force_study_options = jit_study_bits[*arg - '1'];
3103  else goto BAD_ARG;
3104  }
3105  else if (strcmp(arg, "-8") == 0)
3106  {
3107 #ifdef SUPPORT_PCRE8
3108  pcre_mode = PCRE8_MODE;
3109 #else
3110  printf("** This version of PCRE was built without 8-bit support\n");
3111  exit(1);
3112 #endif
3113  }
3114  else if (strcmp(arg, "-16") == 0)
3115  {
3116 #ifdef SUPPORT_PCRE16
3117  pcre_mode = PCRE16_MODE;
3118 #else
3119  printf("** This version of PCRE was built without 16-bit support\n");
3120  exit(1);
3121 #endif
3122  }
3123  else if (strcmp(arg, "-32") == 0)
3124  {
3125 #ifdef SUPPORT_PCRE32
3126  pcre_mode = PCRE32_MODE;
3127 #else
3128  printf("** This version of PCRE was built without 32-bit support\n");
3129  exit(1);
3130 #endif
3131  }
3132  else if (strcmp(arg, "-q") == 0) quiet = 1;
3133  else if (strcmp(arg, "-b") == 0) debug = 1;
3134  else if (strcmp(arg, "-i") == 0) showinfo = 1;
3135  else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3136  else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3137  else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3138 #if !defined NODFA
3139  else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3140 #endif
3141  else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3142  ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3143  *endptr == 0))
3144  {
3145  op++;
3146  argc--;
3147  }
3148  else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3149  strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3150  {
3151  int temp;
3152  int both = arg[2] == 0;
3153  showtotaltimes = arg[1] == 'T';
3154  if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3155  *endptr == 0))
3156  {
3157  timeitm = temp;
3158  op++;
3159  argc--;
3160  }
3161  else timeitm = LOOPREPEAT;
3162  if (both) timeit = timeitm;
3163  }
3164  else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3165  ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3166  *endptr == 0))
3167  {
3168 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3169  printf("PCRE: -S not supported on this OS\n");
3170  exit(1);
3171 #else
3172  int rc;
3173  struct rlimit rlim;
3174  getrlimit(RLIMIT_STACK, &rlim);
3175  rlim.rlim_cur = stack_size * 1024 * 1024;
3176  rc = setrlimit(RLIMIT_STACK, &rlim);
3177  if (rc != 0)
3178  {
3179  printf("PCRE: setrlimit() failed with error %d\n", rc);
3180  exit(1);
3181  }
3182  op++;
3183  argc--;
3184 #endif
3185  }
3186 #if !defined NOPOSIX
3187  else if (strcmp(arg, "-p") == 0) posix = 1;
3188 #endif
3189  else if (strcmp(arg, "-C") == 0)
3190  {
3191  int rc;
3192  unsigned long int lrc;
3193 
3194  if (argc > 2)
3195  {
3196  if (strcmp(argv[op + 1], "linksize") == 0)
3197  {
3198  (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3199  printf("%d\n", rc);
3200  yield = rc;
3201 
3202 #ifdef __VMS
3203  vms_setsymbol("LINKSIZE",0,yield );
3204 #endif
3205  }
3206  else if (strcmp(argv[op + 1], "pcre8") == 0)
3207  {
3208 #ifdef SUPPORT_PCRE8
3209  printf("1\n");
3210  yield = 1;
3211 #else
3212  printf("0\n");
3213  yield = 0;
3214 #endif
3215 #ifdef __VMS
3216  vms_setsymbol("PCRE8",0,yield );
3217 #endif
3218  }
3219  else if (strcmp(argv[op + 1], "pcre16") == 0)
3220  {
3221 #ifdef SUPPORT_PCRE16
3222  printf("1\n");
3223  yield = 1;
3224 #else
3225  printf("0\n");
3226  yield = 0;
3227 #endif
3228 #ifdef __VMS
3229  vms_setsymbol("PCRE16",0,yield );
3230 #endif
3231  }
3232  else if (strcmp(argv[op + 1], "pcre32") == 0)
3233  {
3234 #ifdef SUPPORT_PCRE32
3235  printf("1\n");
3236  yield = 1;
3237 #else
3238  printf("0\n");
3239  yield = 0;
3240 #endif
3241 #ifdef __VMS
3242  vms_setsymbol("PCRE32",0,yield );
3243 #endif
3244  }
3245  else if (strcmp(argv[op + 1], "utf") == 0)
3246  {
3247 #ifdef SUPPORT_PCRE8
3248  if (pcre_mode == PCRE8_MODE)
3249  (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3250 #endif
3251 #ifdef SUPPORT_PCRE16
3252  if (pcre_mode == PCRE16_MODE)
3253  (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3254 #endif
3255 #ifdef SUPPORT_PCRE32
3256  if (pcre_mode == PCRE32_MODE)
3257  (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3258 #endif
3259  printf("%d\n", rc);
3260  yield = rc;
3261 #ifdef __VMS
3262  vms_setsymbol("UTF",0,yield );
3263 #endif
3264  }
3265  else if (strcmp(argv[op + 1], "ucp") == 0)
3266  {
3267  (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3268  printf("%d\n", rc);
3269  yield = rc;
3270  }
3271  else if (strcmp(argv[op + 1], "jit") == 0)
3272  {
3273  (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3274  printf("%d\n", rc);
3275  yield = rc;
3276  }
3277  else if (strcmp(argv[op + 1], "newline") == 0)
3278  {
3279  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3280  print_newline_config(rc, TRUE);
3281  }
3282  else if (strcmp(argv[op + 1], "bsr") == 0)
3283  {
3284  (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3285  printf("%s\n", rc? "ANYCRLF" : "ANY");
3286  }
3287  else if (strcmp(argv[op + 1], "ebcdic") == 0)
3288  {
3289 #ifdef EBCDIC
3290  printf("1\n");
3291  yield = 1;
3292 #else
3293  printf("0\n");
3294 #endif
3295  }
3296  else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3297  {
3298 #ifdef EBCDIC
3299  printf("0x%02x\n", CHAR_LF);
3300 #else
3301  printf("0\n");
3302 #endif
3303  }
3304  else
3305  {
3306  printf("Unknown -C option: %s\n", argv[op + 1]);
3307  }
3308  goto EXIT;
3309  }
3310 
3311  /* No argument for -C: output all configuration information. */
3312 
3313  printf("PCRE version %s\n", version);
3314  printf("Compiled with\n");
3315 
3316 #ifdef EBCDIC
3317  printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3318 #endif
3319 
3320 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3321 are set, either both UTFs are supported or both are not supported. */
3322 
3323 #ifdef SUPPORT_PCRE8
3324  printf(" 8-bit support\n");
3325  (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3326  printf (" %sUTF-8 support\n", rc ? "" : "No ");
3327 #endif
3328 #ifdef SUPPORT_PCRE16
3329  printf(" 16-bit support\n");
3330  (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3331  printf (" %sUTF-16 support\n", rc ? "" : "No ");
3332 #endif
3333 #ifdef SUPPORT_PCRE32
3334  printf(" 32-bit support\n");
3335  (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3336  printf (" %sUTF-32 support\n", rc ? "" : "No ");
3337 #endif
3338 
3339  (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3340  printf(" %sUnicode properties support\n", rc? "" : "No ");
3341  (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3342  if (rc)
3343  {
3344  const char *arch;
3345  (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3346  printf(" Just-in-time compiler support: %s\n", arch);
3347  }
3348  else
3349  printf(" No just-in-time compiler support\n");
3350  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3351  print_newline_config(rc, FALSE);
3352  (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3353  printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3354  "all Unicode newlines");
3355  (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3356  printf(" Internal link size = %d\n", rc);
3357  (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3358  printf(" POSIX malloc threshold = %d\n", rc);
3359  (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3360  printf(" Parentheses nest limit = %ld\n", lrc);
3361  (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3362  printf(" Default match limit = %ld\n", lrc);
3363  (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3364  printf(" Default recursion depth limit = %ld\n", lrc);
3365  (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3366  printf(" Match recursion uses %s", rc? "stack" : "heap");
3367  if (showstore)
3368  {
3369  PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3370  printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3371  }
3372  printf("\n");
3373  goto EXIT;
3374  }
3375  else if (strcmp(arg, "-help") == 0 ||
3376  strcmp(arg, "--help") == 0)
3377  {
3378  usage();
3379  goto EXIT;
3380  }
3381  else
3382  {
3383  BAD_ARG:
3384  printf("** Unknown or malformed option %s\n", arg);
3385  usage();
3386  yield = 1;
3387  goto EXIT;
3388  }
3389  op++;
3390  argc--;
3391  }
3392 
3393 /* Get the store for the offsets vector, and remember what it was */
3394 
3395 size_offsets_max = size_offsets;
3396 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3397 if (offsets == NULL)
3398  {
3399  printf("** Failed to get %d bytes of memory for offsets vector\n",
3400  (int)(size_offsets_max * sizeof(int)));
3401  yield = 1;
3402  goto EXIT;
3403  }
3404 
3405 /* Sort out the input and output files */
3406 
3407 if (argc > 1)
3408  {
3409  infile = fopen(argv[op], INPUT_MODE);
3410  if (infile == NULL)
3411  {
3412  printf("** Failed to open %s\n", argv[op]);
3413  yield = 1;
3414  goto EXIT;
3415  }
3416  }
3417 
3418 if (argc > 2)
3419  {
3420  outfile = fopen(argv[op+1], OUTPUT_MODE);
3421  if (outfile == NULL)
3422  {
3423  printf("** Failed to open %s\n", argv[op+1]);
3424  yield = 1;
3425  goto EXIT;
3426  }
3427  }
3428 
3429 /* Set alternative malloc function */
3430 
3431 #ifdef SUPPORT_PCRE8
3432 pcre_malloc = new_malloc;
3433 pcre_free = new_free;
3434 pcre_stack_malloc = stack_malloc;
3435 pcre_stack_free = stack_free;
3436 #endif
3437 
3438 #ifdef SUPPORT_PCRE16
3439 pcre16_malloc = new_malloc;
3440 pcre16_free = new_free;
3441 pcre16_stack_malloc = stack_malloc;
3442 pcre16_stack_free = stack_free;
3443 #endif
3444 
3445 #ifdef SUPPORT_PCRE32
3446 pcre32_malloc = new_malloc;
3447 pcre32_free = new_free;
3448 pcre32_stack_malloc = stack_malloc;
3449 pcre32_stack_free = stack_free;
3450 #endif
3451 
3452 /* Heading line unless quiet */
3453 
3454 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3455 
3456 /* Main loop */
3457 
3458 while (!done)
3459  {
3460  pcre *re = NULL;
3461  pcre_extra *extra = NULL;
3462 
3463 #if !defined NOPOSIX /* There are still compilers that require no indent */
3464  regex_t preg = { NULL, 0, 0} ;
3465  int do_posix = 0;
3466 #endif
3467 
3468  const char *error;
3469  pcre_uint8 *markptr;
3470  pcre_uint8 *p, *pp, *ppp;
3471  pcre_uint8 *to_file = NULL;
3472  const pcre_uint8 *tables = NULL;
3473  unsigned long int get_options;
3474  unsigned long int true_size, true_study_size = 0;
3475  size_t size;
3476  int do_allcaps = 0;
3477  int do_mark = 0;
3478  int do_study = 0;
3479  int no_force_study = 0;
3480  int do_debug = debug;
3481  int do_G = 0;
3482  int do_g = 0;
3483  int do_showinfo = showinfo;
3484  int do_showrest = 0;
3485  int do_showcaprest = 0;
3486  int do_flip = 0;
3487  int erroroffset, len, delimiter, poffset;
3488 
3489 #if !defined NODFA
3490  int dfa_matched = 0;
3491 #endif
3492 
3493  use_utf = 0;
3494  debug_lengths = 1;
3495  SET_PCRE_STACK_GUARD(NULL);
3496 
3497  if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3498  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3499  fflush(outfile);
3500 
3501  p = buffer;
3502  while (isspace(*p)) p++;
3503  if (*p == 0) continue;
3504 
3505  /* Handle option lock-out setting */
3506 
3507  if (*p == '<' && p[1] == ' ')
3508  {
3509  p += 2;
3510  while (isspace(*p)) p++;
3511  if (strncmp((char *)p, "forbid ", 7) == 0)
3512  {
3513  p += 7;
3514  while (isspace(*p)) p++;
3515  pp = lockout;
3516  while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3517  *pp++ = *p++;
3518  *pp = 0;
3519  }
3520  else
3521  {
3522  printf("** Unrecognized special command '%s'\n", p);
3523  yield = 1;
3524  goto EXIT;
3525  }
3526  continue;
3527  }
3528 
3529  /* See if the pattern is to be loaded pre-compiled from a file. */
3530 
3531  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3532  {
3533  pcre_uint32 magic;
3534  pcre_uint8 sbuf[8];
3535  FILE *f;
3536 
3537  p++;
3538  if (*p == '!')
3539  {
3540  do_debug = TRUE;
3541  do_showinfo = TRUE;
3542  p++;
3543  }
3544 
3545  pp = p + (int)strlen((char *)p);
3546  while (isspace(pp[-1])) pp--;
3547  *pp = 0;
3548 
3549  f = fopen((char *)p, "rb");
3550  if (f == NULL)
3551  {
3552  fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3553  continue;
3554  }
3555  if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3556 
3557  true_size =
3558  (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3559  true_study_size =
3560  (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3561 
3562  re = (pcre *)new_malloc(true_size);
3563  if (re == NULL)
3564  {
3565  printf("** Failed to get %d bytes of memory for pcre object\n",
3566  (int)true_size);
3567  yield = 1;
3568  goto EXIT;
3569  }
3570  if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3571 
3572  magic = REAL_PCRE_MAGIC(re);
3573  if (magic != MAGIC_NUMBER)
3574  {
3575  if (swap_uint32(magic) == MAGIC_NUMBER)
3576  {
3577  do_flip = 1;
3578  }
3579  else
3580  {
3581  fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3582  new_free(re);
3583  fclose(f);
3584  continue;
3585  }
3586  }
3587 
3588  /* We hide the byte-invert info for little and big endian tests. */
3589  fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3590  do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3591 
3592  /* Now see if there is any following study data. */
3593 
3594  if (true_study_size != 0)
3595  {
3596  pcre_study_data *psd;
3597 
3598  extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3599  extra->flags = PCRE_EXTRA_STUDY_DATA;
3600 
3601  psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3602  extra->study_data = psd;
3603 
3604  if (fread(psd, 1, true_study_size, f) != true_study_size)
3605  {
3606  FAIL_READ:
3607  fprintf(outfile, "Failed to read data from %s\n", p);
3608  if (extra != NULL)
3609  {
3610  PCRE_FREE_STUDY(extra);
3611  }
3612  new_free(re);
3613  fclose(f);
3614  continue;
3615  }
3616  fprintf(outfile, "Study data loaded from %s\n", p);
3617  do_study = 1; /* To get the data output if requested */
3618  }
3619  else fprintf(outfile, "No study data\n");
3620 
3621  /* Flip the necessary bytes. */
3622  if (do_flip)
3623  {
3624  int rc;
3625  PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3626  if (rc == PCRE_ERROR_BADMODE)
3627  {
3628  pcre_uint32 flags_in_host_byte_order;
3629  if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3630  flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3631  else
3632  flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3633  /* Simulate the result of the function call below. */
3634  fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3635  pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3636  PCRE_INFO_OPTIONS);
3637  fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3638  "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3639  new_free(re);
3640  fclose(f);
3641  continue;
3642  }
3643  }
3644 
3645  /* Need to know if UTF-8 for printing data strings. */
3646 
3647  if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3648  {
3649  new_free(re);
3650  fclose(f);
3651  continue;
3652  }
3653  use_utf = (get_options & PCRE_UTF8) != 0;
3654 
3655  fclose(f);
3656  goto SHOW_INFO;
3657  }
3658 
3659  /* In-line pattern (the usual case). Get the delimiter and seek the end of
3660  the pattern; if it isn't complete, read more. */
3661 
3662  delimiter = *p++;
3663 
3664  if (isalnum(delimiter) || delimiter == '\\')
3665  {
3666  fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3667  goto SKIP_DATA;
3668  }
3669 
3670  pp = p;
3671  poffset = (int)(p - buffer);
3672 
3673  for(;;)
3674  {
3675  while (*pp != 0)
3676  {
3677  if (*pp == '\\' && pp[1] != 0) pp++;
3678  else if (*pp == delimiter) break;
3679  pp++;
3680  }
3681  if (*pp != 0) break;
3682  if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3683  {
3684  fprintf(outfile, "** Unexpected EOF\n");
3685  done = 1;
3686  goto CONTINUE;
3687  }
3688  if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3689  }
3690 
3691  /* The buffer may have moved while being extended; reset the start of data
3692  pointer to the correct relative point in the buffer. */
3693 
3694  p = buffer + poffset;
3695 
3696  /* If the first character after the delimiter is backslash, make
3697  the pattern end with backslash. This is purely to provide a way
3698  of testing for the error message when a pattern ends with backslash. */
3699 
3700  if (pp[1] == '\\') *pp++ = '\\';
3701 
3702  /* Terminate the pattern at the delimiter, and save a copy of the pattern
3703  for callouts. */
3704 
3705  *pp++ = 0;
3706  strcpy((char *)pbuffer, (char *)p);
3707 
3708  /* Look for modifiers and options after the final delimiter. */
3709 
3710  options = default_options;
3711  study_options = force_study_options;
3712  log_store = showstore; /* default from command line */
3713 
3714  while (*pp != 0)
3715  {
3716  /* Check to see whether this modifier has been locked out for this file.
3717  This is complicated for the multi-character options that begin with '<'.
3718  If there is no '>' in the lockout string, all multi-character modifiers are
3719  locked out. */
3720 
3721  if (strchr((char *)lockout, *pp) != NULL)
3722  {
3723  if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3724  {
3725  int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3726  if (x == 0) goto SKIP_DATA;
3727 
3728  for (ppp = lockout; *ppp != 0; ppp++)
3729  {
3730  if (*ppp == '<')
3731  {
3732  int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3733  if (y == 0)
3734  {
3735  printf("** Error in modifier forbid data - giving up.\n");
3736  yield = 1;
3737  goto EXIT;
3738  }
3739  if (x == y)
3740  {
3741  ppp = pp;
3742  while (*ppp != '>') ppp++;
3743  printf("** The %.*s modifier is locked out - giving up.\n",
3744  (int)(ppp - pp + 1), pp);
3745  yield = 1;
3746  goto EXIT;
3747  }
3748  }
3749  }
3750  }
3751 
3752  /* The single-character modifiers are straightforward. */
3753 
3754  else
3755  {
3756  printf("** The /%c modifier is locked out - giving up.\n", *pp);
3757  yield = 1;
3758  goto EXIT;
3759  }
3760  }
3761 
3762  /* The modifier is not locked out; handle it. */
3763 
3764  switch (*pp++)
3765  {
3766  case 'f': options |= PCRE_FIRSTLINE; break;
3767  case 'g': do_g = 1; break;
3768  case 'i': options |= PCRE_CASELESS; break;
3769  case 'm': options |= PCRE_MULTILINE; break;
3770  case 's': options |= PCRE_DOTALL; break;
3771  case 'x': options |= PCRE_EXTENDED; break;
3772 
3773  case '+':
3774  if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3775  break;
3776 
3777  case '=': do_allcaps = 1; break;
3778  case 'A': options |= PCRE_ANCHORED; break;
3779  case 'B': do_debug = 1; break;
3780  case 'C': options |= PCRE_AUTO_CALLOUT; break;
3781  case 'D': do_debug = do_showinfo = 1; break;
3782  case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3783  case 'F': do_flip = 1; break;
3784  case 'G': do_G = 1; break;
3785  case 'I': do_showinfo = 1; break;
3786  case 'J': options |= PCRE_DUPNAMES; break;
3787  case 'K': do_mark = 1; break;
3788  case 'M': log_store = 1; break;
3789  case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3790  case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3791 
3792 #if !defined NOPOSIX
3793  case 'P': do_posix = 1; break;
3794 #endif
3795 
3796  case 'Q':
3797  switch (*pp)
3798  {
3799  case '0':
3800  case '1':
3801  stack_guard_return = *pp++ - '0';
3802  break;
3803 
3804  default:
3805  fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3806  goto SKIP_DATA;
3807  }
3808  SET_PCRE_STACK_GUARD(stack_guard);
3809  break;
3810 
3811  case 'S':
3812  do_study = 1;
3813  for (;;)
3814  {
3815  switch (*pp++)
3816  {
3817  case 'S':
3818  do_study = 0;
3819  no_force_study = 1;
3820  break;
3821 
3822  case '!':
3823  study_options |= PCRE_STUDY_EXTRA_NEEDED;
3824  break;
3825 
3826  case '+':
3827  if (*pp == '+')
3828  {
3829  verify_jit = TRUE;
3830  pp++;
3831  }
3832  if (*pp >= '1' && *pp <= '7')
3833  study_options |= jit_study_bits[*pp++ - '1'];
3834  else
3835  study_options |= jit_study_bits[6];
3836  break;
3837 
3838  case '-':
3839  study_options &= ~PCRE_STUDY_ALLJIT;
3840  break;
3841 
3842  default:
3843  pp--;
3844  goto ENDLOOP;
3845  }
3846  }
3847  ENDLOOP:
3848  break;
3849 
3850  case 'U': options |= PCRE_UNGREEDY; break;
3851  case 'W': options |= PCRE_UCP; break;
3852  case 'X': options |= PCRE_EXTRA; break;
3853  case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3854  case 'Z': debug_lengths = 0; break;
3855  case '8': options |= PCRE_UTF8; use_utf = 1; break;
3856  case '9': options |= PCRE_NEVER_UTF; break;
3857  case '?': options |= PCRE_NO_UTF8_CHECK; break;
3858 
3859  case 'T':
3860  switch (*pp++)
3861  {
3862  case '0': tables = tables0; break;
3863  case '1': tables = tables1; break;
3864 
3865  case '\r':
3866  case '\n':
3867  case ' ':
3868  case 0:
3869  fprintf(outfile, "** Missing table number after /T\n");
3870  goto SKIP_DATA;
3871 
3872  default:
3873  fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3874  goto SKIP_DATA;
3875  }
3876  break;
3877 
3878  case 'L':
3879  ppp = pp;
3880  /* The '\r' test here is so that it works on Windows. */
3881  /* The '0' test is just in case this is an unterminated line. */
3882  while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3883  *ppp = 0;
3884  if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3885  {
3886  fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3887  goto SKIP_DATA;
3888  }
3889  locale_set = 1;
3890  tables = PCRE_MAKETABLES;
3891  pp = ppp;
3892  break;
3893 
3894  case '>':
3895  to_file = pp;
3896  while (*pp != 0) pp++;
3897  while (isspace(pp[-1])) pp--;
3898  *pp = 0;
3899  break;
3900 
3901  case '<':
3902  {
3903  int x = check_mc_option(pp, outfile, FALSE, "modifier");
3904  if (x == 0) goto SKIP_DATA;
3905  options |= x;
3906  while (*pp++ != '>');
3907  }
3908  break;
3909 
3910  case '\r': /* So that it works in Windows */
3911  case '\n':
3912  case ' ':
3913  break;
3914 
3915  default:
3916  fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3917  goto SKIP_DATA;
3918  }
3919  }
3920 
3921  /* Handle compiling via the POSIX interface, which doesn't support the
3922  timing, showing, or debugging options, nor the ability to pass over
3923  local character tables. Neither does it have 16-bit support. */
3924 
3925 #if !defined NOPOSIX
3926  if (posix || do_posix)
3927  {
3928  int rc;
3929  int cflags = 0;
3930 
3931  if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3932  if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3933  if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3934  if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3935  if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3936  if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3937  if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3938 
3939  rc = regcomp(&preg, (char *)p, cflags);
3940 
3941  /* Compilation failed; go back for another re, skipping to blank line
3942  if non-interactive. */
3943 
3944  if (rc != 0)
3945  {
3946  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3947  fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3948  goto SKIP_DATA;
3949  }
3950  }
3951 
3952  /* Handle compiling via the native interface */
3953 
3954  else
3955 #endif /* !defined NOPOSIX */
3956 
3957  {
3958  /* In 16- or 32-bit mode, convert the input. */
3959 
3960 #ifdef SUPPORT_PCRE16
3961  if (pcre_mode == PCRE16_MODE)
3962  {
3963  switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3964  {
3965  case -1:
3966  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3967  "converted to UTF-16\n");
3968  goto SKIP_DATA;
3969 
3970  case -2:
3971  fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3972  "cannot be converted to UTF-16\n");
3973  goto SKIP_DATA;
3974 
3975  case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3976  fprintf(outfile, "**Failed: character value greater than 0xffff "
3977  "cannot be converted to 16-bit in non-UTF mode\n");
3978  goto SKIP_DATA;
3979 
3980  default:
3981  break;
3982  }
3983  p = (pcre_uint8 *)buffer16;
3984  }
3985 #endif
3986 
3987 #ifdef SUPPORT_PCRE32
3988  if (pcre_mode == PCRE32_MODE)
3989  {
3990  switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3991  {
3992  case -1:
3993  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3994  "converted to UTF-32\n");
3995  goto SKIP_DATA;
3996 
3997  case -2:
3998  fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3999  "cannot be converted to UTF-32\n");
4000  goto SKIP_DATA;
4001 
4002  case -3:
4003  fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4004  goto SKIP_DATA;
4005 
4006  default:
4007  break;
4008  }
4009  p = (pcre_uint8 *)buffer32;
4010  }
4011 #endif
4012 
4013  /* Compile many times when timing */
4014 
4015  if (timeit > 0)
4016  {
4017  register int i;
4018  clock_t time_taken;
4019  clock_t start_time = clock();
4020  for (i = 0; i < timeit; i++)
4021  {
4022  PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4023  if (re != NULL) free(re);
4024  }
4025  total_compile_time += (time_taken = clock() - start_time);
4026  fprintf(outfile, "Compile time %.4f milliseconds\n",
4027  (((double)time_taken * 1000.0) / (double)timeit) /
4028  (double)CLOCKS_PER_SEC);
4029  }
4030 
4031  PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4032 
4033  /* Compilation failed; go back for another re, skipping to blank line
4034  if non-interactive. */
4035 
4036  if (re == NULL)
4037  {
4038  fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4039  SKIP_DATA:
4040  if (infile != stdin)
4041  {
4042  for (;;)
4043  {
4044  if (extend_inputline(infile, buffer, NULL) == NULL)
4045  {
4046  done = 1;
4047  goto CONTINUE;
4048  }
4049  len = (int)strlen((char *)buffer);
4050  while (len > 0 && isspace(buffer[len-1])) len--;
4051  if (len == 0) break;
4052  }
4053  fprintf(outfile, "\n");
4054  }
4055  goto CONTINUE;
4056  }
4057 
4058  /* Compilation succeeded. It is now possible to set the UTF-8 option from
4059  within the regex; check for this so that we know how to process the data
4060  lines. */
4061 
4062  if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4063  goto SKIP_DATA;
4064  if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4065 
4066  /* Extract the size for possible writing before possibly flipping it,
4067  and remember the store that was got. */
4068 
4069  true_size = REAL_PCRE_SIZE(re);
4070 
4071  /* Output code size information if requested */
4072 
4073  if (log_store)
4074  {
4075  int name_count, name_entry_size, real_pcre_size;
4076 
4077  new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4078  new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4079  real_pcre_size = 0;
4080 #ifdef SUPPORT_PCRE8
4081  if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4082  real_pcre_size = sizeof(real_pcre);
4083 #endif
4084 #ifdef SUPPORT_PCRE16
4085  if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4086  real_pcre_size = sizeof(real_pcre16);
4087 #endif
4088 #ifdef SUPPORT_PCRE32
4089  if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4090  real_pcre_size = sizeof(real_pcre32);
4091 #endif
4092  new_info(re, NULL, PCRE_INFO_SIZE, &size);
4093  fprintf(outfile, "Memory allocation (code space): %d\n",
4094  (int)(size - real_pcre_size - name_count * name_entry_size));
4095  }
4096 
4097  /* If -s or /S was present, study the regex to generate additional info to
4098  help with the matching, unless the pattern has the SS option, which
4099  suppresses the effect of /S (used for a few test patterns where studying is
4100  never sensible). */
4101 
4102  if (do_study || (force_study >= 0 && !no_force_study))
4103  {
4104  if (timeit > 0)
4105  {
4106  register int i;
4107  clock_t time_taken;
4108  clock_t start_time = clock();
4109  for (i = 0; i < timeit; i++)
4110  {
4111  PCRE_STUDY(extra, re, study_options, &error);
4112  }
4113  total_study_time = (time_taken = clock() - start_time);
4114  if (extra != NULL)
4115  {
4116  PCRE_FREE_STUDY(extra);
4117  }
4118  fprintf(outfile, " Study time %.4f milliseconds\n",
4119  (((double)time_taken * 1000.0) / (double)timeit) /
4120  (double)CLOCKS_PER_SEC);
4121  }
4122  PCRE_STUDY(extra, re, study_options, &error);
4123  if (error != NULL)
4124  fprintf(outfile, "Failed to study: %s\n", error);
4125  else if (extra != NULL)
4126  {
4127  true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4128  if (log_store)
4129  {
4130  size_t jitsize;
4131  if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4132  jitsize != 0)
4133  fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4134  }
4135  }
4136  }
4137 
4138  /* If /K was present, we set up for handling MARK data. */
4139 
4140  if (do_mark)
4141  {
4142  if (extra == NULL)
4143  {
4144  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4145  extra->flags = 0;
4146  }
4147  extra->mark = &markptr;
4148  extra->flags |= PCRE_EXTRA_MARK;
4149  }
4150 
4151  /* Extract and display information from the compiled data if required. */
4152 
4153  SHOW_INFO:
4154 
4155  if (do_debug)
4156  {
4157  fprintf(outfile, "------------------------------------------------------------------\n");
4158  PCRE_PRINTINT(re, outfile, debug_lengths);
4159  }
4160 
4161  /* We already have the options in get_options (see above) */
4162 
4163  if (do_showinfo)
4164  {
4165  unsigned long int all_options;
4166  pcre_uint32 first_char, need_char;
4167  pcre_uint32 match_limit, recursion_limit;
4168  int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4169  hascrorlf, maxlookbehind, match_empty;
4170  int nameentrysize, namecount;
4171  const pcre_uint8 *nametable;
4172 
4173  if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4174  new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4175  new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4176  new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4177  new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4178  new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4179  new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4180  new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4181  new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4182  new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4183  new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4184  new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4185  new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4186  new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4187  != 0)
4188  goto SKIP_DATA;
4189 
4190  fprintf(outfile, "Capturing subpattern count = %d\n", count);
4191 
4192  if (backrefmax > 0)
4193  fprintf(outfile, "Max back reference = %d\n", backrefmax);
4194 
4195  if (maxlookbehind > 0)
4196  fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4197 
4198  if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4199  fprintf(outfile, "Match limit = %u\n", match_limit);
4200 
4201  if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4202  fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4203 
4204  if (namecount > 0)
4205  {
4206  fprintf(outfile, "Named capturing subpatterns:\n");
4207  while (namecount-- > 0)
4208  {
4209  int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4210  int length = (int)STRLEN(nametable + imm2_size);
4211  fprintf(outfile, " ");
4212  PCHARSV(nametable, imm2_size, length, outfile);
4213  while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4214 #ifdef SUPPORT_PCRE32
4215  if (pcre_mode == PCRE32_MODE)
4216  fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4217 #endif
4218 #ifdef SUPPORT_PCRE16
4219  if (pcre_mode == PCRE16_MODE)
4220  fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4221 #endif
4222 #ifdef SUPPORT_PCRE8
4223  if (pcre_mode == PCRE8_MODE)
4224  fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4225 #endif
4226  nametable += nameentrysize * CHAR_SIZE;
4227  }
4228  }
4229 
4230  if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4231  if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4232  if (match_empty) fprintf(outfile, "May match empty string\n");
4233 
4234  all_options = REAL_PCRE_OPTIONS(re);
4235  if (do_flip) all_options = swap_uint32(all_options);
4236 
4237  if (get_options == 0) fprintf(outfile, "No options\n");
4238  else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4239  ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4240  ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4241  ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4242  ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4243  ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4244  ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4245  ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4246  ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4247  ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4248  ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4249  ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4250  ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4251  ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
4252  ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4253  ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4254  ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4255  ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4256  ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4257  ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4258 
4259  if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4260 
4261  switch (get_options & PCRE_NEWLINE_BITS)
4262  {
4263  case PCRE_NEWLINE_CR:
4264  fprintf(outfile, "Forced newline sequence: CR\n");
4265  break;
4266 
4267  case PCRE_NEWLINE_LF:
4268  fprintf(outfile, "Forced newline sequence: LF\n");
4269  break;
4270 
4271  case PCRE_NEWLINE_CRLF:
4272  fprintf(outfile, "Forced newline sequence: CRLF\n");
4273  break;
4274 
4275  case PCRE_NEWLINE_ANYCRLF:
4276  fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4277  break;
4278 
4279  case PCRE_NEWLINE_ANY:
4280  fprintf(outfile, "Forced newline sequence: ANY\n");
4281  break;
4282 
4283  default:
4284  break;
4285  }
4286 
4287  if (first_char_set == 2)
4288  {
4289  fprintf(outfile, "First char at start or follows newline\n");
4290  }
4291  else if (first_char_set == 1)
4292  {
4293  const char *caseless =
4294  ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4295  "" : " (caseless)";
4296 
4297  if (PRINTOK(first_char))
4298  fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4299  else
4300  {
4301  fprintf(outfile, "First char = ");
4302  pchar(first_char, outfile);
4303  fprintf(outfile, "%s\n", caseless);
4304  }
4305  }
4306  else
4307  {
4308  fprintf(outfile, "No first char\n");
4309  }
4310 
4311  if (need_char_set == 0)
4312  {
4313  fprintf(outfile, "No need char\n");
4314  }
4315  else
4316  {
4317  const char *caseless =
4318  ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4319  "" : " (caseless)";
4320 
4321  if (PRINTOK(need_char))
4322  fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4323  else
4324  {
4325  fprintf(outfile, "Need char = ");
4326  pchar(need_char, outfile);
4327  fprintf(outfile, "%s\n", caseless);
4328  }
4329  }
4330 
4331  /* Don't output study size; at present it is in any case a fixed
4332  value, but it varies, depending on the computer architecture, and
4333  so messes up the test suite. (And with the /F option, it might be
4334  flipped.) If study was forced by an external -s, don't show this
4335  information unless -i or -d was also present. This means that, except
4336  when auto-callouts are involved, the output from runs with and without
4337  -s should be identical. */
4338 
4339  if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4340  {
4341  if (extra == NULL)
4342  fprintf(outfile, "Study returned NULL\n");
4343  else
4344  {
4345  pcre_uint8 *start_bits = NULL;
4346  int minlength;
4347 
4348  if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4349  fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4350 
4351  if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4352  {
4353  if (start_bits == NULL)
4354  fprintf(outfile, "No starting char list\n");
4355  else
4356  {
4357  int i;
4358  int c = 24;
4359  fprintf(outfile, "Starting chars: ");
4360  for (i = 0; i < 256; i++)
4361  {
4362  if ((start_bits[i/8] & (1<<(i&7))) != 0)
4363  {
4364  if (c > 75)
4365  {
4366  fprintf(outfile, "\n ");
4367  c = 2;
4368  }
4369  if (PRINTOK(i) && i != ' ')
4370  {
4371  fprintf(outfile, "%c ", i);
4372  c += 2;
4373  }
4374  else
4375  {
4376  fprintf(outfile, "\\x%02x ", i);
4377  c += 5;
4378  }
4379  }
4380  }
4381  fprintf(outfile, "\n");
4382  }
4383  }
4384  }
4385 
4386  /* Show this only if the JIT was set by /S, not by -s. */
4387 
4388  if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4389  (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4390  {
4391  int jit;
4392  if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4393  {
4394  if (jit)
4395  fprintf(outfile, "JIT study was successful\n");
4396  else
4397 #ifdef SUPPORT_JIT
4398  fprintf(outfile, "JIT study was not successful\n");
4399 #else
4400  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4401 #endif
4402  }
4403  }
4404  }
4405  }
4406 
4407  /* If the '>' option was present, we write out the regex to a file, and
4408  that is all. The first 8 bytes of the file are the regex length and then
4409  the study length, in big-endian order. */
4410 
4411  if (to_file != NULL)
4412  {
4413  FILE *f = fopen((char *)to_file, "wb");
4414  if (f == NULL)
4415  {
4416  fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4417  }
4418  else
4419  {
4420  pcre_uint8 sbuf[8];
4421 
4422  if (do_flip) regexflip(re, extra);
4423  sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4424  sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4425  sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4426  sbuf[3] = (pcre_uint8)((true_size) & 255);
4427  sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4428  sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4429  sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4430  sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4431 
4432  if (fwrite(sbuf, 1, 8, f) < 8 ||
4433  fwrite(re, 1, true_size, f) < true_size)
4434  {
4435  fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4436  }
4437  else
4438  {
4439  fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4440 
4441  /* If there is study data, write it. */
4442 
4443  if (extra != NULL)
4444  {
4445  if (fwrite(extra->study_data, 1, true_study_size, f) <
4446  true_study_size)
4447  {
4448  fprintf(outfile, "Write error on %s: %s\n", to_file,
4449  strerror(errno));
4450  }
4451  else fprintf(outfile, "Study data written to %s\n", to_file);
4452  }
4453  }
4454  fclose(f);
4455  }
4456 
4457  new_free(re);
4458  if (extra != NULL)
4459  {
4460  PCRE_FREE_STUDY(extra);
4461  }
4462  if (locale_set)
4463  {
4464  new_free((void *)tables);
4465  setlocale(LC_CTYPE, "C");
4466  locale_set = 0;
4467  }
4468  continue; /* With next regex */
4469  }
4470  } /* End of non-POSIX compile */
4471 
4472  /* Read data lines and test them */
4473 
4474  for (;;)
4475  {
4476 #ifdef SUPPORT_PCRE8
4477  pcre_uint8 *q8;
4478 #endif
4479 #ifdef SUPPORT_PCRE16
4480  pcre_uint16 *q16;
4481 #endif
4482 #ifdef SUPPORT_PCRE32
4483  pcre_uint32 *q32;
4484 #endif
4485  pcre_uint8 *bptr;
4486  int *use_offsets = offsets;
4487  int use_size_offsets = size_offsets;
4488  int callout_data = 0;
4489  int callout_data_set = 0;
4490  int count;
4491  pcre_uint32 c;
4492  int copystrings = 0;
4493  int find_match_limit = default_find_match_limit;
4494  int getstrings = 0;
4495  int getlist = 0;
4496  int gmatched = 0;
4497  int start_offset = 0;
4498  int start_offset_sign = 1;
4499  int g_notempty = 0;
4500  int use_dfa = 0;
4501 
4502  *copynames = 0;
4503  *getnames = 0;
4504 
4505 #ifdef SUPPORT_PCRE32
4506  cn32ptr = copynames;
4507  gn32ptr = getnames;
4508 #endif
4509 #ifdef SUPPORT_PCRE16
4510  cn16ptr = copynames16;
4511  gn16ptr = getnames16;
4512 #endif
4513 #ifdef SUPPORT_PCRE8
4514  cn8ptr = copynames8;
4515  gn8ptr = getnames8;
4516 #endif
4517 
4518  SET_PCRE_CALLOUT(callout);
4519  first_callout = 1;
4520  last_callout_mark = NULL;
4521  callout_extra = 0;
4522  callout_count = 0;
4523  callout_fail_count = 999999;
4524  callout_fail_id = -1;
4525  show_malloc = 0;
4526  options = 0;
4527 
4528  if (extra != NULL) extra->flags &=
4529  ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4530 
4531  len = 0;
4532  for (;;)
4533  {
4534  if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4535  {
4536  if (len > 0) /* Reached EOF without hitting a newline */
4537  {
4538  fprintf(outfile, "\n");
4539  break;
4540  }
4541  done = 1;
4542  goto CONTINUE;
4543  }
4544  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4545  len = (int)strlen((char *)buffer);
4546  if (buffer[len-1] == '\n') break;
4547  }
4548 
4549  while (len > 0 && isspace(buffer[len-1])) len--;
4550  buffer[len] = 0;
4551  if (len == 0) break;
4552 
4553  p = buffer;
4554  while (isspace(*p)) p++;
4555 
4556 #ifndef NOUTF
4557  /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4558  invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4559 
4560  if (use_utf)
4561  {
4562  pcre_uint8 *q;
4563  pcre_uint32 cc;
4564  int n = 1;
4565 
4566  for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4567  if (n <= 0)
4568  {
4569  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4570  goto NEXT_DATA;
4571  }
4572  }
4573 #endif
4574 
4575 #ifdef SUPPORT_VALGRIND
4576  /* Mark the dbuffer as addressable but undefined again. */
4577 
4578  if (dbuffer != NULL)
4579  {
4580  VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4581  }
4582 #endif
4583 
4584  /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4585  the number of pcre_uchar units that will be needed. */
4586 
4587  while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4588  {
4589  dbuffer_size *= 2;
4590  dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4591  if (dbuffer == NULL)
4592  {
4593  fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4594  exit(1);
4595  }
4596  }
4597 
4598 #ifdef SUPPORT_PCRE8
4599  q8 = (pcre_uint8 *) dbuffer;
4600 #endif
4601 #ifdef SUPPORT_PCRE16
4602  q16 = (pcre_uint16 *) dbuffer;
4603 #endif
4604 #ifdef SUPPORT_PCRE32
4605  q32 = (pcre_uint32 *) dbuffer;
4606 #endif
4607 
4608  while ((c = *p++) != 0)
4609  {
4610  int i = 0;
4611  int n = 0;
4612 
4613  /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4614  In non-UTF mode, allow the value of the byte to fall through to later,
4615  where values greater than 127 are turned into UTF-8 when running in
4616  16-bit or 32-bit mode. */
4617 
4618  if (c != '\\')
4619  {
4620 #ifndef NOUTF
4621  if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4622 #endif
4623  }
4624 
4625  /* Handle backslash escapes */
4626 
4627  else switch ((c = *p++))
4628  {
4629  case 'a': c = CHAR_BEL; break;
4630  case 'b': c = '\b'; break;
4631  case 'e': c = CHAR_ESC; break;
4632  case 'f': c = '\f'; break;
4633  case 'n': c = '\n'; break;
4634  case 'r': c = '\r'; break;
4635  case 't': c = '\t'; break;
4636  case 'v': c = '\v'; break;
4637 
4638  case '0': case '1': case '2': case '3':
4639  case '4': case '5': case '6': case '7':
4640  c -= '0';
4641  while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4642  c = c * 8 + *p++ - '0';
4643  break;
4644 
4645  case 'o':
4646  if (*p == '{')
4647  {
4648  pcre_uint8 *pt = p;
4649  c = 0;
4650  for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
4651  {
4652  if (++i == 12)
4653  fprintf(outfile, "** Too many octal digits in \\o{...} item; "
4654  "using only the first twelve.\n");
4655  else c = c * 8 + *pt - '0';
4656  }
4657  if (*pt == '}') p = pt + 1;
4658  else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
4659  }
4660  break;
4661 
4662  case 'x':
4663  if (*p == '{')
4664  {
4665  pcre_uint8 *pt = p;
4666  c = 0;
4667 
4668  /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4669  when isxdigit() is a macro that refers to its argument more than
4670  once. This is banned by the C Standard, but apparently happens in at
4671  least one MacOS environment. */
4672 
4673  for (pt++; isxdigit(*pt); pt++)
4674  {
4675  if (++i == 9)
4676  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4677  "using only the first eight.\n");
4678  else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4679  }
4680  if (*pt == '}')
4681  {
4682  p = pt + 1;
4683  break;
4684  }
4685  /* Not correct form for \x{...}; fall through */
4686  }
4687 
4688  /* \x without {} always defines just one byte in 8-bit mode. This
4689  allows UTF-8 characters to be constructed byte by byte, and also allows
4690  invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4691  Otherwise, pass it down to later code so that it can be turned into
4692  UTF-8 when running in 16/32-bit mode. */
4693 
4694  c = 0;
4695  while (i++ < 2 && isxdigit(*p))
4696  {
4697  c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4698  p++;
4699  }
4700 #if !defined NOUTF && defined SUPPORT_PCRE8
4701  if (use_utf && (pcre_mode == PCRE8_MODE))
4702  {
4703  *q8++ = c;
4704  continue;
4705  }
4706 #endif
4707  break;
4708 
4709  case 0: /* \ followed by EOF allows for an empty line */
4710  p--;
4711  continue;
4712 
4713  case '>':
4714  if (*p == '-')
4715  {
4716  start_offset_sign = -1;
4717  p++;
4718  }
4719  while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4720  start_offset *= start_offset_sign;
4721  continue;
4722 
4723  case 'A': /* Option setting */
4724  options |= PCRE_ANCHORED;
4725  continue;
4726 
4727  case 'B':
4728  options |= PCRE_NOTBOL;
4729  continue;
4730 
4731  case 'C':
4732  if (isdigit(*p)) /* Set copy string */
4733  {
4734  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4735  copystrings |= 1 << n;
4736  }
4737  else if (isalnum(*p))
4738  {
4739  READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4740  }
4741  else if (*p == '+')
4742  {
4743  callout_extra = 1;
4744  p++;
4745  }
4746  else if (*p == '-')
4747  {
4748  SET_PCRE_CALLOUT(NULL);
4749  p++;
4750  }
4751  else if (*p == '!')
4752  {
4753  callout_fail_id = 0;
4754  p++;
4755  while(isdigit(*p))
4756  callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4757  callout_fail_count = 0;
4758  if (*p == '!')
4759  {
4760  p++;
4761  while(isdigit(*p))
4762  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4763  }
4764  }
4765  else if (*p == '*')
4766  {
4767  int sign = 1;
4768  callout_data = 0;
4769  if (*(++p) == '-') { sign = -1; p++; }
4770  while(isdigit(*p))
4771  callout_data = callout_data * 10 + *p++ - '0';
4772  callout_data *= sign;
4773  callout_data_set = 1;
4774  }
4775  continue;
4776 
4777 #if !defined NODFA
4778  case 'D':
4779 #if !defined NOPOSIX
4780  if (posix || do_posix)
4781  printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4782  else
4783 #endif
4784  use_dfa = 1;
4785  continue;
4786 #endif
4787 
4788 #if !defined NODFA
4789  case 'F':
4790  options |= PCRE_DFA_SHORTEST;
4791  continue;
4792 #endif
4793 
4794  case 'G':
4795  if (isdigit(*p))
4796  {
4797  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4798  getstrings |= 1 << n;
4799  }
4800  else if (isalnum(*p))
4801  {
4802  READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4803  }
4804  continue;
4805 
4806  case 'J':
4807  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4808  if (extra != NULL
4809  && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4810  && extra->executable_jit != NULL)
4811  {
4812  if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4813  jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4814  PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4815  }
4816  continue;
4817 
4818  case 'L':
4819  getlist = 1;
4820  continue;
4821 
4822  case 'M':
4823  find_match_limit = 1;
4824  continue;
4825 
4826  case 'N':
4827  if ((options & PCRE_NOTEMPTY) != 0)
4828  options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4829  else
4830  options |= PCRE_NOTEMPTY;
4831  continue;
4832 
4833  case 'O':
4834  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4835  if (n > size_offsets_max)
4836  {
4837  size_offsets_max = n;
4838  free(offsets);
4839  use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4840  if (offsets == NULL)
4841  {
4842  printf("** Failed to get %d bytes of memory for offsets vector\n",
4843  (int)(size_offsets_max * sizeof(int)));
4844  yield = 1;
4845  goto EXIT;
4846  }
4847  }
4848  use_size_offsets = n;
4849  if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4850  else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4851  continue;
4852 
4853  case 'P':
4854  options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4855  PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4856  continue;
4857 
4858  case 'Q':
4859  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4860  if (extra == NULL)
4861  {
4862  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4863  extra->flags = 0;
4864  }
4865  extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4866  extra->match_limit_recursion = n;
4867  continue;
4868 
4869  case 'q':
4870  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4871  if (extra == NULL)
4872  {
4873  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4874  extra->flags = 0;
4875  }
4876  extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4877  extra->match_limit = n;
4878  continue;
4879 
4880 #if !defined NODFA
4881  case 'R':
4882  options |= PCRE_DFA_RESTART;
4883  continue;
4884 #endif
4885 
4886  case 'S':
4887  show_malloc = 1;
4888  continue;
4889 
4890  case 'Y':
4891  options |= PCRE_NO_START_OPTIMIZE;
4892  continue;
4893 
4894  case 'Z':
4895  options |= PCRE_NOTEOL;
4896  continue;
4897 
4898  case '?':
4899  options |= PCRE_NO_UTF8_CHECK;
4900  continue;
4901 
4902  case '<':
4903  {
4904  int x = check_mc_option(p, outfile, TRUE, "escape sequence");
4905  if (x == 0) goto NEXT_DATA;
4906  options |= x;
4907  while (*p++ != '>');
4908  }
4909  continue;
4910  }
4911 
4912  /* We now have a character value in c that may be greater than 255.
4913  In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4914  than 127 in UTF mode must have come from \x{...} or octal constructs
4915  because values from \x.. get this far only in non-UTF mode. */
4916 
4917 #ifdef SUPPORT_PCRE8
4918  if (pcre_mode == PCRE8_MODE)
4919  {
4920 #ifndef NOUTF
4921  if (use_utf)
4922  {
4923  if (c > 0x7fffffff)
4924  {
4925  fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4926  "and so cannot be converted to UTF-8\n", c);
4927  goto NEXT_DATA;
4928  }
4929  q8 += ord2utf8(c, q8);
4930  }
4931  else
4932 #endif
4933  {
4934  if (c > 0xffu)
4935  {
4936  fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4937  "and UTF-8 mode is not enabled.\n", c);
4938  fprintf(outfile, "** Truncation will probably give the wrong "
4939  "result.\n");
4940  }
4941  *q8++ = c;
4942  }
4943  }
4944 #endif
4945 #ifdef SUPPORT_PCRE16
4946  if (pcre_mode == PCRE16_MODE)
4947  {
4948 #ifndef NOUTF
4949  if (use_utf)
4950  {
4951  if (c > 0x10ffffu)
4952  {
4953  fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4954  "0x10ffff and so cannot be converted to UTF-16\n", c);
4955  goto NEXT_DATA;
4956  }
4957  else if (c >= 0x10000u)
4958  {
4959  c-= 0x10000u;
4960  *q16++ = 0xD800 | (c >> 10);
4961  *q16++ = 0xDC00 | (c & 0x3ff);
4962  }
4963  else
4964  *q16++ = c;
4965  }
4966  else
4967 #endif
4968  {
4969  if (c > 0xffffu)
4970  {
4971  fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4972  "and UTF-16 mode is not enabled.\n", c);
4973  fprintf(outfile, "** Truncation will probably give the wrong "
4974  "result.\n");
4975  }
4976 
4977  *q16++ = c;
4978  }
4979  }
4980 #endif
4981 #ifdef SUPPORT_PCRE32
4982  if (pcre_mode == PCRE32_MODE)
4983  {
4984  *q32++ = c;
4985  }
4986 #endif
4987 
4988  }
4989 
4990  /* Reached end of subject string */
4991 
4992 #ifdef SUPPORT_PCRE8
4993  if (pcre_mode == PCRE8_MODE)
4994  {
4995  *q8 = 0;
4996  len = (int)(q8 - (pcre_uint8 *)dbuffer);
4997  }
4998 #endif
4999 #ifdef SUPPORT_PCRE16
5000  if (pcre_mode == PCRE16_MODE)
5001  {
5002  *q16 = 0;
5003  len = (int)(q16 - (pcre_uint16 *)dbuffer);
5004  }
5005 #endif
5006 #ifdef SUPPORT_PCRE32
5007  if (pcre_mode == PCRE32_MODE)
5008  {
5009  *q32 = 0;
5010  len = (int)(q32 - (pcre_uint32 *)dbuffer);
5011  }
5012 #endif
5013 
5014  /* If we're compiling with explicit valgrind support, Mark the data from after
5015  its end to the end of the buffer as unaddressable, so that a read over the end
5016  of the buffer will be seen by valgrind, even if it doesn't cause a crash.
5017  If we're not building with valgrind support, at least move the data to the end
5018  of the buffer so that it might at least cause a crash.
5019  If we are using the POSIX interface, we must include the terminating zero. */
5020 
5021  bptr = dbuffer;
5022 
5023 #if !defined NOPOSIX
5024  if (posix || do_posix)
5025  {
5026 #ifdef SUPPORT_VALGRIND
5027  VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
5028 #else
5029  memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5030  bptr += dbuffer_size - len - 1;
5031 #endif
5032  }
5033  else
5034 #endif
5035  {
5036 #ifdef SUPPORT_VALGRIND
5037  VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
5038 #else
5039  bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5040 #endif
5041  }
5042 
5043  if ((all_use_dfa || use_dfa) && find_match_limit)
5044  {
5045  printf("**Match limit not relevant for DFA matching: ignored\n");
5046  find_match_limit = 0;
5047  }
5048 
5049  /* Handle matching via the POSIX interface, which does not
5050  support timing or playing with the match limit or callout data. */
5051 
5052 #if !defined NOPOSIX
5053  if (posix || do_posix)
5054  {
5055  int rc;
5056  int eflags = 0;
5057  regmatch_t *pmatch = NULL;
5058  if (use_size_offsets > 0)
5059  pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5060  if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5061  if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5062  if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5063 
5064  rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5065 
5066  if (rc != 0)
5067  {
5068  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5069  fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5070  }
5071  else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5072  {
5073  fprintf(outfile, "Matched with REG_NOSUB\n");
5074  }
5075  else
5076  {
5077  size_t i;
5078  for (i = 0; i < (size_t)use_size_offsets; i++)
5079  {
5080  if (pmatch[i].rm_so >= 0)
5081  {
5082  fprintf(outfile, "%2d: ", (int)i);
5083  PCHARSV(dbuffer, pmatch[i].rm_so,
5084  pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5085  fprintf(outfile, "\n");
5086  if (do_showcaprest || (i == 0 && do_showrest))
5087  {
5088  fprintf(outfile, "%2d+ ", (int)i);
5089  PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5090  outfile);
5091  fprintf(outfile, "\n");
5092  }
5093  }
5094  }
5095  }
5096  free(pmatch);
5097  goto NEXT_DATA;
5098  }
5099 
5100 #endif /* !defined NOPOSIX */
5101 
5102  /* Handle matching via the native interface - repeats for /g and /G */
5103 
5104  /* Ensure that there is a JIT callback if we want to verify that JIT was
5105  actually used. If jit_stack == NULL, no stack has yet been assigned. */
5106 
5107  if (verify_jit && jit_stack == NULL && extra != NULL)
5108  { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5109 
5110  for (;; gmatched++) /* Loop for /g or /G */
5111  {
5112  markptr = NULL;
5113  jit_was_used = FALSE;
5114 
5115  if (timeitm > 0)
5116  {
5117  register int i;
5118  clock_t time_taken;
5119  clock_t start_time = clock();
5120 
5121 #if !defined NODFA
5122  if (all_use_dfa || use_dfa)
5123  {
5124  if ((options & PCRE_DFA_RESTART) != 0)
5125  {
5126  fprintf(outfile, "Timing DFA restarts is not supported\n");
5127  break;
5128  }
5129  if (dfa_workspace == NULL)
5130  dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5131  for (i = 0; i < timeitm; i++)
5132  {
5133  PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5134  (options | g_notempty), use_offsets, use_size_offsets,
5135  dfa_workspace, DFA_WS_DIMENSION);
5136  }
5137  }
5138  else
5139 #endif
5140 
5141  for (i = 0; i < timeitm; i++)
5142  {
5143  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5144  (options | g_notempty), use_offsets, use_size_offsets);
5145  }
5146  total_match_time += (time_taken = clock() - start_time);
5147  fprintf(outfile, "Execute time %.4f milliseconds\n",
5148  (((double)time_taken * 1000.0) / (double)timeitm) /
5149  (double)CLOCKS_PER_SEC);
5150  }
5151 
5152  /* If find_match_limit is set, we want to do repeated matches with
5153  varying limits in order to find the minimum value for the match limit and
5154  for the recursion limit. The match limits are relevant only to the normal
5155  running of pcre_exec(), so disable the JIT optimization. This makes it
5156  possible to run the same set of tests with and without JIT externally
5157  requested. */
5158 
5159  if (find_match_limit)
5160  {
5161  if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5162  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5163  extra->flags = 0;
5164 
5165  (void)check_match_limit(re, extra, bptr, len, start_offset,
5166  options|g_notempty, use_offsets, use_size_offsets,
5167  PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5168  PCRE_ERROR_MATCHLIMIT, "match()");
5169 
5170  count = check_match_limit(re, extra, bptr, len, start_offset,
5171  options|g_notempty, use_offsets, use_size_offsets,
5172  PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5173  PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5174  }
5175 
5176  /* If callout_data is set, use the interface with additional data */
5177 
5178  else if (callout_data_set)
5179  {
5180  if (extra == NULL)
5181  {
5182  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5183  extra->flags = 0;
5184  }
5185  extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5186  extra->callout_data = &callout_data;
5187  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5188  options | g_notempty, use_offsets, use_size_offsets);
5189  extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5190  }
5191 
5192  /* The normal case is just to do the match once, with the default
5193  value of match_limit. */
5194 
5195 #if !defined NODFA
5196  else if (all_use_dfa || use_dfa)
5197  {
5198  if (dfa_workspace == NULL)
5199  dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5200  if (dfa_matched++ == 0)
5201  dfa_workspace[0] = -1; /* To catch bad restart */
5202  PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5203  (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5205  if (count == 0)
5206  {
5207  fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5208  count = use_size_offsets/2;
5209  }
5210  }
5211 #endif
5212 
5213  else
5214  {
5215  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5216  options | g_notempty, use_offsets, use_size_offsets);
5217  if (count == 0)
5218  {
5219  fprintf(outfile, "Matched, but too many substrings\n");
5220  /* 2 is a special case; match can be returned */
5221  count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5222  }
5223  }
5224 
5225  /* Matched */
5226 
5227  if (count >= 0)
5228  {
5229  int i, maxcount;
5230  void *cnptr, *gnptr;
5231 
5232 #if !defined NODFA
5233  if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5234 #endif
5235  /* 2 is a special case; match can be returned */
5236  maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5237 
5238  /* This is a check against a lunatic return value. */
5239 
5240  if (count > maxcount)
5241  {
5242  fprintf(outfile,
5243  "** PCRE error: returned count %d is too big for offset size %d\n",
5244  count, use_size_offsets);
5245  count = use_size_offsets/3;
5246  if (do_g || do_G)
5247  {
5248  fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5249  do_g = do_G = FALSE; /* Break g/G loop */
5250  }
5251  }
5252 
5253  /* do_allcaps requests showing of all captures in the pattern, to check
5254  unset ones at the end. */
5255 
5256  if (do_allcaps)
5257  {
5258  if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5259  goto SKIP_DATA;
5260  count++; /* Allow for full match */
5261  if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5262  }
5263 
5264  /* Output the captured substrings. Note that, for the matched string,
5265  the use of \K in an assertion can make the start later than the end. */
5266 
5267  for (i = 0; i < count * 2; i += 2)
5268  {
5269  if (use_offsets[i] < 0)
5270  {
5271  if (use_offsets[i] != -1)
5272  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5273  use_offsets[i], i);
5274  if (use_offsets[i+1] != -1)
5275  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5276  use_offsets[i+1], i+1);
5277  fprintf(outfile, "%2d: <unset>\n", i/2);
5278  }
5279  else
5280  {
5281  int start = use_offsets[i];
5282  int end = use_offsets[i+1];
5283 
5284  if (start > end)
5285  {
5286  start = use_offsets[i+1];
5287  end = use_offsets[i];
5288  fprintf(outfile, "Start of matched string is beyond its end - "
5289  "displaying from end to start.\n");
5290  }
5291 
5292  fprintf(outfile, "%2d: ", i/2);
5293  PCHARSV(bptr, start, end - start, outfile);
5294  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5295  fprintf(outfile, "\n");
5296 
5297  /* Note: don't use the start/end variables here because we want to
5298  show the text from what is reported as the end. */
5299 
5300  if (do_showcaprest || (i == 0 && do_showrest))
5301  {
5302  fprintf(outfile, "%2d+ ", i/2);
5303  PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5304  outfile);
5305  fprintf(outfile, "\n");
5306  }
5307  }
5308  }
5309 
5310  if (markptr != NULL)
5311  {
5312  fprintf(outfile, "MK: ");
5313  PCHARSV(markptr, 0, -1, outfile);
5314  fprintf(outfile, "\n");
5315  }
5316 
5317  for (i = 0; i < 32; i++)
5318  {
5319  if ((copystrings & (1 << i)) != 0)
5320  {
5321  int rc;
5322  char copybuffer[256];
5323  PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5324  copybuffer, sizeof(copybuffer));
5325  if (rc < 0)
5326  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5327  else
5328  {
5329  fprintf(outfile, "%2dC ", i);
5330  PCHARSV(copybuffer, 0, rc, outfile);
5331  fprintf(outfile, " (%d)\n", rc);
5332  }
5333  }
5334  }
5335 
5336  cnptr = copynames;
5337  for (;;)
5338  {
5339  int rc;
5340  char copybuffer[256];
5341 
5342 #ifdef SUPPORT_PCRE32
5343  if (pcre_mode == PCRE32_MODE)
5344  {
5345  if (*(pcre_uint32 *)cnptr == 0) break;
5346  }
5347 #endif
5348 #ifdef SUPPORT_PCRE16
5349  if (pcre_mode == PCRE16_MODE)
5350  {
5351  if (*(pcre_uint16 *)cnptr == 0) break;
5352  }
5353 #endif
5354 #ifdef SUPPORT_PCRE8
5355  if (pcre_mode == PCRE8_MODE)
5356  {
5357  if (*(pcre_uint8 *)cnptr == 0) break;
5358  }
5359 #endif
5360 
5361  PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5362  cnptr, copybuffer, sizeof(copybuffer));
5363 
5364  if (rc < 0)
5365  {
5366  fprintf(outfile, "copy substring ");
5367  PCHARSV(cnptr, 0, -1, outfile);
5368  fprintf(outfile, " failed %d\n", rc);
5369  }
5370  else
5371  {
5372  fprintf(outfile, " C ");
5373  PCHARSV(copybuffer, 0, rc, outfile);
5374  fprintf(outfile, " (%d) ", rc);
5375  PCHARSV(cnptr, 0, -1, outfile);
5376  putc('\n', outfile);
5377  }
5378 
5379  cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5380  }
5381 
5382  for (i = 0; i < 32; i++)
5383  {
5384  if ((getstrings & (1 << i)) != 0)
5385  {
5386  int rc;
5387  const char *substring;
5388  PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5389  if (rc < 0)
5390  fprintf(outfile, "get substring %d failed %d\n", i, rc);
5391  else
5392  {
5393  fprintf(outfile, "%2dG ", i);
5394  PCHARSV(substring, 0, rc, outfile);
5395  fprintf(outfile, " (%d)\n", rc);
5396  PCRE_FREE_SUBSTRING(substring);
5397  }
5398  }
5399  }
5400 
5401  gnptr = getnames;
5402  for (;;)
5403  {
5404  int rc;
5405  const char *substring;
5406 
5407 #ifdef SUPPORT_PCRE32
5408  if (pcre_mode == PCRE32_MODE)
5409  {
5410  if (*(pcre_uint32 *)gnptr == 0) break;
5411  }
5412 #endif
5413 #ifdef SUPPORT_PCRE16
5414  if (pcre_mode == PCRE16_MODE)
5415  {
5416  if (*(pcre_uint16 *)gnptr == 0) break;
5417  }
5418 #endif
5419 #ifdef SUPPORT_PCRE8
5420  if (pcre_mode == PCRE8_MODE)
5421  {
5422  if (*(pcre_uint8 *)gnptr == 0) break;
5423  }
5424 #endif
5425 
5426  PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5427  gnptr, &substring);
5428  if (rc < 0)
5429  {
5430  fprintf(outfile, "get substring ");
5431  PCHARSV(gnptr, 0, -1, outfile);
5432  fprintf(outfile, " failed %d\n", rc);
5433  }
5434  else
5435  {
5436  fprintf(outfile, " G ");
5437  PCHARSV(substring, 0, rc, outfile);
5438  fprintf(outfile, " (%d) ", rc);
5439  PCHARSV(gnptr, 0, -1, outfile);
5440  PCRE_FREE_SUBSTRING(substring);
5441  putc('\n', outfile);
5442  }
5443 
5444  gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5445  }
5446 
5447  if (getlist)
5448  {
5449  int rc;
5450  const char **stringlist;
5451  PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5452  if (rc < 0)
5453  fprintf(outfile, "get substring list failed %d\n", rc);
5454  else
5455  {
5456  for (i = 0; i < count; i++)
5457  {
5458  fprintf(outfile, "%2dL ", i);
5459  PCHARSV(stringlist[i], 0, -1, outfile);
5460  putc('\n', outfile);
5461  }
5462  if (stringlist[i] != NULL)
5463  fprintf(outfile, "string list not terminated by NULL\n");
5464  PCRE_FREE_SUBSTRING_LIST(stringlist);
5465  }
5466  }
5467  }
5468 
5469  /* There was a partial match. If the bumpalong point is not the same as
5470  the first inspected character, show the offset explicitly. */
5471 
5472  else if (count == PCRE_ERROR_PARTIAL)
5473  {
5474  fprintf(outfile, "Partial match");
5475  if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5476  fprintf(outfile, " at offset %d", use_offsets[2]);
5477  if (markptr != NULL)
5478  {
5479  fprintf(outfile, ", mark=");
5480  PCHARSV(markptr, 0, -1, outfile);
5481  }
5482  if (use_size_offsets > 1)
5483  {
5484  fprintf(outfile, ": ");
5485  PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5486  outfile);
5487  }
5488  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5489  fprintf(outfile, "\n");
5490  break; /* Out of the /g loop */
5491  }
5492 
5493  /* Failed to match. If this is a /g or /G loop and we previously set
5494  g_notempty after a null match, this is not necessarily the end. We want
5495  to advance the start offset, and continue. We won't be at the end of the
5496  string - that was checked before setting g_notempty.
5497 
5498  Complication arises in the case when the newline convention is "any",
5499  "crlf", or "anycrlf". If the previous match was at the end of a line
5500  terminated by CRLF, an advance of one character just passes the \r,
5501  whereas we should prefer the longer newline sequence, as does the code in
5502  pcre_exec(). Fudge the offset value to achieve this. We check for a
5503  newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5504  find the default.
5505 
5506  Otherwise, in the case of UTF-8 matching, the advance must be one
5507  character, not one byte. */
5508 
5509  else
5510  {
5511  if (g_notempty != 0)
5512  {
5513  int onechar = 1;
5514  unsigned int obits = REAL_PCRE_OPTIONS(re);
5515  use_offsets[0] = start_offset;
5516  if ((obits & PCRE_NEWLINE_BITS) == 0)
5517  {
5518  int d;
5519  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5520  /* Note that these values are always the ASCII ones, even in
5521  EBCDIC environments. CR = 13, NL = 10. */
5522  obits = (d == 13)? PCRE_NEWLINE_CR :
5523  (d == 10)? PCRE_NEWLINE_LF :
5524  (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5525  (d == -2)? PCRE_NEWLINE_ANYCRLF :
5526  (d == -1)? PCRE_NEWLINE_ANY : 0;
5527  }
5528  if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5529  (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5530  (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5531  &&
5532  start_offset < len - 1 && (
5533 #ifdef SUPPORT_PCRE8
5534  (pcre_mode == PCRE8_MODE &&
5535  bptr[start_offset] == '\r' &&
5536  bptr[start_offset + 1] == '\n') ||
5537 #endif
5538 #ifdef SUPPORT_PCRE16
5539  (pcre_mode == PCRE16_MODE &&
5540  ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5541  ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5542 #endif
5543 #ifdef SUPPORT_PCRE32
5544  (pcre_mode == PCRE32_MODE &&
5545  ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5546  ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5547 #endif
5548  0))
5549  onechar++;
5550  else if (use_utf)
5551  {
5552  while (start_offset + onechar < len)
5553  {
5554  if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5555  onechar++;
5556  }
5557  }
5558  use_offsets[1] = start_offset + onechar;
5559  }
5560  else
5561  {
5562  switch(count)
5563  {
5564  case PCRE_ERROR_NOMATCH:
5565  if (gmatched == 0)
5566  {
5567  if (markptr == NULL)
5568  {
5569  fprintf(outfile, "No match");
5570  }
5571  else
5572  {
5573  fprintf(outfile, "No match, mark = ");
5574  PCHARSV(markptr, 0, -1, outfile);
5575  }
5576  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5577  putc('\n', outfile);
5578  }
5579  break;
5580 
5581  case PCRE_ERROR_BADUTF8:
5582  case PCRE_ERROR_SHORTUTF8:
5583  fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5584  (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5585  8 * CHAR_SIZE);
5586  if (use_size_offsets >= 2)
5587  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5588  use_offsets[1]);
5589  fprintf(outfile, "\n");
5590  break;
5591 
5592  case PCRE_ERROR_BADUTF8_OFFSET:
5593  fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5594  8 * CHAR_SIZE);
5595  break;
5596 
5597  default:
5598  if (count < 0 &&
5599  (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5600  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5601  else
5602  fprintf(outfile, "Error %d (Unexpected value)\n", count);
5603  break;
5604  }
5605 
5606  break; /* Out of the /g loop */
5607  }
5608  }
5609 
5610  /* If not /g or /G we are done */
5611 
5612  if (!do_g && !do_G) break;
5613 
5614  if (use_offsets == NULL)
5615  {
5616  fprintf(outfile, "Cannot do global matching without an ovector\n");
5617  break;
5618  }
5619 
5620  if (use_size_offsets < 2)
5621  {
5622  fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
5623  break;
5624  }
5625 
5626  /* If we have matched an empty string, first check to see if we are at
5627  the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5628  Perl's /g options does. This turns out to be rather cunning. First we set
5629  PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5630  same point. If this fails (picked up above) we advance to the next
5631  character. */
5632 
5633  g_notempty = 0;
5634 
5635  if (use_offsets[0] == use_offsets[1])
5636  {
5637  if (use_offsets[0] == len) break;
5638  g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5639  }
5640 
5641  /* For /g, update the start offset, leaving the rest alone. There is a
5642  tricky case when \K is used in a positive lookbehind assertion. This can
5643  cause the end of the match to be less than or equal to the start offset.
5644  In this case we restart at one past the start offset. This may return the
5645  same match if the original start offset was bumped along during the
5646  match, but eventually the new start offset will hit the actual start
5647  offset. (In PCRE2 the true start offset is available, and this can be
5648  done better. It is not worth doing more than making sure we do not loop
5649  at this stage in the life of PCRE1.) */
5650 
5651  if (do_g)
5652  {
5653  if (g_notempty == 0 && use_offsets[1] <= start_offset)
5654  {
5655  if (start_offset >= len) break; /* End of subject */
5656  start_offset++;
5657  if (use_utf)
5658  {
5659  while (start_offset < len)
5660  {
5661  if ((bptr[start_offset] & 0xc0) != 0x80) break;
5662  start_offset++;
5663  }
5664  }
5665  }
5666  else start_offset = use_offsets[1];
5667  }
5668 
5669  /* For /G, update the pointer and length */
5670 
5671  else
5672  {
5673  bptr += use_offsets[1] * CHAR_SIZE;
5674  len -= use_offsets[1];
5675  }
5676  } /* End of loop for /g and /G */
5677 
5678  NEXT_DATA: continue;
5679  } /* End of loop for data lines */
5680 
5681  CONTINUE:
5682 
5683 #if !defined NOPOSIX
5684  if ((posix || do_posix) && preg.re_pcre != 0) regfree(&preg);
5685 #endif
5686 
5687  if (re != NULL) new_free(re);
5688  if (extra != NULL)
5689  {
5690  PCRE_FREE_STUDY(extra);
5691  }
5692  if (locale_set)
5693  {
5694  new_free((void *)tables);
5695  setlocale(LC_CTYPE, "C");
5696  locale_set = 0;
5697  }
5698  if (jit_stack != NULL)
5699  {
5700  PCRE_JIT_STACK_FREE(jit_stack);
5701  jit_stack = NULL;
5702  }
5703  }
5704 
5705 if (infile == stdin) fprintf(outfile, "\n");
5706 
5707 if (showtotaltimes)
5708  {
5709  fprintf(outfile, "--------------------------------------\n");
5710  if (timeit > 0)
5711  {
5712  fprintf(outfile, "Total compile time %.4f milliseconds\n",
5713  (((double)total_compile_time * 1000.0) / (double)timeit) /
5714  (double)CLOCKS_PER_SEC);
5715  fprintf(outfile, "Total study time %.4f milliseconds\n",
5716  (((double)total_study_time * 1000.0) / (double)timeit) /
5717  (double)CLOCKS_PER_SEC);
5718  }
5719  fprintf(outfile, "Total execute time %.4f milliseconds\n",
5720  (((double)total_match_time * 1000.0) / (double)timeitm) /
5721  (double)CLOCKS_PER_SEC);
5722  }
5723 
5724 EXIT:
5725 
5726 if (infile != NULL && infile != stdin) fclose(infile);
5727 if (outfile != NULL && outfile != stdout) fclose(outfile);
5728 
5729 free(buffer);
5730 free(dbuffer);
5731 free(pbuffer);
5732 free(offsets);
5733 
5734 #ifdef SUPPORT_PCRE16
5735 if (buffer16 != NULL) free(buffer16);
5736 #endif
5737 #ifdef SUPPORT_PCRE32
5738 if (buffer32 != NULL) free(buffer32);
5739 #endif
5740 
5741 #if !defined NODFA
5742 if (dfa_workspace != NULL)
5743  free(dfa_workspace);
5744 #endif
5745 
5746 #if defined(__VMS)
5747  yield = SS$_NORMAL; /* Return values via DCL symbols */
5748 #endif
5749 
5750 return yield;
5751 }
5752 
5753 /* End of pcretest.c */
5754 
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what, void *where)
Definition: pcre_fullinfo.c:70
#define memmove(a, b, c)
pcre_uint16 name_table_offset
#define INPUT_MODE
Definition: pcretest.c:119
#define REAL_PCRE_SIZE(re)
unsigned char pcre_uint8
pcre_uint16 req_char
pcre_uint32 limit_match
pcre_uint32 limit_match
pcre_uint16 name_entry_size
pcre_uint16 name_count
pcre_uint32 size
#define CHAR_ESC
pcre_uint32 size
pcre_uint16 top_bracket
pcre_uint16 top_backref
PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION regfree(regex_t *preg)
Definition: pcreposix.c:246
#define CHAR_BEL
#define REG_UNGREEDY
Definition: pcreposix.h:64
pcre_uint16 max_lookbehind
#define OUTPUT_MODE
Definition: pcretest.c:120
int BOOL
#define LOOPREPEAT
Definition: pcretest.c:1029
pcre_uint16 ref_count
#define PCRE_FCH_CASELESS
PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: pcreposix.c:211
pcre_uint32 flags
#define CHAR_CR
int sys_nerr
pcre_uint16 max_lookbehind
pcre_uint32 limit_recursion
#define CHAR_LF
#define REG_NOTEMPTY
Definition: pcreposix.h:63
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION regcomp(regex_t *preg, const char *pattern, int cflags)
Definition: pcreposix.c:269
#define PCRE_MODE_MASK
pcre_uint32 flags
#define REAL_PCRE_MAGIC(re)
#define REVERSED_MAGIC_NUMBER
#define REG_NOTBOL
Definition: pcreposix.h:57
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
Definition: pcreposix.c:323
#define PCRE_NEWLINE_BITS
const int utf8_table3[]
Definition: pcregrep.c:413
pcre_uint32 first_char
pcre_uint32 options
int main(int argc, char **argv)
Definition: pcretest.c:2983
#define CLOCKS_PER_SEC
Definition: pcretest.c:1019
#define REG_ICASE
Definition: pcreposix.h:55
pcre_uint16 name_table_offset
char * strerror(int n)
Definition: pcretest.c:1509
pcre_uint16 name_count
#define XCL_MAP
pcre_uint32 size
#define REAL_PCRE_OPTIONS(re)
#define OP_LENGTHS
#define REG_UCP
Definition: pcreposix.h:65
#define REG_NEWLINE
Definition: pcreposix.h:56
struct real_pcre8_or_16 real_pcre
pcre_uint32 minlength
#define PCRE_MODE32
pcre_uint32 flags
pcre_uint32 magic_number
#define PCRE_MODE8
char * sys_errlist[]
#define PCRE_RCH_CASELESS
#define TRUE
PCRE_EXP_DEFN const char *PCRE_CALL_CONVENTION pcre_version(void)
Definition: pcre_version.c:84
pcre_uint16 name_entry_size
#define PRINTOK(c)
Definition: pcretest.c:180
pcre_uint16 ref_count
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
#define PCRE_MODE16
void * re_pcre
Definition: pcreposix.h:98
#define REAL_PCRE_FLAGS(re)
#define REG_NOTEOL
Definition: pcreposix.h:58
#define MAGIC_NUMBER
pcre_uint16 top_bracket
pcre_uint16 top_backref
pcre_uint32 limit_recursion
#define REG_UTF8
Definition: pcreposix.h:61
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_config(int what, void *where)
Definition: pcre_config.c:70
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_get_stringnumber(const pcre *code, const char *stringname)
Definition: pcre_get.c:70
pcre_uint32 options
pcre_uint16 first_char
#define DFA_WS_DIMENSION
Definition: pcretest.c:1024
#define PCRE_STUDY_ALLJIT
Definition: pcretest.c:1127
#define REG_DOTALL
Definition: pcreposix.h:59
#define FALSE
#define REG_NOSUB
Definition: pcreposix.h:60
pcre_uint32 req_char
pcre_uint32 magic_number