pcre  8.38
About: The PCRE library implements Perl compatible regular expression pattern matching.
  Fossies Dox: pcre-8.38.tar.gz  ("inofficial" and yet experimental doxygen-generated source code documentation)  

pcre_jit_test.c
Go to the documentation of this file.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Main Library written by Philip Hazel
9  Copyright (c) 1997-2012 University of Cambridge
10 
11  This JIT compiler regression test program was written by Zoltan Herczeg
12  Copyright (c) 2010-2012
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18  * Redistributions of source code must retain the above copyright notice,
19  this list of conditions and the following disclaimer.
20 
21  * Redistributions in binary form must reproduce the above copyright
22  notice, this list of conditions and the following disclaimer in the
23  documentation and/or other materials provided with the distribution.
24 
25  * Neither the name of the University of Cambridge nor the names of its
26  contributors may be used to endorse or promote products derived from
27  this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50 
51 
52 #include "pcre_internal.h"
53 
54 /*
55  Letter characters:
56  \xe6\x92\xad = 0x64ad = 25773 (kanji)
57  Non-letter characters:
58  \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
59  \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
60  \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
61  \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
62  Newlines:
63  \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
64  \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
65  Othercase pairs:
66  \xc3\xa9 = 0xe9 = 233 (e')
67  \xc3\x89 = 0xc9 = 201 (E')
68  \xc3\xa1 = 0xe1 = 225 (a')
69  \xc3\x81 = 0xc1 = 193 (A')
70  \x53 = 0x53 = S
71  \x73 = 0x73 = s
72  \xc5\xbf = 0x17f = 383 (long S)
73  \xc8\xba = 0x23a = 570
74  \xe2\xb1\xa5 = 0x2c65 = 11365
75  \xe1\xbd\xb8 = 0x1f78 = 8056
76  \xe1\xbf\xb8 = 0x1ff8 = 8184
77  \xf0\x90\x90\x80 = 0x10400 = 66560
78  \xf0\x90\x90\xa8 = 0x10428 = 66600
79  \xc7\x84 = 0x1c4 = 452
80  \xc7\x85 = 0x1c5 = 453
81  \xc7\x86 = 0x1c6 = 454
82  Caseless sets:
83  ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
84  ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
85  ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
86 
87  Mark property:
88  \xcc\x8d = 0x30d = 781
89  Special:
90  \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
91  \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
92  \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
93  \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
94  \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
95  \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
96 */
97 
98 static int regression_tests(void);
99 
100 int main(void)
101 {
102  int jit = 0;
103 #if defined SUPPORT_PCRE8
104  pcre_config(PCRE_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE16
106  pcre16_config(PCRE_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE32
108  pcre32_config(PCRE_CONFIG_JIT, &jit);
109 #endif
110  if (!jit) {
111  printf("JIT must be enabled to run pcre_jit_test\n");
112  return 1;
113  }
114  return regression_tests();
115 }
116 
117 /* --------------------------------------------------------------------------------------- */
118 
119 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
120 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
121 #endif
122 
123 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
124 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
125 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
126 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
127 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
128 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
129 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
130 
131 #define OFFSET_MASK 0x00ffff
132 #define F_NO8 0x010000
133 #define F_NO16 0x020000
134 #define F_NO32 0x020000
135 #define F_NOMATCH 0x040000
136 #define F_DIFF 0x080000
137 #define F_FORCECONV 0x100000
138 #define F_PROPERTY 0x200000
139 #define F_STUDY 0x400000
140 
142  int flags;
144  const char *pattern;
145  const char *input;
146 };
147 
148 static struct regression_test_case regression_test_cases[] = {
149  /* Constant strings. */
150  { MUA, 0, "AbC", "AbAbC" },
151  { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
152  { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
153  { MA, 0, "[^a]", "aAbB" },
154  { CMA, 0, "[^m]", "mMnN" },
155  { MA, 0, "a[^b][^#]", "abacd" },
156  { CMA, 0, "A[^B][^E]", "abacd" },
157  { CMUA, 0, "[^x][^#]", "XxBll" },
158  { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
159  { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
160  { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
161  { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
162  { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
163  { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
164  { MUA, 0, "[axd]", "sAXd" },
165  { CMUA, 0, "[axd]", "sAXd" },
166  { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
167  { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
168  { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
169  { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
170  { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
171  { MUA, 0, "[^a]", "\xc2\x80[]" },
172  { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
173  { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
174  { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
175  { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
176  { PCRE_CASELESS, 0, "a1", "Aa1" },
177  { MA, 0, "\\Ca", "cda" },
178  { CMA, 0, "\\Ca", "CDA" },
179  { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
180  { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
181  { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
182  { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
183  { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
184  { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
185  { MA, 0, "[3-57-9]", "5" },
186 
187  /* Assertions. */
188  { MUA, 0, "\\b[^A]", "A_B#" },
189  { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
190  { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
191  { MAP, 0, "\\B", "_\xa1" },
192  { MAP, 0, "\\b_\\b[,A]\\B", "_," },
193  { MUAP, 0, "\\b", "\xe6\x92\xad!" },
194  { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
195  { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
196  { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
197  { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
198  { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
199  { MA, 0 | F_NOMATCH, "\\R^", "\n" },
200  { MA, 1 | F_NOMATCH, "^", "\n" },
201  { 0, 0, "^ab", "ab" },
202  { 0, 0 | F_NOMATCH, "^ab", "aab" },
203  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
204  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
205  { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
206  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
207  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
208  { 0, 0, "ab$", "ab" },
209  { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
210  { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
211  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
212  { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
213  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
214  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
215  { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
216  { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
217  { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
218  { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
219  { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
220  { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
221  { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
222  { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
223  { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
224  { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
225  { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
226  { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
227  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
228  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
229  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
230  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
231  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
232  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
233  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
234  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
235  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
236  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
237  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
238  { MA, 0, "\\Aa", "aaa" },
239  { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
240  { MA, 1, "\\Ga", "aaa" },
241  { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
242  { MA, 0, "a\\z", "aaa" },
243  { MA, 0 | F_NOMATCH, "a\\z", "aab" },
244 
245  /* Brackets. */
246  { MUA, 0, "(ab|bb|cd)", "bacde" },
247  { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
248  { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
249  { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
250  { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
251  { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
252 
253  /* Greedy and non-greedy ? operators. */
254  { MUA, 0, "(?:a)?a", "laab" },
255  { CMUA, 0, "(A)?A", "llaab" },
256  { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
257  { MUA, 0, "(a)?a", "manm" },
258  { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
259  { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
260  { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
261 
262  /* Greedy and non-greedy + operators */
263  { MUA, 0, "(aa)+aa", "aaaaaaa" },
264  { MUA, 0, "(aa)+?aa", "aaaaaaa" },
265  { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
266  { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
267  { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
268  { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
269  { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
270 
271  /* Greedy and non-greedy * operators */
272  { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
273  { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
274  { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
275  { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
276  { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
277  { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
278  { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
279  { MA, 0, "((?:a|)*){0}a", "a" },
280 
281  /* Combining ? + * operators */
282  { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
283  { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
284  { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
285  { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
286  { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
287 
288  /* Single character iterators. */
289  { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
290  { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
291  { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
292  { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
293  { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
294  { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
295  { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
296  { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
297  { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
298  { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
299  { MUA, 0, "(a?+[^b])+", "babaacacb" },
300  { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
301  { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
302  { CMUA, 0, "[c-f]+k", "DemmFke" },
303  { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
304  { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
305  { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
306  { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
307  { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
308  { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
309  { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
310  { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
311  { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
312  { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
313  { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
314  { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
315  { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
316  { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
317  { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
318  { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
319  { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
320  { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
321 
322  /* Bracket repeats with limit. */
323  { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
324  { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
325  { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
326  { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
327  { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
328  { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
329  { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
330  { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
331  { MUA, 0, "(ab){4,6}?M", "abababababababM" },
332 
333  /* Basic character sets. */
334  { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
335  { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
336  { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
337  { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
338  { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
339  { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
340  { MUA, 0, "x[bcef]+", "xaxdxecbfg" },
341  { MUA, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
342  { MUA, 0, "x[^befg]+", "xbxexacdhg" },
343  { MUA, 0, "x[^bcdl]+", "xlxbxaekmd" },
344  { MUA, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
345  { MUA, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
346  { CMUA, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
347  { CMUA, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
348  { MUA, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
349  { MUA, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
350  { MUA, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
351  { MUA, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
352  { MUA, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
353  { MUA, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
354  { MUA, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
355  { MUA, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
356 
357  /* Unicode properties. */
358  { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
359  { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
360  { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
361  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
362  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
363  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
364  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
365  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
366  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
367  { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
368  { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
369  { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
370  { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
371  { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
372  { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
373  { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
374  { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
375  { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
376  { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
377  { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
378 
379  /* Possible empty brackets. */
380  { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
381  { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
382  { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
383  { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
384  { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
385  { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
386  { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
387  { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
388  { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
389  { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
390 
391  /* Start offset. */
392  { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
393  { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
394  { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
395  { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
396 
397  /* Newline. */
398  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
399  { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
400  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
401  { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
402  { MUA, 1, "^", "\r\n" },
403  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
404  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
405 
406  /* Any character except newline or any newline. */
407  { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
408  { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
409  { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
410  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
411  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
412  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
413  { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
414  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
415  { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
416  { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
417  { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
418  { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
419  { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
420  { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
421  { MUA, 0, "\\R+", "ab\r\n\r" },
422  { MUA, 0, "\\R*", "ab\r\n\r" },
423  { MUA, 0, "\\R*", "\r\n\r" },
424  { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
425  { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
426  { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
427  { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
428  { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
429  { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
430  { MUA, 0, "\\R*\\R\\R", "\n\r" },
431  { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
432  { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
433 
434  /* Atomic groups (no fallback from "next" direction). */
435  { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
436  { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
437  { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
438  "bababcdedefgheijijklmlmnop" },
439  { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
440  { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
441  { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
442  { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
443  { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
444  { MUA, 0, "(?>x|)*$", "aaa" },
445  { MUA, 0, "(?>(x)|)*$", "aaa" },
446  { MUA, 0, "(?>x|())*$", "aaa" },
447  { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
448  { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
449  { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
450  { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
451  { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
452  { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
453  { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
454  { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
455  { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
456  { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
457  { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
458  { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
459  { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
460  { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
461  { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
462  { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
463  { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
464  { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
465  { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
466  { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
467  { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
468  { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
469  { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
470  { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
471  { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
472  { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
473 
474  /* Possessive quantifiers. */
475  { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
476  { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
477  { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
478  { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
479  { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
480  { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
481  { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
482  { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
483  { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
484  { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
485  { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
486  { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
487  { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
488  { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
489  { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
490  { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
491  { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
492  { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
493  { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
494  { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
495  { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
496  { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
497  { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
498  { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
499  { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
500  { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
501  { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
502  { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
503  { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
504  { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
505  { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
506  { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
507  { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
508  { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
509  { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
510 
511  /* Back references. */
512  { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
513  { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
514  { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
515  { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
516  { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
517  { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
518  { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
519  { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
520  { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
521  { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
522  { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
523  { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
524  { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
525  { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
526  { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
527  { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
528  { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
529  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
530  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
531  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
532  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
533  { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
534  { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
535  { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
536  { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
537  { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
538  { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
539  { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
540  { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
541  { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
542  { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
543  { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
544  { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
545  { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
546  { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
547  { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
548  { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
549  { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
550  { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
551 
552  /* Assertions. */
553  { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
554  { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
555  { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
556  { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
557  { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
558  { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
559  { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
560  { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
561  { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
562  { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
563  { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
564  { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
565  { MUA, 0, "((?(?=a)a)+k)", "bbak" },
566  { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
567  { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
568  { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
569  { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
570  { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
571  { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
572  { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
573  { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
574  { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
575  { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
576  { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
577 
578  /* Not empty, ACCEPT, FAIL */
579  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
580  { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
581  { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
582  { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
583  { MUA, 0, "a(*ACCEPT)b", "ab" },
584  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
585  { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
586  { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
587  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
588  { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
589  { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
590  { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
591  { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
592  { MUA, 0, "((a(*ACCEPT)b))", "ab" },
593  { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
594  { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
595  { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
596  { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
597  { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
598 
599  /* Conditional blocks. */
600  { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
601  { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
602  { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
603  { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
604  { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
605  { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
606  { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
607  { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
608  { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
609  { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
610  { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
611  { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
612  { MUA, 0, "(?(?=a)ab)", "a" },
613  { MUA, 0, "(?(?<!b)c)", "b" },
614  { MUA, 0, "(?(DEFINE)a(b))", "a" },
615  { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
616  { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
617  { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
618  { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
619  { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
620  { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
621  { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
622  { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
623  { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
624  { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
625  { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
626  { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
627  { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
628  { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
629  { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
630  { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
631  { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
632  { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
633  { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
634  { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
635  { MUA, 0, "(?(?!)a|b)", "ab" },
636  { MUA, 0, "(?(?!)a)", "ab" },
637  { MUA, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
638 
639  /* Set start of match. */
640  { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
641  { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
642  { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
643  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
644  { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
645 
646  /* First line. */
647  { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
648  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
649  { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
650  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
651  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
652  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
653  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
654  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
655  { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
656  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
657  { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
658  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
659  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
660  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
661  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
662  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
663  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
664  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
665  { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
666  { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
667 
668  /* Recurse. */
669  { MUA, 0, "(a)(?1)", "aa" },
670  { MUA, 0, "((a))(?1)", "aa" },
671  { MUA, 0, "(b|a)(?1)", "aa" },
672  { MUA, 0, "(b|(a))(?1)", "aa" },
673  { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
674  { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
675  { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
676  { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
677  { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
678  { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
679  { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
680  { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
681  { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
682  { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
683  { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
684  { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
685  { MUA, 0, "b|<(?R)*>", "<<b>" },
686  { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
687  { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
688  { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
689  { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
690  { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
691  { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
692  { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
693  { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
694  { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
695  { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
696  { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
697  { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
698 
699  /* 16 bit specific tests. */
700  { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
701  { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
702  { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
703  { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
704  { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
705  { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
706  { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
707  { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
708  { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
709  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
710  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
711  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
712  { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
713  { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
714  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
715  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
716  { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
717  { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
718  { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
719  { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
720  { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
721  { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
722  { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
723  { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
724  { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
725  { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
726  { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
727  { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
728  { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
729  { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
730  { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
731 
732  /* Partial matching. */
733  { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
734  { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
735  { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
736  { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
737  { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
738  { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
739  { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
740  { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
741 
742  /* (*MARK) verb. */
743  { MUA, 0, "a(*MARK:aa)a", "ababaa" },
744  { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
745  { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
746  { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
747  { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
748  { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
749  { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
750  { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
751  { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
752  { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
753  { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
754  { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
755  { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
756  { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
757  { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
758 
759  /* (*COMMIT) verb. */
760  { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
761  { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
762  { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
763  { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
764  { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
765  { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
766 
767  /* (*PRUNE) verb. */
768  { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
769  { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
770  { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
771  { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
772  { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
773  { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
774  { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
775  { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
776  { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
777  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
778  { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
779  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
780  { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
781  { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
782  { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
783  { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
784  { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
785  { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
786  { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
787  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
788  { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
789  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
790  { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
791  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
792  { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
793  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
794  { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
795  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
796  { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
797  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
798 
799  /* (*SKIP) verb. */
800  { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
801 
802  /* (*THEN) verb. */
803  { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
804  { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
805  { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
806  { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
807  { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
808  { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
809  { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
810  { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
811  { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
812 
813  /* Deep recursion. */
814  { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
815  { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
816  { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
817 
818  /* Deep recursion: Stack limit reached. */
819  { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
820  { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
821  { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
822  { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
823  { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
824 
825  { 0, 0, NULL, NULL }
826 };
827 
828 static const unsigned char *tables(int mode)
829 {
830  /* The purpose of this function to allow valgrind
831  for reporting invalid reads and writes. */
832  static unsigned char *tables_copy;
833  const char *errorptr;
834  int erroroffset;
835  unsigned char *default_tables;
836 #if defined SUPPORT_PCRE8
837  pcre *regex;
838  char null_str[1] = { 0 };
839 #elif defined SUPPORT_PCRE16
840  pcre16 *regex;
841  PCRE_UCHAR16 null_str[1] = { 0 };
842 #elif defined SUPPORT_PCRE32
843  pcre32 *regex;
844  PCRE_UCHAR32 null_str[1] = { 0 };
845 #endif
846 
847  if (mode) {
848  if (tables_copy)
849  free(tables_copy);
850  tables_copy = NULL;
851  return NULL;
852  }
853 
854  if (tables_copy)
855  return tables_copy;
856 
857  default_tables = NULL;
858 #if defined SUPPORT_PCRE8
859  regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
860  if (regex) {
861  pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
862  pcre_free(regex);
863  }
864 #elif defined SUPPORT_PCRE16
865  regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
866  if (regex) {
867  pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
868  pcre16_free(regex);
869  }
870 #elif defined SUPPORT_PCRE32
871  regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
872  if (regex) {
873  pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
874  pcre32_free(regex);
875  }
876 #endif
877  /* Shouldn't ever happen. */
878  if (!default_tables)
879  return NULL;
880 
881  /* Unfortunately this value cannot get from pcre_fullinfo.
882  Since this is a test program, this is acceptable at the moment. */
883  tables_copy = (unsigned char *)malloc(1088);
884  if (!tables_copy)
885  return NULL;
886 
887  memcpy(tables_copy, default_tables, 1088);
888  return tables_copy;
889 }
890 
891 #ifdef SUPPORT_PCRE8
892 static pcre_jit_stack* callback8(void *arg)
893 {
894  return (pcre_jit_stack *)arg;
895 }
896 #endif
897 
898 #ifdef SUPPORT_PCRE16
899 static pcre16_jit_stack* callback16(void *arg)
900 {
901  return (pcre16_jit_stack *)arg;
902 }
903 #endif
904 
905 #ifdef SUPPORT_PCRE32
906 static pcre32_jit_stack* callback32(void *arg)
907 {
908  return (pcre32_jit_stack *)arg;
909 }
910 #endif
911 
912 #ifdef SUPPORT_PCRE8
913 static pcre_jit_stack *stack8;
914 
915 static pcre_jit_stack *getstack8(void)
916 {
917  if (!stack8)
918  stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
919  return stack8;
920 }
921 
922 static void setstack8(pcre_extra *extra)
923 {
924  if (!extra) {
925  if (stack8)
926  pcre_jit_stack_free(stack8);
927  stack8 = NULL;
928  return;
929  }
930 
931  pcre_assign_jit_stack(extra, callback8, getstack8());
932 }
933 #endif /* SUPPORT_PCRE8 */
934 
935 #ifdef SUPPORT_PCRE16
936 static pcre16_jit_stack *stack16;
937 
938 static pcre16_jit_stack *getstack16(void)
939 {
940  if (!stack16)
941  stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
942  return stack16;
943 }
944 
945 static void setstack16(pcre16_extra *extra)
946 {
947  if (!extra) {
948  if (stack16)
949  pcre16_jit_stack_free(stack16);
950  stack16 = NULL;
951  return;
952  }
953 
954  pcre16_assign_jit_stack(extra, callback16, getstack16());
955 }
956 #endif /* SUPPORT_PCRE16 */
957 
958 #ifdef SUPPORT_PCRE32
959 static pcre32_jit_stack *stack32;
960 
961 static pcre32_jit_stack *getstack32(void)
962 {
963  if (!stack32)
964  stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
965  return stack32;
966 }
967 
968 static void setstack32(pcre32_extra *extra)
969 {
970  if (!extra) {
971  if (stack32)
972  pcre32_jit_stack_free(stack32);
973  stack32 = NULL;
974  return;
975  }
976 
977  pcre32_assign_jit_stack(extra, callback32, getstack32());
978 }
979 #endif /* SUPPORT_PCRE32 */
980 
981 #ifdef SUPPORT_PCRE16
982 
983 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
984 {
985  unsigned char *iptr = (unsigned char*)input;
986  PCRE_UCHAR16 *optr = output;
987  unsigned int c;
988 
989  if (max_length == 0)
990  return 0;
991 
992  while (*iptr && max_length > 1) {
993  c = 0;
994  if (offsetmap)
995  *offsetmap++ = (int)(iptr - (unsigned char*)input);
996 
997  if (*iptr < 0xc0)
998  c = *iptr++;
999  else if (!(*iptr & 0x20)) {
1000  c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1001  iptr += 2;
1002  } else if (!(*iptr & 0x10)) {
1003  c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1004  iptr += 3;
1005  } else if (!(*iptr & 0x08)) {
1006  c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1007  iptr += 4;
1008  }
1009 
1010  if (c < 65536) {
1011  *optr++ = c;
1012  max_length--;
1013  } else if (max_length <= 2) {
1014  *optr = '\0';
1015  return (int)(optr - output);
1016  } else {
1017  c -= 0x10000;
1018  *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1019  *optr++ = 0xdc00 | (c & 0x3ff);
1020  max_length -= 2;
1021  if (offsetmap)
1022  offsetmap++;
1023  }
1024  }
1025  if (offsetmap)
1026  *offsetmap = (int)(iptr - (unsigned char*)input);
1027  *optr = '\0';
1028  return (int)(optr - output);
1029 }
1030 
1031 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
1032 {
1033  unsigned char *iptr = (unsigned char*)input;
1034  PCRE_UCHAR16 *optr = output;
1035 
1036  if (max_length == 0)
1037  return 0;
1038 
1039  while (*iptr && max_length > 1) {
1040  *optr++ = *iptr++;
1041  max_length--;
1042  }
1043  *optr = '\0';
1044  return (int)(optr - output);
1045 }
1046 
1047 #define REGTEST_MAX_LENGTH16 4096
1048 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1049 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1050 
1051 #endif /* SUPPORT_PCRE16 */
1052 
1053 #ifdef SUPPORT_PCRE32
1054 
1055 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1056 {
1057  unsigned char *iptr = (unsigned char*)input;
1058  PCRE_UCHAR32 *optr = output;
1059  unsigned int c;
1060 
1061  if (max_length == 0)
1062  return 0;
1063 
1064  while (*iptr && max_length > 1) {
1065  c = 0;
1066  if (offsetmap)
1067  *offsetmap++ = (int)(iptr - (unsigned char*)input);
1068 
1069  if (*iptr < 0xc0)
1070  c = *iptr++;
1071  else if (!(*iptr & 0x20)) {
1072  c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1073  iptr += 2;
1074  } else if (!(*iptr & 0x10)) {
1075  c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1076  iptr += 3;
1077  } else if (!(*iptr & 0x08)) {
1078  c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1079  iptr += 4;
1080  }
1081 
1082  *optr++ = c;
1083  max_length--;
1084  }
1085  if (offsetmap)
1086  *offsetmap = (int)(iptr - (unsigned char*)input);
1087  *optr = 0;
1088  return (int)(optr - output);
1089 }
1090 
1091 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1092 {
1093  unsigned char *iptr = (unsigned char*)input;
1094  PCRE_UCHAR32 *optr = output;
1095 
1096  if (max_length == 0)
1097  return 0;
1098 
1099  while (*iptr && max_length > 1) {
1100  *optr++ = *iptr++;
1101  max_length--;
1102  }
1103  *optr = '\0';
1104  return (int)(optr - output);
1105 }
1106 
1107 #define REGTEST_MAX_LENGTH32 4096
1108 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1109 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1110 
1111 #endif /* SUPPORT_PCRE32 */
1112 
1113 static int check_ascii(const char *input)
1114 {
1115  const unsigned char *ptr = (unsigned char *)input;
1116  while (*ptr) {
1117  if (*ptr > 127)
1118  return 0;
1119  ptr++;
1120  }
1121  return 1;
1122 }
1123 
1124 static int regression_tests(void)
1125 {
1126  struct regression_test_case *current = regression_test_cases;
1127  const char *error;
1128  char *cpu_info;
1129  int i, err_offs;
1130  int is_successful, is_ascii;
1131  int total = 0;
1132  int successful = 0;
1133  int successful_row = 0;
1134  int counter = 0;
1135  int study_mode;
1136  int utf = 0, ucp = 0;
1137  int disabled_flags = 0;
1138 #ifdef SUPPORT_PCRE8
1139  pcre *re8;
1140  pcre_extra *extra8;
1141  pcre_extra dummy_extra8;
1142  int ovector8_1[32];
1143  int ovector8_2[32];
1144  int return_value8[2];
1145  unsigned char *mark8_1, *mark8_2;
1146 #endif
1147 #ifdef SUPPORT_PCRE16
1148  pcre16 *re16;
1149  pcre16_extra *extra16;
1150  pcre16_extra dummy_extra16;
1151  int ovector16_1[32];
1152  int ovector16_2[32];
1153  int return_value16[2];
1154  PCRE_UCHAR16 *mark16_1, *mark16_2;
1155  int length16;
1156 #endif
1157 #ifdef SUPPORT_PCRE32
1158  pcre32 *re32;
1159  pcre32_extra *extra32;
1160  pcre32_extra dummy_extra32;
1161  int ovector32_1[32];
1162  int ovector32_2[32];
1163  int return_value32[2];
1164  PCRE_UCHAR32 *mark32_1, *mark32_2;
1165  int length32;
1166 #endif
1167 
1168  /* This test compares the behaviour of interpreter and JIT. Although disabling
1169  utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1170  still considered successful from pcre_jit_test point of view. */
1171 
1172 #if defined SUPPORT_PCRE8
1173  pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1174 #elif defined SUPPORT_PCRE16
1175  pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1176 #elif defined SUPPORT_PCRE32
1177  pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1178 #endif
1179 
1180  printf("Running JIT regression tests\n");
1181  printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1182 
1183 #if defined SUPPORT_PCRE8
1184  pcre_config(PCRE_CONFIG_UTF8, &utf);
1185  pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1186 #elif defined SUPPORT_PCRE16
1187  pcre16_config(PCRE_CONFIG_UTF16, &utf);
1188  pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1189 #elif defined SUPPORT_PCRE32
1190  pcre32_config(PCRE_CONFIG_UTF32, &utf);
1191  pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1192 #endif
1193 
1194  if (!utf)
1195  disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1196  if (!ucp)
1197  disabled_flags |= PCRE_UCP;
1198 #ifdef SUPPORT_PCRE8
1199  printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1200 #endif
1201 #ifdef SUPPORT_PCRE16
1202  printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1203 #endif
1204 #ifdef SUPPORT_PCRE32
1205  printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1206 #endif
1207 
1208  while (current->pattern) {
1209  /* printf("\nPattern: %s :\n", current->pattern); */
1210  total++;
1211  is_ascii = 0;
1212  if (!(current->start_offset & F_PROPERTY))
1213  is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1214 
1215  if (current->flags & PCRE_PARTIAL_SOFT)
1216  study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1217  else if (current->flags & PCRE_PARTIAL_HARD)
1218  study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1219  else
1220  study_mode = PCRE_STUDY_JIT_COMPILE;
1221  error = NULL;
1222 #ifdef SUPPORT_PCRE8
1223  re8 = NULL;
1224  if (!(current->start_offset & F_NO8))
1225  re8 = pcre_compile(current->pattern,
1226  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1227  &error, &err_offs, tables(0));
1228 
1229  extra8 = NULL;
1230  if (re8) {
1231  error = NULL;
1232  extra8 = pcre_study(re8, study_mode, &error);
1233  if (!extra8) {
1234  printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1235  pcre_free(re8);
1236  re8 = NULL;
1237  }
1238  else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1239  printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1240  pcre_free_study(extra8);
1241  pcre_free(re8);
1242  re8 = NULL;
1243  }
1244  extra8->flags |= PCRE_EXTRA_MARK;
1245  } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO8))
1246  printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1247 #endif
1248 #ifdef SUPPORT_PCRE16
1249  if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1250  convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1251  else
1252  copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1253 
1254  re16 = NULL;
1255  if (!(current->start_offset & F_NO16))
1256  re16 = pcre16_compile(regtest_buf16,
1257  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1258  &error, &err_offs, tables(0));
1259 
1260  extra16 = NULL;
1261  if (re16) {
1262  error = NULL;
1263  extra16 = pcre16_study(re16, study_mode, &error);
1264  if (!extra16) {
1265  printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1266  pcre16_free(re16);
1267  re16 = NULL;
1268  }
1269  else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1270  printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1271  pcre16_free_study(extra16);
1272  pcre16_free(re16);
1273  re16 = NULL;
1274  }
1275  extra16->flags |= PCRE_EXTRA_MARK;
1276  } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO16))
1277  printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1278 #endif
1279 #ifdef SUPPORT_PCRE32
1280  if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1281  convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1282  else
1283  copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1284 
1285  re32 = NULL;
1286  if (!(current->start_offset & F_NO32))
1287  re32 = pcre32_compile(regtest_buf32,
1288  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1289  &error, &err_offs, tables(0));
1290 
1291  extra32 = NULL;
1292  if (re32) {
1293  error = NULL;
1294  extra32 = pcre32_study(re32, study_mode, &error);
1295  if (!extra32) {
1296  printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1297  pcre32_free(re32);
1298  re32 = NULL;
1299  }
1300  if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1301  printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1302  pcre32_free_study(extra32);
1303  pcre32_free(re32);
1304  re32 = NULL;
1305  }
1306  extra32->flags |= PCRE_EXTRA_MARK;
1307  } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO32))
1308  printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1309 #endif
1310 
1311  counter++;
1312  if ((counter & 0x3) != 0) {
1313 #ifdef SUPPORT_PCRE8
1314  setstack8(NULL);
1315 #endif
1316 #ifdef SUPPORT_PCRE16
1317  setstack16(NULL);
1318 #endif
1319 #ifdef SUPPORT_PCRE32
1320  setstack32(NULL);
1321 #endif
1322  }
1323 
1324 #ifdef SUPPORT_PCRE8
1325  return_value8[0] = -1000;
1326  return_value8[1] = -1000;
1327  for (i = 0; i < 32; ++i)
1328  ovector8_1[i] = -2;
1329  for (i = 0; i < 32; ++i)
1330  ovector8_2[i] = -2;
1331  if (re8) {
1332  mark8_1 = NULL;
1333  mark8_2 = NULL;
1334  extra8->mark = &mark8_1;
1335 
1336  if ((counter & 0x1) != 0) {
1337  setstack8(extra8);
1338  return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1339  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
1340  } else
1341  return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1342  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
1343  memset(&dummy_extra8, 0, sizeof(pcre_extra));
1344  dummy_extra8.flags = PCRE_EXTRA_MARK;
1345  if (current->start_offset & F_STUDY) {
1346  dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1347  dummy_extra8.study_data = extra8->study_data;
1348  }
1349  dummy_extra8.mark = &mark8_2;
1350  return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1351  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
1352  }
1353 #endif
1354 
1355 #ifdef SUPPORT_PCRE16
1356  return_value16[0] = -1000;
1357  return_value16[1] = -1000;
1358  for (i = 0; i < 32; ++i)
1359  ovector16_1[i] = -2;
1360  for (i = 0; i < 32; ++i)
1361  ovector16_2[i] = -2;
1362  if (re16) {
1363  mark16_1 = NULL;
1364  mark16_2 = NULL;
1365  if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1366  length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1367  else
1368  length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1369  extra16->mark = &mark16_1;
1370  if ((counter & 0x1) != 0) {
1371  setstack16(extra16);
1372  return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1373  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
1374  } else
1375  return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1376  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
1377  memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1378  dummy_extra16.flags = PCRE_EXTRA_MARK;
1379  if (current->start_offset & F_STUDY) {
1380  dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1381  dummy_extra16.study_data = extra16->study_data;
1382  }
1383  dummy_extra16.mark = &mark16_2;
1384  return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1385  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
1386  }
1387 #endif
1388 
1389 #ifdef SUPPORT_PCRE32
1390  return_value32[0] = -1000;
1391  return_value32[1] = -1000;
1392  for (i = 0; i < 32; ++i)
1393  ovector32_1[i] = -2;
1394  for (i = 0; i < 32; ++i)
1395  ovector32_2[i] = -2;
1396  if (re32) {
1397  mark32_1 = NULL;
1398  mark32_2 = NULL;
1399  if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1400  length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1401  else
1402  length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1403  extra32->mark = &mark32_1;
1404  if ((counter & 0x1) != 0) {
1405  setstack32(extra32);
1406  return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1407  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
1408  } else
1409  return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1410  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
1411  memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1412  dummy_extra32.flags = PCRE_EXTRA_MARK;
1413  if (current->start_offset & F_STUDY) {
1414  dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1415  dummy_extra32.study_data = extra32->study_data;
1416  }
1417  dummy_extra32.mark = &mark32_2;
1418  return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1419  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
1420  }
1421 #endif
1422 
1423  /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1424  return_value8[0], return_value16[0], return_value32[0],
1425  ovector8_1[0], ovector8_1[1],
1426  ovector16_1[0], ovector16_1[1],
1427  ovector32_1[0], ovector32_1[1],
1428  (current->flags & PCRE_CASELESS) ? "C" : ""); */
1429 
1430  /* If F_DIFF is set, just run the test, but do not compare the results.
1431  Segfaults can still be captured. */
1432 
1433  is_successful = 1;
1434  if (!(current->start_offset & F_DIFF)) {
1435 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1436  if (!(current->start_offset & F_FORCECONV)) {
1437  int return_value;
1438 
1439  /* All results must be the same. */
1440 #ifdef SUPPORT_PCRE8
1441  if ((return_value = return_value8[0]) != return_value8[1]) {
1442  printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1443  return_value8[0], return_value8[1], total, current->pattern, current->input);
1444  is_successful = 0;
1445  } else
1446 #endif
1447 #ifdef SUPPORT_PCRE16
1448  if ((return_value = return_value16[0]) != return_value16[1]) {
1449  printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1450  return_value16[0], return_value16[1], total, current->pattern, current->input);
1451  is_successful = 0;
1452  } else
1453 #endif
1454 #ifdef SUPPORT_PCRE32
1455  if ((return_value = return_value32[0]) != return_value32[1]) {
1456  printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1457  return_value32[0], return_value32[1], total, current->pattern, current->input);
1458  is_successful = 0;
1459  } else
1460 #endif
1461 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1462  if (return_value8[0] != return_value16[0]) {
1463  printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1464  return_value8[0], return_value16[0],
1465  total, current->pattern, current->input);
1466  is_successful = 0;
1467  } else
1468 #endif
1469 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1470  if (return_value8[0] != return_value32[0]) {
1471  printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1472  return_value8[0], return_value32[0],
1473  total, current->pattern, current->input);
1474  is_successful = 0;
1475  } else
1476 #endif
1477 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1478  if (return_value16[0] != return_value32[0]) {
1479  printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1480  return_value16[0], return_value32[0],
1481  total, current->pattern, current->input);
1482  is_successful = 0;
1483  } else
1484 #endif
1485  if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1486  if (return_value == PCRE_ERROR_PARTIAL) {
1487  return_value = 2;
1488  } else {
1489  return_value *= 2;
1490  }
1491 #ifdef SUPPORT_PCRE8
1492  return_value8[0] = return_value;
1493 #endif
1494 #ifdef SUPPORT_PCRE16
1495  return_value16[0] = return_value;
1496 #endif
1497 #ifdef SUPPORT_PCRE32
1498  return_value32[0] = return_value;
1499 #endif
1500  /* Transform back the results. */
1501  if (current->flags & PCRE_UTF8) {
1502 #ifdef SUPPORT_PCRE16
1503  for (i = 0; i < return_value; ++i) {
1504  if (ovector16_1[i] >= 0)
1505  ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1506  if (ovector16_2[i] >= 0)
1507  ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1508  }
1509 #endif
1510 #ifdef SUPPORT_PCRE32
1511  for (i = 0; i < return_value; ++i) {
1512  if (ovector32_1[i] >= 0)
1513  ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1514  if (ovector32_2[i] >= 0)
1515  ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1516  }
1517 #endif
1518  }
1519 
1520  for (i = 0; i < return_value; ++i) {
1521 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1522  if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1523  printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1524  i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1525  total, current->pattern, current->input);
1526  is_successful = 0;
1527  }
1528 #endif
1529 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1530  if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1531  printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1532  i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1533  total, current->pattern, current->input);
1534  is_successful = 0;
1535  }
1536 #endif
1537 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1538  if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1539  printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1540  i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1541  total, current->pattern, current->input);
1542  is_successful = 0;
1543  }
1544 #endif
1545  }
1546  }
1547  } else
1548 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1549  {
1550  /* Only the 8 bit and 16 bit results must be equal. */
1551 #ifdef SUPPORT_PCRE8
1552  if (return_value8[0] != return_value8[1]) {
1553  printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1554  return_value8[0], return_value8[1], total, current->pattern, current->input);
1555  is_successful = 0;
1556  } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1557  if (return_value8[0] == PCRE_ERROR_PARTIAL)
1558  return_value8[0] = 2;
1559  else
1560  return_value8[0] *= 2;
1561 
1562  for (i = 0; i < return_value8[0]; ++i)
1563  if (ovector8_1[i] != ovector8_2[i]) {
1564  printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1565  i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1566  is_successful = 0;
1567  }
1568  }
1569 #endif
1570 
1571 #ifdef SUPPORT_PCRE16
1572  if (return_value16[0] != return_value16[1]) {
1573  printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1574  return_value16[0], return_value16[1], total, current->pattern, current->input);
1575  is_successful = 0;
1576  } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1577  if (return_value16[0] == PCRE_ERROR_PARTIAL)
1578  return_value16[0] = 2;
1579  else
1580  return_value16[0] *= 2;
1581 
1582  for (i = 0; i < return_value16[0]; ++i)
1583  if (ovector16_1[i] != ovector16_2[i]) {
1584  printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1585  i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1586  is_successful = 0;
1587  }
1588  }
1589 #endif
1590 
1591 #ifdef SUPPORT_PCRE32
1592  if (return_value32[0] != return_value32[1]) {
1593  printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1594  return_value32[0], return_value32[1], total, current->pattern, current->input);
1595  is_successful = 0;
1596  } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1597  if (return_value32[0] == PCRE_ERROR_PARTIAL)
1598  return_value32[0] = 2;
1599  else
1600  return_value32[0] *= 2;
1601 
1602  for (i = 0; i < return_value32[0]; ++i)
1603  if (ovector32_1[i] != ovector32_2[i]) {
1604  printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1605  i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1606  is_successful = 0;
1607  }
1608  }
1609 #endif
1610  }
1611  }
1612 
1613  if (is_successful) {
1614 #ifdef SUPPORT_PCRE8
1615  if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii)) {
1616  if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1617  printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1618  total, current->pattern, current->input);
1619  is_successful = 0;
1620  }
1621 
1622  if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1623  printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1624  total, current->pattern, current->input);
1625  is_successful = 0;
1626  }
1627  }
1628 #endif
1629 #ifdef SUPPORT_PCRE16
1630  if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii)) {
1631  if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1632  printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1633  total, current->pattern, current->input);
1634  is_successful = 0;
1635  }
1636 
1637  if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1638  printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1639  total, current->pattern, current->input);
1640  is_successful = 0;
1641  }
1642  }
1643 #endif
1644 #ifdef SUPPORT_PCRE32
1645  if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii)) {
1646  if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1647  printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1648  total, current->pattern, current->input);
1649  is_successful = 0;
1650  }
1651 
1652  if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1653  printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1654  total, current->pattern, current->input);
1655  is_successful = 0;
1656  }
1657  }
1658 #endif
1659  }
1660 
1661  if (is_successful) {
1662 #ifdef SUPPORT_PCRE8
1663  if (mark8_1 != mark8_2) {
1664  printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1665  total, current->pattern, current->input);
1666  is_successful = 0;
1667  }
1668 #endif
1669 #ifdef SUPPORT_PCRE16
1670  if (mark16_1 != mark16_2) {
1671  printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1672  total, current->pattern, current->input);
1673  is_successful = 0;
1674  }
1675 #endif
1676 #ifdef SUPPORT_PCRE32
1677  if (mark32_1 != mark32_2) {
1678  printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1679  total, current->pattern, current->input);
1680  is_successful = 0;
1681  }
1682 #endif
1683  }
1684 
1685 #ifdef SUPPORT_PCRE8
1686  if (re8) {
1687  pcre_free_study(extra8);
1688  pcre_free(re8);
1689  }
1690 #endif
1691 #ifdef SUPPORT_PCRE16
1692  if (re16) {
1693  pcre16_free_study(extra16);
1694  pcre16_free(re16);
1695  }
1696 #endif
1697 #ifdef SUPPORT_PCRE32
1698  if (re32) {
1699  pcre32_free_study(extra32);
1700  pcre32_free(re32);
1701  }
1702 #endif
1703 
1704  if (is_successful) {
1705  successful++;
1706  successful_row++;
1707  printf(".");
1708  if (successful_row >= 60) {
1709  successful_row = 0;
1710  printf("\n");
1711  }
1712  } else
1713  successful_row = 0;
1714 
1715  fflush(stdout);
1716  current++;
1717  }
1718  tables(1);
1719 #ifdef SUPPORT_PCRE8
1720  setstack8(NULL);
1721 #endif
1722 #ifdef SUPPORT_PCRE16
1723  setstack16(NULL);
1724 #endif
1725 #ifdef SUPPORT_PCRE32
1726  setstack32(NULL);
1727 #endif
1728 
1729  if (total == successful) {
1730  printf("\nAll JIT regression tests are successfully passed.\n");
1731  return 0;
1732  } else {
1733  printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1734  return 1;
1735  }
1736 }
1737 
1738 /* End of pcre_jit_test.c */
#define CMUA
void pcre16_jit_stack_free(pcre16_jit_stack *stack)
#define MUA
it is not the default The Unicode tables correspond to Unicode release In addition to the Perl compatible matching PCRE contains an alternative function that matches the same compiled patterns in a dif ferent way In certain the alternative function has some advantages For a discussion of the two matching see the pcrematching page PCRE is written in C and released as a C library A number of people have written wrappers and interfaces of various kinds In Google Inc have provided a comprehensive C wrapper for the bit library This is now included as part of the PCRE distribution The pcrecpp page has details of this interface Other people s contribu tions can be found in the Contrib directory at the primary FTP which or changed when the library is built The pcre_config() function makes it possible for a client to discover which features are available.The features them-selves are described in the pcrebuild page.Documentation about build-ing PCRE for various operating systems can be found in the README and NON-AUTOTOOLS_BUILD files in the source distribution.The libraries contains a number of undocumented internal functions and data tables that are used by more than one of the exported external functions
it defaults to the value that is set for with match which imposes no additional constraints you can set a lower limit by for with match limit PCRE is built with a set of tables that are distributed in the file pcre_chartables c dist These tables are for ASCII codes only If you add enable rebuild chartables to the configure the distributed tables are no longer used a program called dftables is compiled and run This outputs the source for new set of tables
Definition: pcre.txt:1159
the input buffer is automatically extended if it is too small An empty line signals the end of the data at which point a new regular expression is read The regular expressions are given enclosed in any non alphanumeric delimiters other than for in which case the new line characters are included within it It is possible to include the delimiter within the pattern by escaping for example abc def If you do the escape and the delimiter form part of the but since delimiters are always non this does not affect its interpretation If the terminating delimiter is immediately fol lowed by a for abc then a backslash is added to the end of the pattern This is done to provide a way of testing the error condition that arises if a pattern finishes with a because abc is interpreted as the first line of a pattern that starts with causing pcretest to read the next line as a continuation of the regular expression PATTERN MODIFIERS A pattern may be followed by any number of which are mostly single though some of these can be qualified by further characters Following Perl these are referred to below for the i even though the delimiter of the pattern need not always be a and no slash is used when writing modi fiers White space may appear between the final pattern delimiter and the first and between the modifiers themselves For refer here is a complete list of modifiers They fall into several groups that are described in detail in the following sections set UTF mode set and x modifiers set the PCRE_DOTALL
Definition: pcretest.txt:345
void pcre_jit_stack_free(pcre_jit_stack *stack)
void pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *data)
#define PCRE_STUDY_JIT_COMPILE
in the same way as other grep commands but it uses the PCRE regular expression library to support patterns that are compatible with the regular expressions of Perl See or whether supplied on the command line or in a separate are given without delimiters For by surrounding a pattern with as is common in Perl they are interpreted as part of the pattern Quotes can of course be used to delimit patterns on the command line because they are interpreted by the and indeed quotes are required if a pattern contains white space or shell metacharacters The first argument that follows any option settings is treated as the single pattern to be matched when neither e nor f is present Con when one or both of these options are used to specify pat all arguments are treated as path names At least one of or an argument pattern must be provided If no files are pcregrep reads the standard input The stan dard input can also be referenced by a name consisting of a single hyphen For each line that matches a pattern is copied to the standard output
Definition: pcregrep.txt:21
void(* pcre_free)(void *)
Definition: pcre.txt:1682
PCRE BIT API AUXILIARY FUNCTIONS pcre32_jit_stack * pcre32_jit_stack_alloc(int startsize, int maxsize)
void(* pcre32_free)(void *)
Definition: pcre.txt:627
#define MA
#define F_NO8
pcre16_extra * pcre16_study(const pcre16 *code, int options, const char **errptr)
void pcre32_free_study(pcre32_extra *extra)
THE PCRE BIT LIBRARY Starting with it is possible to compile a PCRE library that supports bit character including UTF as well as or instead of the original bit library The majority of the work to make this possible was done by Zoltan Herczeg The two libraries contain identical sets of used in exactly the same way Only the names of the functions and the data types of their argu ments and results are different To avoid over complication and reduce the documentation maintenance most of the PCRE documentation describes the bit with only occasional references to the bit library This page describes what is different when you use the bit library but you must take care when processing any particular pattern to use func tions from just one library For if you want to study a pat tern that was compiled with pcre16_compile()
wintestinput3 sets the locale to french rather than and there some minor output the JIT features will be tested by the testdata files you might also like to build and run the freestanding JIT test pcre_jit_test compile and link pcregrep c
int pcre32_fullinfo(const pcre32 *code, const pcre32_extra *extra, int what, void *where)
#define CMA
#define MAP
building the test possibly other test programs in the build directory Pcre_test bat runs RunTest Bat with correct source and exe paths For manual testing with RunTest provided the build dir is a subdirectory of the source e chdir to the location of your bat and exe programs Run RunTest bat Test outputs will automatically be compared to expected and discrepancies will be identified in the console output To independently test the just in time run pcre_jit_test exe To test run pcrecpp_unittest pcre_stringpiece_unittest exe and pcre_scanner_unittest exe BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO Vincent Richomme sent a zip archive of files to help with this process They can be found in the file pcre vsbuild zip in the Contrib directory of the FTP site BUILDING UNDER WINDOWS WITH BCC5 Michael Roy sent these comments about building PCRE under Windows with which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version mismatch.I'm including an easy workaround below
int pcre_fullinfo(const pcre *code, const pcre_extra *extra, int what, void *where)
Definition: pcre_fullinfo.c:70
#define CMUAP
#define OFFSET_MASK
it must not be given immediately after S or S because this will be misinterpreted If JIT studying is successful
Definition: pcretest.txt:543
void pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *data)
running out of stack can cause programs to crash in strange ways There is a discussion about stack sizes in the pcrestack man page The default maximum compiled pattern size is around You can increase this by adding with link PCRE then uses three bytes instead of two for offsets to different parts of the compiled pattern In the bit with link the only supported link size is You can build PCRE so that its internal it uses memory blocks obtained from the heap via the special functions use disable stack for recursion on the configure command PCRE runs more slowly in this mode
Definition: README.txt:328
#define F_NO16
void pcre32_jit_stack_free(pcre32_jit_stack *stack)
THE PCRE BIT LIBRARY Starting with it is possible to compile a PCRE library that supports bit character including UTF as well as or instead of the original bit library This work was done by Christian based on the work done by Zoltan Herczeg for the bit library All three libraries contain identical sets of func used in exactly the same way Only the names of the functions and the data types of their arguments and results are different To avoid over complication and reduce the documentation maintenance most of the PCRE documentation describes the bit with only occasional references to the bit and bit libraries This page describes what is different when you use the bit library but you must take care when processing any particular pattern to use functions from just one library For if you want to study a pattern that was compiled with pcre32_compile()
it is run only when the default link size of is and the thirteenth test is run only when JIT support is not available They test some JIT specific features such as information output from pcretest about JIT compilation The and sixteenth tests are run only in bit and the and nineteenth tests are run only in bit mode These are tests that generate different output in the two modes They are for general UTF and Unicode property respectively The twentieth test is run only in bit mode It tests some specific bit features of the DFA matching engine The twenty first and twenty second tests are run only in bit when the link size is set to for the bit library They test reloading pre compiled patterns The twenty third and twenty fourth tests are run only in bit mode They are for general and UTF respectively The twenty fifth and twenty sixth tests are run only in bit mode They are for general and UTF respectively Character tables For PCRE uses four tables for manipulating and identifying characters whose code point values are less than The final argument of the pcre_compile() function is a pointer to a block of memory containing the concatenated tables.A call to pcre_maketables() can be used to generate a set of tables in the current locale.If the final argument for pcre_compile() is passed as NULL
const char * error
Definition: pcre.txt:2104
int main(void)
pcre_jit_stack * pcre_jit_stack_alloc(int startsize, int maxsize)
void pcre_free_study(pcre_extra *extra)
Definition: pcre_study.c:1667
#define F_FORCECONV
PCRE NATIVE API AUXILIARY FUNCTIONS int pcre_jit_exec(const pcre *code, const pcre_extra *extra, const char *subject, int length, int startoffset, int options, int *ovector, int ovecsize, pcre_jit_stack *jstack)
the input buffer is automatically extended if it is too small An empty line signals the end of the data at which point a new regular expression is read The regular expressions are given enclosed in any non alphanumeric delimiters other than for in which case the new line characters are included within it It is possible to include the delimiter within the pattern by escaping for example abc def If you do the escape and the delimiter form part of the but since delimiters are always non this does not affect its interpretation If the terminating delimiter is immediately fol lowed by a for abc then a backslash is added to the end of the pattern This is done to provide a way of testing the error condition that arises if a pattern finishes with a because abc is interpreted as the first line of a pattern that starts with causing pcretest to read the next line as a continuation of the regular expression PATTERN MODIFIERS A pattern may be followed by any number of which are mostly single though some of these can be qualified by further characters Following Perl these are referred to below for the i even though the delimiter of the pattern need not always be a and no slash is used when writing modi fiers White space may appear between the final pattern delimiter and the first and between the modifiers themselves For refer here is a complete list of modifiers They fall into several groups that are described in detail in the following sections set UTF mode set and x modifiers set the PCRE_CASELESS
Definition: pcretest.txt:345
#define F_DIFF
#define F_PROPERTY
the input buffer is automatically extended if it is too small An empty line signals the end of the data at which point a new regular expression is read The regular expressions are given enclosed in any non alphanumeric delimiters other than for in which case the new line characters are included within it It is possible to include the delimiter within the pattern by escaping for example abc def If you do the escape and the delimiter form part of the but since delimiters are always non this does not affect its interpretation If the terminating delimiter is immediately fol lowed by a for abc then a backslash is added to the end of the pattern This is done to provide a way of testing the error condition that arises if a pattern finishes with a because abc is interpreted as the first line of a pattern that starts with causing pcretest to read the next line as a continuation of the regular expression PATTERN MODIFIERS A pattern may be followed by any number of which are mostly single though some of these can be qualified by further characters Following Perl these are referred to below for the i even though the delimiter of the pattern need not always be a and no slash is used when writing modi fiers White space may appear between the final pattern delimiter and the first and between the modifiers themselves For refer here is a complete list of modifiers They fall into several groups that are described in detail in the following sections set UTF mode set and x modifiers set the PCRE_MULTILINE
Definition: pcretest.txt:345
int pcre32_exec(const pcre32 *code, const pcre32_extra *extra, PCRE_SPTR32 subject, int length, int startoffset, int options, int *ovector, int ovecsize)
pcre32_extra * pcre32_study(const pcre32 *code, int options, const char **errptr)
the caller of PCRE can change the selection at run time The default newline indicator is a single LF respectively If you specify enable newline is cr or enable newline is some of the standard tests will because the lines in the test files end with LF Even if the files are edited to change the line there are likely to be some failures With enable newline is anycrlf or enable newline is many tests should but there may be some failures By the sequence R in a pattern matches any Unicode line ending sequence This is independent of the option specifying what PCRE considers to be the end of a the caller of PCRE can restrict R to match only or CRLF You can make this the default by adding enable bsr anycrlf to the configure PCRE uses malloc() to get additional storage for processing capturing parentheses if there are more than 10 of them in a pattern.You can increase this threshold by setting
this is a typedef for a structure whose contents are not externally defined It is up to the caller to free the that it does not depend on memory the complete pcre data block is not fully because it may contain a copy of the tableptr argu which is an those that are compatible with but some others as well can also be set and unset from within the the contents of the options argument specifies their settings at the start of compilation and execution The PCRE_NO_UTF8_CHECK
Definition: pcre.txt:2066
int pcre16_exec(const pcre16 *code, const pcre16_extra *extra, PCRE_SPTR16 subject, int length, int startoffset, int options, int *ovector, int ovecsize)
void(* pcre16_free)(void *)
Definition: pcre.txt:296
THE PCRE BIT LIBRARY Starting with it is possible to compile a PCRE library that supports bit character including UTF as well as or instead of the original bit library The majority of the work to make this possible was done by Zoltan Herczeg The two libraries contain identical sets of used in exactly the same way Only the names of the functions and the data types of their argu ments and results are different To avoid over complication and reduce the documentation maintenance most of the PCRE documentation describes the bit with only occasional references to the bit library This page describes what is different when you use the bit library but you must take care when processing any particular pattern to use func tions from just one library For if you want to study a pat tern that was compiled with you must do so with not pcre_study()
int pcre32_config(int what, void *where)
#define F_NOMATCH
int pcre16_config(int what, void *where)
THE PCRE BIT LIBRARY Starting with it is possible to compile a PCRE library that supports bit character including UTF as well as or instead of the original bit library The majority of the work to make this possible was done by Zoltan Herczeg The two libraries contain identical sets of used in exactly the same way Only the names of the functions and the data types of their argu ments and results are different To avoid over complication and reduce the documentation maintenance most of the PCRE documentation describes the bit with only occasional references to the bit library This page describes what is different when you use the bit library but you must take care when processing any particular pattern to use func tions from just one library For if you want to study a pat tern that was compiled with you must do so with not and you must free the study data with pcre h It contains prototypes for all the functions in all as well as definitions of struc error etc THE LIBRARY NAME In Unix like the bit library is called and can normally be accesss by adding lpcre16 to the command for linking an application that uses PCRE STRING TYPES In the bit strings are passed to PCRE library functions as vectors of bytes with the C type char *In the bit strings are passed as vectors of unsigned bit quantities The macro PCRE_UCHAR16 specifies an appropriate data and PCRE_SPTR16 is defined as const PCRE_UCHAR16 *In very many short int is a bit data type When PCRE is it defines PCRE_UCHAR16 as unsigned short int
Definition: pcre.txt:337
this is a typedef for a structure whose contents are not externally defined It is up to the caller to free the that it does not depend on memory the complete pcre data block is not fully because it may contain a copy of the tableptr argu which is an those that are compatible with but some others as well can also be set and unset from within the the contents of the options argument specifies their settings at the start of compilation and execution The and PCRE_NO_START_OPTIMIZE options can be set at the time of matching as well as at compile time If errptr is NULL
Definition: pcre.txt:2066
#define F_STUDY
#define F_NO32
in other force each pattern to be studied If s is all the JIT compile options are passed to pcre[16|32] causing just in time optimization to be set up if it is for both full and partial matching Specific JIT compile options can be selected by following s with a digit in the range which selects the JIT compile modes as the either specifying no studying at or suppressing JIT com pilation If the I or D option is present on a information about the result of studying is not included when studying is caused only by s and neither i nor d is present on the command line This behaviour means that the output from tests that are run with and without s should be except when options that output information about the actual running of a match are set The and tm which give information about resources are likely to produce different output with and without s Output may also differ if the C option is present on an individual pattern This uses callouts to trace the the matching and this may be different between studied and non studied patterns If the pattern contains MARK items there may also be for the same reason The s command line option can be overridden for spe cific patterns that should never be and match many times with a and output the resulting times per or because you will then get the size output a zillion and the timing will be distorted You can control the number of iterations that are used for timing by following t with a t iter ates times The default is to iterate times tm This is like t except that it times only the matching not the compile or study phases T TM These behave like t and but in at the end of a the total times for all and matches are output DESCRIPTION If pcretest is given two filename it reads from the first and writes to the second If it is given only one filename it reads from that file and writes to stdout it reads from stdin and writes to stdout
Definition: pcretest.txt:220
#define MUAP
int pcre16_fullinfo(const pcre16 *code, const pcre16_extra *extra, int what, void *where)
void pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *data)
const char * pattern
in others it may provoke an error e regex
Definition: pcregrep.txt:225
void pcre16_free_study(pcre16_extra *extra)
PCRE BIT API AUXILIARY FUNCTIONS pcre16_jit_stack * pcre16_jit_stack_alloc(int startsize, int maxsize)