pcre  8.33
About: PCRE - Perl-compatible regular expressions
  Fossies Dox: pcre-8.33.tar.gz  ("inofficial" and yet experimental doxygen-generated source code documentation)  

 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pcre_jit_test.c
Go to the documentation of this file.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Main Library written by Philip Hazel
9  Copyright (c) 1997-2012 University of Cambridge
10 
11  This JIT compiler regression test program was written by Zoltan Herczeg
12  Copyright (c) 2010-2012
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18  * Redistributions of source code must retain the above copyright notice,
19  this list of conditions and the following disclaimer.
20 
21  * Redistributions in binary form must reproduce the above copyright
22  notice, this list of conditions and the following disclaimer in the
23  documentation and/or other materials provided with the distribution.
24 
25  * Neither the name of the University of Cambridge nor the names of its
26  contributors may be used to endorse or promote products derived from
27  this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50 
51 
52 #include "pcre_internal.h"
53 
54 #define PCRE_BUG 0x80000000
55 
56 /*
57  Letter characters:
58  \xe6\x92\xad = 0x64ad = 25773 (kanji)
59  Non-letter characters:
60  \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61  \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62  \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63  \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
64  Newlines:
65  \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66  \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
67  Othercase pairs:
68  \xc3\xa9 = 0xe9 = 233 (e')
69  \xc3\x89 = 0xc9 = 201 (E')
70  \xc3\xa1 = 0xe1 = 225 (a')
71  \xc3\x81 = 0xc1 = 193 (A')
72  \xc8\xba = 0x23a = 570
73  \xe2\xb1\xa5 = 0x2c65 = 11365
74  \xe1\xbd\xb8 = 0x1f78 = 8056
75  \xe1\xbf\xb8 = 0x1ff8 = 8184
76  \xf0\x90\x90\x80 = 0x10400 = 66560
77  \xf0\x90\x90\xa8 = 0x10428 = 66600
78  Mark property:
79  \xcc\x8d = 0x30d = 781
80  Special:
81  \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
82  \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
83  \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
84  \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
85  \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
86 */
87 
88 static int regression_tests(void);
89 
90 int main(void)
91 {
92  int jit = 0;
93 #if defined SUPPORT_PCRE8
94  pcre_config(PCRE_CONFIG_JIT, &jit);
95 #elif defined SUPPORT_PCRE16
96  pcre16_config(PCRE_CONFIG_JIT, &jit);
97 #elif defined SUPPORT_PCRE32
98  pcre32_config(PCRE_CONFIG_JIT, &jit);
99 #endif
100  if (!jit) {
101  printf("JIT must be enabled to run pcre_jit_test\n");
102  return 1;
103  }
104  return regression_tests();
105 }
106 
107 /* --------------------------------------------------------------------------------------- */
108 
109 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
110 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
111 #endif
112 
113 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
114 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
115 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
116 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
117 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
118 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
119 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
120 
121 #define OFFSET_MASK 0x00ffff
122 #define F_NO8 0x010000
123 #define F_NO16 0x020000
124 #define F_NO32 0x020000
125 #define F_NOMATCH 0x040000
126 #define F_DIFF 0x080000
127 #define F_FORCECONV 0x100000
128 #define F_PROPERTY 0x200000
129 #define F_STUDY 0x400000
130 
132  int flags;
134  const char *pattern;
135  const char *input;
136 };
137 
138 static struct regression_test_case regression_test_cases[] = {
139  /* Constant strings. */
140  { MUA, 0, "AbC", "AbAbC" },
141  { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
142  { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
143  { MA, 0, "[^a]", "aAbB" },
144  { CMA, 0, "[^m]", "mMnN" },
145  { MA, 0, "a[^b][^#]", "abacd" },
146  { CMA, 0, "A[^B][^E]", "abacd" },
147  { CMUA, 0, "[^x][^#]", "XxBll" },
148  { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
149  { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
150  { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
151  { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
152  { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
153  { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
154  { MUA, 0, "[axd]", "sAXd" },
155  { CMUA, 0, "[axd]", "sAXd" },
156  { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
157  { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
158  { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
159  { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
160  { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
161  { MUA, 0, "[^a]", "\xc2\x80[]" },
162  { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
163  { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
164  { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
165  { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
166  { PCRE_CASELESS, 0, "a1", "Aa1" },
167  { MA, 0, "\\Ca", "cda" },
168  { CMA, 0, "\\Ca", "CDA" },
169  { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
170  { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
171  { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
172  { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
173  { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
174  { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
175 
176  /* Assertions. */
177  { MUA, 0, "\\b[^A]", "A_B#" },
178  { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
179  { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
180  { MAP, 0, "\\B", "_\xa1" },
181  { MAP, 0, "\\b_\\b[,A]\\B", "_," },
182  { MUAP, 0, "\\b", "\xe6\x92\xad!" },
183  { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
184  { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
185  { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
186  { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
187  { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
188  { MA, 0 | F_NOMATCH, "\\R^", "\n" },
189  { MA, 1 | F_NOMATCH, "^", "\n" },
190  { 0, 0, "^ab", "ab" },
191  { 0, 0 | F_NOMATCH, "^ab", "aab" },
192  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
193  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
194  { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
195  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
196  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
197  { 0, 0, "ab$", "ab" },
198  { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
199  { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
200  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
201  { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
202  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
203  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
204  { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
205  { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
206  { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
207  { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
208  { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
209  { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
210  { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
211  { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
212  { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
213  { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
214  { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
215  { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
216  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
217  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
218  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
219  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
220  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
221  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
226  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
227  { MA, 0, "\\Aa", "aaa" },
228  { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
229  { MA, 1, "\\Ga", "aaa" },
230  { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
231  { MA, 0, "a\\z", "aaa" },
232  { MA, 0 | F_NOMATCH, "a\\z", "aab" },
233 
234  /* Brackets. */
235  { MUA, 0, "(ab|bb|cd)", "bacde" },
236  { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
237  { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
238  { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
239  { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
240  { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
241 
242  /* Greedy and non-greedy ? operators. */
243  { MUA, 0, "(?:a)?a", "laab" },
244  { CMUA, 0, "(A)?A", "llaab" },
245  { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
246  { MUA, 0, "(a)?a", "manm" },
247  { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
248  { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
249  { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
250 
251  /* Greedy and non-greedy + operators */
252  { MUA, 0, "(aa)+aa", "aaaaaaa" },
253  { MUA, 0, "(aa)+?aa", "aaaaaaa" },
254  { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
255  { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
256  { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
257  { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
258  { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
259 
260  /* Greedy and non-greedy * operators */
261  { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
262  { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
263  { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
264  { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
265  { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
266  { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
267  { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
268  { MA, 0, "((?:a|)*){0}a", "a" },
269 
270  /* Combining ? + * operators */
271  { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
272  { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
273  { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
274  { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
275  { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
276 
277  /* Single character iterators. */
278  { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
279  { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
280  { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
281  { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
282  { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
283  { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
284  { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
285  { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
286  { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
287  { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
288  { MUA, 0, "(a?+[^b])+", "babaacacb" },
289  { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
290  { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
291  { CMUA, 0, "[c-f]+k", "DemmFke" },
292  { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
293  { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
294  { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
295  { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
296  { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
297  { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
298  { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
299  { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
300  { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
301  { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
302  { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
303  { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
304  { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
305  { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
306  { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
307  { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
308  { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
309  { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
310 
311  /* Bracket repeats with limit. */
312  { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
313  { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
314  { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
315  { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
316  { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
317  { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
318  { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
319  { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
320  { MUA, 0, "(ab){4,6}?M", "abababababababM" },
321 
322  /* Basic character sets. */
323  { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
324  { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
325  { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
326  { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
327  { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
328  { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
329 
330  /* Unicode properties. */
331  { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
332  { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
333  { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
334  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
335  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
336  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
337  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
338  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
339  { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
340  { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
341  { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
342  { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
343  { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
344  { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
345  { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
346  { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
347  { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
348  { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
349  { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
350  { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
351 
352  /* Possible empty brackets. */
353  { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
354  { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
355  { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
356  { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
357  { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
358  { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
359  { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
360  { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
361  { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
362  { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
363 
364  /* Start offset. */
365  { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
366  { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
367  { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
368  { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
369 
370  /* Newline. */
371  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
372  { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
373  { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
374 
375  /* Any character except newline or any newline. */
376  { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
377  { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
378  { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
379  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
380  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
381  { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
382  { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
383  { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
384  { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
385  { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
386  { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
387  { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
388  { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
389  { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
390  { MUA, 0, "\\R+", "ab\r\n\r" },
391  { MUA, 0, "\\R*", "ab\r\n\r" },
392  { MUA, 0, "\\R*", "\r\n\r" },
393  { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
394  { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
395  { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
396  { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
397  { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
398  { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
399  { MUA, 0, "\\R*\\R\\R", "\n\r" },
400  { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
401  { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
402 
403  /* Atomic groups (no fallback from "next" direction). */
404  { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
405  { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
406  { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
407  "bababcdedefgheijijklmlmnop" },
408  { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
409  { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
410  { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
411  { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
412  { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
413  { MUA, 0, "(?>x|)*$", "aaa" },
414  { MUA, 0, "(?>(x)|)*$", "aaa" },
415  { MUA, 0, "(?>x|())*$", "aaa" },
416  { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
417  { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
418  { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
419  { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
420  { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
421  { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
422  { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
423  { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
424  { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
425  { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
426  { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
427  { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
428  { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
429  { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
430  { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
431  { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
432  { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
433  { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
434  { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
435  { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
436  { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
437  { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
438  { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
439  { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
440  { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
441  { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
442 
443  /* Possessive quantifiers. */
444  { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
445  { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
446  { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
447  { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
448  { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
449  { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
450  { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
451  { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
452  { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
453  { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
454  { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
455  { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
456  { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
457  { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
458  { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
459  { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
460  { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
461  { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
462  { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
463  { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
464  { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
465  { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
466  { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
467  { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
468  { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
469  { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
470  { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
471  { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
472  { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
473  { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
474  { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
475  { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
476  { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
477  { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
478  { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
479 
480  /* Back references. */
481  { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
482  { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
483  { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
484  { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
485  { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
486  { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
487  { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
488  { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
489  { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
490  { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
491  { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
492  { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
493  { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
494  { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
495  { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
496  { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
497  { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
498  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
499  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
500  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
501  { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
502  { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
503  { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
504 
505  /* Assertions. */
506  { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
507  { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
508  { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
509  { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
510  { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
511  { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
512  { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
513  { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
514  { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
515  { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
516  { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
517  { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
518  { MUA, 0, "((?(?=a)a)+k)", "bbak" },
519  { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
520  { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
521  { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
522  { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
523  { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
524  { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
525  { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
526  { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
527  { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
528  { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
529  { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
530 
531  /* Not empty, ACCEPT, FAIL */
532  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
533  { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
534  { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
535  { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
536  { MUA, 0, "a(*ACCEPT)b", "ab" },
537  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
538  { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
539  { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
540  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
541  { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
542  { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
543  { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
544  { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
545  { MUA, 0, "((a(*ACCEPT)b))", "ab" },
546  { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
547  { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
548  { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
549  { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
550  { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
551 
552  /* Conditional blocks. */
553  { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
554  { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
555  { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
556  { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
557  { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
558  { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
559  { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
560  { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
561  { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
562  { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
563  { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
564  { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
565  { MUA, 0, "(?(?=a)ab)", "a" },
566  { MUA, 0, "(?(?<!b)c)", "b" },
567  { MUA, 0, "(?(DEFINE)a(b))", "a" },
568  { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
569  { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
570  { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
571  { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
572  { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
573  { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
574  { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
575  { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
576  { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
577  { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
578  { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
579  { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
580  { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
581  { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
582  { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
583  { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
584  { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
585  { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
586  { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
587  { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
588 
589  /* Set start of match. */
590  { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
591  { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
592  { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
593  { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
594  { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
595 
596  /* First line. */
597  { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
598  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
599  { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
600  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
601  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
602  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
603  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
604  { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
605  { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
606  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
607  { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
608  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
609  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
610  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
611  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
612  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
613  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
614  { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
615  { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
616 
617  /* Recurse. */
618  { MUA, 0, "(a)(?1)", "aa" },
619  { MUA, 0, "((a))(?1)", "aa" },
620  { MUA, 0, "(b|a)(?1)", "aa" },
621  { MUA, 0, "(b|(a))(?1)", "aa" },
622  { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
623  { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
624  { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
625  { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
626  { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
627  { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
628  { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
629  { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
630  { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
631  { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
632  { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
633  { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
634  { MUA, 0, "b|<(?R)*>", "<<b>" },
635  { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
636  { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
637  { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
638  { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
639  { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
640  { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
641  { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
642  { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
643  { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
644  { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
645  { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
646  { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
647 
648  /* 16 bit specific tests. */
649  { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
650  { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
651  { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
652  { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
653  { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
654  { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
655  { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
656  { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
657  { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
658  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
659  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
660  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
661  { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
662  { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
663  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
664  { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
665  { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
666  { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
667  { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
668  { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
669  { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
670  { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
671  { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
672  { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
673  { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
674  { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
675  { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
676  { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
677  { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
678  { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
679  { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
680 
681  /* Partial matching. */
682  { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
683  { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
684  { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
685  { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
686  { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
687  { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
688  { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
689  { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
690 
691  /* (*MARK) verb. */
692  { MUA, 0, "a(*MARK:aa)a", "ababaa" },
693  { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
694  { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
695  { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
696  { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
697  { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
698  { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
699  { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
700  { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
701  { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
702  { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
703  { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
704  { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
705  { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
706  { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
707 
708  /* (*COMMIT) verb. */
709  { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
710  { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
711  { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
712  { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
713  { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
714  { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
715 
716  /* (*PRUNE) verb. */
717  { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
718  { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
719  { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
720  { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
721  { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
722  { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
723  { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
724  { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
725  { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
726  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
727  { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
728  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
729  { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
730  { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
731  { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
732  { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
733  { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
734  { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
735  { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
736  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
737  { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
738  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
739  { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
740  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
741  { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
742  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
743  { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
744  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
745  { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
746  { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
747 
748  /* (*SKIP) verb. */
749  { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
750 
751  /* (*THEN) verb. */
752  { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
753  { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
754  { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
755  { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
756  { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
757  { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
758  { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
759  { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
760  { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
761 
762  /* Deep recursion. */
763  { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
764  { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
765  { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
766 
767  /* Deep recursion: Stack limit reached. */
768  { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
769  { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
770  { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
771  { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
772  { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
773 
774  { 0, 0, NULL, NULL }
775 };
776 
777 static const unsigned char *tables(int mode)
778 {
779  /* The purpose of this function to allow valgrind
780  for reporting invalid reads and writes. */
781  static unsigned char *tables_copy;
782  const char *errorptr;
783  int erroroffset;
784  unsigned char *default_tables;
785 #if defined SUPPORT_PCRE8
786  pcre *regex;
787  char null_str[1] = { 0 };
788 #elif defined SUPPORT_PCRE16
789  pcre16 *regex;
790  PCRE_UCHAR16 null_str[1] = { 0 };
791 #elif defined SUPPORT_PCRE32
792  pcre32 *regex;
793  PCRE_UCHAR32 null_str[1] = { 0 };
794 #endif
795 
796  if (mode) {
797  if (tables_copy)
798  free(tables_copy);
799  tables_copy = NULL;
800  return NULL;
801  }
802 
803  if (tables_copy)
804  return tables_copy;
805 
806  default_tables = NULL;
807 #if defined SUPPORT_PCRE8
808  regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
809  if (regex) {
810  pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
811  pcre_free(regex);
812  }
813 #elif defined SUPPORT_PCRE16
814  regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
815  if (regex) {
816  pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
817  pcre16_free(regex);
818  }
819 #elif defined SUPPORT_PCRE32
820  regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
821  if (regex) {
822  pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
823  pcre32_free(regex);
824  }
825 #endif
826  /* Shouldn't ever happen. */
827  if (!default_tables)
828  return NULL;
829 
830  /* Unfortunately this value cannot get from pcre_fullinfo.
831  Since this is a test program, this is acceptable at the moment. */
832  tables_copy = (unsigned char *)malloc(1088);
833  if (!tables_copy)
834  return NULL;
835 
836  memcpy(tables_copy, default_tables, 1088);
837  return tables_copy;
838 }
839 
840 #ifdef SUPPORT_PCRE8
841 static pcre_jit_stack* callback8(void *arg)
842 {
843  return (pcre_jit_stack *)arg;
844 }
845 #endif
846 
847 #ifdef SUPPORT_PCRE16
848 static pcre16_jit_stack* callback16(void *arg)
849 {
850  return (pcre16_jit_stack *)arg;
851 }
852 #endif
853 
854 #ifdef SUPPORT_PCRE32
855 static pcre32_jit_stack* callback32(void *arg)
856 {
857  return (pcre32_jit_stack *)arg;
858 }
859 #endif
860 
861 #ifdef SUPPORT_PCRE8
862 static pcre_jit_stack *stack8;
863 
864 static pcre_jit_stack *getstack8(void)
865 {
866  if (!stack8)
867  stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
868  return stack8;
869 }
870 
871 static void setstack8(pcre_extra *extra)
872 {
873  if (!extra) {
874  if (stack8)
875  pcre_jit_stack_free(stack8);
876  stack8 = NULL;
877  return;
878  }
879 
880  pcre_assign_jit_stack(extra, callback8, getstack8());
881 }
882 #endif /* SUPPORT_PCRE8 */
883 
884 #ifdef SUPPORT_PCRE16
885 static pcre16_jit_stack *stack16;
886 
887 static pcre16_jit_stack *getstack16(void)
888 {
889  if (!stack16)
890  stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
891  return stack16;
892 }
893 
894 static void setstack16(pcre16_extra *extra)
895 {
896  if (!extra) {
897  if (stack16)
898  pcre16_jit_stack_free(stack16);
899  stack16 = NULL;
900  return;
901  }
902 
903  pcre16_assign_jit_stack(extra, callback16, getstack16());
904 }
905 #endif /* SUPPORT_PCRE8 */
906 
907 #ifdef SUPPORT_PCRE32
908 static pcre32_jit_stack *stack32;
909 
910 static pcre32_jit_stack *getstack32(void)
911 {
912  if (!stack32)
913  stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
914  return stack32;
915 }
916 
917 static void setstack32(pcre32_extra *extra)
918 {
919  if (!extra) {
920  if (stack32)
921  pcre32_jit_stack_free(stack32);
922  stack32 = NULL;
923  return;
924  }
925 
926  pcre32_assign_jit_stack(extra, callback32, getstack32());
927 }
928 #endif /* SUPPORT_PCRE8 */
929 
930 #ifdef SUPPORT_PCRE16
931 
932 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
933 {
934  unsigned char *iptr = (unsigned char*)input;
935  PCRE_UCHAR16 *optr = output;
936  unsigned int c;
937 
938  if (max_length == 0)
939  return 0;
940 
941  while (*iptr && max_length > 1) {
942  c = 0;
943  if (offsetmap)
944  *offsetmap++ = (int)(iptr - (unsigned char*)input);
945 
946  if (!(*iptr & 0x80))
947  c = *iptr++;
948  else if (!(*iptr & 0x20)) {
949  c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
950  iptr += 2;
951  } else if (!(*iptr & 0x10)) {
952  c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
953  iptr += 3;
954  } else if (!(*iptr & 0x08)) {
955  c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
956  iptr += 4;
957  }
958 
959  if (c < 65536) {
960  *optr++ = c;
961  max_length--;
962  } else if (max_length <= 2) {
963  *optr = '\0';
964  return (int)(optr - output);
965  } else {
966  c -= 0x10000;
967  *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
968  *optr++ = 0xdc00 | (c & 0x3ff);
969  max_length -= 2;
970  if (offsetmap)
971  offsetmap++;
972  }
973  }
974  if (offsetmap)
975  *offsetmap = (int)(iptr - (unsigned char*)input);
976  *optr = '\0';
977  return (int)(optr - output);
978 }
979 
980 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
981 {
982  unsigned char *iptr = (unsigned char*)input;
983  PCRE_UCHAR16 *optr = output;
984 
985  if (max_length == 0)
986  return 0;
987 
988  while (*iptr && max_length > 1) {
989  *optr++ = *iptr++;
990  max_length--;
991  }
992  *optr = '\0';
993  return (int)(optr - output);
994 }
995 
996 #define REGTEST_MAX_LENGTH16 4096
997 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
998 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
999 
1000 #endif /* SUPPORT_PCRE16 */
1001 
1002 #ifdef SUPPORT_PCRE32
1003 
1004 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1005 {
1006  unsigned char *iptr = (unsigned char*)input;
1007  PCRE_UCHAR32 *optr = output;
1008  unsigned int c;
1009 
1010  if (max_length == 0)
1011  return 0;
1012 
1013  while (*iptr && max_length > 1) {
1014  c = 0;
1015  if (offsetmap)
1016  *offsetmap++ = (int)(iptr - (unsigned char*)input);
1017 
1018  if (!(*iptr & 0x80))
1019  c = *iptr++;
1020  else if (!(*iptr & 0x20)) {
1021  c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1022  iptr += 2;
1023  } else if (!(*iptr & 0x10)) {
1024  c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1025  iptr += 3;
1026  } else if (!(*iptr & 0x08)) {
1027  c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1028  iptr += 4;
1029  }
1030 
1031  *optr++ = c;
1032  max_length--;
1033  }
1034  if (offsetmap)
1035  *offsetmap = (int)(iptr - (unsigned char*)input);
1036  *optr = 0;
1037  return (int)(optr - output);
1038 }
1039 
1040 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1041 {
1042  unsigned char *iptr = (unsigned char*)input;
1043  PCRE_UCHAR32 *optr = output;
1044 
1045  if (max_length == 0)
1046  return 0;
1047 
1048  while (*iptr && max_length > 1) {
1049  *optr++ = *iptr++;
1050  max_length--;
1051  }
1052  *optr = '\0';
1053  return (int)(optr - output);
1054 }
1055 
1056 #define REGTEST_MAX_LENGTH32 4096
1057 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1058 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1059 
1060 #endif /* SUPPORT_PCRE32 */
1061 
1062 static int check_ascii(const char *input)
1063 {
1064  const unsigned char *ptr = (unsigned char *)input;
1065  while (*ptr) {
1066  if (*ptr > 127)
1067  return 0;
1068  ptr++;
1069  }
1070  return 1;
1071 }
1072 
1073 static int regression_tests(void)
1074 {
1075  struct regression_test_case *current = regression_test_cases;
1076  const char *error;
1077  char *cpu_info;
1078  int i, err_offs;
1079  int is_successful, is_ascii_pattern, is_ascii_input;
1080  int total = 0;
1081  int successful = 0;
1082  int successful_row = 0;
1083  int counter = 0;
1084  int study_mode;
1085  int utf = 0, ucp = 0;
1086  int disabled_flags = 0;
1087 #ifdef SUPPORT_PCRE8
1088  pcre *re8;
1089  pcre_extra *extra8;
1090  pcre_extra dummy_extra8;
1091  int ovector8_1[32];
1092  int ovector8_2[32];
1093  int return_value8[2];
1094  unsigned char *mark8_1, *mark8_2;
1095 #endif
1096 #ifdef SUPPORT_PCRE16
1097  pcre16 *re16;
1098  pcre16_extra *extra16;
1099  pcre16_extra dummy_extra16;
1100  int ovector16_1[32];
1101  int ovector16_2[32];
1102  int return_value16[2];
1103  PCRE_UCHAR16 *mark16_1, *mark16_2;
1104  int length16;
1105 #endif
1106 #ifdef SUPPORT_PCRE32
1107  pcre32 *re32;
1108  pcre32_extra *extra32;
1109  pcre32_extra dummy_extra32;
1110  int ovector32_1[32];
1111  int ovector32_2[32];
1112  int return_value32[2];
1113  PCRE_UCHAR32 *mark32_1, *mark32_2;
1114  int length32;
1115 #endif
1116 
1117  /* This test compares the behaviour of interpreter and JIT. Although disabling
1118  utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1119  still considered successful from pcre_jit_test point of view. */
1120 
1121 #if defined SUPPORT_PCRE8
1122  pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1123 #elif defined SUPPORT_PCRE16
1124  pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1125 #elif defined SUPPORT_PCRE32
1126  pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1127 #endif
1128 
1129  printf("Running JIT regression tests\n");
1130  printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1131 
1132 #if defined SUPPORT_PCRE8
1133  pcre_config(PCRE_CONFIG_UTF8, &utf);
1134  pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1135 #elif defined SUPPORT_PCRE16
1136  pcre16_config(PCRE_CONFIG_UTF16, &utf);
1137  pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1138 #elif defined SUPPORT_PCRE16
1139  pcre32_config(PCRE_CONFIG_UTF32, &utf);
1140  pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1141 #endif
1142 
1143  if (!utf)
1144  disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1145  if (!ucp)
1146  disabled_flags |= PCRE_UCP;
1147 #ifdef SUPPORT_PCRE8
1148  printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1149 #endif
1150 #ifdef SUPPORT_PCRE16
1151  printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1152 #endif
1153 #ifdef SUPPORT_PCRE32
1154  printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1155 #endif
1156 
1157  while (current->pattern) {
1158  /* printf("\nPattern: %s :\n", current->pattern); */
1159  total++;
1160  if (current->start_offset & F_PROPERTY) {
1161  is_ascii_pattern = 0;
1162  is_ascii_input = 0;
1163  } else {
1164  is_ascii_pattern = check_ascii(current->pattern);
1165  is_ascii_input = check_ascii(current->input);
1166  }
1167 
1168  if (current->flags & PCRE_PARTIAL_SOFT)
1169  study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1170  else if (current->flags & PCRE_PARTIAL_HARD)
1171  study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1172  else
1173  study_mode = PCRE_STUDY_JIT_COMPILE;
1174  error = NULL;
1175 #ifdef SUPPORT_PCRE8
1176  re8 = NULL;
1177  if (!(current->start_offset & F_NO8))
1178  re8 = pcre_compile(current->pattern,
1179  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1180  &error, &err_offs, tables(0));
1181 
1182  extra8 = NULL;
1183  if (re8) {
1184  error = NULL;
1185  extra8 = pcre_study(re8, study_mode, &error);
1186  if (!extra8) {
1187  printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1188  pcre_free(re8);
1189  re8 = NULL;
1190  }
1191  else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1192  printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1193  pcre_free_study(extra8);
1194  pcre_free(re8);
1195  re8 = NULL;
1196  }
1197  extra8->flags |= PCRE_EXTRA_MARK;
1198  } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1199  printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1200 #endif
1201 #ifdef SUPPORT_PCRE16
1202  if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1203  convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1204  else
1205  copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1206 
1207  re16 = NULL;
1208  if (!(current->start_offset & F_NO16))
1209  re16 = pcre16_compile(regtest_buf16,
1210  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1211  &error, &err_offs, tables(0));
1212 
1213  extra16 = NULL;
1214  if (re16) {
1215  error = NULL;
1216  extra16 = pcre16_study(re16, study_mode, &error);
1217  if (!extra16) {
1218  printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1219  pcre16_free(re16);
1220  re16 = NULL;
1221  }
1222  else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1223  printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1224  pcre16_free_study(extra16);
1225  pcre16_free(re16);
1226  re16 = NULL;
1227  }
1228  extra16->flags |= PCRE_EXTRA_MARK;
1229  } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1230  printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1231 #endif
1232 #ifdef SUPPORT_PCRE32
1233  if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1234  convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1235  else
1236  copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1237 
1238  re32 = NULL;
1239  if (!(current->start_offset & F_NO32))
1240  re32 = pcre32_compile(regtest_buf32,
1241  current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1242  &error, &err_offs, tables(0));
1243 
1244  extra32 = NULL;
1245  if (re32) {
1246  error = NULL;
1247  extra32 = pcre32_study(re32, study_mode, &error);
1248  if (!extra32) {
1249  printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1250  pcre32_free(re32);
1251  re32 = NULL;
1252  }
1253  if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1254  printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1255  pcre32_free_study(extra32);
1256  pcre32_free(re32);
1257  re32 = NULL;
1258  }
1259  extra32->flags |= PCRE_EXTRA_MARK;
1260  } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
1261  printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1262 #endif
1263 
1264  counter++;
1265  if ((counter & 0x3) != 0) {
1266 #ifdef SUPPORT_PCRE8
1267  setstack8(NULL);
1268 #endif
1269 #ifdef SUPPORT_PCRE16
1270  setstack16(NULL);
1271 #endif
1272 #ifdef SUPPORT_PCRE32
1273  setstack32(NULL);
1274 #endif
1275  }
1276 
1277 #ifdef SUPPORT_PCRE8
1278  return_value8[0] = -1000;
1279  return_value8[1] = -1000;
1280  for (i = 0; i < 32; ++i)
1281  ovector8_1[i] = -2;
1282  for (i = 0; i < 32; ++i)
1283  ovector8_2[i] = -2;
1284  if (re8) {
1285  mark8_1 = NULL;
1286  mark8_2 = NULL;
1287  extra8->mark = &mark8_1;
1288 
1289  if ((counter & 0x1) != 0) {
1290  setstack8(extra8);
1291  return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1292  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1293  } else
1294  return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1295  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
1296  memset(&dummy_extra8, 0, sizeof(pcre_extra));
1297  dummy_extra8.flags = PCRE_EXTRA_MARK;
1298  if (current->start_offset & F_STUDY) {
1299  dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1300  dummy_extra8.study_data = extra8->study_data;
1301  }
1302  dummy_extra8.mark = &mark8_2;
1303  return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1304  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1305  }
1306 #endif
1307 
1308 #ifdef SUPPORT_PCRE16
1309  return_value16[0] = -1000;
1310  return_value16[1] = -1000;
1311  for (i = 0; i < 32; ++i)
1312  ovector16_1[i] = -2;
1313  for (i = 0; i < 32; ++i)
1314  ovector16_2[i] = -2;
1315  if (re16) {
1316  mark16_1 = NULL;
1317  mark16_2 = NULL;
1318  if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1319  length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1320  else
1321  length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1322  extra16->mark = &mark16_1;
1323  if ((counter & 0x1) != 0) {
1324  setstack16(extra16);
1325  return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1326  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1327  } else
1328  return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1329  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
1330  memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1331  dummy_extra16.flags = PCRE_EXTRA_MARK;
1332  if (current->start_offset & F_STUDY) {
1333  dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1334  dummy_extra16.study_data = extra16->study_data;
1335  }
1336  dummy_extra16.mark = &mark16_2;
1337  return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1338  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1339  }
1340 #endif
1341 
1342 #ifdef SUPPORT_PCRE32
1343  return_value32[0] = -1000;
1344  return_value32[1] = -1000;
1345  for (i = 0; i < 32; ++i)
1346  ovector32_1[i] = -2;
1347  for (i = 0; i < 32; ++i)
1348  ovector32_2[i] = -2;
1349  if (re32) {
1350  mark32_1 = NULL;
1351  mark32_2 = NULL;
1352  if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1353  length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1354  else
1355  length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1356  extra32->mark = &mark32_1;
1357  if ((counter & 0x1) != 0) {
1358  setstack32(extra32);
1359  return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1360  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
1361  } else
1362  return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1363  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
1364  memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1365  dummy_extra32.flags = PCRE_EXTRA_MARK;
1366  if (current->start_offset & F_STUDY) {
1367  dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1368  dummy_extra32.study_data = extra32->study_data;
1369  }
1370  dummy_extra32.mark = &mark32_2;
1371  return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1372  current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
1373  }
1374 #endif
1375 
1376  /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1377  return_value8[0], return_value16[0],
1378  ovector8_1[0], ovector8_1[1],
1379  ovector16_1[0], ovector16_1[1],
1380  ovector32_1[0], ovector32_1[1],
1381  (current->flags & PCRE_CASELESS) ? "C" : ""); */
1382 
1383  /* If F_DIFF is set, just run the test, but do not compare the results.
1384  Segfaults can still be captured. */
1385 
1386  is_successful = 1;
1387  if (!(current->start_offset & F_DIFF)) {
1388 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1389  if (!(current->start_offset & F_FORCECONV)) {
1390  int return_value;
1391 
1392  /* All results must be the same. */
1393 #ifdef SUPPORT_PCRE8
1394  if ((return_value = return_value8[0]) != return_value8[1]) {
1395  printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1396  return_value8[0], return_value8[1], total, current->pattern, current->input);
1397  is_successful = 0;
1398  } else
1399 #endif
1400 #ifdef SUPPORT_PCRE16
1401  if ((return_value = return_value16[0]) != return_value16[1]) {
1402  printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1403  return_value16[0], return_value16[1], total, current->pattern, current->input);
1404  is_successful = 0;
1405  } else
1406 #endif
1407 #ifdef SUPPORT_PCRE32
1408  if ((return_value = return_value32[0]) != return_value32[1]) {
1409  printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1410  return_value32[0], return_value32[1], total, current->pattern, current->input);
1411  is_successful = 0;
1412  } else
1413 #endif
1414 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1415  if (return_value8[0] != return_value16[0]) {
1416  printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1417  return_value8[0], return_value16[0],
1418  total, current->pattern, current->input);
1419  is_successful = 0;
1420  } else
1421 #endif
1422 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1423  if (return_value8[0] != return_value32[0]) {
1424  printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1425  return_value8[0], return_value32[0],
1426  total, current->pattern, current->input);
1427  is_successful = 0;
1428  } else
1429 #endif
1430 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1431  if (return_value16[0] != return_value32[0]) {
1432  printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1433  return_value16[0], return_value32[0],
1434  total, current->pattern, current->input);
1435  is_successful = 0;
1436  } else
1437 #endif
1438  if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1439  if (return_value == PCRE_ERROR_PARTIAL) {
1440  return_value = 2;
1441  } else {
1442  return_value *= 2;
1443  }
1444 #ifdef SUPPORT_PCRE8
1445  return_value8[0] = return_value;
1446 #endif
1447 #ifdef SUPPORT_PCRE16
1448  return_value16[0] = return_value;
1449 #endif
1450 #ifdef SUPPORT_PCRE32
1451  return_value32[0] = return_value;
1452 #endif
1453  /* Transform back the results. */
1454  if (current->flags & PCRE_UTF8) {
1455 #ifdef SUPPORT_PCRE16
1456  for (i = 0; i < return_value; ++i) {
1457  if (ovector16_1[i] >= 0)
1458  ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1459  if (ovector16_2[i] >= 0)
1460  ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1461  }
1462 #endif
1463 #ifdef SUPPORT_PCRE32
1464  for (i = 0; i < return_value; ++i) {
1465  if (ovector32_1[i] >= 0)
1466  ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1467  if (ovector32_2[i] >= 0)
1468  ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1469  }
1470 #endif
1471  }
1472 
1473  for (i = 0; i < return_value; ++i) {
1474 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1475  if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1476  printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1477  i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1478  total, current->pattern, current->input);
1479  is_successful = 0;
1480  }
1481 #endif
1482 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1483  if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1484  printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1485  i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1486  total, current->pattern, current->input);
1487  is_successful = 0;
1488  }
1489 #endif
1490 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1491  if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1492  printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1493  i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1494  total, current->pattern, current->input);
1495  is_successful = 0;
1496  }
1497 #endif
1498  }
1499  }
1500  } else
1501 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1502  {
1503  /* Only the 8 bit and 16 bit results must be equal. */
1504 #ifdef SUPPORT_PCRE8
1505  if (return_value8[0] != return_value8[1]) {
1506  printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1507  return_value8[0], return_value8[1], total, current->pattern, current->input);
1508  is_successful = 0;
1509  } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1510  if (return_value8[0] == PCRE_ERROR_PARTIAL)
1511  return_value8[0] = 2;
1512  else
1513  return_value8[0] *= 2;
1514 
1515  for (i = 0; i < return_value8[0]; ++i)
1516  if (ovector8_1[i] != ovector8_2[i]) {
1517  printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1518  i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1519  is_successful = 0;
1520  }
1521  }
1522 #endif
1523 
1524 #ifdef SUPPORT_PCRE16
1525  if (return_value16[0] != return_value16[1]) {
1526  printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1527  return_value16[0], return_value16[1], total, current->pattern, current->input);
1528  is_successful = 0;
1529  } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1530  if (return_value16[0] == PCRE_ERROR_PARTIAL)
1531  return_value16[0] = 2;
1532  else
1533  return_value16[0] *= 2;
1534 
1535  for (i = 0; i < return_value16[0]; ++i)
1536  if (ovector16_1[i] != ovector16_2[i]) {
1537  printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1538  i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1539  is_successful = 0;
1540  }
1541  }
1542 #endif
1543 
1544 #ifdef SUPPORT_PCRE32
1545  if (return_value32[0] != return_value32[1]) {
1546  printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1547  return_value32[0], return_value32[1], total, current->pattern, current->input);
1548  is_successful = 0;
1549  } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1550  if (return_value32[0] == PCRE_ERROR_PARTIAL)
1551  return_value32[0] = 2;
1552  else
1553  return_value32[0] *= 2;
1554 
1555  for (i = 0; i < return_value32[0]; ++i)
1556  if (ovector32_1[i] != ovector32_2[i]) {
1557  printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1558  i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1559  is_successful = 0;
1560  }
1561  }
1562 #endif
1563  }
1564  }
1565 
1566  if (is_successful) {
1567 #ifdef SUPPORT_PCRE8
1568  if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
1569  if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1570  printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1571  total, current->pattern, current->input);
1572  is_successful = 0;
1573  }
1574 
1575  if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1576  printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1577  total, current->pattern, current->input);
1578  is_successful = 0;
1579  }
1580  }
1581 #endif
1582 #ifdef SUPPORT_PCRE16
1583  if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
1584  if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1585  printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1586  total, current->pattern, current->input);
1587  is_successful = 0;
1588  }
1589 
1590  if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1591  printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1592  total, current->pattern, current->input);
1593  is_successful = 0;
1594  }
1595  }
1596 #endif
1597 #ifdef SUPPORT_PCRE32
1598  if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
1599  if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1600  printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1601  total, current->pattern, current->input);
1602  is_successful = 0;
1603  }
1604 
1605  if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1606  printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1607  total, current->pattern, current->input);
1608  is_successful = 0;
1609  }
1610  }
1611 #endif
1612  }
1613 
1614  if (is_successful) {
1615 #ifdef SUPPORT_PCRE8
1616  if (mark8_1 != mark8_2) {
1617  printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1618  total, current->pattern, current->input);
1619  is_successful = 0;
1620  }
1621 #endif
1622 #ifdef SUPPORT_PCRE16
1623  if (mark16_1 != mark16_2) {
1624  printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1625  total, current->pattern, current->input);
1626  is_successful = 0;
1627  }
1628 #endif
1629 #ifdef SUPPORT_PCRE32
1630  if (mark32_1 != mark32_2) {
1631  printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1632  total, current->pattern, current->input);
1633  is_successful = 0;
1634  }
1635 #endif
1636  }
1637 
1638 #ifdef SUPPORT_PCRE8
1639  if (re8) {
1640  pcre_free_study(extra8);
1641  pcre_free(re8);
1642  }
1643 #endif
1644 #ifdef SUPPORT_PCRE16
1645  if (re16) {
1646  pcre16_free_study(extra16);
1647  pcre16_free(re16);
1648  }
1649 #endif
1650 #ifdef SUPPORT_PCRE32
1651  if (re32) {
1652  pcre32_free_study(extra32);
1653  pcre32_free(re32);
1654  }
1655 #endif
1656 
1657  if (is_successful) {
1658  successful++;
1659  successful_row++;
1660  printf(".");
1661  if (successful_row >= 60) {
1662  successful_row = 0;
1663  printf("\n");
1664  }
1665  } else
1666  successful_row = 0;
1667 
1668  fflush(stdout);
1669  current++;
1670  }
1671  tables(1);
1672 #ifdef SUPPORT_PCRE8
1673  setstack8(NULL);
1674 #endif
1675 #ifdef SUPPORT_PCRE16
1676  setstack16(NULL);
1677 #endif
1678 #ifdef SUPPORT_PCRE32
1679  setstack32(NULL);
1680 #endif
1681 
1682  if (total == successful) {
1683  printf("\nAll JIT regression tests are successfully passed.\n");
1684  return 0;
1685  } else {
1686  printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1687  return 1;
1688  }
1689 }
1690 
1691 /* End of pcre_jit_test.c */