"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.43/pcrecpp_unittest.cc" (25 Jan 2019, 39873 Bytes) of package /linux/misc/pcre-8.43.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pcrecpp_unittest.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 8.42_vs_8.43.

    1 // -*- coding: utf-8 -*-
    2 //
    3 // Copyright (c) 2005 - 2010, Google Inc.
    4 // All rights reserved.
    5 //
    6 // Redistribution and use in source and binary forms, with or without
    7 // modification, are permitted provided that the following conditions are
    8 // met:
    9 //
   10 //     * Redistributions of source code must retain the above copyright
   11 // notice, this list of conditions and the following disclaimer.
   12 //     * Redistributions in binary form must reproduce the above
   13 // copyright notice, this list of conditions and the following disclaimer
   14 // in the documentation and/or other materials provided with the
   15 // distribution.
   16 //     * Neither the name of Google Inc. nor the names of its
   17 // contributors may be used to endorse or promote products derived from
   18 // this software without specific prior written permission.
   19 //
   20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   31 //
   32 // Author: Sanjay Ghemawat
   33 //
   34 // TODO: Test extractions for PartialMatch/Consume
   35 
   36 #ifdef HAVE_CONFIG_H
   37 #include "config.h"
   38 #endif
   39 
   40 #include <stdio.h>
   41 #include <string.h>      /* for memset and strcmp */
   42 #include <cassert>
   43 #include <vector>
   44 #include "pcrecpp.h"
   45 
   46 using std::string;
   47 using pcrecpp::StringPiece;
   48 using pcrecpp::RE;
   49 using pcrecpp::RE_Options;
   50 using pcrecpp::Hex;
   51 using pcrecpp::Octal;
   52 using pcrecpp::CRadix;
   53 
   54 static bool VERBOSE_TEST  = false;
   55 
   56 // CHECK dies with a fatal error if condition is not true.  It is *not*
   57 // controlled by NDEBUG, so the check will be executed regardless of
   58 // compilation mode.  Therefore, it is safe to do things like:
   59 //    CHECK_EQ(fp->Write(x), 4)
   60 #define CHECK(condition) do {                           \
   61   if (!(condition)) {                                   \
   62     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
   63             __FILE__, __LINE__, #condition);            \
   64     exit(1);                                            \
   65   }                                                     \
   66 } while (0)
   67 
   68 #define CHECK_EQ(a, b)   CHECK(a == b)
   69 
   70 static void Timing1(int num_iters) {
   71   // Same pattern lots of times
   72   RE pattern("ruby:\\d+");
   73   StringPiece p("ruby:1234");
   74   for (int j = num_iters; j > 0; j--) {
   75     CHECK(pattern.FullMatch(p));
   76   }
   77 }
   78 
   79 static void Timing2(int num_iters) {
   80   // Same pattern lots of times
   81   RE pattern("ruby:(\\d+)");
   82   int i;
   83   for (int j = num_iters; j > 0; j--) {
   84     CHECK(pattern.FullMatch("ruby:1234", &i));
   85     CHECK_EQ(i, 1234);
   86   }
   87 }
   88 
   89 static void Timing3(int num_iters) {
   90   string text_string;
   91   for (int j = num_iters; j > 0; j--) {
   92     text_string += "this is another line\n";
   93   }
   94 
   95   RE line_matcher(".*\n");
   96   string line;
   97   StringPiece text(text_string);
   98   int counter = 0;
   99   while (line_matcher.Consume(&text)) {
  100     counter++;
  101   }
  102   printf("Matched %d lines\n", counter);
  103 }
  104 
  105 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
  106 
  107 static void LeakTest() {
  108   // Check for memory leaks
  109   unsigned long long initial_size = 0;
  110   for (int i = 0; i < 100000; i++) {
  111     if (i == 50000) {
  112       initial_size = VirtualProcessSize();
  113       printf("Size after 50000: %llu\n", initial_size);
  114     }
  115     char buf[100];  // definitely big enough
  116     sprintf(buf, "pat%09d", i);
  117     RE newre(buf);
  118   }
  119   uint64 final_size = VirtualProcessSize();
  120   printf("Size after 100000: %llu\n", final_size);
  121   const double growth = double(final_size - initial_size) / final_size;
  122   printf("Growth: %0.2f%%", growth * 100);
  123   CHECK(growth < 0.02);       // Allow < 2% growth
  124 }
  125 
  126 #endif
  127 
  128 static void RadixTests() {
  129   printf("Testing hex\n");
  130 
  131 #define CHECK_HEX(type, value) \
  132   do { \
  133     type v; \
  134     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
  135     CHECK_EQ(v, 0x ## value); \
  136     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
  137     CHECK_EQ(v, 0x ## value); \
  138   } while(0)
  139 
  140   CHECK_HEX(short,              2bad);
  141   CHECK_HEX(unsigned short,     2badU);
  142   CHECK_HEX(int,                dead);
  143   CHECK_HEX(unsigned int,       deadU);
  144   CHECK_HEX(long,               7eadbeefL);
  145   CHECK_HEX(unsigned long,      deadbeefUL);
  146 #ifdef HAVE_LONG_LONG
  147   CHECK_HEX(long long,          12345678deadbeefLL);
  148 #endif
  149 #ifdef HAVE_UNSIGNED_LONG_LONG
  150   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
  151 #endif
  152 
  153 #undef CHECK_HEX
  154 
  155   printf("Testing octal\n");
  156 
  157 #define CHECK_OCTAL(type, value) \
  158   do { \
  159     type v; \
  160     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
  161     CHECK_EQ(v, 0 ## value); \
  162     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
  163     CHECK_EQ(v, 0 ## value); \
  164   } while(0)
  165 
  166   CHECK_OCTAL(short,              77777);
  167   CHECK_OCTAL(unsigned short,     177777U);
  168   CHECK_OCTAL(int,                17777777777);
  169   CHECK_OCTAL(unsigned int,       37777777777U);
  170   CHECK_OCTAL(long,               17777777777L);
  171   CHECK_OCTAL(unsigned long,      37777777777UL);
  172 #ifdef HAVE_LONG_LONG
  173   CHECK_OCTAL(long long,          777777777777777777777LL);
  174 #endif
  175 #ifdef HAVE_UNSIGNED_LONG_LONG
  176   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
  177 #endif
  178 
  179 #undef CHECK_OCTAL
  180 
  181   printf("Testing decimal\n");
  182 
  183 #define CHECK_DECIMAL(type, value) \
  184   do { \
  185     type v; \
  186     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
  187     CHECK_EQ(v, value); \
  188     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
  189     CHECK_EQ(v, value); \
  190   } while(0)
  191 
  192   CHECK_DECIMAL(short,              -1);
  193   CHECK_DECIMAL(unsigned short,     9999);
  194   CHECK_DECIMAL(int,                -1000);
  195   CHECK_DECIMAL(unsigned int,       12345U);
  196   CHECK_DECIMAL(long,               -10000000L);
  197   CHECK_DECIMAL(unsigned long,      3083324652U);
  198 #ifdef HAVE_LONG_LONG
  199   CHECK_DECIMAL(long long,          -100000000000000LL);
  200 #endif
  201 #ifdef HAVE_UNSIGNED_LONG_LONG
  202   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
  203 #endif
  204 
  205 #undef CHECK_DECIMAL
  206 
  207 }
  208 
  209 static void TestReplace() {
  210   printf("Testing Replace\n");
  211 
  212   struct ReplaceTest {
  213     const char *regexp;
  214     const char *rewrite;
  215     const char *original;
  216     const char *single;
  217     const char *global;
  218     int global_count;         // the expected return value from ReplaceAll
  219   };
  220   static const ReplaceTest tests[] = {
  221     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
  222       "\\2\\1ay",
  223       "the quick brown fox jumps over the lazy dogs.",
  224       "ethay quick brown fox jumps over the lazy dogs.",
  225       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
  226       9 },
  227     { "\\w+",
  228       "\\0-NOSPAM",
  229       "paul.haahr@google.com",
  230       "paul-NOSPAM.haahr@google.com",
  231       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
  232       4 },
  233     { "^",
  234       "(START)",
  235       "foo",
  236       "(START)foo",
  237       "(START)foo",
  238       1 },
  239     { "^",
  240       "(START)",
  241       "",
  242       "(START)",
  243       "(START)",
  244       1 },
  245     { "$",
  246       "(END)",
  247       "",
  248       "(END)",
  249       "(END)",
  250       1 },
  251     { "b",
  252       "bb",
  253       "ababababab",
  254       "abbabababab",
  255       "abbabbabbabbabb",
  256        5 },
  257     { "b",
  258       "bb",
  259       "bbbbbb",
  260       "bbbbbbb",
  261       "bbbbbbbbbbbb",
  262       6 },
  263     { "b+",
  264       "bb",
  265       "bbbbbb",
  266       "bb",
  267       "bb",
  268       1 },
  269     { "b*",
  270       "bb",
  271       "bbbbbb",
  272       "bb",
  273       "bbbb",
  274       2 },
  275     { "b*",
  276       "bb",
  277       "aaaaa",
  278       "bbaaaaa",
  279       "bbabbabbabbabbabb",
  280       6 },
  281     { "b*",
  282       "bb",
  283       "aa\naa\n",
  284       "bbaa\naa\n",
  285       "bbabbabb\nbbabbabb\nbb",
  286       7 },
  287     { "b*",
  288       "bb",
  289       "aa\raa\r",
  290       "bbaa\raa\r",
  291       "bbabbabb\rbbabbabb\rbb",
  292       7 },
  293     { "b*",
  294       "bb",
  295       "aa\r\naa\r\n",
  296       "bbaa\r\naa\r\n",
  297       "bbabbabb\r\nbbabbabb\r\nbb",
  298       7 },
  299     // Check empty-string matching (it's tricky!)
  300     { "aa|b*",
  301       "@",
  302       "aa",
  303       "@",
  304       "@@",
  305       2 },
  306     { "b*|aa",
  307       "@",
  308       "aa",
  309       "@aa",
  310       "@@@",
  311       3 },
  312 #ifdef SUPPORT_UTF
  313     { "b*",
  314       "bb",
  315       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
  316       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
  317       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
  318       5 },
  319     { "b*",
  320       "bb",
  321       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
  322       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
  323       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
  324        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
  325       9 },
  326 #endif
  327     { "", NULL, NULL, NULL, NULL, 0 }
  328   };
  329 
  330 #ifdef SUPPORT_UTF
  331   const bool support_utf8 = true;
  332 #else
  333   const bool support_utf8 = false;
  334 #endif
  335 
  336   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
  337     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
  338     assert(re.error().empty());
  339     string one(t->original);
  340     CHECK(re.Replace(t->rewrite, &one));
  341     CHECK_EQ(one, t->single);
  342     string all(t->original);
  343     const int replace_count = re.GlobalReplace(t->rewrite, &all);
  344     CHECK_EQ(all, t->global);
  345     CHECK_EQ(replace_count, t->global_count);
  346   }
  347 
  348   // One final test: test \r\n replacement when we're not in CRLF mode
  349   {
  350     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
  351     assert(re.error().empty());
  352     string all("aa\r\naa\r\n");
  353     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
  354     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  355   }
  356   {
  357     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
  358     assert(re.error().empty());
  359     string all("aa\r\naa\r\n");
  360     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
  361     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  362   }
  363   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
  364   //       Alas, the answer depends on how pcre was compiled.
  365 }
  366 
  367 static void TestExtract() {
  368   printf("Testing Extract\n");
  369 
  370   string s;
  371 
  372   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
  373   CHECK_EQ(s, "kremvax!boris");
  374 
  375   // check the RE interface as well
  376   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
  377   CHECK_EQ(s, "'foo'");
  378   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
  379   CHECK_EQ(s, "'foo'");
  380 }
  381 
  382 static void TestConsume() {
  383   printf("Testing Consume\n");
  384 
  385   string word;
  386 
  387   string s("   aaa b!@#$@#$cccc");
  388   StringPiece input(s);
  389 
  390   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
  391   CHECK(r.Consume(&input, &word));
  392   CHECK_EQ(word, "aaa");
  393   CHECK(r.Consume(&input, &word));
  394   CHECK_EQ(word, "b");
  395   CHECK(! r.Consume(&input, &word));
  396 }
  397 
  398 static void TestFindAndConsume() {
  399   printf("Testing FindAndConsume\n");
  400 
  401   string word;
  402 
  403   string s("   aaa b!@#$@#$cccc");
  404   StringPiece input(s);
  405 
  406   RE r("(\\w+)");      // matches a word
  407   CHECK(r.FindAndConsume(&input, &word));
  408   CHECK_EQ(word, "aaa");
  409   CHECK(r.FindAndConsume(&input, &word));
  410   CHECK_EQ(word, "b");
  411   CHECK(r.FindAndConsume(&input, &word));
  412   CHECK_EQ(word, "cccc");
  413   CHECK(! r.FindAndConsume(&input, &word));
  414 }
  415 
  416 static void TestMatchNumberPeculiarity() {
  417   printf("Testing match-number peculiarity\n");
  418 
  419   string word1;
  420   string word2;
  421   string word3;
  422 
  423   RE r("(foo)|(bar)|(baz)");
  424   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
  425   CHECK_EQ(word1, "foo");
  426   CHECK_EQ(word2, "");
  427   CHECK_EQ(word3, "");
  428   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
  429   CHECK_EQ(word1, "");
  430   CHECK_EQ(word2, "bar");
  431   CHECK_EQ(word3, "");
  432   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
  433   CHECK_EQ(word1, "");
  434   CHECK_EQ(word2, "");
  435   CHECK_EQ(word3, "baz");
  436   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
  437 
  438   string a;
  439   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
  440   CHECK_EQ(a, "");
  441 }
  442 
  443 static void TestRecursion() {
  444   printf("Testing recursion\n");
  445 
  446   // Get one string that passes (sometimes), one that never does.
  447   string text_good("abcdefghijk");
  448   string text_bad("acdefghijkl");
  449 
  450   // According to pcretest, matching text_good against (\w+)*b
  451   // requires match_limit of at least 8192, and match_recursion_limit
  452   // of at least 37.
  453 
  454   RE_Options options_ml;
  455   options_ml.set_match_limit(8192);
  456   RE re("(\\w+)*b", options_ml);
  457   CHECK(re.PartialMatch(text_good) == true);
  458   CHECK(re.PartialMatch(text_bad) == false);
  459   CHECK(re.FullMatch(text_good) == false);
  460   CHECK(re.FullMatch(text_bad) == false);
  461 
  462   options_ml.set_match_limit(1024);
  463   RE re2("(\\w+)*b", options_ml);
  464   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
  465   CHECK(re2.PartialMatch(text_bad) == false);
  466   CHECK(re2.FullMatch(text_good) == false);
  467   CHECK(re2.FullMatch(text_bad) == false);
  468 
  469   RE_Options options_mlr;
  470   options_mlr.set_match_limit_recursion(50);
  471   RE re3("(\\w+)*b", options_mlr);
  472   CHECK(re3.PartialMatch(text_good) == true);
  473   CHECK(re3.PartialMatch(text_bad) == false);
  474   CHECK(re3.FullMatch(text_good) == false);
  475   CHECK(re3.FullMatch(text_bad) == false);
  476 
  477   options_mlr.set_match_limit_recursion(10);
  478   RE re4("(\\w+)*b", options_mlr);
  479   CHECK(re4.PartialMatch(text_good) == false);
  480   CHECK(re4.PartialMatch(text_bad) == false);
  481   CHECK(re4.FullMatch(text_good) == false);
  482   CHECK(re4.FullMatch(text_bad) == false);
  483 }
  484 
  485 // A meta-quoted string, interpreted as a pattern, should always match
  486 // the original unquoted string.
  487 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
  488   string quoted = RE::QuoteMeta(unquoted);
  489   RE re(quoted, options);
  490   CHECK(re.FullMatch(unquoted));
  491 }
  492 
  493 // A string containing meaningful regexp characters, which is then meta-
  494 // quoted, should not generally match a string the unquoted string does.
  495 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
  496                                   RE_Options options = RE_Options()) {
  497   string quoted = RE::QuoteMeta(unquoted);
  498   RE re(quoted, options);
  499   CHECK(!re.FullMatch(should_not_match));
  500 }
  501 
  502 // Tests that quoted meta characters match their original strings,
  503 // and that a few things that shouldn't match indeed do not.
  504 static void TestQuotaMetaSimple() {
  505   TestQuoteMeta("foo");
  506   TestQuoteMeta("foo.bar");
  507   TestQuoteMeta("foo\\.bar");
  508   TestQuoteMeta("[1-9]");
  509   TestQuoteMeta("1.5-2.0?");
  510   TestQuoteMeta("\\d");
  511   TestQuoteMeta("Who doesn't like ice cream?");
  512   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
  513   TestQuoteMeta("((?!)xxx).*yyy");
  514   TestQuoteMeta("([");
  515   TestQuoteMeta(string("foo\0bar", 7));
  516 }
  517 
  518 static void TestQuoteMetaSimpleNegative() {
  519   NegativeTestQuoteMeta("foo", "bar");
  520   NegativeTestQuoteMeta("...", "bar");
  521   NegativeTestQuoteMeta("\\.", ".");
  522   NegativeTestQuoteMeta("\\.", "..");
  523   NegativeTestQuoteMeta("(a)", "a");
  524   NegativeTestQuoteMeta("(a|b)", "a");
  525   NegativeTestQuoteMeta("(a|b)", "(a)");
  526   NegativeTestQuoteMeta("(a|b)", "a|b");
  527   NegativeTestQuoteMeta("[0-9]", "0");
  528   NegativeTestQuoteMeta("[0-9]", "0-9");
  529   NegativeTestQuoteMeta("[0-9]", "[9]");
  530   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
  531 }
  532 
  533 static void TestQuoteMetaLatin1() {
  534   TestQuoteMeta("3\xb2 = 9");
  535 }
  536 
  537 static void TestQuoteMetaUtf8() {
  538 #ifdef SUPPORT_UTF
  539   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
  540   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
  541   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
  542   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
  543   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
  544   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
  545   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
  546   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
  547                         "27\\\xc2\\\xb0",
  548                         pcrecpp::UTF8());
  549 #endif
  550 }
  551 
  552 static void TestQuoteMetaAll() {
  553   printf("Testing QuoteMeta\n");
  554   TestQuotaMetaSimple();
  555   TestQuoteMetaSimpleNegative();
  556   TestQuoteMetaLatin1();
  557   TestQuoteMetaUtf8();
  558 }
  559 
  560 //
  561 // Options tests contributed by
  562 // Giuseppe Maxia, CTO, Stardata s.r.l.
  563 // July 2005
  564 //
  565 static void GetOneOptionResult(
  566                 const char *option_name,
  567                 const char *regex,
  568                 const char *str,
  569                 RE_Options options,
  570                 bool full,
  571                 string expected) {
  572 
  573   printf("Testing Option <%s>\n", option_name);
  574   if(VERBOSE_TEST)
  575     printf("/%s/ finds \"%s\" within \"%s\" \n",
  576                     regex,
  577                     expected.c_str(),
  578                     str);
  579   string captured("");
  580   if (full)
  581     RE(regex,options).FullMatch(str, &captured);
  582   else
  583     RE(regex,options).PartialMatch(str, &captured);
  584   CHECK_EQ(captured, expected);
  585 }
  586 
  587 static void TestOneOption(
  588                 const char *option_name,
  589                 const char *regex,
  590                 const char *str,
  591                 RE_Options options,
  592                 bool full,
  593                 bool assertive = true) {
  594 
  595   printf("Testing Option <%s>\n", option_name);
  596   if (VERBOSE_TEST)
  597     printf("'%s' %s /%s/ \n",
  598                   str,
  599                   (assertive? "matches" : "doesn't match"),
  600                   regex);
  601   if (assertive) {
  602     if (full)
  603       CHECK(RE(regex,options).FullMatch(str));
  604     else
  605       CHECK(RE(regex,options).PartialMatch(str));
  606   } else {
  607     if (full)
  608       CHECK(!RE(regex,options).FullMatch(str));
  609     else
  610       CHECK(!RE(regex,options).PartialMatch(str));
  611   }
  612 }
  613 
  614 static void Test_CASELESS() {
  615   RE_Options options;
  616   RE_Options options2;
  617 
  618   options.set_caseless(true);
  619   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
  620   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
  621   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
  622 
  623   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
  624   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
  625   options.set_caseless(false);
  626   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
  627 }
  628 
  629 static void Test_MULTILINE() {
  630   RE_Options options;
  631   RE_Options options2;
  632   const char *str = "HELLO\n" "cruel\n" "world\n";
  633 
  634   options.set_multiline(true);
  635   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
  636   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
  637   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
  638   options.set_multiline(false);
  639   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
  640 }
  641 
  642 static void Test_DOTALL() {
  643   RE_Options options;
  644   RE_Options options2;
  645   const char *str = "HELLO\n" "cruel\n" "world";
  646 
  647   options.set_dotall(true);
  648   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
  649   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
  650   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
  651   options.set_dotall(false);
  652   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
  653 }
  654 
  655 static void Test_DOLLAR_ENDONLY() {
  656   RE_Options options;
  657   RE_Options options2;
  658   const char *str = "HELLO world\n";
  659 
  660   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
  661   options.set_dollar_endonly(true);
  662   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
  663   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
  664 }
  665 
  666 static void Test_EXTRA() {
  667   RE_Options options;
  668   const char *str = "HELLO";
  669 
  670   options.set_extra(true);
  671   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
  672   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
  673   options.set_extra(false);
  674   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
  675 }
  676 
  677 static void Test_EXTENDED() {
  678   RE_Options options;
  679   RE_Options options2;
  680   const char *str = "HELLO world";
  681 
  682   options.set_extended(true);
  683   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
  684   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
  685   TestOneOption("EXTENDED (class)",
  686                     "^ HE L{2} O "
  687                     "\\s+        "
  688                     "\\w+ $      ",
  689                     str,
  690                     options,
  691                     false);
  692 
  693   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
  694   TestOneOption("EXTENDED (function)",
  695                     "^ HE L{2} O "
  696                     "\\s+        "
  697                     "\\w+ $      ",
  698                     str,
  699                     pcrecpp::EXTENDED(),
  700                     false);
  701 
  702   options.set_extended(false);
  703   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
  704 }
  705 
  706 static void Test_NO_AUTO_CAPTURE() {
  707   RE_Options options;
  708   const char *str = "HELLO world";
  709   string captured;
  710 
  711   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
  712   if (VERBOSE_TEST)
  713     printf("parentheses capture text\n");
  714   RE re("(world|universe)$", options);
  715   CHECK(re.Extract("\\1", str , &captured));
  716   CHECK_EQ(captured, "world");
  717   options.set_no_auto_capture(true);
  718   printf("testing Option <NO_AUTO_CAPTURE>\n");
  719   if (VERBOSE_TEST)
  720     printf("parentheses do not capture text\n");
  721   re.Extract("\\1",str, &captured );
  722   CHECK_EQ(captured, "world");
  723 }
  724 
  725 static void Test_UNGREEDY() {
  726   RE_Options options;
  727   const char *str = "HELLO, 'this' is the 'world'";
  728 
  729   options.set_ungreedy(true);
  730   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
  731   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
  732   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
  733 
  734   options.set_ungreedy(false);
  735   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
  736   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
  737 }
  738 
  739 static void Test_all_options() {
  740   const char *str = "HELLO\n" "cruel\n" "world";
  741   RE_Options options;
  742   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
  743 
  744   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
  745   options.set_all_options(0);
  746   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
  747   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
  748 
  749   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
  750   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
  751                   " ^ c r u e l $ ",
  752                   str,
  753                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
  754                   false);
  755 
  756   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
  757                   " ^ c r u e l $ ",
  758                   str,
  759                   RE_Options()
  760                        .set_multiline(true)
  761                        .set_extended(true),
  762                   false);
  763 
  764   options.set_all_options(0);
  765   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
  766 
  767 }
  768 
  769 static void TestOptions() {
  770   printf("Testing Options\n");
  771   Test_CASELESS();
  772   Test_MULTILINE();
  773   Test_DOTALL();
  774   Test_DOLLAR_ENDONLY();
  775   Test_EXTENDED();
  776   Test_NO_AUTO_CAPTURE();
  777   Test_UNGREEDY();
  778   Test_EXTRA();
  779   Test_all_options();
  780 }
  781 
  782 static void TestConstructors() {
  783   printf("Testing constructors\n");
  784 
  785   RE_Options options;
  786   options.set_dotall(true);
  787   const char *str = "HELLO\n" "cruel\n" "world";
  788 
  789   RE orig("HELLO.*world", options);
  790   CHECK(orig.FullMatch(str));
  791 
  792   RE copy1(orig);
  793   CHECK(copy1.FullMatch(str));
  794 
  795   RE copy2("not a match");
  796   CHECK(!copy2.FullMatch(str));
  797   copy2 = copy1;
  798   CHECK(copy2.FullMatch(str));
  799   copy2 = orig;
  800   CHECK(copy2.FullMatch(str));
  801 
  802   // Make sure when we assign to ourselves, nothing bad happens
  803   orig = orig;
  804   copy1 = copy1;
  805   copy2 = copy2;
  806   CHECK(orig.FullMatch(str));
  807   CHECK(copy1.FullMatch(str));
  808   CHECK(copy2.FullMatch(str));
  809 }
  810 
  811 int main(int argc, char** argv) {
  812   // Treat any flag as --help
  813   if (argc > 1 && argv[1][0] == '-') {
  814     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
  815            "       If 'timingX ###' is specified, run the given timing test\n"
  816            "       with the given number of iterations, rather than running\n"
  817            "       the default corectness test.\n", argv[0]);
  818     return 0;
  819   }
  820 
  821   if (argc > 1) {
  822     if ( argc == 2 || atoi(argv[2]) == 0) {
  823       printf("timing mode needs a num-iters argument\n");
  824       return 1;
  825     }
  826     if (!strcmp(argv[1], "timing1"))
  827       Timing1(atoi(argv[2]));
  828     else if (!strcmp(argv[1], "timing2"))
  829       Timing2(atoi(argv[2]));
  830     else if (!strcmp(argv[1], "timing3"))
  831       Timing3(atoi(argv[2]));
  832     else
  833       printf("Unknown argument '%s'\n", argv[1]);
  834     return 0;
  835   }
  836 
  837   printf("PCRE C++ wrapper tests\n");
  838   printf("Testing FullMatch\n");
  839 
  840   int i;
  841   string s;
  842 
  843   /***** FullMatch with no args *****/
  844 
  845   CHECK(RE("h.*o").FullMatch("hello"));
  846   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
  847   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
  848   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
  849   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
  850   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
  851 
  852   /***** FullMatch with args *****/
  853 
  854   // Zero-arg
  855   CHECK(RE("\\d+").FullMatch("1001"));
  856 
  857   // Single-arg
  858   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
  859   CHECK_EQ(i, 1001);
  860   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
  861   CHECK_EQ(i, -123);
  862   CHECK(!RE("()\\d+").FullMatch("10", &i));
  863   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
  864                                 &i));
  865 
  866   // Digits surrounding integer-arg
  867   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
  868   CHECK_EQ(i, 23);
  869   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
  870   CHECK_EQ(i, 1);
  871   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
  872   CHECK_EQ(i, -1);
  873   CHECK(RE("(\\d)").PartialMatch("1234", &i));
  874   CHECK_EQ(i, 1);
  875   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
  876   CHECK_EQ(i, -1);
  877 
  878   // String-arg
  879   CHECK(RE("h(.*)o").FullMatch("hello", &s));
  880   CHECK_EQ(s, string("ell"));
  881 
  882   // StringPiece-arg
  883   StringPiece sp;
  884   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
  885   CHECK_EQ(sp.size(), 4);
  886   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
  887   CHECK_EQ(i, 1234);
  888 
  889   // Multi-arg
  890   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
  891   CHECK_EQ(s, string("ruby"));
  892   CHECK_EQ(i, 1234);
  893 
  894   // Ignore non-void* NULL arg
  895   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
  896   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
  897   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
  898   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
  899 #ifdef HAVE_LONG_LONG
  900   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
  901 #endif
  902   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
  903   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
  904 
  905   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
  906   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
  907   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
  908   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
  909   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
  910   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
  911 
  912   // Ignored arg
  913   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
  914   CHECK_EQ(s, string("ruby"));
  915   CHECK_EQ(i, 1234);
  916 
  917   // Type tests
  918   {
  919     char c;
  920     CHECK(RE("(H)ello").FullMatch("Hello", &c));
  921     CHECK_EQ(c, 'H');
  922   }
  923   {
  924     unsigned char c;
  925     CHECK(RE("(H)ello").FullMatch("Hello", &c));
  926     CHECK_EQ(c, static_cast<unsigned char>('H'));
  927   }
  928   {
  929     short v;
  930     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
  931     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
  932     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
  933     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
  934     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
  935     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
  936   }
  937   {
  938     unsigned short v;
  939     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
  940     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
  941     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
  942     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
  943   }
  944   {
  945     int v;
  946     static const int max_value = 0x7fffffff;
  947     static const int min_value = -max_value - 1;
  948     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
  949     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
  950     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
  951     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
  952     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
  953     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
  954   }
  955   {
  956     unsigned int v;
  957     static const unsigned int max_value = 0xfffffffful;
  958     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
  959     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
  960     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
  961   }
  962 #ifdef HAVE_LONG_LONG
  963 # if defined(__MINGW__) || defined(__MINGW32__)
  964 #   define LLD "%I64d"
  965 #   define LLU "%I64u"
  966 # else
  967 #   define LLD "%lld"
  968 #   define LLU "%llu"
  969 # endif
  970   {
  971     long long v;
  972     static const long long max_value = 0x7fffffffffffffffLL;
  973     static const long long min_value = -max_value - 1;
  974     char buf[32];  // definitely big enough for a long long
  975 
  976     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
  977     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
  978 
  979     sprintf(buf, LLD, max_value);
  980     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
  981 
  982     sprintf(buf, LLD, min_value);
  983     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
  984 
  985     sprintf(buf, LLD, max_value);
  986     assert(buf[strlen(buf)-1] != '9');
  987     buf[strlen(buf)-1]++;
  988     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  989 
  990     sprintf(buf, LLD, min_value);
  991     assert(buf[strlen(buf)-1] != '9');
  992     buf[strlen(buf)-1]++;
  993     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  994   }
  995 #endif
  996 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
  997   {
  998     unsigned long long v;
  999     long long v2;
 1000     static const unsigned long long max_value = 0xffffffffffffffffULL;
 1001     char buf[32];  // definitely big enough for a unsigned long long
 1002 
 1003     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
 1004     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
 1005 
 1006     sprintf(buf, LLU, max_value);
 1007     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
 1008 
 1009     assert(buf[strlen(buf)-1] != '9');
 1010     buf[strlen(buf)-1]++;
 1011     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
 1012   }
 1013 #endif
 1014   {
 1015     float v;
 1016     CHECK(RE("(.*)").FullMatch("100", &v));
 1017     CHECK(RE("(.*)").FullMatch("-100.", &v));
 1018     CHECK(RE("(.*)").FullMatch("1e23", &v));
 1019   }
 1020   {
 1021     double v;
 1022     CHECK(RE("(.*)").FullMatch("100", &v));
 1023     CHECK(RE("(.*)").FullMatch("-100.", &v));
 1024     CHECK(RE("(.*)").FullMatch("1e23", &v));
 1025   }
 1026 
 1027   // Check that matching is fully anchored
 1028   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
 1029   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
 1030   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
 1031   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
 1032 
 1033   // Braces
 1034   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
 1035   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
 1036   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
 1037 
 1038   // Complicated RE
 1039   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
 1040   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
 1041   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
 1042   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
 1043 
 1044   // Check full-match handling (needs '$' tacked on internally)
 1045   CHECK(RE("fo|foo").FullMatch("fo"));
 1046   CHECK(RE("fo|foo").FullMatch("foo"));
 1047   CHECK(RE("fo|foo$").FullMatch("fo"));
 1048   CHECK(RE("fo|foo$").FullMatch("foo"));
 1049   CHECK(RE("foo$").FullMatch("foo"));
 1050   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
 1051   CHECK(!RE("fo|bar").FullMatch("fox"));
 1052 
 1053   // Uncomment the following if we change the handling of '$' to
 1054   // prevent it from matching a trailing newline
 1055   if (false) {
 1056     // Check that we don't get bitten by pcre's special handling of a
 1057     // '\n' at the end of the string matching '$'
 1058     CHECK(!RE("foo$").PartialMatch("foo\n"));
 1059   }
 1060 
 1061   // Number of args
 1062   int a[16];
 1063   CHECK(RE("").FullMatch(""));
 1064 
 1065   memset(a, 0, sizeof(0));
 1066   CHECK(RE("(\\d){1}").FullMatch("1",
 1067                                  &a[0]));
 1068   CHECK_EQ(a[0], 1);
 1069 
 1070   memset(a, 0, sizeof(0));
 1071   CHECK(RE("(\\d)(\\d)").FullMatch("12",
 1072                                    &a[0],  &a[1]));
 1073   CHECK_EQ(a[0], 1);
 1074   CHECK_EQ(a[1], 2);
 1075 
 1076   memset(a, 0, sizeof(0));
 1077   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
 1078                                         &a[0],  &a[1],  &a[2]));
 1079   CHECK_EQ(a[0], 1);
 1080   CHECK_EQ(a[1], 2);
 1081   CHECK_EQ(a[2], 3);
 1082 
 1083   memset(a, 0, sizeof(0));
 1084   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
 1085                                              &a[0],  &a[1],  &a[2],  &a[3]));
 1086   CHECK_EQ(a[0], 1);
 1087   CHECK_EQ(a[1], 2);
 1088   CHECK_EQ(a[2], 3);
 1089   CHECK_EQ(a[3], 4);
 1090 
 1091   memset(a, 0, sizeof(0));
 1092   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
 1093                                                   &a[0],  &a[1],  &a[2],
 1094                                                   &a[3],  &a[4]));
 1095   CHECK_EQ(a[0], 1);
 1096   CHECK_EQ(a[1], 2);
 1097   CHECK_EQ(a[2], 3);
 1098   CHECK_EQ(a[3], 4);
 1099   CHECK_EQ(a[4], 5);
 1100 
 1101   memset(a, 0, sizeof(0));
 1102   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
 1103                                                        &a[0],  &a[1],  &a[2],
 1104                                                        &a[3],  &a[4],  &a[5]));
 1105   CHECK_EQ(a[0], 1);
 1106   CHECK_EQ(a[1], 2);
 1107   CHECK_EQ(a[2], 3);
 1108   CHECK_EQ(a[3], 4);
 1109   CHECK_EQ(a[4], 5);
 1110   CHECK_EQ(a[5], 6);
 1111 
 1112   memset(a, 0, sizeof(0));
 1113   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
 1114                                                             &a[0],  &a[1],  &a[2],  &a[3],
 1115                                                             &a[4],  &a[5],  &a[6]));
 1116   CHECK_EQ(a[0], 1);
 1117   CHECK_EQ(a[1], 2);
 1118   CHECK_EQ(a[2], 3);
 1119   CHECK_EQ(a[3], 4);
 1120   CHECK_EQ(a[4], 5);
 1121   CHECK_EQ(a[5], 6);
 1122   CHECK_EQ(a[6], 7);
 1123 
 1124   memset(a, 0, sizeof(0));
 1125   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
 1126            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
 1127                "1234567890123456",
 1128                &a[0],  &a[1],  &a[2],  &a[3],
 1129                &a[4],  &a[5],  &a[6],  &a[7],
 1130                &a[8],  &a[9],  &a[10], &a[11],
 1131                &a[12], &a[13], &a[14], &a[15]));
 1132   CHECK_EQ(a[0], 1);
 1133   CHECK_EQ(a[1], 2);
 1134   CHECK_EQ(a[2], 3);
 1135   CHECK_EQ(a[3], 4);
 1136   CHECK_EQ(a[4], 5);
 1137   CHECK_EQ(a[5], 6);
 1138   CHECK_EQ(a[6], 7);
 1139   CHECK_EQ(a[7], 8);
 1140   CHECK_EQ(a[8], 9);
 1141   CHECK_EQ(a[9], 0);
 1142   CHECK_EQ(a[10], 1);
 1143   CHECK_EQ(a[11], 2);
 1144   CHECK_EQ(a[12], 3);
 1145   CHECK_EQ(a[13], 4);
 1146   CHECK_EQ(a[14], 5);
 1147   CHECK_EQ(a[15], 6);
 1148 
 1149   /***** PartialMatch *****/
 1150 
 1151   printf("Testing PartialMatch\n");
 1152 
 1153   CHECK(RE("h.*o").PartialMatch("hello"));
 1154   CHECK(RE("h.*o").PartialMatch("othello"));
 1155   CHECK(RE("h.*o").PartialMatch("hello!"));
 1156   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
 1157 
 1158   /***** other tests *****/
 1159 
 1160   RadixTests();
 1161   TestReplace();
 1162   TestExtract();
 1163   TestConsume();
 1164   TestFindAndConsume();
 1165   TestQuoteMetaAll();
 1166   TestMatchNumberPeculiarity();
 1167 
 1168   // Check the pattern() accessor
 1169   {
 1170     const string kPattern = "http://([^/]+)/.*";
 1171     const RE re(kPattern);
 1172     CHECK_EQ(kPattern, re.pattern());
 1173   }
 1174 
 1175   // Check RE error field.
 1176   {
 1177     RE re("foo");
 1178     CHECK(re.error().empty());  // Must have no error
 1179   }
 1180 
 1181 #ifdef SUPPORT_UTF
 1182   // Check UTF-8 handling
 1183   {
 1184     printf("Testing UTF-8 handling\n");
 1185 
 1186     // Three Japanese characters (nihongo)
 1187     const unsigned char utf8_string[] = {
 1188          0xe6, 0x97, 0xa5, // 65e5
 1189          0xe6, 0x9c, 0xac, // 627c
 1190          0xe8, 0xaa, 0x9e, // 8a9e
 1191          0
 1192     };
 1193     const unsigned char utf8_pattern[] = {
 1194          '.',
 1195          0xe6, 0x9c, 0xac, // 627c
 1196          '.',
 1197          0
 1198     };
 1199 
 1200     // Both should match in either mode, bytes or UTF-8
 1201     RE re_test1(".........");
 1202     CHECK(re_test1.FullMatch(utf8_string));
 1203     RE re_test2("...", pcrecpp::UTF8());
 1204     CHECK(re_test2.FullMatch(utf8_string));
 1205 
 1206     // PH added these tests for leading option settings
 1207 
 1208     RE re_testZ0("(*CR)(*NO_START_OPT).........");
 1209     CHECK(re_testZ0.FullMatch(utf8_string));
 1210 
 1211 #ifdef SUPPORT_UTF
 1212     RE re_testZ1("(*UTF8)...");
 1213     CHECK(re_testZ1.FullMatch(utf8_string));
 1214 
 1215     RE re_testZ2("(*UTF)...");
 1216     CHECK(re_testZ2.FullMatch(utf8_string));
 1217 
 1218 #ifdef SUPPORT_UCP
 1219     RE re_testZ3("(*UCP)(*UTF)...");
 1220     CHECK(re_testZ3.FullMatch(utf8_string));
 1221 
 1222     RE re_testZ4("(*UCP)(*LIMIT_MATCH=1000)(*UTF)...");
 1223     CHECK(re_testZ4.FullMatch(utf8_string));
 1224 
 1225     RE re_testZ5("(*UCP)(*LIMIT_MATCH=1000)(*ANY)(*UTF)...");
 1226     CHECK(re_testZ5.FullMatch(utf8_string));
 1227 #endif
 1228 #endif
 1229 
 1230     // Check that '.' matches one byte or UTF-8 character
 1231     // according to the mode.
 1232     string ss;
 1233     RE re_test3("(.)");
 1234     CHECK(re_test3.PartialMatch(utf8_string, &ss));
 1235     CHECK_EQ(ss, string("\xe6"));
 1236     RE re_test4("(.)", pcrecpp::UTF8());
 1237     CHECK(re_test4.PartialMatch(utf8_string, &ss));
 1238     CHECK_EQ(ss, string("\xe6\x97\xa5"));
 1239 
 1240     // Check that string matches itself in either mode
 1241     RE re_test5(utf8_string);
 1242     CHECK(re_test5.FullMatch(utf8_string));
 1243     RE re_test6(utf8_string, pcrecpp::UTF8());
 1244     CHECK(re_test6.FullMatch(utf8_string));
 1245 
 1246     // Check that pattern matches string only in UTF8 mode
 1247     RE re_test7(utf8_pattern);
 1248     CHECK(!re_test7.FullMatch(utf8_string));
 1249     RE re_test8(utf8_pattern, pcrecpp::UTF8());
 1250     CHECK(re_test8.FullMatch(utf8_string));
 1251   }
 1252 
 1253   // Check that ungreedy, UTF8 regular expressions don't match when they
 1254   // oughtn't -- see bug 82246.
 1255   {
 1256     // This code always worked.
 1257     const char* pattern = "\\w+X";
 1258     const string target = "a aX";
 1259     RE match_sentence(pattern);
 1260     RE match_sentence_re(pattern, pcrecpp::UTF8());
 1261 
 1262     CHECK(!match_sentence.FullMatch(target));
 1263     CHECK(!match_sentence_re.FullMatch(target));
 1264   }
 1265 
 1266   {
 1267     const char* pattern = "(?U)\\w+X";
 1268     const string target = "a aX";
 1269     RE match_sentence(pattern);
 1270     RE match_sentence_re(pattern, pcrecpp::UTF8());
 1271 
 1272     CHECK(!match_sentence.FullMatch(target));
 1273     CHECK(!match_sentence_re.FullMatch(target));
 1274   }
 1275 #endif  /* def SUPPORT_UTF */
 1276 
 1277   printf("Testing error reporting\n");
 1278 
 1279   { RE re("a\\1"); CHECK(!re.error().empty()); }
 1280   {
 1281     RE re("a[x");
 1282     CHECK(!re.error().empty());
 1283   }
 1284   {
 1285     RE re("a[z-a]");
 1286     CHECK(!re.error().empty());
 1287   }
 1288   {
 1289     RE re("a[[:foobar:]]");
 1290     CHECK(!re.error().empty());
 1291   }
 1292   {
 1293     RE re("a(b");
 1294     CHECK(!re.error().empty());
 1295   }
 1296   {
 1297     RE re("a\\");
 1298     CHECK(!re.error().empty());
 1299   }
 1300 
 1301   // Test that recursion is stopped
 1302   TestRecursion();
 1303 
 1304   // Test Options
 1305   if (getenv("VERBOSE_TEST") != NULL)
 1306     VERBOSE_TEST  = true;
 1307   TestOptions();
 1308 
 1309   // Test the constructors
 1310   TestConstructors();
 1311 
 1312   // Done
 1313   printf("OK\n");
 1314 
 1315   return 0;
 1316 }