diff options
Diffstat (limited to 'lib/win32/pcre/pcrecpp_unittest.cc')
-rw-r--r-- | lib/win32/pcre/pcrecpp_unittest.cc | 1276 |
1 files changed, 0 insertions, 1276 deletions
diff --git a/lib/win32/pcre/pcrecpp_unittest.cc b/lib/win32/pcre/pcrecpp_unittest.cc deleted file mode 100644 index 44e0cc9558..0000000000 --- a/lib/win32/pcre/pcrecpp_unittest.cc +++ /dev/null @@ -1,1276 +0,0 @@ -// -*- coding: utf-8 -*- -// -// Copyright (c) 2005 - 2006, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: Sanjay Ghemawat -// -// TODO: Test extractions for PartialMatch/Consume - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stdio.h> -#include <cassert> -#include <vector> -#include "pcrecpp.h" - -using pcrecpp::StringPiece; -using pcrecpp::RE; -using pcrecpp::RE_Options; -using pcrecpp::Hex; -using pcrecpp::Octal; -using pcrecpp::CRadix; - -static bool VERBOSE_TEST = false; - -// CHECK dies with a fatal error if condition is not true. It is *not* -// controlled by NDEBUG, so the check will be executed regardless of -// compilation mode. Therefore, it is safe to do things like: -// CHECK_EQ(fp->Write(x), 4) -#define CHECK(condition) do { \ - if (!(condition)) { \ - fprintf(stderr, "%s:%d: Check failed: %s\n", \ - __FILE__, __LINE__, #condition); \ - exit(1); \ - } \ -} while (0) - -#define CHECK_EQ(a, b) CHECK(a == b) - -static void Timing1(int num_iters) { - // Same pattern lots of times - RE pattern("ruby:\\d+"); - StringPiece p("ruby:1234"); - for (int j = num_iters; j > 0; j--) { - CHECK(pattern.FullMatch(p)); - } -} - -static void Timing2(int num_iters) { - // Same pattern lots of times - RE pattern("ruby:(\\d+)"); - int i; - for (int j = num_iters; j > 0; j--) { - CHECK(pattern.FullMatch("ruby:1234", &i)); - CHECK_EQ(i, 1234); - } -} - -static void Timing3(int num_iters) { - string text_string; - for (int j = num_iters; j > 0; j--) { - text_string += "this is another line\n"; - } - - RE line_matcher(".*\n"); - string line; - StringPiece text(text_string); - int counter = 0; - while (line_matcher.Consume(&text)) { - counter++; - } - printf("Matched %d lines\n", counter); -} - -#if 0 // uncomment this if you have a way of defining VirtualProcessSize() - -static void LeakTest() { - // Check for memory leaks - unsigned long long initial_size = 0; - for (int i = 0; i < 100000; i++) { - if (i == 50000) { - initial_size = VirtualProcessSize(); - printf("Size after 50000: %llu\n", initial_size); - } - char buf[100]; // definitely big enough - sprintf(buf, "pat%09d", i); - RE newre(buf); - } - uint64 final_size = VirtualProcessSize(); - printf("Size after 100000: %llu\n", final_size); - const double growth = double(final_size - initial_size) / final_size; - printf("Growth: %0.2f%%", growth * 100); - CHECK(growth < 0.02); // Allow < 2% growth -} - -#endif - -static void RadixTests() { - printf("Testing hex\n"); - -#define CHECK_HEX(type, value) \ - do { \ - type v; \ - CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ - CHECK_EQ(v, 0x ## value); \ - CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ - CHECK_EQ(v, 0x ## value); \ - } while(0) - - CHECK_HEX(short, 2bad); - CHECK_HEX(unsigned short, 2badU); - CHECK_HEX(int, dead); - CHECK_HEX(unsigned int, deadU); - CHECK_HEX(long, 7eadbeefL); - CHECK_HEX(unsigned long, deadbeefUL); -#ifdef HAVE_LONG_LONG - CHECK_HEX(long long, 12345678deadbeefLL); -#endif -#ifdef HAVE_UNSIGNED_LONG_LONG - CHECK_HEX(unsigned long long, cafebabedeadbeefULL); -#endif - -#undef CHECK_HEX - - printf("Testing octal\n"); - -#define CHECK_OCTAL(type, value) \ - do { \ - type v; \ - CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ - CHECK_EQ(v, 0 ## value); \ - CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ - CHECK_EQ(v, 0 ## value); \ - } while(0) - - CHECK_OCTAL(short, 77777); - CHECK_OCTAL(unsigned short, 177777U); - CHECK_OCTAL(int, 17777777777); - CHECK_OCTAL(unsigned int, 37777777777U); - CHECK_OCTAL(long, 17777777777L); - CHECK_OCTAL(unsigned long, 37777777777UL); -#ifdef HAVE_LONG_LONG - CHECK_OCTAL(long long, 777777777777777777777LL); -#endif -#ifdef HAVE_UNSIGNED_LONG_LONG - CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); -#endif - -#undef CHECK_OCTAL - - printf("Testing decimal\n"); - -#define CHECK_DECIMAL(type, value) \ - do { \ - type v; \ - CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ - CHECK_EQ(v, value); \ - CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ - CHECK_EQ(v, value); \ - } while(0) - - CHECK_DECIMAL(short, -1); - CHECK_DECIMAL(unsigned short, 9999); - CHECK_DECIMAL(int, -1000); - CHECK_DECIMAL(unsigned int, 12345U); - CHECK_DECIMAL(long, -10000000L); - CHECK_DECIMAL(unsigned long, 3083324652U); -#ifdef HAVE_LONG_LONG - CHECK_DECIMAL(long long, -100000000000000LL); -#endif -#ifdef HAVE_UNSIGNED_LONG_LONG - CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); -#endif - -#undef CHECK_DECIMAL - -} - -static void TestReplace() { - printf("Testing Replace\n"); - - struct ReplaceTest { - const char *regexp; - const char *rewrite; - const char *original; - const char *single; - const char *global; - int global_count; // the expected return value from ReplaceAll - }; - static const ReplaceTest tests[] = { - { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", - "\\2\\1ay", - "the quick brown fox jumps over the lazy dogs.", - "ethay quick brown fox jumps over the lazy dogs.", - "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", - 9 }, - { "\\w+", - "\\0-NOSPAM", - "paul.haahr@google.com", - "paul-NOSPAM.haahr@google.com", - "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", - 4 }, - { "^", - "(START)", - "foo", - "(START)foo", - "(START)foo", - 1 }, - { "^", - "(START)", - "", - "(START)", - "(START)", - 1 }, - { "$", - "(END)", - "", - "(END)", - "(END)", - 1 }, - { "b", - "bb", - "ababababab", - "abbabababab", - "abbabbabbabbabb", - 5 }, - { "b", - "bb", - "bbbbbb", - "bbbbbbb", - "bbbbbbbbbbbb", - 6 }, - { "b+", - "bb", - "bbbbbb", - "bb", - "bb", - 1 }, - { "b*", - "bb", - "bbbbbb", - "bb", - "bb", - 1 }, - { "b*", - "bb", - "aaaaa", - "bbaaaaa", - "bbabbabbabbabbabb", - 6 }, - { "b*", - "bb", - "aa\naa\n", - "bbaa\naa\n", - "bbabbabb\nbbabbabb\nbb", - 7 }, - { "b*", - "bb", - "aa\raa\r", - "bbaa\raa\r", - "bbabbabb\rbbabbabb\rbb", - 7 }, - { "b*", - "bb", - "aa\r\naa\r\n", - "bbaa\r\naa\r\n", - "bbabbabb\r\nbbabbabb\r\nbb", - 7 }, -#ifdef SUPPORT_UTF8 - { "b*", - "bb", - "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 - "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", - "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", - 5 }, - { "b*", - "bb", - "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 - "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", - ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" - "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), - 9 }, -#endif - { "", NULL, NULL, NULL, NULL, 0 } - }; - -#ifdef SUPPORT_UTF8 - const bool support_utf8 = true; -#else - const bool support_utf8 = false; -#endif - - for (const ReplaceTest *t = tests; t->original != NULL; ++t) { - RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); - assert(re.error().empty()); - string one(t->original); - CHECK(re.Replace(t->rewrite, &one)); - CHECK_EQ(one, t->single); - string all(t->original); - const int replace_count = re.GlobalReplace(t->rewrite, &all); - CHECK_EQ(all, t->global); - CHECK_EQ(replace_count, t->global_count); - } - - // One final test: test \r\n replacement when we're not in CRLF mode - { - RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); - assert(re.error().empty()); - string all("aa\r\naa\r\n"); - CHECK_EQ(re.GlobalReplace("bb", &all), 9); - CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); - } - { - RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); - assert(re.error().empty()); - string all("aa\r\naa\r\n"); - CHECK_EQ(re.GlobalReplace("bb", &all), 9); - CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); - } - // TODO: test what happens when no PCRE_NEWLINE_* flag is set. - // Alas, the answer depends on how pcre was compiled. -} - -static void TestExtract() { - printf("Testing Extract\n"); - - string s; - - CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s)); - CHECK_EQ(s, "kremvax!boris"); - - // check the RE interface as well - CHECK(RE(".*").Extract("'\\0'", "foo", &s)); - CHECK_EQ(s, "'foo'"); - CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); - CHECK_EQ(s, "'foo'"); -} - -static void TestConsume() { - printf("Testing Consume\n"); - - string word; - - string s(" aaa b!@#$@#$cccc"); - StringPiece input(s); - - RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace - CHECK(r.Consume(&input, &word)); - CHECK_EQ(word, "aaa"); - CHECK(r.Consume(&input, &word)); - CHECK_EQ(word, "b"); - CHECK(! r.Consume(&input, &word)); -} - -static void TestFindAndConsume() { - printf("Testing FindAndConsume\n"); - - string word; - - string s(" aaa b!@#$@#$cccc"); - StringPiece input(s); - - RE r("(\\w+)"); // matches a word - CHECK(r.FindAndConsume(&input, &word)); - CHECK_EQ(word, "aaa"); - CHECK(r.FindAndConsume(&input, &word)); - CHECK_EQ(word, "b"); - CHECK(r.FindAndConsume(&input, &word)); - CHECK_EQ(word, "cccc"); - CHECK(! r.FindAndConsume(&input, &word)); -} - -static void TestMatchNumberPeculiarity() { - printf("Testing match-number peculiaraity\n"); - - string word1; - string word2; - string word3; - - RE r("(foo)|(bar)|(baz)"); - CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); - CHECK_EQ(word1, "foo"); - CHECK_EQ(word2, ""); - CHECK_EQ(word3, ""); - CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); - CHECK_EQ(word1, ""); - CHECK_EQ(word2, "bar"); - CHECK_EQ(word3, ""); - CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); - CHECK_EQ(word1, ""); - CHECK_EQ(word2, ""); - CHECK_EQ(word3, "baz"); - CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); - - string a; - CHECK(RE("(foo)|hello").FullMatch("hello", &a)); - CHECK_EQ(a, ""); -} - -static void TestRecursion() { - printf("Testing recursion\n"); - - // Get one string that passes (sometimes), one that never does. - string text_good("abcdefghijk"); - string text_bad("acdefghijkl"); - - // According to pcretest, matching text_good against (\w+)*b - // requires match_limit of at least 8192, and match_recursion_limit - // of at least 37. - - RE_Options options_ml; - options_ml.set_match_limit(8192); - RE re("(\\w+)*b", options_ml); - CHECK(re.PartialMatch(text_good) == true); - CHECK(re.PartialMatch(text_bad) == false); - CHECK(re.FullMatch(text_good) == false); - CHECK(re.FullMatch(text_bad) == false); - - options_ml.set_match_limit(1024); - RE re2("(\\w+)*b", options_ml); - CHECK(re2.PartialMatch(text_good) == false); // because of match_limit - CHECK(re2.PartialMatch(text_bad) == false); - CHECK(re2.FullMatch(text_good) == false); - CHECK(re2.FullMatch(text_bad) == false); - - RE_Options options_mlr; - options_mlr.set_match_limit_recursion(50); - RE re3("(\\w+)*b", options_mlr); - CHECK(re3.PartialMatch(text_good) == true); - CHECK(re3.PartialMatch(text_bad) == false); - CHECK(re3.FullMatch(text_good) == false); - CHECK(re3.FullMatch(text_bad) == false); - - options_mlr.set_match_limit_recursion(10); - RE re4("(\\w+)*b", options_mlr); - CHECK(re4.PartialMatch(text_good) == false); - CHECK(re4.PartialMatch(text_bad) == false); - CHECK(re4.FullMatch(text_good) == false); - CHECK(re4.FullMatch(text_bad) == false); -} - -// A meta-quoted string, interpreted as a pattern, should always match -// the original unquoted string. -static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { - string quoted = RE::QuoteMeta(unquoted); - RE re(quoted, options); - CHECK(re.FullMatch(unquoted)); -} - -// A string containing meaningful regexp characters, which is then meta- -// quoted, should not generally match a string the unquoted string does. -static void NegativeTestQuoteMeta(string unquoted, string should_not_match, - RE_Options options = RE_Options()) { - string quoted = RE::QuoteMeta(unquoted); - RE re(quoted, options); - CHECK(!re.FullMatch(should_not_match)); -} - -// Tests that quoted meta characters match their original strings, -// and that a few things that shouldn't match indeed do not. -static void TestQuotaMetaSimple() { - TestQuoteMeta("foo"); - TestQuoteMeta("foo.bar"); - TestQuoteMeta("foo\\.bar"); - TestQuoteMeta("[1-9]"); - TestQuoteMeta("1.5-2.0?"); - TestQuoteMeta("\\d"); - TestQuoteMeta("Who doesn't like ice cream?"); - TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); - TestQuoteMeta("((?!)xxx).*yyy"); - TestQuoteMeta("(["); - TestQuoteMeta(string("foo\0bar", 7)); -} - -static void TestQuoteMetaSimpleNegative() { - NegativeTestQuoteMeta("foo", "bar"); - NegativeTestQuoteMeta("...", "bar"); - NegativeTestQuoteMeta("\\.", "."); - NegativeTestQuoteMeta("\\.", ".."); - NegativeTestQuoteMeta("(a)", "a"); - NegativeTestQuoteMeta("(a|b)", "a"); - NegativeTestQuoteMeta("(a|b)", "(a)"); - NegativeTestQuoteMeta("(a|b)", "a|b"); - NegativeTestQuoteMeta("[0-9]", "0"); - NegativeTestQuoteMeta("[0-9]", "0-9"); - NegativeTestQuoteMeta("[0-9]", "[9]"); - NegativeTestQuoteMeta("((?!)xxx)", "xxx"); -} - -static void TestQuoteMetaLatin1() { - TestQuoteMeta("3\xb2 = 9"); -} - -static void TestQuoteMetaUtf8() { -#ifdef SUPPORT_UTF8 - TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); - TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 - TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) - TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character - TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) - TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) - TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work - NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) - "27\\\xc2\\\xb0", - pcrecpp::UTF8()); -#endif -} - -static void TestQuoteMetaAll() { - printf("Testing QuoteMeta\n"); - TestQuotaMetaSimple(); - TestQuoteMetaSimpleNegative(); - TestQuoteMetaLatin1(); - TestQuoteMetaUtf8(); -} - -// -// Options tests contributed by -// Giuseppe Maxia, CTO, Stardata s.r.l. -// July 2005 -// -static void GetOneOptionResult( - const char *option_name, - const char *regex, - const char *str, - RE_Options options, - bool full, - string expected) { - - printf("Testing Option <%s>\n", option_name); - if(VERBOSE_TEST) - printf("/%s/ finds \"%s\" within \"%s\" \n", - regex, - expected.c_str(), - str); - string captured(""); - if (full) - RE(regex,options).FullMatch(str, &captured); - else - RE(regex,options).PartialMatch(str, &captured); - CHECK_EQ(captured, expected); -} - -static void TestOneOption( - const char *option_name, - const char *regex, - const char *str, - RE_Options options, - bool full, - bool assertive = true) { - - printf("Testing Option <%s>\n", option_name); - if (VERBOSE_TEST) - printf("'%s' %s /%s/ \n", - str, - (assertive? "matches" : "doesn't match"), - regex); - if (assertive) { - if (full) - CHECK(RE(regex,options).FullMatch(str)); - else - CHECK(RE(regex,options).PartialMatch(str)); - } else { - if (full) - CHECK(!RE(regex,options).FullMatch(str)); - else - CHECK(!RE(regex,options).PartialMatch(str)); - } -} - -static void Test_CASELESS() { - RE_Options options; - RE_Options options2; - - options.set_caseless(true); - TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); - TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); - TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); - - TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); - TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); - options.set_caseless(false); - TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); -} - -static void Test_MULTILINE() { - RE_Options options; - RE_Options options2; - const char *str = "HELLO\n" "cruel\n" "world\n"; - - options.set_multiline(true); - TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); - TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); - TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); - options.set_multiline(false); - TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); -} - -static void Test_DOTALL() { - RE_Options options; - RE_Options options2; - const char *str = "HELLO\n" "cruel\n" "world"; - - options.set_dotall(true); - TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); - TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); - TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); - options.set_dotall(false); - TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); -} - -static void Test_DOLLAR_ENDONLY() { - RE_Options options; - RE_Options options2; - const char *str = "HELLO world\n"; - - TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); - options.set_dollar_endonly(true); - TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); - TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); -} - -static void Test_EXTRA() { - RE_Options options; - const char *str = "HELLO"; - - options.set_extra(true); - TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); - TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); - options.set_extra(false); - TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); -} - -static void Test_EXTENDED() { - RE_Options options; - RE_Options options2; - const char *str = "HELLO world"; - - options.set_extended(true); - TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); - TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); - TestOneOption("EXTENDED (class)", - "^ HE L{2} O " - "\\s+ " - "\\w+ $ ", - str, - options, - false); - - TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); - TestOneOption("EXTENDED (function)", - "^ HE L{2} O " - "\\s+ " - "\\w+ $ ", - str, - pcrecpp::EXTENDED(), - false); - - options.set_extended(false); - TestOneOption("no EXTENDED", "HELLO world", str, options, false); -} - -static void Test_NO_AUTO_CAPTURE() { - RE_Options options; - const char *str = "HELLO world"; - string captured; - - printf("Testing Option <no NO_AUTO_CAPTURE>\n"); - if (VERBOSE_TEST) - printf("parentheses capture text\n"); - RE re("(world|universe)$", options); - CHECK(re.Extract("\\1", str , &captured)); - CHECK_EQ(captured, "world"); - options.set_no_auto_capture(true); - printf("testing Option <NO_AUTO_CAPTURE>\n"); - if (VERBOSE_TEST) - printf("parentheses do not capture text\n"); - re.Extract("\\1",str, &captured ); - CHECK_EQ(captured, "world"); -} - -static void Test_UNGREEDY() { - RE_Options options; - const char *str = "HELLO, 'this' is the 'world'"; - - options.set_ungreedy(true); - GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); - GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); - GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); - - options.set_ungreedy(false); - GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); - GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); -} - -static void Test_all_options() { - const char *str = "HELLO\n" "cruel\n" "world"; - RE_Options options; - options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); - - TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); - options.set_all_options(0); - TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); - options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); - - TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); - TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", - " ^ c r u e l $ ", - str, - RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), - false); - - TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", - " ^ c r u e l $ ", - str, - RE_Options() - .set_multiline(true) - .set_extended(true), - false); - - options.set_all_options(0); - TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); - -} - -static void TestOptions() { - printf("Testing Options\n"); - Test_CASELESS(); - Test_MULTILINE(); - Test_DOTALL(); - Test_DOLLAR_ENDONLY(); - Test_EXTENDED(); - Test_NO_AUTO_CAPTURE(); - Test_UNGREEDY(); - Test_EXTRA(); - Test_all_options(); -} - -static void TestConstructors() { - printf("Testing constructors\n"); - - RE_Options options; - options.set_dotall(true); - const char *str = "HELLO\n" "cruel\n" "world"; - - RE orig("HELLO.*world", options); - CHECK(orig.FullMatch(str)); - - RE copy1(orig); - CHECK(copy1.FullMatch(str)); - - RE copy2("not a match"); - CHECK(!copy2.FullMatch(str)); - copy2 = copy1; - CHECK(copy2.FullMatch(str)); - copy2 = orig; - CHECK(copy2.FullMatch(str)); - - // Make sure when we assign to ourselves, nothing bad happens - orig = orig; - copy1 = copy1; - copy2 = copy2; - CHECK(orig.FullMatch(str)); - CHECK(copy1.FullMatch(str)); - CHECK(copy2.FullMatch(str)); -} - -int main(int argc, char** argv) { - // Treat any flag as --help - if (argc > 1 && argv[1][0] == '-') { - printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" - " If 'timingX ###' is specified, run the given timing test\n" - " with the given number of iterations, rather than running\n" - " the default corectness test.\n", argv[0]); - return 0; - } - - if (argc > 1) { - if ( argc == 2 || atoi(argv[2]) == 0) { - printf("timing mode needs a num-iters argument\n"); - return 1; - } - if (!strcmp(argv[1], "timing1")) - Timing1(atoi(argv[2])); - else if (!strcmp(argv[1], "timing2")) - Timing2(atoi(argv[2])); - else if (!strcmp(argv[1], "timing3")) - Timing3(atoi(argv[2])); - else - printf("Unknown argument '%s'\n", argv[1]); - return 0; - } - - printf("Testing FullMatch\n"); - - int i; - string s; - - /***** FullMatch with no args *****/ - - CHECK(RE("h.*o").FullMatch("hello")); - CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front - CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end - CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op - CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op - CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops - - /***** FullMatch with args *****/ - - // Zero-arg - CHECK(RE("\\d+").FullMatch("1001")); - - // Single-arg - CHECK(RE("(\\d+)").FullMatch("1001", &i)); - CHECK_EQ(i, 1001); - CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); - CHECK_EQ(i, -123); - CHECK(!RE("()\\d+").FullMatch("10", &i)); - CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", - &i)); - - // Digits surrounding integer-arg - CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); - CHECK_EQ(i, 23); - CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); - CHECK_EQ(i, 1); - CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); - CHECK_EQ(i, -1); - CHECK(RE("(\\d)").PartialMatch("1234", &i)); - CHECK_EQ(i, 1); - CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); - CHECK_EQ(i, -1); - - // String-arg - CHECK(RE("h(.*)o").FullMatch("hello", &s)); - CHECK_EQ(s, string("ell")); - - // StringPiece-arg - StringPiece sp; - CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); - CHECK_EQ(sp.size(), 4); - CHECK(memcmp(sp.data(), "ruby", 4) == 0); - CHECK_EQ(i, 1234); - - // Multi-arg - CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); - CHECK_EQ(s, string("ruby")); - CHECK_EQ(i, 1234); - - // Ignore non-void* NULL arg - CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); - CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); - CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); - CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); -#ifdef HAVE_LONG_LONG - CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); -#endif - CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); - CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); - - // Fail on non-void* NULL arg if the match doesn't parse for the given type. - CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); - CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); - CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); - CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); - CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); - - // Ignored arg - CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); - CHECK_EQ(s, string("ruby")); - CHECK_EQ(i, 1234); - - // Type tests - { - char c; - CHECK(RE("(H)ello").FullMatch("Hello", &c)); - CHECK_EQ(c, 'H'); - } - { - unsigned char c; - CHECK(RE("(H)ello").FullMatch("Hello", &c)); - CHECK_EQ(c, static_cast<unsigned char>('H')); - } - { - short v; - CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); - CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); - CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); - CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); - CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); - CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); - } - { - unsigned short v; - CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); - CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); - CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); - CHECK(!RE("(\\d+)").FullMatch("65536", &v)); - } - { - int v; - static const int max_value = 0x7fffffff; - static const int min_value = -max_value - 1; - CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); - CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); - CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); - CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); - CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); - CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); - } - { - unsigned int v; - static const unsigned int max_value = 0xfffffffful; - CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); - CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); - CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); - } -#ifdef HAVE_LONG_LONG -# if defined(__MINGW__) || defined(__MINGW32__) -# define LLD "%I64d" -# define LLU "%I64u" -# else -# define LLD "%lld" -# define LLU "%llu" -# endif - { - long long v; - static const long long max_value = 0x7fffffffffffffffLL; - static const long long min_value = -max_value - 1; - char buf[32]; // definitely big enough for a long long - - CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); - CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); - - sprintf(buf, LLD, max_value); - CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); - - sprintf(buf, LLD, min_value); - CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); - - sprintf(buf, LLD, max_value); - assert(buf[strlen(buf)-1] != '9'); - buf[strlen(buf)-1]++; - CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); - - sprintf(buf, LLD, min_value); - assert(buf[strlen(buf)-1] != '9'); - buf[strlen(buf)-1]++; - CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); - } -#endif -#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG - { - unsigned long long v; - long long v2; - static const unsigned long long max_value = 0xffffffffffffffffULL; - char buf[32]; // definitely big enough for a unsigned long long - - CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); - CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); - - sprintf(buf, LLU, max_value); - CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); - - assert(buf[strlen(buf)-1] != '9'); - buf[strlen(buf)-1]++; - CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); - } -#endif - { - float v; - CHECK(RE("(.*)").FullMatch("100", &v)); - CHECK(RE("(.*)").FullMatch("-100.", &v)); - CHECK(RE("(.*)").FullMatch("1e23", &v)); - } - { - double v; - CHECK(RE("(.*)").FullMatch("100", &v)); - CHECK(RE("(.*)").FullMatch("-100.", &v)); - CHECK(RE("(.*)").FullMatch("1e23", &v)); - } - - // Check that matching is fully anchored - CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); - CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); - CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); - CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); - - // Braces - CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); - CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); - CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); - - // Complicated RE - CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); - CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); - CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); - CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); - - // Check full-match handling (needs '$' tacked on internally) - CHECK(RE("fo|foo").FullMatch("fo")); - CHECK(RE("fo|foo").FullMatch("foo")); - CHECK(RE("fo|foo$").FullMatch("fo")); - CHECK(RE("fo|foo$").FullMatch("foo")); - CHECK(RE("foo$").FullMatch("foo")); - CHECK(!RE("foo\\$").FullMatch("foo$bar")); - CHECK(!RE("fo|bar").FullMatch("fox")); - - // Uncomment the following if we change the handling of '$' to - // prevent it from matching a trailing newline - if (false) { - // Check that we don't get bitten by pcre's special handling of a - // '\n' at the end of the string matching '$' - CHECK(!RE("foo$").PartialMatch("foo\n")); - } - - // Number of args - int a[16]; - CHECK(RE("").FullMatch("")); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d){1}").FullMatch("1", - &a[0])); - CHECK_EQ(a[0], 1); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d)(\\d)").FullMatch("12", - &a[0], &a[1])); - CHECK_EQ(a[0], 1); - CHECK_EQ(a[1], 2); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", - &a[0], &a[1], &a[2])); - CHECK_EQ(a[0], 1); - CHECK_EQ(a[1], 2); - CHECK_EQ(a[2], 3); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", - &a[0], &a[1], &a[2], &a[3])); - CHECK_EQ(a[0], 1); - CHECK_EQ(a[1], 2); - CHECK_EQ(a[2], 3); - CHECK_EQ(a[3], 4); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", - &a[0], &a[1], &a[2], - &a[3], &a[4])); - CHECK_EQ(a[0], 1); - CHECK_EQ(a[1], 2); - CHECK_EQ(a[2], 3); - CHECK_EQ(a[3], 4); - CHECK_EQ(a[4], 5); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", - &a[0], &a[1], &a[2], - &a[3], &a[4], &a[5])); - CHECK_EQ(a[0], 1); - CHECK_EQ(a[1], 2); - CHECK_EQ(a[2], 3); - CHECK_EQ(a[3], 4); - CHECK_EQ(a[4], 5); - CHECK_EQ(a[5], 6); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", - &a[0], &a[1], &a[2], &a[3], - &a[4], &a[5], &a[6])); - CHECK_EQ(a[0], 1); - CHECK_EQ(a[1], 2); - CHECK_EQ(a[2], 3); - CHECK_EQ(a[3], 4); - CHECK_EQ(a[4], 5); - CHECK_EQ(a[5], 6); - CHECK_EQ(a[6], 7); - - memset(a, 0, sizeof(0)); - CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" - "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( - "1234567890123456", - &a[0], &a[1], &a[2], &a[3], - &a[4], &a[5], &a[6], &a[7], - &a[8], &a[9], &a[10], &a[11], - &a[12], &a[13], &a[14], &a[15])); - CHECK_EQ(a[0], 1); - CHECK_EQ(a[1], 2); - CHECK_EQ(a[2], 3); - CHECK_EQ(a[3], 4); - CHECK_EQ(a[4], 5); - CHECK_EQ(a[5], 6); - CHECK_EQ(a[6], 7); - CHECK_EQ(a[7], 8); - CHECK_EQ(a[8], 9); - CHECK_EQ(a[9], 0); - CHECK_EQ(a[10], 1); - CHECK_EQ(a[11], 2); - CHECK_EQ(a[12], 3); - CHECK_EQ(a[13], 4); - CHECK_EQ(a[14], 5); - CHECK_EQ(a[15], 6); - - /***** PartialMatch *****/ - - printf("Testing PartialMatch\n"); - - CHECK(RE("h.*o").PartialMatch("hello")); - CHECK(RE("h.*o").PartialMatch("othello")); - CHECK(RE("h.*o").PartialMatch("hello!")); - CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); - - /***** other tests *****/ - - RadixTests(); - TestReplace(); - TestExtract(); - TestConsume(); - TestFindAndConsume(); - TestQuoteMetaAll(); - TestMatchNumberPeculiarity(); - - // Check the pattern() accessor - { - const string kPattern = "http://([^/]+)/.*"; - const RE re(kPattern); - CHECK_EQ(kPattern, re.pattern()); - } - - // Check RE error field. - { - RE re("foo"); - CHECK(re.error().empty()); // Must have no error - } - -#ifdef SUPPORT_UTF8 - // Check UTF-8 handling - { - printf("Testing UTF-8 handling\n"); - - // Three Japanese characters (nihongo) - const unsigned char utf8_string[] = { - 0xe6, 0x97, 0xa5, // 65e5 - 0xe6, 0x9c, 0xac, // 627c - 0xe8, 0xaa, 0x9e, // 8a9e - 0 - }; - const unsigned char utf8_pattern[] = { - '.', - 0xe6, 0x9c, 0xac, // 627c - '.', - 0 - }; - - // Both should match in either mode, bytes or UTF-8 - RE re_test1("........."); - CHECK(re_test1.FullMatch(utf8_string)); - RE re_test2("...", pcrecpp::UTF8()); - CHECK(re_test2.FullMatch(utf8_string)); - - // Check that '.' matches one byte or UTF-8 character - // according to the mode. - string ss; - RE re_test3("(.)"); - CHECK(re_test3.PartialMatch(utf8_string, &ss)); - CHECK_EQ(ss, string("\xe6")); - RE re_test4("(.)", pcrecpp::UTF8()); - CHECK(re_test4.PartialMatch(utf8_string, &ss)); - CHECK_EQ(ss, string("\xe6\x97\xa5")); - - // Check that string matches itself in either mode - RE re_test5(utf8_string); - CHECK(re_test5.FullMatch(utf8_string)); - RE re_test6(utf8_string, pcrecpp::UTF8()); - CHECK(re_test6.FullMatch(utf8_string)); - - // Check that pattern matches string only in UTF8 mode - RE re_test7(utf8_pattern); - CHECK(!re_test7.FullMatch(utf8_string)); - RE re_test8(utf8_pattern, pcrecpp::UTF8()); - CHECK(re_test8.FullMatch(utf8_string)); - } - - // Check that ungreedy, UTF8 regular expressions don't match when they - // oughtn't -- see bug 82246. - { - // This code always worked. - const char* pattern = "\\w+X"; - const string target = "a aX"; - RE match_sentence(pattern); - RE match_sentence_re(pattern, pcrecpp::UTF8()); - - CHECK(!match_sentence.FullMatch(target)); - CHECK(!match_sentence_re.FullMatch(target)); - } - - { - const char* pattern = "(?U)\\w+X"; - const string target = "a aX"; - RE match_sentence(pattern); - RE match_sentence_re(pattern, pcrecpp::UTF8()); - - CHECK(!match_sentence.FullMatch(target)); - CHECK(!match_sentence_re.FullMatch(target)); - } -#endif /* def SUPPORT_UTF8 */ - - printf("Testing error reporting\n"); - - { RE re("a\\1"); CHECK(!re.error().empty()); } - { - RE re("a[x"); - CHECK(!re.error().empty()); - } - { - RE re("a[z-a]"); - CHECK(!re.error().empty()); - } - { - RE re("a[[:foobar:]]"); - CHECK(!re.error().empty()); - } - { - RE re("a(b"); - CHECK(!re.error().empty()); - } - { - RE re("a\\"); - CHECK(!re.error().empty()); - } - - // Test that recursion is stopped - TestRecursion(); - - // Test Options - if (getenv("VERBOSE_TEST") != NULL) - VERBOSE_TEST = true; - TestOptions(); - - // Test the constructors - TestConstructors(); - - // Done - printf("OK\n"); - - return 0; -} |