encodingstest.cpp (rapidjson-1.0.2) | : | encodingstest.cpp (rapidjson-1.1.0) | ||
---|---|---|---|---|
skipping to change at line 243 | skipping to change at line 243 | |||
0xE0100, 0xE01EF, // Variation Selectors Supplement | 0xE0100, 0xE01EF, // Variation Selectors Supplement | |||
0xF0000, 0xFFFFF, // Supplementary Private Use Area-A | 0xF0000, 0xFFFFF, // Supplementary Private Use Area-A | |||
0x100000, 0x10FFFF, // Supplementary Private Use Area-B | 0x100000, 0x10FFFF, // Supplementary Private Use Area-B | |||
0xFFFFFFFF | 0xFFFFFFFF | |||
}; | }; | |||
// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> | // Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> | |||
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. | // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. | |||
#define UTF8_ACCEPT 0u | #define UTF8_ACCEPT 0u | |||
#define UTF8_REJECT 12u | ||||
static const unsigned char utf8d[] = { | static const unsigned char utf8d[] = { | |||
// The first part of the table maps bytes to character classes that | // The first part of the table maps bytes to character classes that | |||
// to reduce the size of the transition table and create bitmasks. | // to reduce the size of the transition table and create bitmasks. | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, | |||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | |||
skipping to change at line 301 | skipping to change at line 300 | |||
UTF8<>::Encode(os, codepoint); | UTF8<>::Encode(os, codepoint); | |||
const char* encodedStr = os.GetString(); | const char* encodedStr = os.GetString(); | |||
// Decode with Hoehrmann | // Decode with Hoehrmann | |||
{ | { | |||
unsigned decodedCodepoint = 0; | unsigned decodedCodepoint = 0; | |||
unsigned state = 0; | unsigned state = 0; | |||
unsigned decodedCount = 0; | unsigned decodedCount = 0; | |||
for (const char* s = encodedStr; *s; ++s) | for (const char* s = encodedStr; *s; ++s) | |||
if (!decode(&state, &decodedCodepoint, (unsigned char)*s)) { | if (!decode(&state, &decodedCodepoint, static_cast<unsigned char>(*s))) { | |||
EXPECT_EQ(codepoint, decodedCodepoint); | EXPECT_EQ(codepoint, decodedCodepoint); | |||
decodedCount++; | decodedCount++; | |||
} | } | |||
if (*encodedStr) // This decoder cannot handle U+ 0000 | if (*encodedStr) { // This decoder cannot handl e U+0000 | |||
EXPECT_EQ(1u, decodedCount); // Should only contain one c ode point | EXPECT_EQ(1u, decodedCount); // Should only contain one c ode point | |||
} | ||||
EXPECT_EQ(UTF8_ACCEPT, state); | EXPECT_EQ(UTF8_ACCEPT, state); | |||
if (UTF8_ACCEPT != state) | if (UTF8_ACCEPT != state) | |||
std::cout << std::hex << codepoint << " " << decodedCodepoin t << std::endl; | std::cout << std::hex << codepoint << " " << decodedCodepoin t << std::endl; | |||
} | } | |||
// Decode | // Decode | |||
{ | { | |||
StringStream is(encodedStr); | StringStream is(encodedStr); | |||
unsigned decodedCodepoint; | unsigned decodedCodepoint; | |||
skipping to change at line 358 | skipping to change at line 358 | |||
{ | { | |||
// encode with UTF8<> first | // encode with UTF8<> first | |||
utf8os.Clear(); | utf8os.Clear(); | |||
UTF8<>::Encode(utf8os, codepoint); | UTF8<>::Encode(utf8os, codepoint); | |||
// transcode from UTF8 to UTF16 with Hoehrmann's code | // transcode from UTF8 to UTF16 with Hoehrmann's code | |||
unsigned decodedCodepoint = 0; | unsigned decodedCodepoint = 0; | |||
unsigned state = 0; | unsigned state = 0; | |||
UTF16<>::Ch buffer[3], *p = &buffer[0]; | UTF16<>::Ch buffer[3], *p = &buffer[0]; | |||
for (const char* s = utf8os.GetString(); *s; ++s) { | for (const char* s = utf8os.GetString(); *s; ++s) { | |||
if (!decode(&state, &decodedCodepoint, (unsigned char)*s)) | if (!decode(&state, &decodedCodepoint, static_cast<unsigned char>(*s))) | |||
break; | break; | |||
} | } | |||
if (codepoint <= 0xFFFF) | if (codepoint <= 0xFFFF) | |||
*p++ = static_cast<UTF16<>::Ch>(decodedCodepoint); | *p++ = static_cast<UTF16<>::Ch>(decodedCodepoint); | |||
else { | else { | |||
// Encode code points above U+FFFF as surrogate pair. | // Encode code points above U+FFFF as surrogate pair. | |||
*p++ = static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint > > 10)); | *p++ = static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint > > 10)); | |||
*p++ = static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF)); | *p++ = static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF)); | |||
} | } | |||
skipping to change at line 427 | skipping to change at line 427 | |||
{ | { | |||
GenericStringStream<UTF32<> > is(encodedStr); | GenericStringStream<UTF32<> > is(encodedStr); | |||
os2.Clear(); | os2.Clear(); | |||
bool result = UTF32<>::Validate(is, os2); | bool result = UTF32<>::Validate(is, os2); | |||
EXPECT_TRUE(result); | EXPECT_TRUE(result); | |||
EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString())); | EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString())); | |||
} | } | |||
} | } | |||
} | } | |||
} | } | |||
TEST(EncodingsTest, ASCII) { | ||||
StringBuffer os, os2; | ||||
for (unsigned codepoint = 0; codepoint < 128; codepoint++) { | ||||
os.Clear(); | ||||
ASCII<>::Encode(os, codepoint); | ||||
const ASCII<>::Ch* encodedStr = os.GetString(); | ||||
{ | ||||
StringStream is(encodedStr); | ||||
unsigned decodedCodepoint; | ||||
bool result = ASCII<>::Decode(is, &decodedCodepoint); | ||||
if (!result || codepoint != decodedCodepoint) | ||||
std::cout << std::hex << codepoint << " " << decodedCodepoint << | ||||
std::endl; | ||||
} | ||||
// Validate | ||||
{ | ||||
StringStream is(encodedStr); | ||||
os2.Clear(); | ||||
bool result = ASCII<>::Validate(is, os2); | ||||
EXPECT_TRUE(result); | ||||
EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString())); | ||||
} | ||||
} | ||||
} | ||||
End of changes. 6 change blocks. | ||||
4 lines changed or deleted | 4 lines changed or added |