"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "htmlparsetest.cc" between
xapian-omega-1.4.18.tar.xz and xapian-omega-1.4.19.tar.xz

About: Xapian Omega is an application built on Xapian, consisting of indexers and a CGI search frontend.

htmlparsetest.cc  (xapian-omega-1.4.18.tar.xz):htmlparsetest.cc  (xapian-omega-1.4.19.tar.xz)
skipping to change at line 64 skipping to change at line 64
// Check that whitespace is handled as intended. // Check that whitespace is handled as intended.
{ " <b>not </b>\n<b>\table\t</b>\r\n", "not able", "", "", "" }, { " <b>not </b>\n<b>\table\t</b>\r\n", "not able", "", "", "" },
{ "<html><head><title>\xc2\xae</title></head><body>\xc2\xa3</body></html>", "\xc3\x82\xc2\xa3", "\xc3\x82\xc2\xae", "", "" }, { "<html><head><title>\xc2\xae</title></head><body>\xc2\xa3</body></html>", "\xc3\x82\xc2\xa3", "\xc3\x82\xc2\xae", "", "" },
{ "<html><head><meta http-equiv=Content-Type content=\"text/html;charset=iso -8859-1\"><title>\xc2\xae</title></head><body>\xc2\xa3</body></html>", "\xc3\x82 \xc2\xa3", "\xc3\x82\xc2\xae", "", "" }, { "<html><head><meta http-equiv=Content-Type content=\"text/html;charset=iso -8859-1\"><title>\xc2\xae</title></head><body>\xc2\xa3</body></html>", "\xc3\x82 \xc2\xa3", "\xc3\x82\xc2\xae", "", "" },
{ "<html><head><meta http-equiv=Content-Type content=\"text/html;charset=utf -8\"><title>\xc2\xae</title></head><body>\xc2\xa3</body></html>", "\xc2\xa3", "\ xc2\xae", "", "" }, { "<html><head><meta http-equiv=Content-Type content=\"text/html;charset=utf -8\"><title>\xc2\xae</title></head><body>\xc2\xa3</body></html>", "\xc2\xa3", "\ xc2\xae", "", "" },
{ "<html><head><meta charset='utf-8'><title>\xc2\xae</title></head><body>\xc 2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<html><head><meta charset='utf-8'><title>\xc2\xae</title></head><body>\xc 2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
{ "<html><head><title>\xc2\xae</title><meta charset=\"utf-8\"></head><body>\ xc2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<html><head><title>\xc2\xae</title><meta charset=\"utf-8\"></head><body>\ xc2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
// The UTF-8 "BOM" should also set the charset to utf-8. // The UTF-8 "BOM" should also set the charset to utf-8.
{ "\xef\xbb\xbf<html><head><title>\xc2\xae</title></head><body>\xc2\xa3</bod y></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "\xef\xbb\xbf<html><head><title>\xc2\xae</title></head><body>\xc2\xa3</bod y></html>", "\xc2\xa3", "\xc2\xae", "", "" },
{ "<title>X</title>", "", "X", "", "" }, { "<title>X</title>", "", "X", "", "" },
{ WIDE("\xfe\xff<\0t\0i\0t\0l\0e\0>\0\x20\x26<\0/\0t\0i\0t\0l\0e\0>\0"), "", { WIDE("\xff\xfe<\0t\0i\0t\0l\0e\0>\0\x20\x26<\0/\0t\0i\0t\0l\0e\0>\0"), "",
"\xe2\x98\xa0", "", "" }, "\xe2\x98\xa0", "", "" },
{ WIDE("\xff\xfe\0<\0t\0i\0t\0l\0e\0>\x26\x20\0<\0/\0t\0i\0t\0l\0e\0>"), "", { WIDE("\xfe\xff\0<\0t\0i\0t\0l\0e\0>\x26\x20\0<\0/\0t\0i\0t\0l\0e\0>"), "",
"\xe2\x98\xa0", "", "" }, "\xe2\x98\xa0", "", "" },
{ "<html><body><p>This is \nthe text</p><p>This is \nthe tex</p></body></htm l>", "This is the text\rThis is the tex", "", "", "" }, { "<html><body><p>This is \nthe text</p><p>This is \nthe tex</p></body></htm l>", "This is the text\rThis is the tex", "", "", "" },
// Check we default to UTF-8 for HTML5. // Check we default to UTF-8 for HTML5.
{ "<!DOCTYPE html><html><head><title>\xc2\xae</title></head><body>\xc2\xa3</ body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<!DOCTYPE html><html><head><title>\xc2\xae</title></head><body>\xc2\xa3</ body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
{ "<!Doctype\tHTML ><html><head><title>\xc2\xae</title></head><body>\xc2\xa 3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<!Doctype\tHTML ><html><head><title>\xc2\xae</title></head><body>\xc2\xa 3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
{ "<!Doctype HTML\t><html><head><title>\xc2\xae</title></head><body>\xc2\xa 3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<!Doctype HTML\t><html><head><title>\xc2\xae</title></head><body>\xc2\xa 3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
{ "<!DOCTYPE system 'about:legacy-compat'><html><head><title>\xc2\xae</title ></head><body>\xc2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<!DOCTYPE system 'about:legacy-compat'><html><head><title>\xc2\xae</title ></head><body>\xc2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
{ "<!doctype SyStem \"about:legacy-compat\" ><html><head><title>\xc2\xae</ti tle></head><body>\xc2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<!doctype SyStem \"about:legacy-compat\" ><html><head><title>\xc2\xae</ti tle></head><body>\xc2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
// Check we default to UTF-8 for XML. // Check we default to UTF-8 for XML.
{ "<?xml version=\"1.0\"?><html><head><title>\xc2\xae</title></head><body>\x c2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" }, { "<?xml version=\"1.0\"?><html><head><title>\xc2\xae</title></head><body>\x c2\xa3</body></html>", "\xc2\xa3", "\xc2\xae", "", "" },
// Check we handle specify a charset for XML. // Check we handle specify a charset for XML.
skipping to change at line 101 skipping to change at line 101
// Test empty tags. // Test empty tags.
// //
// First two cases are a regression test - in Omega < 1.4.16 the title // First two cases are a regression test - in Omega < 1.4.16 the title
// wasn't closed and any body content was put into the title instead. // wasn't closed and any body content was put into the title instead.
{ "<head><title xml:lang=\"en-US\"/></head><body><p>Body</p></body>", "Body" , "", "", "" }, { "<head><title xml:lang=\"en-US\"/></head><body><p>Body</p></body>", "Body" , "", "", "" },
{ "<head><title xml:lang='en-US'/></head><body><p>Body</p></body>", "Body", "", "", "" }, { "<head><title xml:lang='en-US'/></head><body><p>Body</p></body>", "Body", "", "", "" },
{ "<head><title xml:lang=\"en-US\" /></head><body><p>Body</p></body>", "Body ", "", "", "" }, { "<head><title xml:lang=\"en-US\" /></head><body><p>Body</p></body>", "Body ", "", "", "" },
{ "<head><title xml:lang='en-US\" /></head><body><p>Body</p></body>", "Body" , "", "", "" }, { "<head><title xml:lang='en-US\" /></head><body><p>Body</p></body>", "Body" , "", "", "" },
{ "<head><title/></head><body><p>Body</p></body>", "Body", "", "", "" }, { "<head><title/></head><body><p>Body</p></body>", "Body", "", "", "" },
{ "<head><title /></head><body><p>Body</p></body>", "Body", "", "", "" }, { "<head><title /></head><body><p>Body</p></body>", "Body", "", "", "" },
// Test handling of PHP tags.
{ "T<?php $a=PHP_MAJOR_VERSION > 7 ?>\r\ne<? if ($a) new(); ?>\ns<?= $a ?>\r
ting<? ?>\n\nPHP<?php $a=0;", "Testing PHP", "", "", "" },
{ 0, 0, 0, 0, 0 } { 0, 0, 0, 0, 0 }
}; };
int int
main() main()
{ {
for (size_t i = 0; tests[i].html; ++i) { for (size_t i = 0; tests[i].html; ++i) {
MyHtmlParser p; MyHtmlParser p;
const char* html_begin = tests[i].html; const char* html_begin = tests[i].html;
size_t html_len = strlen(html_begin); size_t html_len = strlen(html_begin);
 End of changes. 2 change blocks. 
4 lines changed or deleted 7 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)