Rewritten numeric character reference parsing (fixed &#; and &#x; parsing), added more character reference tests

git-svn-id: http://pugixml.googlecode.com/svn/trunk@512 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
arseny.kapoulkine 2010-06-11 20:39:57 +00:00
parent 141d26d3af
commit 624b5702d7
2 changed files with 79 additions and 20 deletions

View File

@ -1374,37 +1374,48 @@ namespace
{ {
unsigned int ucsc = 0; unsigned int ucsc = 0;
++stre; if (stre[1] == 'x') // &#x... (hex code)
if (*stre == 'x') // &#x... (hex code)
{ {
++stre; stre += 2;
while (*stre) char_t ch = *stre;
if (ch == ';') return stre;
for (;;)
{ {
if (*stre >= '0' && *stre <= '9') if (static_cast<unsigned int>(ch - '0') <= 9)
ucsc = 16 * ucsc + (*stre++ - '0'); ucsc = 16 * ucsc + (ch - '0');
else if (*stre >= 'A' && *stre <= 'F') else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
ucsc = 16 * ucsc + (*stre++ - 'A' + 10); ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
else if (*stre >= 'a' && *stre <= 'f') else if (ch == ';')
ucsc = 16 * ucsc + (*stre++ - 'a' + 10);
else if (*stre == ';')
break; break;
else // cancel else // cancel
return stre; return stre;
}
if (*stre != ';') return stre; ch = *++stre;
}
++stre; ++stre;
} }
else // &#... (dec code) else // &#... (dec code)
{ {
while (*stre >= '0' && *stre <= '9') char_t ch = *++stre;
ucsc = 10 * ucsc + (*stre++ - '0');
if (*stre != ';') return stre; if (ch == ';') return stre;
for (;;)
{
if (static_cast<unsigned int>(ch - '0') <= 9)
ucsc = 10 * ucsc + (ch - '0');
else if (ch == ';')
break;
else // cancel
return stre;
ch = *++stre;
}
++stre; ++stre;
} }

View File

@ -281,6 +281,47 @@ TEST(parse_escapes_code)
CHECK_STRING(doc.child_value(STR("node")), STR("\01 ")); CHECK_STRING(doc.child_value(STR("node")), STR("\01 "));
} }
TEST(parse_escapes_code_exhaustive_dec)
{
xml_document doc;
CHECK(doc.load(STR("<node>&#/;&#01;&#2;&#3;&#4;&#5;&#6;&#7;&#8;&#9;&#:;&#a;&#A;&#XA;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&#/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#:;&#a;&#A;&#XA;"));
}
TEST(parse_escapes_code_exhaustive_hex)
{
xml_document doc;
CHECK(doc.load(STR("<node>&#x/;&#x01;&#x2;&#x3;&#x4;&#x5;&#x6;&#x7;&#x8;&#x9;&#x:;&#x@;&#xA;&#xB;&#xC;&#xD;&#xE;&#xF;&#xG;&#x`;&#xa;&#xb;&#xc;&#xd;&#xe;&#xf;&#xg;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&#x/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#x:;&#x@;\xa\xb\xc\xd\xe\xf&#xG;&#x`;\xa\xb\xc\xd\xe\xf&#xg;"));
}
TEST(parse_escapes_code_restore)
{
xml_document doc;
CHECK(doc.load(STR("<node>&#1&#32;&#x1&#32;&#1-&#32;&#x1-&#32;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&#1 &#x1 &#1- &#x1- "));
}
TEST(parse_escapes_char_restore)
{
xml_document doc;
CHECK(doc.load(STR("<node>&q&#32;&qu&#32;&quo&#32;&quot&#32;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&q &qu &quo &quot "));
CHECK(doc.load(STR("<node>&a&#32;&ap&#32;&apo&#32;&apos&#32;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&a &ap &apo &apos "));
CHECK(doc.load(STR("<node>&a&#32;&am&#32;&amp&#32;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&a &am &amp "));
CHECK(doc.load(STR("<node>&l&#32;&lt&#32;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&l &lt "));
CHECK(doc.load(STR("<node>&g&#32;&gt&#32;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&g &gt "));
}
TEST(parse_escapes_unicode) TEST(parse_escapes_unicode)
{ {
xml_document doc; xml_document doc;
@ -314,6 +355,13 @@ TEST(parse_escapes_error)
CHECK(!doc.load(STR("<node id='&apos"))); CHECK(!doc.load(STR("<node id='&apos")));
} }
TEST(parse_escapes_code_invalid)
{
xml_document doc;
CHECK(doc.load(STR("<node>&#;&#x;&;&#x-;&#-;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&#;&#x;&;&#x-;&#-;"));
}
TEST(parse_attribute_spaces) TEST(parse_attribute_spaces)
{ {
xml_document doc; xml_document doc;