Add special treatment for writing single quotes within attribute values

This commit is contained in:
Maximilian Naumann 2019-06-18 11:07:52 -07:00
parent 90d26bf9f3
commit bcfdfd59b1
2 changed files with 8 additions and 4 deletions

View File

@ -1861,7 +1861,7 @@ PUGI__NS_BEGIN
enum chartypex_t
{
ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, >, "
ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, >, ", '
ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
ctx_digit = 8, // 0-9
ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
@ -1871,7 +1871,7 @@ PUGI__NS_BEGIN
{
3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
@ -3933,6 +3933,10 @@ PUGI__NS_BEGIN
writer.write('&', 'q', 'u', 'o', 't', ';');
++s;
break;
case '\'':
writer.write('&', 'a', 'p', 'o', 's', ';');
++s;
break;
default: // s is not a usual symbol
{
unsigned int ch = static_cast<unsigned int>(*s++);

View File

@ -193,7 +193,7 @@ TEST_XML(write_escape, "<node attr=''>text</node>")
doc.child(STR("node")).attribute(STR("attr")) = STR("<>'\"&\x04\r\n\t");
doc.child(STR("node")).first_child().set_value(STR("<>'\"&\x04\r\n\t"));
CHECK_NODE(doc, STR("<node attr=\"&lt;&gt;'&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\r\n\t</node>"));
CHECK_NODE(doc, STR("<node attr=\"&lt;&gt;&apos;&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\r\n\t</node>"));
}
TEST_XML(write_escape_roundtrip, "<node attr=''>text</node>")
@ -207,7 +207,7 @@ TEST_XML(write_escape_roundtrip, "<node attr=''>text</node>")
// Note: this string is almost identical to the string from write_escape with the exception of \r
// \r in PCDATA doesn't roundtrip because it has to go through newline conversion (which could be disabled, but is active by default)
CHECK_NODE(doc, STR("<node attr=\"&lt;&gt;'&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\n\t</node>"));
CHECK_NODE(doc, STR("<node attr=\"&lt;&gt;&apos;&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\n\t</node>"));
}
TEST_XML(write_escape_unicode, "<node attr='&#x3c00;'/>")