Do not emit surrounding whitespace for text nodes
Previously we omitted extra whitespace for single PCDATA/CDATA children, but in mixed content there was extra indentation before/after text nodes. One of the problems with that is that the text that you saved is not exactly the same as the parsing result using default flags (parse_trim_pcdata helps). Another problem is that parse-format cycles do not have a fixed point for mixed content - the result expands indefinitely. Some XML libraries, like Python minidom, have the same issue, but this is definitely a problem. Pretty-printing mixed content is hard. It seems that the only other sensible choice is to switch mixed content nodes to raw formatting. In a way the code in this change is a weaker version of that - it removes indentation around text nodes but still keeps it around element siblings/children. Thus we can switch to mixed-raw formatting at some point later, which will be a superset of the current behavior. To do this we have to either switch at the first text node (.NET XmlDocument does that), or scan the children of each element for a possible text node and switch before we output the first child. The former behavior seems non-intuitive (and a bit broken); unfortunately, the latter behavior can cost up to 20% of the output time for trees *without* mixed content. Fixes #13.
This commit is contained in:
parent
51da129b50
commit
5f996eba6d
128
src/pugixml.cpp
128
src/pugixml.cpp
@ -3511,61 +3511,28 @@ PUGI__NS_BEGIN
|
|||||||
if (node->first_attribute)
|
if (node->first_attribute)
|
||||||
node_output_attributes(writer, node, flags);
|
node_output_attributes(writer, node, flags);
|
||||||
|
|
||||||
if (flags & format_raw)
|
if (!node->first_child)
|
||||||
{
|
{
|
||||||
if (!node->first_child)
|
writer.write(' ', '/', '>');
|
||||||
writer.write(' ', '/', '>');
|
|
||||||
else
|
|
||||||
{
|
|
||||||
writer.write('>');
|
|
||||||
|
|
||||||
return true;
|
return false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xml_node_struct* first = node->first_child;
|
writer.write('>');
|
||||||
|
|
||||||
if (!first)
|
return true;
|
||||||
writer.write(' ', '/', '>', '\n');
|
|
||||||
else if (!first->next_sibling && (PUGI__NODETYPE(first) == node_pcdata || PUGI__NODETYPE(first) == node_cdata))
|
|
||||||
{
|
|
||||||
writer.write('>');
|
|
||||||
|
|
||||||
const char_t* value = first->value ? first->value : PUGIXML_TEXT("");
|
|
||||||
|
|
||||||
if (PUGI__NODETYPE(first) == node_pcdata)
|
|
||||||
text_output(writer, value, ctx_special_pcdata, flags);
|
|
||||||
else
|
|
||||||
text_output_cdata(writer, value);
|
|
||||||
|
|
||||||
writer.write('<', '/');
|
|
||||||
writer.write_string(name);
|
|
||||||
writer.write('>', '\n');
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
writer.write('>', '\n');
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
|
PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
|
||||||
{
|
{
|
||||||
const char_t* default_name = PUGIXML_TEXT(":anonymous");
|
const char_t* default_name = PUGIXML_TEXT(":anonymous");
|
||||||
const char_t* name = node->name ? node->name : default_name;
|
const char_t* name = node->name ? node->name : default_name;
|
||||||
|
|
||||||
writer.write('<', '/');
|
writer.write('<', '/');
|
||||||
writer.write_string(name);
|
writer.write_string(name);
|
||||||
|
writer.write('>');
|
||||||
if (flags & format_raw)
|
|
||||||
writer.write('>');
|
|
||||||
else
|
|
||||||
writer.write('>', '\n');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
|
PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
|
||||||
@ -3576,17 +3543,14 @@ PUGI__NS_BEGIN
|
|||||||
{
|
{
|
||||||
case node_pcdata:
|
case node_pcdata:
|
||||||
text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
|
text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
|
||||||
if ((flags & format_raw) == 0) writer.write('\n');
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case node_cdata:
|
case node_cdata:
|
||||||
text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT(""));
|
text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT(""));
|
||||||
if ((flags & format_raw) == 0) writer.write('\n');
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case node_comment:
|
case node_comment:
|
||||||
node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT(""));
|
node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT(""));
|
||||||
if ((flags & format_raw) == 0) writer.write('\n');
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case node_pi:
|
case node_pi:
|
||||||
@ -3600,7 +3564,6 @@ PUGI__NS_BEGIN
|
|||||||
}
|
}
|
||||||
|
|
||||||
writer.write('?', '>');
|
writer.write('?', '>');
|
||||||
if ((flags & format_raw) == 0) writer.write('\n');
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case node_declaration:
|
case node_declaration:
|
||||||
@ -3608,7 +3571,6 @@ PUGI__NS_BEGIN
|
|||||||
writer.write_string(node->name ? node->name : default_name);
|
writer.write_string(node->name ? node->name : default_name);
|
||||||
node_output_attributes(writer, node, flags);
|
node_output_attributes(writer, node, flags);
|
||||||
writer.write('?', '>');
|
writer.write('?', '>');
|
||||||
if ((flags & format_raw) == 0) writer.write('\n');
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case node_doctype:
|
case node_doctype:
|
||||||
@ -3622,7 +3584,6 @@ PUGI__NS_BEGIN
|
|||||||
}
|
}
|
||||||
|
|
||||||
writer.write('>');
|
writer.write('>');
|
||||||
if ((flags & format_raw) == 0) writer.write('\n');
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -3630,9 +3591,16 @@ PUGI__NS_BEGIN
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum indent_flags_t
|
||||||
|
{
|
||||||
|
indent_newline = 1,
|
||||||
|
indent_indent = 2
|
||||||
|
};
|
||||||
|
|
||||||
PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
|
PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
|
||||||
{
|
{
|
||||||
size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0;
|
size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0;
|
||||||
|
unsigned int indent_flags = indent_indent;
|
||||||
|
|
||||||
xml_node_struct* node = root;
|
xml_node_struct* node = root;
|
||||||
|
|
||||||
@ -3641,29 +3609,47 @@ PUGI__NS_BEGIN
|
|||||||
assert(node);
|
assert(node);
|
||||||
|
|
||||||
// begin writing current node
|
// begin writing current node
|
||||||
if (indent_length)
|
if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
|
||||||
text_output_indent(writer, indent, indent_length, depth);
|
{
|
||||||
|
node_output_simple(writer, node, flags);
|
||||||
|
|
||||||
if (PUGI__NODETYPE(node) == node_element)
|
indent_flags = 0;
|
||||||
{
|
|
||||||
if (node_output_start(writer, node, flags))
|
|
||||||
{
|
|
||||||
node = node->first_child;
|
|
||||||
depth++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (PUGI__NODETYPE(node) == node_document)
|
|
||||||
{
|
|
||||||
if (node->first_child)
|
|
||||||
{
|
|
||||||
node = node->first_child;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
node_output_simple(writer, node, flags);
|
if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
|
||||||
|
writer.write('\n');
|
||||||
|
|
||||||
|
if ((indent_flags & indent_indent) && indent_length)
|
||||||
|
text_output_indent(writer, indent, indent_length, depth);
|
||||||
|
|
||||||
|
if (PUGI__NODETYPE(node) == node_element)
|
||||||
|
{
|
||||||
|
indent_flags = indent_newline | indent_indent;
|
||||||
|
|
||||||
|
if (node_output_start(writer, node, flags))
|
||||||
|
{
|
||||||
|
node = node->first_child;
|
||||||
|
depth++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (PUGI__NODETYPE(node) == node_document)
|
||||||
|
{
|
||||||
|
indent_flags = indent_indent;
|
||||||
|
|
||||||
|
if (node->first_child)
|
||||||
|
{
|
||||||
|
node = node->first_child;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
node_output_simple(writer, node, flags);
|
||||||
|
|
||||||
|
indent_flags = indent_newline | indent_indent;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// continue to the next node
|
// continue to the next node
|
||||||
@ -3682,14 +3668,22 @@ PUGI__NS_BEGIN
|
|||||||
{
|
{
|
||||||
depth--;
|
depth--;
|
||||||
|
|
||||||
if (indent_length)
|
if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
|
||||||
|
writer.write('\n');
|
||||||
|
|
||||||
|
if ((indent_flags & indent_indent) && indent_length)
|
||||||
text_output_indent(writer, indent, indent_length, depth);
|
text_output_indent(writer, indent, indent_length, depth);
|
||||||
|
|
||||||
node_output_end(writer, node, flags);
|
node_output_end(writer, node);
|
||||||
|
|
||||||
|
indent_flags = indent_newline | indent_indent;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (node != root);
|
while (node != root);
|
||||||
|
|
||||||
|
if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
|
||||||
|
writer.write('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
PUGI__FN bool has_declaration(xml_node_struct* node)
|
PUGI__FN bool has_declaration(xml_node_struct* node)
|
||||||
|
|||||||
@ -22,19 +22,19 @@ TEST_XML(write_indent, "<node attr='1'><child><sub>text</sub></child></node>")
|
|||||||
|
|
||||||
TEST_XML(write_pcdata, "<node attr='1'><child><sub/>text</child></node>")
|
TEST_XML(write_pcdata, "<node attr='1'><child><sub/>text</child></node>")
|
||||||
{
|
{
|
||||||
CHECK_NODE_EX(doc, STR("<node attr=\"1\">\n\t<child>\n\t\t<sub />\n\t\ttext\n\t</child>\n</node>\n"), STR("\t"), format_indent);
|
CHECK_NODE_EX(doc, STR("<node attr=\"1\">\n\t<child>\n\t\t<sub />text</child>\n</node>\n"), STR("\t"), format_indent);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_XML_FLAGS(write_cdata, "<![CDATA[value]]>", parse_cdata | parse_fragment)
|
TEST_XML_FLAGS(write_cdata, "<![CDATA[value]]>", parse_cdata | parse_fragment)
|
||||||
{
|
{
|
||||||
CHECK_NODE(doc, STR("<![CDATA[value]]>"));
|
CHECK_NODE(doc, STR("<![CDATA[value]]>"));
|
||||||
CHECK_NODE_EX(doc, STR("<![CDATA[value]]>\n"), STR(""), 0);
|
CHECK_NODE_EX(doc, STR("<![CDATA[value]]>"), STR(""), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_XML_FLAGS(write_cdata_empty, "<![CDATA[]]>", parse_cdata | parse_fragment)
|
TEST_XML_FLAGS(write_cdata_empty, "<![CDATA[]]>", parse_cdata | parse_fragment)
|
||||||
{
|
{
|
||||||
CHECK_NODE(doc, STR("<![CDATA[]]>"));
|
CHECK_NODE(doc, STR("<![CDATA[]]>"));
|
||||||
CHECK_NODE_EX(doc, STR("<![CDATA[]]>\n"), STR(""), 0);
|
CHECK_NODE_EX(doc, STR("<![CDATA[]]>"), STR(""), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_XML_FLAGS(write_cdata_escape, "<![CDATA[value]]>", parse_cdata | parse_fragment)
|
TEST_XML_FLAGS(write_cdata_escape, "<![CDATA[value]]>", parse_cdata | parse_fragment)
|
||||||
@ -527,5 +527,50 @@ TEST(write_pcdata_null)
|
|||||||
|
|
||||||
doc.first_child().append_child(node_pcdata);
|
doc.first_child().append_child(node_pcdata);
|
||||||
|
|
||||||
CHECK_NODE_EX(doc, STR("<node>\n\t\n\t\n</node>\n"), STR("\t"), format_indent);
|
CHECK_NODE_EX(doc, STR("<node></node>\n"), STR("\t"), format_indent);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(write_pcdata_whitespace_fixedpoint)
|
||||||
|
{
|
||||||
|
const char_t* data = STR("<node> test <child>\n <sub/>\n </child>\n</node>");
|
||||||
|
|
||||||
|
static const unsigned int flags_parse[] =
|
||||||
|
{
|
||||||
|
0,
|
||||||
|
parse_ws_pcdata,
|
||||||
|
parse_ws_pcdata_single,
|
||||||
|
parse_trim_pcdata
|
||||||
|
};
|
||||||
|
|
||||||
|
static const unsigned int flags_format[] =
|
||||||
|
{
|
||||||
|
0,
|
||||||
|
format_raw,
|
||||||
|
format_indent
|
||||||
|
};
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < sizeof(flags_parse) / sizeof(flags_parse[0]); ++i)
|
||||||
|
{
|
||||||
|
xml_document doc;
|
||||||
|
CHECK(doc.load_string(data, flags_parse[i]));
|
||||||
|
|
||||||
|
for (unsigned int j = 0; j < sizeof(flags_format) / sizeof(flags_format[0]); ++j)
|
||||||
|
{
|
||||||
|
std::string saved = write_narrow(doc, flags_format[j], encoding_auto);
|
||||||
|
|
||||||
|
xml_document rdoc;
|
||||||
|
CHECK(rdoc.load_buffer(&saved[0], saved.size(), flags_parse[i]));
|
||||||
|
|
||||||
|
std::string rsaved = write_narrow(rdoc, flags_format[j], encoding_auto);
|
||||||
|
|
||||||
|
CHECK(saved == rsaved);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_XML_FLAGS(write_mixed, "<node><child1/><child2>pre<![CDATA[data]]>mid<!--comment--><test/>post<?pi value?>fin</child2><child3/></node>", parse_full)
|
||||||
|
{
|
||||||
|
CHECK_NODE(doc, "<node><child1 /><child2>pre<![CDATA[data]]>mid<!--comment--><test />post<?pi value?>fin</child2><child3 /></node>");
|
||||||
|
CHECK_NODE_EX(doc, "<node>\n<child1 />\n<child2>pre<![CDATA[data]]>mid<!--comment-->\n<test />post<?pi value?>fin</child2>\n<child3 />\n</node>\n", STR("\t"), 0);
|
||||||
|
CHECK_NODE_EX(doc, "<node>\n\t<child1 />\n\t<child2>pre<![CDATA[data]]>mid<!--comment-->\n\t\t<test />post<?pi value?>fin</child2>\n\t<child3 />\n</node>\n", STR("\t"), format_indent);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -45,7 +45,7 @@ std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi:
|
|||||||
{
|
{
|
||||||
xml_writer_string writer;
|
xml_writer_string writer;
|
||||||
|
|
||||||
doc.save(writer, STR(""), flags, encoding);
|
doc.save(writer, STR("\t"), flags, encoding);
|
||||||
|
|
||||||
return writer.as_narrow();
|
return writer.as_narrow();
|
||||||
}
|
}
|
||||||
@ -59,7 +59,7 @@ std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_enco
|
|||||||
{
|
{
|
||||||
xml_writer_string writer;
|
xml_writer_string writer;
|
||||||
|
|
||||||
node.print(writer, STR(""), flags, encoding);
|
node.print(writer, STR("\t"), flags, encoding);
|
||||||
|
|
||||||
return writer.as_narrow();
|
return writer.as_narrow();
|
||||||
}
|
}
|
||||||
@ -73,7 +73,7 @@ std::basic_string<wchar_t> write_wide(pugi::xml_node node, unsigned int flags, p
|
|||||||
{
|
{
|
||||||
xml_writer_string writer;
|
xml_writer_string writer;
|
||||||
|
|
||||||
node.print(writer, STR(""), flags, encoding);
|
node.print(writer, STR("\t"), flags, encoding);
|
||||||
|
|
||||||
return writer.as_wide();
|
return writer.as_wide();
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user