Implement document fragment parsing.

Introduce a notable behavior change in default parsing mode: documents without a
document element node are now considered invalid. This is technically a breaking change,
however the amount of documents it affects is very small, all parsed data still persists,
and lack of this check results in very confusing behavior in a number of cases.

In order to be able to parse documents without an element node, a fragment parsing flag is
introduced.

Parsing a buffer in fragment mode treats the buffer as a fragment of a valid XML.
As a consequence, top-level PCDATA is added to the tree; additionally, there are no
restrictions on the number of nodes -- so documents without a document element are considered
valid.

Due to the way parsing works internally, load_buffer_inplace occasionally can not preserve
the document contents if it's parsed in a fragment mode. While unfortunate, this problem is
fundamental; since the use case is relatively obscure, hopefully documenting this shortcoming
will be enough.

git-svn-id: https://pugixml.googlecode.com/svn/trunk@980 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
Arseny Kapoulkine 2014-02-11 06:45:27 +00:00
parent 5fa25a878a
commit 47c15ad949
8 changed files with 365 additions and 104 deletions

View File

@ -2199,7 +2199,7 @@ PUGI__NS_BEGIN
char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
{ {
assert(s[0] == '<' && s[1] == '!'); assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
s++; s++;
while (*s) while (*s)
@ -2331,6 +2331,9 @@ PUGI__NS_BEGIN
s = parse_doctype_group(s, endch, true); s = parse_doctype_group(s, endch, true);
if (!s) return s; if (!s) return s;
assert((*s == 0 && endch == '>') || *s == '>');
if (*s) *s++ = 0;
if (PUGI__OPTSET(parse_doctype)) if (PUGI__OPTSET(parse_doctype))
{ {
while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
@ -2339,9 +2342,6 @@ PUGI__NS_BEGIN
cursor->value = mark; cursor->value = mark;
assert((*s == 0 && endch == '>') || *s == '>');
if (*s) *s++ = 0;
PUGI__POPNODE(); PUGI__POPNODE();
} }
} }
@ -2629,7 +2629,7 @@ PUGI__NS_BEGIN
PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
if (*s == '<') if (*s == '<' || !*s)
{ {
// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
assert(mark != s); assert(mark != s);
@ -2640,13 +2640,13 @@ PUGI__NS_BEGIN
} }
else if (PUGI__OPTSET(parse_ws_pcdata_single)) else if (PUGI__OPTSET(parse_ws_pcdata_single))
{ {
if (s[1] != '/' || cursor->first_child) continue; if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
} }
} }
s = mark; s = mark;
if (cursor->parent) if (cursor->parent || PUGI__OPTSET(parse_fragment))
{ {
PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
cursor->value = s; // Save the offset. cursor->value = s; // Save the offset.
@ -2676,14 +2676,43 @@ PUGI__NS_BEGIN
return s; return s;
} }
#ifdef PUGIXML_WCHAR_MODE
static char_t* parse_skip_bom(char_t* s)
{
return (s[0] == 0xfeff) ? s + 1 : s;
}
#else
static char_t* parse_skip_bom(char_t* s)
{
return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
}
#endif
static bool has_element_node_siblings(xml_node_struct* node)
{
while (node)
{
xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
if (type == node_element) return true;
node = node->next_sibling;
}
return false;
}
static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
{ {
// allocator object is a part of document object // allocator object is a part of document object
xml_allocator& alloc = *static_cast<xml_allocator*>(xmldoc); xml_allocator& alloc = *static_cast<xml_allocator*>(xmldoc);
// early-out for empty documents // early-out for empty documents
if (length == 0) return make_parse_result(status_ok); if (length == 0)
return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
// get last child of the root before parsing
xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
// create parser on stack // create parser on stack
xml_parser parser(alloc); xml_parser parser(alloc);
@ -2691,24 +2720,35 @@ PUGI__NS_BEGIN
char_t endch = buffer[length - 1]; char_t endch = buffer[length - 1];
buffer[length - 1] = 0; buffer[length - 1] = 0;
// skip BOM to make sure it does not end up as part of parse output
char_t* buffer_data = parse_skip_bom(buffer);
// perform actual parsing // perform actual parsing
parser.parse_tree(buffer, root, optmsk, endch); parser.parse_tree(buffer_data, root, optmsk, endch);
xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
// roll back offset if it occurs on a null terminator in the source buffer
if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
result.offset--;
// update allocator state // update allocator state
alloc = parser.alloc; alloc = parser.alloc;
// since we removed last character, we have to handle the only possible false positive xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
if (result && endch == '<') assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
if (result)
{ {
// there's no possible well-formed document with < at the end // since we removed last character, we have to handle the only possible false positive (stray <)
return make_parse_result(status_unrecognized_tag, length - 1); if (endch == '<')
return make_parse_result(status_unrecognized_tag, length - 1);
// check if there are any element nodes parsed
xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
return make_parse_result(status_no_document_element, length - 1);
}
else
{
// roll back offset if it occurs on a null terminator in the source buffer
if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
result.offset--;
} }
return result; return result;
@ -5469,6 +5509,8 @@ namespace pugi
case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
case status_no_document_element: return "No document element found";
default: return "Unknown error"; default: return "Unknown error";
} }
} }

View File

@ -151,6 +151,10 @@ namespace pugi
// This flag is off by default; turning it on may result in slower parsing and more memory consumption. // This flag is off by default; turning it on may result in slower parsing and more memory consumption.
const unsigned int parse_ws_pcdata_single = 0x0400; const unsigned int parse_ws_pcdata_single = 0x0400;
// This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document
// is a valid document. This flag is off by default.
const unsigned int parse_fragment = 0x0800;
// The default parsing mode. // The default parsing mode.
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
@ -880,7 +884,9 @@ namespace pugi
status_bad_end_element, // Parsing error occurred while parsing end element tag status_bad_end_element, // Parsing error occurred while parsing end element tag
status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
status_append_invalid_root // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer) status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer)
status_no_document_element // Parsing resulted in a document without element nodes
}; };
// Parsing result // Parsing result

View File

@ -249,7 +249,7 @@ TEST(document_load_file_empty)
{ {
pugi::xml_document doc; pugi::xml_document doc;
CHECK(doc.load_file("tests/data/empty.xml")); CHECK(doc.load_file("tests/data/empty.xml").status == status_no_document_element);
CHECK(!doc.first_child()); CHECK(!doc.first_child());
} }
@ -907,16 +907,52 @@ TEST(document_load_buffer_empty)
xml_encoding encoding = encodings[i]; xml_encoding encoding = encodings[i];
xml_document doc; xml_document doc;
CHECK(doc.load_buffer(buffer, 0, parse_default, encoding) && !doc.first_child()); CHECK(doc.load_buffer(buffer, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
CHECK(doc.load_buffer(0, 0, parse_default, encoding) && !doc.first_child()); CHECK(doc.load_buffer(0, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
CHECK(doc.load_buffer_inplace(buffer, 0, parse_default, encoding) && !doc.first_child()); CHECK(doc.load_buffer_inplace(buffer, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
CHECK(doc.load_buffer_inplace(0, 0, parse_default, encoding) && !doc.first_child()); CHECK(doc.load_buffer_inplace(0, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
void* own_buffer = pugi::get_memory_allocation_function()(1); void* own_buffer = pugi::get_memory_allocation_function()(1);
CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_default, encoding) && !doc.first_child()); CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
CHECK(doc.load_buffer_inplace_own(0, 0, parse_default, encoding) && !doc.first_child()); CHECK(doc.load_buffer_inplace_own(0, 0, parse_default, encoding).status == status_no_document_element && !doc.first_child());
}
}
TEST(document_load_buffer_empty_fragment)
{
xml_encoding encodings[] =
{
encoding_auto,
encoding_utf8,
encoding_utf16_le,
encoding_utf16_be,
encoding_utf16,
encoding_utf32_le,
encoding_utf32_be,
encoding_utf32,
encoding_wchar,
encoding_latin1
};
char buffer[1];
for (unsigned int i = 0; i < sizeof(encodings) / sizeof(encodings[0]); ++i)
{
xml_encoding encoding = encodings[i];
xml_document doc;
CHECK(doc.load_buffer(buffer, 0, parse_fragment, encoding) && !doc.first_child());
CHECK(doc.load_buffer(0, 0, parse_fragment, encoding) && !doc.first_child());
CHECK(doc.load_buffer_inplace(buffer, 0, parse_fragment, encoding) && !doc.first_child());
CHECK(doc.load_buffer_inplace(0, 0, parse_fragment, encoding) && !doc.first_child());
void* own_buffer = pugi::get_memory_allocation_function()(1);
CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_fragment, encoding) && !doc.first_child());
CHECK(doc.load_buffer_inplace_own(0, 0, parse_fragment, encoding) && !doc.first_child());
} }
} }
@ -933,13 +969,27 @@ TEST(document_progressive_truncation)
{ {
char* truncated_data = buffer + original_size - i; char* truncated_data = buffer + original_size - i;
memcpy(truncated_data, original_data, i); // default flags
{
memcpy(truncated_data, original_data, i);
xml_document doc; xml_document doc;
bool result = doc.load_buffer_inplace(truncated_data, i); bool result = doc.load_buffer_inplace(truncated_data, i);
// some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof // only eof is parseable
CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result); CHECK((i >= 3325) ? result : !result);
}
// fragment mode
{
memcpy(truncated_data, original_data, i);
xml_document doc;
bool result = doc.load_buffer_inplace(truncated_data, i, parse_default | parse_fragment);
// some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof
CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result);
}
} }
delete[] buffer; delete[] buffer;
@ -953,12 +1003,29 @@ TEST(document_load_buffer_short)
xml_document doc; xml_document doc;
CHECK(doc.load_buffer(data, 4)); CHECK(doc.load_buffer(data, 4).status == status_no_document_element);
CHECK(doc.load_buffer(data + 1, 3)); CHECK(doc.load_buffer(data + 1, 3).status == status_no_document_element);
CHECK(doc.load_buffer(data + 2, 2)); CHECK(doc.load_buffer(data + 2, 2).status == status_no_document_element);
CHECK(doc.load_buffer(data + 3, 1)); CHECK(doc.load_buffer(data + 3, 1).status == status_no_document_element);
CHECK(doc.load_buffer(data + 4, 0)); CHECK(doc.load_buffer(data + 4, 0).status == status_no_document_element);
CHECK(doc.load_buffer(0, 0)); CHECK(doc.load_buffer(0, 0).status == status_no_document_element);
delete[] data;
}
TEST(document_load_buffer_short_fragment)
{
char* data = new char[4];
memcpy(data, "abcd", 4);
xml_document doc;
CHECK(doc.load_buffer(data, 4, parse_fragment) && test_string_equal(doc.text().get(), STR("abcd")));
CHECK(doc.load_buffer(data + 1, 3, parse_fragment) && test_string_equal(doc.text().get(), STR("bcd")));
CHECK(doc.load_buffer(data + 2, 2, parse_fragment) && test_string_equal(doc.text().get(), STR("cd")));
CHECK(doc.load_buffer(data + 3, 1, parse_fragment) && test_string_equal(doc.text().get(), STR("d")));
CHECK(doc.load_buffer(data + 4, 0, parse_fragment) && !doc.first_child());
CHECK(doc.load_buffer(0, 0, parse_fragment) && !doc.first_child());
delete[] data; delete[] data;
} }
@ -970,12 +1037,12 @@ TEST(document_load_buffer_inplace_short)
xml_document doc; xml_document doc;
CHECK(doc.load_buffer_inplace(data, 4)); CHECK(doc.load_buffer_inplace(data, 4).status == status_no_document_element);
CHECK(doc.load_buffer_inplace(data + 1, 3)); CHECK(doc.load_buffer_inplace(data + 1, 3).status == status_no_document_element);
CHECK(doc.load_buffer_inplace(data + 2, 2)); CHECK(doc.load_buffer_inplace(data + 2, 2).status == status_no_document_element);
CHECK(doc.load_buffer_inplace(data + 3, 1)); CHECK(doc.load_buffer_inplace(data + 3, 1).status == status_no_document_element);
CHECK(doc.load_buffer_inplace(data + 4, 0)); CHECK(doc.load_buffer_inplace(data + 4, 0).status == status_no_document_element);
CHECK(doc.load_buffer_inplace(0, 0)); CHECK(doc.load_buffer_inplace(0, 0).status == status_no_document_element);
delete[] data; delete[] data;
} }
@ -1006,7 +1073,7 @@ TEST_XML_FLAGS(document_element, "<?xml version='1.0'?><node><child/></node><!--
CHECK(doc.document_element() == doc.child(STR("node"))); CHECK(doc.document_element() == doc.child(STR("node")));
} }
TEST_XML_FLAGS(document_element_absent, "<!---->", parse_comments) TEST_XML_FLAGS(document_element_absent, "<!---->", parse_comments | parse_fragment)
{ {
CHECK(doc.document_element() == xml_node()); CHECK(doc.document_element() == xml_node());
} }
@ -1070,16 +1137,6 @@ TEST_XML(document_reset_copy_self, "<node><child/></node>")
CHECK_NODE(doc, STR("")); CHECK_NODE(doc, STR(""));
} }
struct document_data_t
{
xml_encoding encoding;
const unsigned char* data;
size_t size;
};
#include <stdio.h>
TEST(document_load_buffer_utf_truncated) TEST(document_load_buffer_utf_truncated)
{ {
const unsigned char utf8[] = {'<', 0xe2, 0x82, 0xac, '/', '>'}; const unsigned char utf8[] = {'<', 0xe2, 0x82, 0xac, '/', '>'};
@ -1088,6 +1145,14 @@ TEST(document_load_buffer_utf_truncated)
const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'}; const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'};
const unsigned char utf32_le[] = {'<', 0, 0, 0, 0xac, 0x20, 0, 0, '/', 0, 0, 0, '>', 0, 0, 0}; const unsigned char utf32_le[] = {'<', 0, 0, 0, 0xac, 0x20, 0, 0, '/', 0, 0, 0, '>', 0, 0, 0};
struct document_data_t
{
xml_encoding encoding;
const unsigned char* data;
size_t size;
};
const document_data_t data[] = const document_data_t data[] =
{ {
{ encoding_utf8, utf8, sizeof(utf8) }, { encoding_utf8, utf8, sizeof(utf8) },

View File

@ -1057,3 +1057,20 @@ TEST(dom_node_append_buffer_out_of_memory_buffer)
CHECK(doc.append_buffer(data, sizeof(data)).status == status_out_of_memory); CHECK(doc.append_buffer(data, sizeof(data)).status == status_out_of_memory);
CHECK(!doc.first_child()); CHECK(!doc.first_child());
} }
TEST_XML(dom_node_append_buffer_fragment, "<node />")
{
xml_node node = doc.child(STR("node"));
CHECK(node.append_buffer("1", 1).status == status_no_document_element);
CHECK_NODE(doc, STR("<node>1</node>"));
CHECK(node.append_buffer("2", 1, parse_fragment));
CHECK_NODE(doc, STR("<node>12</node>"));
CHECK(node.append_buffer("3", 1).status == status_no_document_element);
CHECK_NODE(doc, STR("<node>123</node>"));
CHECK(node.append_buffer("4", 1, parse_fragment));
CHECK_NODE(doc, STR("<node>1234</node>"));
}

View File

@ -119,7 +119,7 @@ TEST(memory_large_allocations)
CHECK(allocate_count == deallocate_count + 1); // only one live page left (it waits for new allocations) CHECK(allocate_count == deallocate_count + 1); // only one live page left (it waits for new allocations)
char buffer; char buffer;
CHECK(doc.load_buffer_inplace(&buffer, 0, parse_default, get_native_encoding())); CHECK(doc.load_buffer_inplace(&buffer, 0, parse_fragment, get_native_encoding()));
CHECK(allocate_count == deallocate_count); // no live pages left CHECK(allocate_count == deallocate_count); // no live pages left
} }

View File

@ -1,10 +1,12 @@
#include "common.hpp" #include "common.hpp"
#include "writer_string.hpp"
TEST(parse_pi_skip) TEST(parse_pi_skip)
{ {
xml_document doc; xml_document doc;
unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_declaration}; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{ {
@ -21,7 +23,7 @@ TEST(parse_pi_skip)
TEST(parse_pi_parse) TEST(parse_pi_parse)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<?pi1?><?pi2 value?>"), parse_minimal | parse_pi)); CHECK(doc.load(STR("<?pi1?><?pi2 value?>"), parse_fragment | parse_pi));
xml_node pi1 = doc.first_child(); xml_node pi1 = doc.first_child();
xml_node pi2 = doc.last_child(); xml_node pi2 = doc.last_child();
@ -38,7 +40,7 @@ TEST(parse_pi_parse)
TEST(parse_pi_parse_spaces) TEST(parse_pi_parse_spaces)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<?target \r\n\t value ?>"), parse_minimal | parse_pi)); CHECK(doc.load(STR("<?target \r\n\t value ?>"), parse_fragment | parse_pi));
xml_node pi = doc.first_child(); xml_node pi = doc.first_child();
@ -51,7 +53,7 @@ TEST(parse_pi_error)
{ {
xml_document doc; xml_document doc;
unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_pi}; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{ {
@ -81,22 +83,22 @@ TEST(parse_pi_error)
CHECK(doc.load(STR("<?name&?"), flags).status == status_bad_pi); CHECK(doc.load(STR("<?name&?"), flags).status == status_bad_pi);
} }
CHECK(doc.load(STR("<?xx#?>"), parse_minimal | parse_pi).status == status_bad_pi); CHECK(doc.load(STR("<?xx#?>"), parse_fragment | parse_pi).status == status_bad_pi);
CHECK(doc.load(STR("<?name&?>"), parse_minimal | parse_pi).status == status_bad_pi); CHECK(doc.load(STR("<?name&?>"), parse_fragment | parse_pi).status == status_bad_pi);
CHECK(doc.load(STR("<?name& x?>"), parse_minimal | parse_pi).status == status_bad_pi); CHECK(doc.load(STR("<?name& x?>"), parse_fragment | parse_pi).status == status_bad_pi);
} }
TEST(parse_comments_skip) TEST(parse_comments_skip)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<!----><!--value-->"), parse_minimal)); CHECK(doc.load(STR("<!----><!--value-->"), parse_fragment));
CHECK(!doc.first_child()); CHECK(!doc.first_child());
} }
TEST(parse_comments_parse) TEST(parse_comments_parse)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<!----><!--value-->"), parse_minimal | parse_comments)); CHECK(doc.load(STR("<!----><!--value-->"), parse_fragment | parse_comments));
xml_node c1 = doc.first_child(); xml_node c1 = doc.first_child();
xml_node c2 = doc.last_child(); xml_node c2 = doc.last_child();
@ -113,7 +115,7 @@ TEST(parse_comments_parse)
TEST(parse_comments_parse_no_eol) TEST(parse_comments_parse_no_eol)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<!--\r\rval1\rval2\r\nval3\nval4\r\r-->"), parse_minimal | parse_comments)); CHECK(doc.load(STR("<!--\r\rval1\rval2\r\nval3\nval4\r\r-->"), parse_fragment | parse_comments));
xml_node c = doc.first_child(); xml_node c = doc.first_child();
CHECK(c.type() == node_comment); CHECK(c.type() == node_comment);
@ -123,7 +125,7 @@ TEST(parse_comments_parse_no_eol)
TEST(parse_comments_parse_eol) TEST(parse_comments_parse_eol)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<!--\r\rval1\rval2\r\nval3\nval4\r\r-->"), parse_minimal | parse_comments | parse_eol)); CHECK(doc.load(STR("<!--\r\rval1\rval2\r\nval3\nval4\r\r-->"), parse_fragment | parse_comments | parse_eol));
xml_node c = doc.first_child(); xml_node c = doc.first_child();
CHECK(c.type() == node_comment); CHECK(c.type() == node_comment);
@ -134,7 +136,7 @@ TEST(parse_comments_error)
{ {
xml_document doc; xml_document doc;
unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_comments, parse_minimal | parse_comments | parse_eol}; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_comments, parse_fragment | parse_comments | parse_eol};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{ {
@ -152,21 +154,21 @@ TEST(parse_comments_error)
TEST(parse_cdata_skip) TEST(parse_cdata_skip)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<![CDATA[]]><![CDATA[value]]>"), parse_minimal)); CHECK(doc.load(STR("<![CDATA[]]><![CDATA[value]]>"), parse_fragment));
CHECK(!doc.first_child()); CHECK(!doc.first_child());
} }
TEST(parse_cdata_skip_contents) TEST(parse_cdata_skip_contents)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<node><![CDATA[]]>hello<![CDATA[value]]>, world!</node>"), parse_minimal)); CHECK(doc.load(STR("<node><![CDATA[]]>hello<![CDATA[value]]>, world!</node>"), parse_fragment));
CHECK_NODE(doc, STR("<node>hello, world!</node>")); CHECK_NODE(doc, STR("<node>hello, world!</node>"));
} }
TEST(parse_cdata_parse) TEST(parse_cdata_parse)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<![CDATA[]]><![CDATA[value]]>"), parse_minimal | parse_cdata)); CHECK(doc.load(STR("<![CDATA[]]><![CDATA[value]]>"), parse_fragment | parse_cdata));
xml_node c1 = doc.first_child(); xml_node c1 = doc.first_child();
xml_node c2 = doc.last_child(); xml_node c2 = doc.last_child();
@ -183,7 +185,7 @@ TEST(parse_cdata_parse)
TEST(parse_cdata_parse_no_eol) TEST(parse_cdata_parse_no_eol)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<![CDATA[\r\rval1\rval2\r\nval3\nval4\r\r]]>"), parse_minimal | parse_cdata)); CHECK(doc.load(STR("<![CDATA[\r\rval1\rval2\r\nval3\nval4\r\r]]>"), parse_fragment | parse_cdata));
xml_node c = doc.first_child(); xml_node c = doc.first_child();
CHECK(c.type() == node_cdata); CHECK(c.type() == node_cdata);
@ -193,7 +195,7 @@ TEST(parse_cdata_parse_no_eol)
TEST(parse_cdata_parse_eol) TEST(parse_cdata_parse_eol)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<![CDATA[\r\rval1\rval2\r\nval3\nval4\r\r]]>"), parse_minimal | parse_cdata | parse_eol)); CHECK(doc.load(STR("<![CDATA[\r\rval1\rval2\r\nval3\nval4\r\r]]>"), parse_fragment | parse_cdata | parse_eol));
xml_node c = doc.first_child(); xml_node c = doc.first_child();
CHECK(c.type() == node_cdata); CHECK(c.type() == node_cdata);
@ -204,7 +206,7 @@ TEST(parse_cdata_error)
{ {
xml_document doc; xml_document doc;
unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_cdata, parse_minimal | parse_cdata | parse_eol}; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_cdata, parse_fragment | parse_cdata | parse_eol};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{ {
@ -229,7 +231,7 @@ TEST(parse_cdata_error)
TEST(parse_ws_pcdata_skip) TEST(parse_ws_pcdata_skip)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR(" "), parse_minimal)); CHECK(doc.load(STR(" "), parse_fragment));
CHECK(!doc.first_child()); CHECK(!doc.first_child());
CHECK(doc.load(STR("<root> <node> </node> </root>"), parse_minimal)); CHECK(doc.load(STR("<root> <node> </node> </root>"), parse_minimal));
@ -286,8 +288,6 @@ TEST(parse_ws_pcdata_permutations)
test_data_t test_data[] = test_data_t test_data[] =
{ {
// external pcdata should be discarded (whitespace or not) // external pcdata should be discarded (whitespace or not)
{7, STR("ext1"), STR(""), 1},
{7, STR(" "), STR(""), 1},
{7, STR("ext1<node/>"), STR("<node />"), 2}, {7, STR("ext1<node/>"), STR("<node />"), 2},
{7, STR("ext1<node/>ext2"), STR("<node />"), 2}, {7, STR("ext1<node/>ext2"), STR("<node />"), 2},
{7, STR(" <node/>"), STR("<node />"), 2}, {7, STR(" <node/>"), STR("<node />"), 2},
@ -314,11 +314,13 @@ TEST(parse_ws_pcdata_permutations)
{4, STR("<node>\t\t<!---->\n\n</node>"), STR("<node>\n\n</node>"), 3}, {4, STR("<node>\t\t<!---->\n\n</node>"), STR("<node>\n\n</node>"), 3},
// error case: terminate PCDATA in the middle // error case: terminate PCDATA in the middle
{7, STR("<node>abcdef"), STR("<node>abcdef</node>"), -3}, {7, STR("<node>abcdef"), STR("<node>abcdef</node>"), -3},
{7, STR("<node> "), STR("<node> </node>"), -3}, {5, STR("<node> "), STR("<node />"), -2},
{2, STR("<node> "), STR("<node> </node>"), -3},
// error case: terminate PCDATA as early as possible // error case: terminate PCDATA as early as possible
{7, STR("<node>"), STR("<node />"), -2}, {7, STR("<node>"), STR("<node />"), -2},
{7, STR("<node>a"), STR("<node>a</node>"), -3}, {7, STR("<node>a"), STR("<node>a</node>"), -3},
{7, STR("<node> "), STR("<node> </node>"), -3}, {5, STR("<node> "), STR("<node />"), -2},
{2, STR("<node> "), STR("<node> </node>"), -3},
}; };
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
@ -342,6 +344,57 @@ TEST(parse_ws_pcdata_permutations)
} }
} }
TEST(parse_ws_pcdata_fragment_permutations)
{
struct test_data_t
{
unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single
const pugi::char_t* source;
const pugi::char_t* result;
int nodes; // negative if parsing should fail
};
test_data_t test_data[] =
{
// external pcdata should be preserved
{7, STR("ext1"), STR("ext1"), 2},
{5, STR(" "), STR(""), 1},
{2, STR(" "), STR(" "), 2},
{7, STR("ext1<node/>"), STR("ext1<node />"), 3},
{7, STR("<node/>ext2"), STR("<node />ext2"), 3},
{7, STR("ext1<node/>ext2"), STR("ext1<node />ext2"), 4},
{7, STR("ext1<node1/>ext2<node2/>ext3"), STR("ext1<node1 />ext2<node2 />ext3"), 6},
{5, STR(" <node/>"), STR("<node />"), 2},
{2, STR(" <node/>"), STR(" <node />"), 3},
{5, STR("<node/> "), STR("<node />"), 2},
{2, STR("<node/> "), STR("<node /> "), 3},
{5, STR(" <node/> "), STR("<node />"), 2},
{2, STR(" <node/> "), STR(" <node /> "), 4},
{5, STR(" <node1/> <node2/> "), STR("<node1 /><node2 />"), 3},
{2, STR(" <node1/> <node2/> "), STR(" <node1 /> <node2 /> "), 6},
};
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
{
const test_data_t& td = test_data[i];
for (int flag = 0; flag < 3; ++flag)
{
if (td.mask & (1 << flag))
{
unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single};
xml_document doc;
CHECK((td.nodes > 0) == doc.load(td.source, flags[flag] | parse_fragment));
CHECK_NODE(doc, td.result);
int nodes = get_tree_node_count(doc);
CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes);
}
}
}
}
TEST(parse_pcdata_no_eol) TEST(parse_pcdata_no_eol)
{ {
xml_document doc; xml_document doc;
@ -685,14 +738,14 @@ TEST(parse_tag_error)
TEST(parse_declaration_cases) TEST(parse_declaration_cases)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<?xml?><?xmL?><?xMl?><?xML?><?Xml?><?XmL?><?XMl?><?XML?>"), parse_minimal | parse_pi)); CHECK(doc.load(STR("<?xml?><?xmL?><?xMl?><?xML?><?Xml?><?XmL?><?XMl?><?XML?>"), parse_fragment | parse_pi));
CHECK(!doc.first_child()); CHECK(!doc.first_child());
} }
TEST(parse_declaration_attr_cases) TEST(parse_declaration_attr_cases)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<?xml ?><?xmL ?><?xMl ?><?xML ?><?Xml ?><?XmL ?><?XMl ?><?XML ?>"), parse_minimal | parse_pi)); CHECK(doc.load(STR("<?xml ?><?xmL ?><?xMl ?><?xML ?><?Xml ?><?XmL ?><?XMl ?><?XML ?>"), parse_fragment | parse_pi));
CHECK(!doc.first_child()); CHECK(!doc.first_child());
} }
@ -700,7 +753,7 @@ TEST(parse_declaration_skip)
{ {
xml_document doc; xml_document doc;
unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_pi}; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{ {
@ -717,7 +770,7 @@ TEST(parse_declaration_skip)
TEST(parse_declaration_parse) TEST(parse_declaration_parse)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<?xml?><?xml version='1.0'?>"), parse_minimal | parse_declaration)); CHECK(doc.load(STR("<?xml?><?xml version='1.0'?>"), parse_fragment | parse_declaration));
xml_node d1 = doc.first_child(); xml_node d1 = doc.first_child();
xml_node d2 = doc.last_child(); xml_node d2 = doc.last_child();
@ -734,7 +787,7 @@ TEST(parse_declaration_error)
{ {
xml_document doc; xml_document doc;
unsigned int flag_sets[] = {parse_minimal, parse_minimal | parse_declaration}; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{ {
@ -746,14 +799,15 @@ TEST(parse_declaration_error)
CHECK(doc.load(STR("<?xml version='1>"), flags).status == status_bad_pi); CHECK(doc.load(STR("<?xml version='1>"), flags).status == status_bad_pi);
} }
CHECK(doc.load(STR("<?xml version='1?>"), parse_minimal | parse_declaration).status == status_bad_attribute); CHECK(doc.load(STR("<?xml version='1?>"), parse_fragment | parse_declaration).status == status_bad_attribute);
CHECK(doc.load(STR("<foo><?xml version='1'?></foo>"), parse_minimal | parse_declaration).status == status_bad_pi); CHECK(doc.load(STR("<foo><?xml version='1'?></foo>"), parse_fragment | parse_declaration).status == status_bad_pi);
} }
TEST(parse_empty) TEST(parse_empty)
{ {
xml_document doc; xml_document doc;
CHECK(doc.load(STR("")) && !doc.first_child()); CHECK(doc.load(STR("")).status == status_no_document_element && !doc.first_child());
CHECK(doc.load(STR(""), parse_fragment) && !doc.first_child());
} }
TEST(parse_out_of_memory) TEST(parse_out_of_memory)
@ -843,3 +897,81 @@ TEST(parse_result_default)
CHECK(result.offset == 0); CHECK(result.offset == 0);
CHECK(result.encoding == encoding_auto); CHECK(result.encoding == encoding_auto);
} }
TEST(parse_bom_fragment)
{
struct test_data_t
{
xml_encoding encoding;
const char* data;
size_t size;
const char_t* text;
};
const test_data_t data[] =
{
{ encoding_utf8, "\xef\xbb\xbf", 3, STR("") },
{ encoding_utf8, "\xef\xbb\xbftest", 7, STR("test") },
{ encoding_utf16_be, "\xfe\xff", 2, STR("") },
{ encoding_utf16_be, "\xfe\xff\x00t\x00o\x00s\x00t", 10, STR("tost") },
{ encoding_utf16_le, "\xff\xfe", 2, STR("") },
{ encoding_utf16_le, "\xff\xfet\x00o\x00s\x00t\x00", 10, STR("tost") },
{ encoding_utf32_be, "\x00\x00\xfe\xff", 4, STR("") },
{ encoding_utf32_be, "\x00\x00\xfe\xff\x00\x00\x00t\x00\x00\x00o\x00\x00\x00s\x00\x00\x00t", 20, STR("tost") },
{ encoding_utf32_le, "\xff\xfe\x00\x00", 4, STR("") },
{ encoding_utf32_le, "\xff\xfe\x00\x00t\x00\x00\x00o\x00\x00\x00s\x00\x00\x00t\x00\x00\x00", 20, STR("tost") },
};
for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
{
xml_document doc;
CHECK(doc.load_buffer(data[i].data, data[i].size, parse_fragment, data[i].encoding));
CHECK_STRING(doc.text().get(), data[i].text);
CHECK(save_narrow(doc, format_no_declaration | format_raw | format_write_bom, data[i].encoding) == std::string(data[i].data, data[i].size));
}
}
TEST(parse_bom_fragment_invalid_utf8)
{
xml_document doc;
CHECK(doc.load_buffer("\xef\xbb\xbb", 3, parse_fragment, encoding_utf8));
const char_t* value = doc.text().get();
#ifdef PUGIXML_WCHAR_MODE
CHECK(value[0] == wchar_cast(0xfefb) && value[1] == 0);
#else
CHECK_STRING(value, "\xef\xbb\xbb");
#endif
}
TEST(parse_bom_fragment_invalid_utf16)
{
xml_document doc;
CHECK(doc.load_buffer("\xff\xfe", 2, parse_fragment, encoding_utf16_be));
const char_t* value = doc.text().get();
#ifdef PUGIXML_WCHAR_MODE
CHECK(value[0] == wchar_cast(0xfffe) && value[1] == 0);
#else
CHECK_STRING(value, "\xef\xbf\xbe");
#endif
}
TEST(parse_bom_fragment_invalid_utf32)
{
xml_document doc;
CHECK(doc.load_buffer("\xff\xff\x00\x00", 4, parse_fragment, encoding_utf32_le));
const char_t* value = doc.text().get();
#ifdef PUGIXML_WCHAR_MODE
CHECK(value[0] == wchar_cast(0xffff) && value[1] == 0);
#else
CHECK_STRING(value, "\xef\xbf\xbf");
#endif
}

View File

@ -20,7 +20,7 @@ static xml_parse_result load_concat(xml_document& doc, const char_t* a, const ch
strcat(buffer, c); strcat(buffer, c);
#endif #endif
return doc.load(buffer); return doc.load(buffer, parse_fragment);
} }
static bool test_doctype_wf(const char_t* decl) static bool test_doctype_wf(const char_t* decl)
@ -31,9 +31,9 @@ static bool test_doctype_wf(const char_t* decl)
if (!load_concat(doc, decl) || !doc.first_child().empty()) return false; if (!load_concat(doc, decl) || !doc.first_child().empty()) return false;
// pcdata pre/postfix // pcdata pre/postfix
if (!load_concat(doc, STR("a"), decl) || !doc.first_child().empty()) return false; if (!load_concat(doc, STR("a"), decl) || !test_node(doc, STR("a"), STR(""), format_raw)) return false;
if (!load_concat(doc, decl, STR("b")) || !doc.first_child().empty()) return false; if (!load_concat(doc, decl, STR("b")) || !test_node(doc, STR("b"), STR(""), format_raw)) return false;
if (!load_concat(doc, STR("a"), decl, STR("b")) || !doc.first_child().empty()) return false; if (!load_concat(doc, STR("a"), decl, STR("b")) || !test_node(doc, STR("ab"), STR(""), format_raw)) return false;
// node pre/postfix // node pre/postfix
if (!load_concat(doc, STR("<nodea/>"), decl) || !test_node(doc, STR("<nodea />"), STR(""), format_raw)) return false; if (!load_concat(doc, STR("<nodea/>"), decl) || !test_node(doc, STR("<nodea />"), STR(""), format_raw)) return false;
@ -41,7 +41,7 @@ static bool test_doctype_wf(const char_t* decl)
if (!load_concat(doc, STR("<nodea/>"), decl, STR("<nodeb/>")) || !test_node(doc, STR("<nodea /><nodeb />"), STR(""), format_raw)) return false; if (!load_concat(doc, STR("<nodea/>"), decl, STR("<nodeb/>")) || !test_node(doc, STR("<nodea /><nodeb />"), STR(""), format_raw)) return false;
// check load-store contents preservation // check load-store contents preservation
CHECK(doc.load(decl, parse_doctype)); CHECK(doc.load(decl, parse_doctype | parse_fragment));
CHECK_NODE(doc, decl); CHECK_NODE(doc, decl);
return true; return true;
@ -281,8 +281,8 @@ TEST(parse_doctype_xmlconf_oasis_1)
// not actually a doctype :) // not actually a doctype :)
xml_document doc; xml_document doc;
CHECK(doc.load(STR("<!--a <!DOCTYPE <?- ]]>-<[ CDATA [ \"- -'- -<doc>--> <!---->"), parse_full) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child()); CHECK(doc.load(STR("<!--a <!DOCTYPE <?- ]]>-<[ CDATA [ \"- -'- -<doc>--> <!---->"), parse_full | parse_fragment) && doc.first_child().type() == node_comment && doc.last_child().type() == node_comment && doc.first_child().next_sibling() == doc.last_child());
CHECK(doc.load(STR("<?xmla <!DOCTYPE <[ CDATA [</doc> &a%b&#c?>"), parse_full) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child()); CHECK(doc.load(STR("<?xmla <!DOCTYPE <[ CDATA [</doc> &a%b&#c?>"), parse_full | parse_fragment) && doc.first_child().type() == node_pi && doc.first_child() == doc.last_child());
} }
TEST(parse_doctype_xmlconf_xmltest_1) TEST(parse_doctype_xmlconf_xmltest_1)
@ -299,7 +299,7 @@ TEST(parse_doctype_xmlconf_xmltest_1)
TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>"); TEST_DOCTYPE_WF("<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>");
} }
TEST_XML_FLAGS(parse_doctype_value, "<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>", parse_minimal | parse_doctype) TEST_XML_FLAGS(parse_doctype_value, "<!DOCTYPE doc [ <!ELEMENT doc (#PCDATA)> <!ENTITY e \"<![CDATA[Tim & Michael]]>\"> ]>", parse_fragment | parse_doctype)
{ {
xml_node n = doc.first_child(); xml_node n = doc.first_child();

View File

@ -25,19 +25,19 @@ TEST_XML(write_pcdata, "<node attr='1'><child><sub/>text</child></node>")
CHECK_NODE_EX(doc, STR("<node attr=\"1\">\n\t<child>\n\t\t<sub />\n\t\ttext\n\t</child>\n</node>\n"), STR("\t"), format_indent); CHECK_NODE_EX(doc, STR("<node attr=\"1\">\n\t<child>\n\t\t<sub />\n\t\ttext\n\t</child>\n</node>\n"), STR("\t"), format_indent);
} }
TEST_XML(write_cdata, "<![CDATA[value]]>") TEST_XML_FLAGS(write_cdata, "<![CDATA[value]]>", parse_cdata | parse_fragment)
{ {
CHECK_NODE(doc, STR("<![CDATA[value]]>")); CHECK_NODE(doc, STR("<![CDATA[value]]>"));
CHECK_NODE_EX(doc, STR("<![CDATA[value]]>\n"), STR(""), 0); CHECK_NODE_EX(doc, STR("<![CDATA[value]]>\n"), STR(""), 0);
} }
TEST_XML(write_cdata_empty, "<![CDATA[]]>") TEST_XML_FLAGS(write_cdata_empty, "<![CDATA[]]>", parse_cdata | parse_fragment)
{ {
CHECK_NODE(doc, STR("<![CDATA[]]>")); CHECK_NODE(doc, STR("<![CDATA[]]>"));
CHECK_NODE_EX(doc, STR("<![CDATA[]]>\n"), STR(""), 0); CHECK_NODE_EX(doc, STR("<![CDATA[]]>\n"), STR(""), 0);
} }
TEST_XML(write_cdata_escape, "<![CDATA[value]]>") TEST_XML_FLAGS(write_cdata_escape, "<![CDATA[value]]>", parse_cdata | parse_fragment)
{ {
CHECK_NODE(doc, STR("<![CDATA[value]]>")); CHECK_NODE(doc, STR("<![CDATA[value]]>"));
@ -51,26 +51,25 @@ TEST_XML(write_cdata_inner, "<node><![CDATA[value]]></node>")
CHECK_NODE_EX(doc, STR("<node><![CDATA[value]]></node>\n"), STR(""), 0); CHECK_NODE_EX(doc, STR("<node><![CDATA[value]]></node>\n"), STR(""), 0);
} }
TEST_XML_FLAGS(write_comment, "<!--text-->", parse_comments | parse_fragment)
TEST_XML_FLAGS(write_comment, "<!--text-->", parse_default | parse_comments)
{ {
CHECK_NODE(doc, STR("<!--text-->")); CHECK_NODE(doc, STR("<!--text-->"));
CHECK_NODE_EX(doc, STR("<!--text-->\n"), STR(""), 0); CHECK_NODE_EX(doc, STR("<!--text-->\n"), STR(""), 0);
} }
TEST_XML_FLAGS(write_pi, "<?name value?>", parse_default | parse_pi) TEST_XML_FLAGS(write_pi, "<?name value?>", parse_pi | parse_fragment)
{ {
CHECK_NODE(doc, STR("<?name value?>")); CHECK_NODE(doc, STR("<?name value?>"));
CHECK_NODE_EX(doc, STR("<?name value?>\n"), STR(""), 0); CHECK_NODE_EX(doc, STR("<?name value?>\n"), STR(""), 0);
} }
TEST_XML_FLAGS(write_declaration, "<?xml version='2.0'?>", parse_default | parse_declaration) TEST_XML_FLAGS(write_declaration, "<?xml version='2.0'?>", parse_declaration | parse_fragment)
{ {
CHECK_NODE(doc, STR("<?xml version=\"2.0\"?>")); CHECK_NODE(doc, STR("<?xml version=\"2.0\"?>"));
CHECK_NODE_EX(doc, STR("<?xml version=\"2.0\"?>\n"), STR(""), 0); CHECK_NODE_EX(doc, STR("<?xml version=\"2.0\"?>\n"), STR(""), 0);
} }
TEST_XML_FLAGS(write_doctype, "<!DOCTYPE id [ foo ]>", parse_default | parse_doctype) TEST_XML_FLAGS(write_doctype, "<!DOCTYPE id [ foo ]>", parse_doctype | parse_fragment)
{ {
CHECK_NODE(doc, STR("<!DOCTYPE id [ foo ]>")); CHECK_NODE(doc, STR("<!DOCTYPE id [ foo ]>"));
CHECK_NODE_EX(doc, STR("<!DOCTYPE id [ foo ]>\n"), STR(""), 0); CHECK_NODE_EX(doc, STR("<!DOCTYPE id [ foo ]>\n"), STR(""), 0);