Add initial support for parse_embed_pcdata
When this flag is true, PCDATA value is saved to the parent element instead of allocating a new node. This prevents some documents from round-tripping since it loses information, but can provide a significant memory reduction and parsing speedup for some documents.
This commit is contained in:
parent
ad3b492c1a
commit
2874f6f21d
@ -3360,13 +3360,22 @@ PUGI__NS_BEGIN
|
|||||||
|
|
||||||
if (cursor->parent || PUGI__OPTSET(parse_fragment))
|
if (cursor->parent || PUGI__OPTSET(parse_fragment))
|
||||||
{
|
{
|
||||||
PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
|
if (!PUGI__OPTSET(parse_embed_pcdata))
|
||||||
cursor->value = s; // Save the offset.
|
{
|
||||||
|
PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
|
||||||
|
|
||||||
|
cursor->value = s; // Save the offset.
|
||||||
|
|
||||||
|
PUGI__POPNODE(); // Pop since this is a standalone.
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (cursor->parent && !cursor->value)
|
||||||
|
cursor->value = s; // Save the offset.
|
||||||
|
}
|
||||||
|
|
||||||
s = strconv_pcdata(s);
|
s = strconv_pcdata(s);
|
||||||
|
|
||||||
PUGI__POPNODE(); // Pop since this is a standalone.
|
|
||||||
|
|
||||||
if (!*s) break;
|
if (!*s) break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@ -158,6 +158,11 @@ namespace pugi
|
|||||||
// is a valid document. This flag is off by default.
|
// is a valid document. This flag is off by default.
|
||||||
const unsigned int parse_fragment = 0x1000;
|
const unsigned int parse_fragment = 0x1000;
|
||||||
|
|
||||||
|
// This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of
|
||||||
|
// the document and does not allow some documents to round-trip; this flag is only recommended for parsing documents with a lot of
|
||||||
|
// PCDATA nodes in a very memory-constrained environment. This flag is off by default.
|
||||||
|
const unsigned int parse_embed_pcdata = 0x2000;
|
||||||
|
|
||||||
// The default parsing mode.
|
// The default parsing mode.
|
||||||
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
|
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
|
||||||
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
|
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user