Parsing flags refactoring (removed trim flags, eol flags merged together, escapes flags merged together, removed wnorm_pcdata flag, changed wnorm_attribute flag (it's space normalization + trimming now), fixed default flags, changed documentation accordingly
git-svn-id: http://pugixml.googlecode.com/svn/trunk@26 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
parent
784235c5e6
commit
2777da9faa
@ -275,30 +275,13 @@ So, these are the processing flags:
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
<li>If <b>parse_trim_pcdata</b> is on, then the trimming of leading/trailing space-like characters
|
<li>If <b>parse_escapes</b> is on, then the character reference expansion is done for PCDATA content
|
||||||
is performed for PCDATA content
|
and for attribute values (replacing <lt; with <, &#4c; with L, etc.).
|
||||||
<br>Default value: on
|
|
||||||
<br>In W3C mode: off</li>
|
|
||||||
<li>If <b>parse_trim_attribute</b> is on, then the trimming of leading/trailing space-like characters
|
|
||||||
is performed for attribute values (this is non-standard behavior and is here only for compatibility
|
|
||||||
reasons (PugXML had this flag).
|
|
||||||
<br>Default value: off
|
|
||||||
<br>In W3C mode: off</li>
|
|
||||||
<li>If <b>parse_escapes_pcdata</b> is on, then the character reference expansion is done for PCDATA
|
|
||||||
content (replacing <lt; with <, &#4c; with L, etc.).
|
|
||||||
<br>Default value: on
|
<br>Default value: on
|
||||||
<br>In W3C mode: on</li>
|
<br>In W3C mode: on</li>
|
||||||
<li>If <b>parse_escapes_attribute</b> is on, then the character reference expansion is done for
|
|
||||||
attribute values (replacing <lt; with <, &#4c; with L, etc.).
|
|
||||||
<br>Default value: on
|
|
||||||
<br>In W3C mode: on</li>
|
|
||||||
<li>If <b>parse_wnorm_pcdata</b> is on, then the whitespace normalisation is done for PCDATA content
|
|
||||||
(this includes replacing any space-like character by a space character and converting sequences of
|
|
||||||
spaces into a single space)
|
|
||||||
<br>Default value: on
|
|
||||||
<br>In W3C mode: off</li>
|
|
||||||
<li>If <b>parse_wnorm_attribute</b> is on, then the whitespace normalisation is done for attribute
|
<li>If <b>parse_wnorm_attribute</b> is on, then the whitespace normalisation is done for attribute
|
||||||
values
|
values (this includes replacing any space-like character by a space character, converting sequences of
|
||||||
|
spaces into a single space and trimming of leading/trailing spaces)
|
||||||
<br>Default value: on
|
<br>Default value: on
|
||||||
<br>In W3C mode: off</li>
|
<br>In W3C mode: off</li>
|
||||||
<li>If <b>parse_wconv_attribute</b> is on, then the whitespace conversion is done for attribute
|
<li>If <b>parse_wconv_attribute</b> is on, then the whitespace conversion is done for attribute
|
||||||
@ -306,15 +289,9 @@ values (this is a subset of whitespace normalization, and includes only replacin
|
|||||||
with spaces). If <b>parse_wnorm_attribute</b> is on, this flag has no effect.
|
with spaces). If <b>parse_wnorm_attribute</b> is on, this flag has no effect.
|
||||||
<br>Default value: on
|
<br>Default value: on
|
||||||
<br>In W3C mode: on</li>
|
<br>In W3C mode: on</li>
|
||||||
<li>If <b>parse_eol_pcdata</b> is on, then the end-of-line handling is done for PCDATA content (this
|
<li>If <b>parse_eol</b> is on, then the end-of-line handling is done for PCDATA/CDATA content and for
|
||||||
includes converting any pair of 0x0d 0x0a characters to a single 0x0a and converting any standalone
|
attribute values (this includes converting any pair of 0x0d 0x0a characters to a single 0x0a and
|
||||||
0x0d to 0x0a).
|
converting any standalone 0x0d to 0x0a).
|
||||||
<br>Default value: on
|
|
||||||
<br>In W3C mode: on</li>
|
|
||||||
<li>If <b>parse_eol_attribute</b> is on, then the end-of-line handling is done for attribute values.
|
|
||||||
<br>Default value: on
|
|
||||||
<br>In W3C mode: on</li>
|
|
||||||
<li>If <b>parse_eol_cdata</b> is on, then the end-of-line handling is done for CDATA content.
|
|
||||||
<br>Default value: on
|
<br>Default value: on
|
||||||
<br>In W3C mode: on</li>
|
<br>In W3C mode: on</li>
|
||||||
</ul>
|
</ul>
|
||||||
@ -329,7 +306,7 @@ correctly). This is controlled by <b>parse_match_end_tags</b>, which is on by de
|
|||||||
<li>just treat the tag as a closing tag for the node (so that <b><foo> ... </bar></b> will
|
<li>just treat the tag as a closing tag for the node (so that <b><foo> ... </bar></b> will
|
||||||
be parsed as <b><foo> ... </foo></b>). This is the fastest way, and this is what <i>pugxml</i>
|
be parsed as <b><foo> ... </foo></b>). This is the fastest way, and this is what <i>pugxml</i>
|
||||||
is doing, but it can corrupt your DOM tree. This way is chosen if both <b>parse_check_end_tags</b> and
|
is doing, but it can corrupt your DOM tree. This way is chosen if both <b>parse_check_end_tags</b> and
|
||||||
<b>parsse_match_end_tags</b> are off.
|
<b>parse_match_end_tags</b> are off.
|
||||||
</ul>
|
</ul>
|
||||||
Note, that these 2 flags are mutually exclusive.
|
Note, that these 2 flags are mutually exclusive.
|
||||||
</p>
|
</p>
|
||||||
|
|||||||
116
src/pugixml.cpp
116
src/pugixml.cpp
@ -252,14 +252,14 @@ namespace pugi
|
|||||||
static bool chartype_lbracket(char c) { return c == '['; }
|
static bool chartype_lbracket(char c) { return c == '['; }
|
||||||
static bool chartype_rbracket(char c) { return c == ']'; }
|
static bool chartype_rbracket(char c) { return c == ']'; }
|
||||||
|
|
||||||
template <bool opt_trim, bool opt_escape, bool opt_wnorm, bool opt_wconv, bool opt_eol> static void strconv_t(char** s)
|
template <bool opt_escape, bool opt_wnorm, bool opt_wconv, bool opt_eol> static void strconv_t(char** s)
|
||||||
{
|
{
|
||||||
if (!s || !*s) return;
|
if (!s || !*s) return;
|
||||||
|
|
||||||
if (!opt_trim && !opt_escape && !opt_wnorm && !opt_wconv && !opt_eol) return;
|
if (!opt_escape && !opt_wnorm && !opt_wconv && !opt_eol) return;
|
||||||
|
|
||||||
// Trim whitespaces
|
// Trim whitespaces
|
||||||
if (opt_trim) while (chartype_space(**s)) ++(*s);
|
if (opt_wnorm) while (chartype_space(**s)) ++(*s);
|
||||||
|
|
||||||
char* str = *s;
|
char* str = *s;
|
||||||
|
|
||||||
@ -270,6 +270,7 @@ namespace pugi
|
|||||||
{
|
{
|
||||||
if (opt_escape && *str == '&') break;
|
if (opt_escape && *str == '&') break;
|
||||||
if ((opt_wnorm || opt_wconv || opt_eol) && chartype_space(*str)) break;
|
if ((opt_wnorm || opt_wconv || opt_eol) && chartype_space(*str)) break;
|
||||||
|
|
||||||
++str;
|
++str;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -406,7 +407,7 @@ namespace pugi
|
|||||||
*lastpos++ = *str++;
|
*lastpos++ = *str++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_trim)
|
if (opt_wnorm)
|
||||||
{
|
{
|
||||||
do *lastpos-- = 0;
|
do *lastpos-- = 0;
|
||||||
while (chartype_space(*lastpos));
|
while (chartype_space(*lastpos));
|
||||||
@ -414,131 +415,68 @@ namespace pugi
|
|||||||
else *lastpos = 0;
|
else *lastpos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void strconv_setup(void (*&func)(char**), unsigned int opt_trim, unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv, unsigned int opt_eol)
|
static void strconv_setup(void (*&func)(char**), unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv, unsigned int opt_eol)
|
||||||
{
|
{
|
||||||
if (opt_eol)
|
if (opt_eol)
|
||||||
{
|
{
|
||||||
if (opt_wconv)
|
if (opt_wconv)
|
||||||
{
|
|
||||||
if (opt_trim)
|
|
||||||
{
|
{
|
||||||
if (opt_escape)
|
if (opt_escape)
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<true, true, true, true, true>;
|
if (opt_wnorm) func = &strconv_t<true, true, true, true>;
|
||||||
else func = &strconv_t<true, true, false, true, true>;
|
else func = &strconv_t<true, false, true, true>;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<true, false, true, true, true>;
|
if (opt_wnorm) func = &strconv_t<false, true, true, true>;
|
||||||
else func = &strconv_t<true, false, false, true, true>;
|
else func = &strconv_t<false, false, true, true>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (opt_escape)
|
if (opt_escape)
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<false, true, true, true, true>;
|
if (opt_wnorm) func = &strconv_t<true, true, false, true>;
|
||||||
else func = &strconv_t<false, true, false, true, true>;
|
else func = &strconv_t<true, false, false, true>;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<false, false, true, true, true>;
|
if (opt_wnorm) func = &strconv_t<false, true, false, true>;
|
||||||
else func = &strconv_t<false, false, false, true, true>;
|
else func = &strconv_t<false, false, false, true>;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_trim)
|
|
||||||
{
|
|
||||||
if (opt_escape)
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<true, true, true, false, true>;
|
|
||||||
else func = &strconv_t<true, true, false, false, true>;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<true, false, true, false, true>;
|
|
||||||
else func = &strconv_t<true, false, false, false, true>;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_escape)
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<false, true, true, false, true>;
|
|
||||||
else func = &strconv_t<false, true, false, false, true>;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<false, false, true, false, true>;
|
|
||||||
else func = &strconv_t<false, false, false, false, true>;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (opt_wconv)
|
if (opt_wconv)
|
||||||
{
|
|
||||||
if (opt_trim)
|
|
||||||
{
|
{
|
||||||
if (opt_escape)
|
if (opt_escape)
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<true, true, true, true, false>;
|
if (opt_wnorm) func = &strconv_t<true, true, true, false>;
|
||||||
else func = &strconv_t<true, true, false, true, false>;
|
else func = &strconv_t<true, false, true, false>;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<true, false, true, true, false>;
|
if (opt_wnorm) func = &strconv_t<false, true, true, false>;
|
||||||
else func = &strconv_t<true, false, false, true, false>;
|
else func = &strconv_t<false, false, true, false>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (opt_escape)
|
if (opt_escape)
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<false, true, true, true, false>;
|
if (opt_wnorm) func = &strconv_t<true, true, false, false>;
|
||||||
else func = &strconv_t<false, true, false, true, false>;
|
else func = &strconv_t<true, false, false, false>;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (opt_wnorm) func = &strconv_t<false, false, true, true, false>;
|
if (opt_wnorm) func = &strconv_t<false, true, false, false>;
|
||||||
else func = &strconv_t<false, false, false, true, false>;
|
else func = &strconv_t<false, false, false, false>;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_trim)
|
|
||||||
{
|
|
||||||
if (opt_escape)
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<true, true, true, false, false>;
|
|
||||||
else func = &strconv_t<true, true, false, false, false>;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<true, false, true, false, false>;
|
|
||||||
else func = &strconv_t<true, false, false, false, false>;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_escape)
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<false, true, true, false, false>;
|
|
||||||
else func = &strconv_t<false, true, false, false, false>;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt_wnorm) func = &strconv_t<false, false, true, false, false>;
|
|
||||||
else func = &strconv_t<false, false, false, false, false>;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate & append a new xml_node_struct onto the given parent.
|
// Allocate & append a new xml_node_struct onto the given parent.
|
||||||
// \param parent - pointer to parent node.
|
// \param parent - pointer to parent node.
|
||||||
// \param type - desired node type.
|
// \param type - desired node type.
|
||||||
@ -608,8 +546,8 @@ namespace pugi
|
|||||||
void (*strconv_pcdata)(char**);
|
void (*strconv_pcdata)(char**);
|
||||||
void (*strconv_attribute)(char**);
|
void (*strconv_attribute)(char**);
|
||||||
|
|
||||||
strconv_setup(strconv_attribute, OPTSET(parse_trim_attribute), OPTSET(parse_escapes_attribute), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute), OPTSET(parse_eol_attribute));
|
strconv_setup(strconv_attribute, OPTSET(parse_escapes), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute), OPTSET(parse_eol));
|
||||||
strconv_setup(strconv_pcdata, OPTSET(parse_trim_pcdata), OPTSET(parse_escapes_pcdata), OPTSET(parse_wnorm_pcdata), false, OPTSET(parse_eol_pcdata));
|
strconv_setup(strconv_pcdata, OPTSET(parse_escapes), false, false, OPTSET(parse_eol));
|
||||||
|
|
||||||
char ch = 0; // Current char, in cases where we must null-terminate before we test.
|
char ch = 0; // Current char, in cases where we must null-terminate before we test.
|
||||||
xml_node_struct* cursor = xmldoc; // Tree node cursor.
|
xml_node_struct* cursor = xmldoc; // Tree node cursor.
|
||||||
@ -702,9 +640,9 @@ namespace pugi
|
|||||||
SCANFOR(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)));
|
SCANFOR(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)));
|
||||||
ENDSEG(); // Zero-terminate this segment.
|
ENDSEG(); // Zero-terminate this segment.
|
||||||
|
|
||||||
if (OPTSET(parse_eol_cdata))
|
if (OPTSET(parse_eol))
|
||||||
{
|
{
|
||||||
strconv_t<false, false, false, false, true>(&cursor->value);
|
strconv_t<false, false, false, true>(&cursor->value);
|
||||||
}
|
}
|
||||||
|
|
||||||
POPNODE(); // Pop since this is a standalone.
|
POPNODE(); // Pop since this is a standalone.
|
||||||
|
|||||||
@ -48,28 +48,21 @@ namespace pugi
|
|||||||
const unsigned int parse_pi = 0x00000001; ///< Parse '<?...?>'
|
const unsigned int parse_pi = 0x00000001; ///< Parse '<?...?>'
|
||||||
const unsigned int parse_comments = 0x00000002; ///< Parse '<!--...-->'
|
const unsigned int parse_comments = 0x00000002; ///< Parse '<!--...-->'
|
||||||
const unsigned int parse_cdata = 0x00000004; ///< Parse '<![CDATA[...]]>'
|
const unsigned int parse_cdata = 0x00000004; ///< Parse '<![CDATA[...]]>'
|
||||||
const unsigned int parse_ws_pcdata = 0x00000008; ///< Skip PCDATA that consists only of whitespaces
|
const unsigned int parse_ws_pcdata = 0x00000008; ///< Do not skip PCDATA that consists only of whitespaces
|
||||||
const unsigned int parse_ext_pcdata = 0x00000010; ///< Skip PCDATA that is outside all tags (i.e. root)
|
const unsigned int parse_ext_pcdata = 0x00000010; ///< Do not skip PCDATA that is outside all tags (i.e. root)
|
||||||
const unsigned int parse_trim_pcdata = 0x00000020; ///< Trim '>...<'
|
const unsigned int parse_escapes = 0x00000020; ///< Parse <, >, &, ", ', &#.. sequences
|
||||||
const unsigned int parse_trim_attribute = 0x00000040; ///< Trim 'foo="..."'.
|
const unsigned int parse_wnorm_attribute = 0x00000080; ///< Normalize spaces in attributes (convert space-like characters to spaces + merge adjacent spaces + trim leading/trailing spaces)
|
||||||
const unsigned int parse_escapes_pcdata = 0x00000080; ///< Parse <, >, &, ", ', &#.. sequences
|
const unsigned int parse_wconv_attribute = 0x00000100; ///< Convert space-like characters to spaces in attributes (only if wnorm is not set)
|
||||||
const unsigned int parse_escapes_attribute = 0x00000100; ///< Parse <, >, &, ", ', &#.. sequences
|
const unsigned int parse_eol = 0x00000200; ///< Perform EOL handling
|
||||||
const unsigned int parse_wnorm_pcdata = 0x00000200; ///< Normalize spaces in pcdata
|
const unsigned int parse_check_end_tags = 0x00000400; ///< Check start and end tag names and return error if names mismatch
|
||||||
const unsigned int parse_wnorm_attribute = 0x00000400; ///< Normalize spaces in attributes
|
const unsigned int parse_match_end_tags = 0x00000800; ///< Try to find corresponding start tag for an end tag
|
||||||
const unsigned int parse_wconv_attribute = 0x00000800; ///< Convert space-like characters to spaces in attributes (only if wnorm is not set)
|
|
||||||
const unsigned int parse_eol_pcdata = 0x00001000; ///< Perform EOL handling in pcdata
|
|
||||||
const unsigned int parse_eol_attribute = 0x00002000; ///< Perform EOL handling in attrobites
|
|
||||||
const unsigned int parse_eol_cdata = 0x00004000; ///< Perform EOL handling in CDATA sections
|
|
||||||
const unsigned int parse_check_end_tags = 0x00010000; ///< Check start and end tag names and return error if names mismatch
|
|
||||||
const unsigned int parse_match_end_tags = 0x00020000; ///< Try to find corresponding start tag for an end tag
|
|
||||||
///< Set all flags, except parse_ws_pcdata, parse_trim_attribute, parse_pi and parse_comments
|
///< Set all flags, except parse_ws_pcdata, parse_trim_attribute, parse_pi and parse_comments
|
||||||
const unsigned int parse_default = 0x00FFFFFF & ~parse_ws_pcdata & ~parse_trim_attribute & ~parse_pi & ~parse_comments;
|
const unsigned int parse_default = parse_cdata | parse_ext_pcdata | parse_escapes | parse_wconv_attribute | parse_eol | parse_check_end_tags;
|
||||||
const unsigned int parse_noset = 0x80000000; ///< Parse with flags in xml_parser
|
const unsigned int parse_noset = 0x80000000; ///< Parse with flags in xml_parser
|
||||||
|
|
||||||
const unsigned int parse_w3c = parse_pi | parse_comments | parse_cdata |
|
const unsigned int parse_w3c = parse_pi | parse_comments | parse_cdata |
|
||||||
parse_escapes_pcdata | parse_escapes_attribute |
|
parse_escapes | parse_wconv_attribute |
|
||||||
parse_wconv_attribute | parse_check_end_tags |
|
parse_check_end_tags | parse_ws_pcdata | parse_eol;
|
||||||
parse_ws_pcdata | parse_eol_cdata;
|
|
||||||
|
|
||||||
/// Forward declarations
|
/// Forward declarations
|
||||||
struct xml_attribute_struct;
|
struct xml_attribute_struct;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user