Add length field for name & value

This commit is contained in:
halx99 2021-12-23 15:13:59 +08:00
parent bd0fd02860
commit 909b3b65f1

View File

@ -1104,6 +1104,9 @@ namespace pugi
char_t* name; char_t* name;
char_t* value; char_t* value;
int name_len;
int value_len;
xml_attribute_struct* prev_attribute_c; xml_attribute_struct* prev_attribute_c;
xml_attribute_struct* next_attribute; xml_attribute_struct* next_attribute;
}; };
@ -1119,6 +1122,8 @@ namespace pugi
char_t* name; char_t* name;
char_t* value; char_t* value;
int name_len;
int value_len;
xml_node_struct* parent; xml_node_struct* parent;
@ -2593,14 +2598,16 @@ PUGI__NS_BEGIN
#define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
#define PUGI__SCANWHILE(X) { while (X) ++s; } #define PUGI__SCANWHILE(X) { while (X) ++s; }
#define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
#define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } #define PUGI__ENDSEG(p,v) { ch = *s; p->v##_len = static_cast<int>(s - p->v); *s = '\0'; ++s; }
#define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
#define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) PUGI__FN char_t* strconv_comment(char_t* s, char_t endch, int& len)
{ {
gap g; gap g;
char_t* begin = s;
while (true) while (true)
{ {
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
@ -2613,7 +2620,9 @@ PUGI__NS_BEGIN
} }
else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
{ {
*g.flush(s) = 0; auto end = g.flush(s);
*end = '\0';
len = static_cast<int>(end - begin);
return s + (s[2] == '>' ? 3 : 2); return s + (s[2] == '>' ? 3 : 2);
} }
@ -2625,10 +2634,12 @@ PUGI__NS_BEGIN
} }
} }
PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch, int& len)
{ {
gap g; gap g;
char_t* begin = s;
while (true) while (true)
{ {
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
@ -2641,7 +2652,9 @@ PUGI__NS_BEGIN
} }
else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
{ {
*g.flush(s) = 0; auto end = g.flush(s);
*end = '\0';
len = static_cast<int>(end - begin);
return s + 1; return s + 1;
} }
@ -2653,11 +2666,11 @@ PUGI__NS_BEGIN
} }
} }
typedef char_t* (*strconv_pcdata_t)(char_t*); typedef char_t* (*strconv_pcdata_t)(char_t*, int& len);
template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
{ {
static char_t* parse(char_t* s) static char_t* parse(char_t* s, int& len)
{ {
gap g; gap g;
@ -2675,7 +2688,8 @@ PUGI__NS_BEGIN
while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
--end; --end;
*end = 0; *end = '\0';
len = static_cast<int>(end - begin);
return s + 1; return s + 1;
} }
@ -2697,7 +2711,8 @@ PUGI__NS_BEGIN
while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
--end; --end;
*end = 0; *end = '\0';
len = static_cast<int>(end - begin);
return s; return s;
} }
@ -2724,14 +2739,16 @@ PUGI__NS_BEGIN
} }
} }
typedef char_t* (*strconv_attribute_t)(char_t*, char_t); typedef char_t* (*strconv_attribute_t)(char_t*, char_t, int& len);
template <typename opt_escape> struct strconv_attribute_impl template <typename opt_escape> struct strconv_attribute_impl
{ {
static char_t* parse_wnorm(char_t* s, char_t end_quote) static char_t* parse_wnorm(char_t* s, char_t end_quote, int& len)
{ {
gap g; gap g;
char_t* begin = s;
// trim leading whitespaces // trim leading whitespaces
if (PUGI__IS_CHARTYPE(*s, ct_space)) if (PUGI__IS_CHARTYPE(*s, ct_space))
{ {
@ -2751,9 +2768,11 @@ PUGI__NS_BEGIN
{ {
char_t* str = g.flush(s); char_t* str = g.flush(s);
do *str-- = 0; do *str-- = '\0';
while (PUGI__IS_CHARTYPE(*str, ct_space)); while (PUGI__IS_CHARTYPE(*str, ct_space));
len = static_cast<int>(str + 1 - begin);
return s + 1; return s + 1;
} }
else if (PUGI__IS_CHARTYPE(*s, ct_space)) else if (PUGI__IS_CHARTYPE(*s, ct_space))
@ -2780,17 +2799,21 @@ PUGI__NS_BEGIN
} }
} }
static char_t* parse_wconv(char_t* s, char_t end_quote) static char_t* parse_wconv(char_t* s, char_t end_quote, int& len)
{ {
gap g; gap g;
char_t* begin = s;
while (true) while (true)
{ {
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
if (*s == end_quote) if (*s == end_quote)
{ {
*g.flush(s) = 0; auto end = g.flush(s);
*end = '\0';
len = static_cast<int>(end - begin);
return s + 1; return s + 1;
} }
@ -2816,17 +2839,21 @@ PUGI__NS_BEGIN
} }
} }
static char_t* parse_eol(char_t* s, char_t end_quote) static char_t* parse_eol(char_t* s, char_t end_quote, int& len)
{ {
gap g; gap g;
char_t* begin = s;
while (true) while (true)
{ {
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
if (*s == end_quote) if (*s == end_quote)
{ {
*g.flush(s) = 0; auto end = g.flush(s);
*end = '\0';
len = static_cast<int>(end - begin);
return s + 1; return s + 1;
} }
@ -2848,17 +2875,21 @@ PUGI__NS_BEGIN
} }
} }
static char_t* parse_simple(char_t* s, char_t end_quote) static char_t* parse_simple(char_t* s, char_t end_quote, int& len)
{ {
gap g; gap g;
char_t* begin = s;
while (true) while (true)
{ {
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
if (*s == end_quote) if (*s == end_quote)
{ {
*g.flush(s) = 0; auto end = g.flush(s);
*end = '\0';
len = static_cast<int>(end - begin);
return s + 1; return s + 1;
} }
@ -3058,7 +3089,7 @@ PUGI__NS_BEGIN
if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
{ {
s = strconv_comment(s, endch); s = strconv_comment(s, endch, cursor->value_len);
if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
} }
@ -3068,9 +3099,10 @@ PUGI__NS_BEGIN
PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
PUGI__CHECK_ERROR(status_bad_comment, s); PUGI__CHECK_ERROR(status_bad_comment, s);
if (PUGI__OPTSET(parse_comments)) if (PUGI__OPTSET(parse_comments)) {
*s = 0; // Zero-terminate this segment at the first terminating '-'. *s = '\0'; // Zero-terminate this segment at the first terminating '-'.
cursor->value_len = static_cast<int>(s - cursor->value);
}
s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
} }
} }
@ -3090,7 +3122,7 @@ PUGI__NS_BEGIN
if (PUGI__OPTSET(parse_eol)) if (PUGI__OPTSET(parse_eol))
{ {
s = strconv_cdata(s, endch); s = strconv_cdata(s, endch, cursor->value_len);
if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
} }
@ -3100,7 +3132,8 @@ PUGI__NS_BEGIN
PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
PUGI__CHECK_ERROR(status_bad_cdata, s); PUGI__CHECK_ERROR(status_bad_cdata, s);
*s++ = 0; // Zero-terminate this segment. cursor->value_len = s - cursor->value;
*s++ = '\0'; // Zero-terminate this segment.
} }
} }
else // Flagged for discard, but we still have to scan for the terminator. else // Flagged for discard, but we still have to scan for the terminator.
@ -3128,7 +3161,7 @@ PUGI__NS_BEGIN
if (!s) return s; if (!s) return s;
assert((*s == 0 && endch == '>') || *s == '>'); assert((*s == 0 && endch == '>') || *s == '>');
if (*s) *s++ = 0; if (*s) *s++ = '\0';
if (PUGI__OPTSET(parse_doctype)) if (PUGI__OPTSET(parse_doctype))
{ {
@ -3137,6 +3170,7 @@ PUGI__NS_BEGIN
PUGI__PUSHNODE(node_doctype); PUGI__PUSHNODE(node_doctype);
cursor->value = mark; cursor->value = mark;
cursor->value_len = static_cast<int>(s - mark - 1);
} }
} }
else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
@ -3182,7 +3216,7 @@ PUGI__NS_BEGIN
cursor->name = target; cursor->name = target;
PUGI__ENDSEG(); PUGI__ENDSEG(cursor, name);
// parse value/attributes // parse value/attributes
if (ch == '?') if (ch == '?')
@ -3215,11 +3249,9 @@ PUGI__NS_BEGIN
{ {
// store value and step over > // store value and step over >
cursor->value = value; cursor->value = value;
PUGI__ENDSEG(cursor, value);
PUGI__POPNODE(); PUGI__POPNODE();
PUGI__ENDSEG();
s += (*s == '>'); s += (*s == '>');
} }
} }
@ -3263,7 +3295,7 @@ PUGI__NS_BEGIN
cursor->name = s; cursor->name = s;
PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. PUGI__ENDSEG(cursor, name); // Save char in 'ch', terminate & step over.
if (ch == '>') if (ch == '>')
{ {
@ -3284,7 +3316,7 @@ PUGI__NS_BEGIN
a->name = s; // Save the offset. a->name = s; // Save the offset.
PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. PUGI__ENDSEG(a, name); // Save char in 'ch', terminate & step over.
if (PUGI__IS_CHARTYPE(ch, ct_space)) if (PUGI__IS_CHARTYPE(ch, ct_space))
{ {
@ -3304,7 +3336,7 @@ PUGI__NS_BEGIN
++s; // Step over the quote. ++s; // Step over the quote.
a->value = s; // Save the offset. a->value = s; // Save the offset.
s = strconv_attribute(s, ch); s = strconv_attribute(s, ch, a->value_len);
if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
@ -3442,20 +3474,22 @@ PUGI__NS_BEGIN
if (cursor->parent || PUGI__OPTSET(parse_fragment)) if (cursor->parent || PUGI__OPTSET(parse_fragment))
{ {
pugi::xml_node_struct* target;
if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
{ {
cursor->value = s; // Save the offset. target = cursor; // cursor->value = s; // Save the offset.
} }
else else
{ {
PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
cursor->value = s; // Save the offset. target = cursor; // cursor->value = s; // Save the offset.
PUGI__POPNODE(); // Pop since this is a standalone. PUGI__POPNODE(); // Pop since this is a standalone.
} }
s = strconv_pcdata(s); target->value = s;
s = strconv_pcdata(s, target->value_len);
if (!*s) break; if (!*s) break;
} }
@ -8376,7 +8410,7 @@ PUGI__NS_BEGIN
// zero-terminate // zero-terminate
assert(s < result + result_size); assert(s < result + result_size);
*s = 0; *s = '\0';
return xpath_string::from_heap_preallocated(result, s); return xpath_string::from_heap_preallocated(result, s);
} }