Optimized debug mode parsing/saving by order of magnitude

git-svn-id: http://pugixml.googlecode.com/svn/trunk@440 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
arseny.kapoulkine 2010-05-20 22:15:23 +00:00
parent 4c7d82fa5b
commit e31d977c80
2 changed files with 58 additions and 73 deletions

View File

@ -922,16 +922,11 @@ namespace
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
}; };
inline bool is_chartype(char_t c, chartype_t ct)
{
#ifdef PUGIXML_WCHAR_MODE #ifdef PUGIXML_WCHAR_MODE
unsigned int ch = static_cast<unsigned int>(c); #define IS_CHARTYPE(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartype_table[static_cast<unsigned int>(c)] : chartype_table[128]) & (ct))
return !!((ch < 128 ? chartype_table[ch] : chartype_table[128]) & ct);
#else #else
return !!(chartype_table[static_cast<unsigned char>(c)] & ct); #define IS_CHARTYPE(c, ct) (chartype_table[static_cast<unsigned char>(c)] & (ct))
#endif #endif
}
enum output_chartype_t enum output_chartype_t
{ {
@ -961,16 +956,11 @@ namespace
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}; };
inline bool is_output_chartype(char_t c, output_chartype_t ct)
{
#ifdef PUGIXML_WCHAR_MODE #ifdef PUGIXML_WCHAR_MODE
unsigned int ch = static_cast<unsigned int>(c); #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast<unsigned int>(c) < 128 ? output_chartype_table[static_cast<unsigned int>(c)] : output_chartype_table[128]) & (ct))
return !!((ch < 128 ? output_chartype_table[ch] : output_chartype_table[128]) & ct);
#else #else
return !!(output_chartype_table[static_cast<unsigned char>(c)] & ct); #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast<unsigned char>(c)] & (ct))
#endif #endif
}
template <bool _1> struct opt1_to_type template <bool _1> struct opt1_to_type
{ {
@ -1488,7 +1478,7 @@ namespace
while (true) while (true)
{ {
while (!is_chartype(*s, ct_parse_comment)) ++s; while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s;
if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
{ {
@ -1518,7 +1508,7 @@ namespace
while (true) while (true)
{ {
while (!is_chartype(*s, ct_parse_cdata)) ++s; while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
{ {
@ -1553,7 +1543,7 @@ namespace
while (true) while (true)
{ {
while (!is_chartype(*s, ct_parse_pcdata)) ++s; while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
if (*s == '<') // PCDATA ends here if (*s == '<') // PCDATA ends here
{ {
@ -1608,19 +1598,19 @@ namespace
gap g; gap g;
// trim leading whitespaces // trim leading whitespaces
if (opt_wnorm && is_chartype(*s, ct_space)) if (opt_wnorm && IS_CHARTYPE(*s, ct_space))
{ {
char_t* str = s; char_t* str = s;
do ++str; do ++str;
while (is_chartype(*str, ct_space)); while (IS_CHARTYPE(*str, ct_space));
g.push(s, str - s); g.push(s, str - s);
} }
while (true) while (true)
{ {
while (!is_chartype(*s, (opt_wnorm || opt_wconv) ? ct_parse_attr_ws : ct_parse_attr)) ++s; while (!IS_CHARTYPE(*s, (opt_wnorm || opt_wconv) ? ct_parse_attr_ws : ct_parse_attr)) ++s;
if (*s == end_quote) if (*s == end_quote)
{ {
@ -1629,25 +1619,25 @@ namespace
if (opt_wnorm) if (opt_wnorm)
{ {
do *str-- = 0; do *str-- = 0;
while (is_chartype(*str, ct_space)); while (IS_CHARTYPE(*str, ct_space));
} }
else *str = 0; else *str = 0;
return s + 1; return s + 1;
} }
else if (opt_wnorm && is_chartype(*s, ct_space)) else if (opt_wnorm && IS_CHARTYPE(*s, ct_space))
{ {
*s++ = ' '; *s++ = ' ';
if (is_chartype(*s, ct_space)) if (IS_CHARTYPE(*s, ct_space))
{ {
char_t* str = s + 1; char_t* str = s + 1;
while (is_chartype(*str, ct_space)) ++str; while (IS_CHARTYPE(*str, ct_space)) ++str;
g.push(s, str - s); g.push(s, str - s);
} }
} }
else if (opt_wconv && is_chartype(*s, ct_space)) else if (opt_wconv && IS_CHARTYPE(*s, ct_space))
{ {
if (opt_eol) if (opt_eol)
{ {
@ -1719,7 +1709,7 @@ namespace
xml_allocator alloc; xml_allocator alloc;
// Parser utilities. // Parser utilities.
#define SKIPWS() { while (is_chartype(*s, ct_space)) ++s; } #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; }
#define OPTSET(OPT) ( optmsk & OPT ) #define OPTSET(OPT) ( optmsk & OPT )
#define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); } #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); }
#define POPNODE() { cursor = cursor->parent; } #define POPNODE() { cursor = cursor->parent; }
@ -1962,15 +1952,15 @@ namespace
// parse node contents, starting with question mark // parse node contents, starting with question mark
++s; ++s;
if (!is_chartype(*s, ct_start_symbol)) // bad PI if (!IS_CHARTYPE(*s, ct_start_symbol)) // bad PI
THROW_ERROR(status_bad_pi, s); THROW_ERROR(status_bad_pi, s);
else if (OPTSET(parse_pi) || OPTSET(parse_declaration)) else if (OPTSET(parse_pi) || OPTSET(parse_declaration))
{ {
char_t* mark = s; char_t* mark = s;
SCANWHILE(is_chartype(*s, ct_symbol)); // Read PI target SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Read PI target
CHECK_ERROR(status_bad_pi, s); CHECK_ERROR(status_bad_pi, s);
if (!is_chartype(*s, ct_space) && *s != '?') // Target has to end with space or ? if (!IS_CHARTYPE(*s, ct_space) && *s != '?') // Target has to end with space or ?
THROW_ERROR(status_bad_pi, s); THROW_ERROR(status_bad_pi, s);
ENDSEG(); ENDSEG();
@ -2091,38 +2081,38 @@ namespace
++s; ++s;
LOC_TAG: LOC_TAG:
if (is_chartype(*s, ct_start_symbol)) // '<#...' if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
{ {
PUSHNODE(node_element); // Append a new node to the tree. PUSHNODE(node_element); // Append a new node to the tree.
cursor->name = s; cursor->name = s;
SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator. SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
ENDSEG(); // Save char in 'ch', terminate & step over. ENDSEG(); // Save char in 'ch', terminate & step over.
if (ch == '>') if (ch == '>')
{ {
// end of tag // end of tag
} }
else if (is_chartype(ch, ct_space)) else if (IS_CHARTYPE(ch, ct_space))
{ {
LOC_ATTRIBUTES: LOC_ATTRIBUTES:
while (true) while (true)
{ {
SKIPWS(); // Eat any whitespace. SKIPWS(); // Eat any whitespace.
if (is_chartype(*s, ct_start_symbol)) // <... #... if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
{ {
xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
a->name = s; // Save the offset. a->name = s; // Save the offset.
SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator. SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
CHECK_ERROR(status_bad_attribute, s); CHECK_ERROR(status_bad_attribute, s);
ENDSEG(); // Save char in 'ch', terminate & step over. ENDSEG(); // Save char in 'ch', terminate & step over.
CHECK_ERROR(status_bad_attribute, s); CHECK_ERROR(status_bad_attribute, s);
if (is_chartype(ch, ct_space)) if (IS_CHARTYPE(ch, ct_space))
{ {
SKIPWS(); // Eat any whitespace. SKIPWS(); // Eat any whitespace.
CHECK_ERROR(status_bad_attribute, s); CHECK_ERROR(status_bad_attribute, s);
@ -2148,7 +2138,7 @@ namespace
// After this line the loop continues from the start; // After this line the loop continues from the start;
// Whitespaces, / and > are ok, symbols and EOF are wrong, // Whitespaces, / and > are ok, symbols and EOF are wrong,
// everything else will be detected // everything else will be detected
if (is_chartype(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s);
} }
else THROW_ERROR(status_bad_attribute, s); else THROW_ERROR(status_bad_attribute, s);
} }
@ -2210,7 +2200,7 @@ namespace
char_t* name = cursor->name; char_t* name = cursor->name;
if (!name) THROW_ERROR(status_end_element_mismatch, s); if (!name) THROW_ERROR(status_end_element_mismatch, s);
while (is_chartype(*s, ct_symbol)) while (IS_CHARTYPE(*s, ct_symbol))
{ {
if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s);
} }
@ -2681,7 +2671,7 @@ namespace
const char_t* prev = s; const char_t* prev = s;
// While *s is a usual symbol // While *s is a usual symbol
while (!is_output_chartype(*s, type)) ++s; while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s;
writer.write(prev, static_cast<size_t>(s - prev)); writer.write(prev, static_cast<size_t>(s - prev));

View File

@ -82,16 +82,11 @@ namespace
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
}; };
inline bool is_chartypex(char_t c, chartypex ct)
{
#ifdef PUGIXML_WCHAR_MODE #ifdef PUGIXML_WCHAR_MODE
unsigned int ch = static_cast<unsigned int>(c); #define IS_CHARTYPEX(c, ct) ((static_cast<unsigned int>(c) < 128 ? chartypex_table[static_cast<unsigned int>(c)] : chartypex_table[128]) & (ct))
return !!((ch < 128 ? chartypex_table[ch] : chartypex_table[128]) & ct);
#else #else
return !!(chartypex_table[static_cast<unsigned char>(c)] & ct); #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast<unsigned char>(c)] & (ct))
#endif #endif
}
bool starts_with(const char_t* string, const char_t* pattern) bool starts_with(const char_t* string, const char_t* pattern)
{ {
@ -401,7 +396,7 @@ namespace
bool check_string_to_number_format(const char_t* string) bool check_string_to_number_format(const char_t* string)
{ {
// parse leading whitespace // parse leading whitespace
while (is_chartypex(*string, ctx_space)) ++string; while (IS_CHARTYPEX(*string, ctx_space)) ++string;
// parse sign // parse sign
if (*string == '-') ++string; if (*string == '-') ++string;
@ -409,21 +404,21 @@ namespace
if (!*string) return false; if (!*string) return false;
// if there is no integer part, there should be a decimal part with at least one digit // if there is no integer part, there should be a decimal part with at least one digit
if (!is_chartypex(string[0], ctx_digit) && (string[0] != '.' || !is_chartypex(string[1], ctx_digit))) return false; if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false;
// parse integer part // parse integer part
while (is_chartypex(*string, ctx_digit)) ++string; while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
// parse decimal part // parse decimal part
if (*string == '.') if (*string == '.')
{ {
++string; ++string;
while (is_chartypex(*string, ctx_digit)) ++string; while (IS_CHARTYPEX(*string, ctx_digit)) ++string;
} }
// parse trailing whitespace // parse trailing whitespace
while (is_chartypex(*string, ctx_space)) ++string; while (IS_CHARTYPEX(*string, ctx_space)) ++string;
return *string == 0; return *string == 0;
} }
@ -919,7 +914,7 @@ namespace pugi
{ {
contents_clear(); contents_clear();
while (is_chartypex(*m_cur, ctx_space)) ++m_cur; while (IS_CHARTYPEX(*m_cur, ctx_space)) ++m_cur;
switch (*m_cur) switch (*m_cur)
{ {
@ -1050,13 +1045,13 @@ namespace pugi
m_cur += 2; m_cur += 2;
m_cur_lexeme = lex_double_dot; m_cur_lexeme = lex_double_dot;
} }
else if (is_chartypex(*(m_cur+1), ctx_digit)) else if (IS_CHARTYPEX(*(m_cur+1), ctx_digit))
{ {
m_cur_lexeme_contents.begin = m_cur; // . m_cur_lexeme_contents.begin = m_cur; // .
++m_cur; ++m_cur;
while (is_chartypex(*m_cur, ctx_digit)) m_cur++; while (IS_CHARTYPEX(*m_cur, ctx_digit)) m_cur++;
m_cur_lexeme_contents.end = m_cur; m_cur_lexeme_contents.end = m_cur;
@ -1110,28 +1105,28 @@ namespace pugi
break; break;
default: default:
if (is_chartypex(*m_cur, ctx_digit)) if (IS_CHARTYPEX(*m_cur, ctx_digit))
{ {
m_cur_lexeme_contents.begin = m_cur; m_cur_lexeme_contents.begin = m_cur;
while (is_chartypex(*m_cur, ctx_digit)) m_cur++; while (IS_CHARTYPEX(*m_cur, ctx_digit)) m_cur++;
if (*m_cur == '.' && is_chartypex(*(m_cur+1), ctx_digit)) if (*m_cur == '.' && IS_CHARTYPEX(*(m_cur+1), ctx_digit))
{ {
m_cur++; m_cur++;
while (is_chartypex(*m_cur, ctx_digit)) m_cur++; while (IS_CHARTYPEX(*m_cur, ctx_digit)) m_cur++;
} }
m_cur_lexeme_contents.end = m_cur; m_cur_lexeme_contents.end = m_cur;
m_cur_lexeme = lex_number; m_cur_lexeme = lex_number;
} }
else if (is_chartypex(*m_cur, ctx_start_symbol)) else if (IS_CHARTYPEX(*m_cur, ctx_start_symbol))
{ {
m_cur_lexeme_contents.begin = m_cur; m_cur_lexeme_contents.begin = m_cur;
while (is_chartypex(*m_cur, ctx_symbol)) m_cur++; while (IS_CHARTYPEX(*m_cur, ctx_symbol)) m_cur++;
if (m_cur[0] == ':') if (m_cur[0] == ':')
{ {
@ -1139,17 +1134,17 @@ namespace pugi
{ {
m_cur += 2; // :* m_cur += 2; // :*
} }
else if (is_chartypex(m_cur[1], ctx_symbol)) // namespace test qname else if (IS_CHARTYPEX(m_cur[1], ctx_symbol)) // namespace test qname
{ {
m_cur++; // : m_cur++; // :
while (is_chartypex(*m_cur, ctx_symbol)) m_cur++; while (IS_CHARTYPEX(*m_cur, ctx_symbol)) m_cur++;
} }
} }
m_cur_lexeme_contents.end = m_cur; m_cur_lexeme_contents.end = m_cur;
while (is_chartypex(*m_cur, ctx_space)) ++m_cur; while (IS_CHARTYPEX(*m_cur, ctx_space)) ++m_cur;
m_cur_lexeme = lex_string; m_cur_lexeme = lex_string;
} }
@ -2276,7 +2271,7 @@ namespace pugi
for (string_t::const_iterator it = s.begin(); it != s.end(); ++it) for (string_t::const_iterator it = s.begin(); it != s.end(); ++it)
{ {
if (is_chartypex(*it, ctx_space)) if (IS_CHARTYPEX(*it, ctx_space))
{ {
if (!r.empty() && r[r.size() - 1] != ' ') if (!r.empty() && r[r.size() - 1] != ' ')
r += ' '; r += ' ';
@ -3342,7 +3337,7 @@ namespace pugi
// This is either a function call, or not - if not, we shall proceed with location path // This is either a function call, or not - if not, we shall proceed with location path
const char_t* state = m_lexer.state(); const char_t* state = m_lexer.state();
while (is_chartypex(*state, ctx_space)) ++state; while (IS_CHARTYPEX(*state, ctx_space)) ++state;
if (*state != '(') return parse_location_path(); if (*state != '(') return parse_location_path();