Refactoring: Merged two chartype tables

git-svn-id: http://pugixml.googlecode.com/svn/trunk@672 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
arseny.kapoulkine 2010-08-29 15:32:52 +00:00
parent 0e6d53c9e5
commit 86f9ea3c2c

View File

@ -909,61 +909,35 @@ namespace
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
}; };
enum chartypex enum chartypex_t
{ {
ctx_space = 1, // \r, \n, space, tab ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _ ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
ctx_digit = 4, // 0-9 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . ctx_digit = 8, // 0-9
ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
}; };
const unsigned char chartypex_table[256] = const unsigned char chartypex_table[256] =
{ {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
};
enum output_chartype_t
{
oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, "
};
const unsigned char output_chartype_table[256] = 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
{ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}; };
#ifdef PUGIXML_WCHAR_MODE #ifdef PUGIXML_WCHAR_MODE
@ -974,7 +948,6 @@ namespace
#define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table) #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
#define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table) #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
#define IS_OUTPUT_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, output_chartype_table)
bool is_little_endian() bool is_little_endian()
{ {
@ -2708,14 +2681,14 @@ namespace
} }
} }
void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type) void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
{ {
while (*s) while (*s)
{ {
const char_t* prev = s; const char_t* prev = s;
// While *s is a usual symbol // While *s is a usual symbol
while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; while (!IS_CHARTYPEX(*s, type)) ++s;
writer.write(prev, static_cast<size_t>(s - prev)); writer.write(prev, static_cast<size_t>(s - prev));
@ -2781,7 +2754,7 @@ namespace
writer.write(a.name()[0] ? a.name() : default_name); writer.write(a.name()[0] ? a.name() : default_name);
writer.write('=', '"'); writer.write('=', '"');
text_output_escaped(writer, a.value(), oct_special_attr); text_output_escaped(writer, a.value(), ctx_special_attr);
writer.write('"'); writer.write('"');
} }
@ -2834,7 +2807,7 @@ namespace
{ {
writer.write('>'); writer.write('>');
text_output_escaped(writer, node.first_child().value(), oct_special_pcdata); text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata);
writer.write('<', '/'); writer.write('<', '/');
writer.write(name); writer.write(name);
@ -2859,7 +2832,7 @@ namespace
} }
case node_pcdata: case node_pcdata:
text_output_escaped(writer, node.value(), oct_special_pcdata); text_output_escaped(writer, node.value(), ctx_special_pcdata);
if ((flags & format_raw) == 0) writer.write('\n'); if ((flags & format_raw) == 0) writer.write('\n');
break; break;
@ -5153,7 +5126,7 @@ namespace
bool check_string_to_number_format(const char_t* string) bool check_string_to_number_format(const char_t* string)
{ {
// parse leading whitespace // parse leading whitespace
while (IS_CHARTYPEX(*string, ctx_space)) ++string; while (IS_CHARTYPE(*string, ct_space)) ++string;
// parse sign // parse sign
if (*string == '-') ++string; if (*string == '-') ++string;
@ -5175,7 +5148,7 @@ namespace
} }
// parse trailing whitespace // parse trailing whitespace
while (IS_CHARTYPEX(*string, ctx_space)) ++string; while (IS_CHARTYPE(*string, ct_space)) ++string;
return *string == 0; return *string == 0;
} }
@ -5319,10 +5292,10 @@ namespace
{ {
char_t ch = *it++; char_t ch = *it++;
if (IS_CHARTYPEX(ch, ctx_space)) if (IS_CHARTYPE(ch, ct_space))
{ {
// replace whitespace sequence with single space // replace whitespace sequence with single space
while (IS_CHARTYPEX(*it, ctx_space)) it++; while (IS_CHARTYPE(*it, ct_space)) it++;
// avoid leading spaces // avoid leading spaces
if (write != buffer) *write++ = ' '; if (write != buffer) *write++ = ' ';
@ -5331,7 +5304,7 @@ namespace
} }
// remove trailing space // remove trailing space
if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--; if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--;
// zero-terminate // zero-terminate
*write = 0; *write = 0;
@ -5760,7 +5733,7 @@ namespace pugi
{ {
const char_t* cur = _cur; const char_t* cur = _cur;
while (IS_CHARTYPEX(*cur, ctx_space)) ++cur; while (IS_CHARTYPE(*cur, ct_space)) ++cur;
// save lexeme position for error reporting // save lexeme position for error reporting
_cur_lexeme_pos = cur; _cur_lexeme_pos = cur;
@ -7915,7 +7888,7 @@ namespace pugi
// This is either a function call, or not - if not, we shall proceed with location path // This is either a function call, or not - if not, we shall proceed with location path
const char_t* state = _lexer.state(); const char_t* state = _lexer.state();
while (IS_CHARTYPEX(*state, ctx_space)) ++state; while (IS_CHARTYPE(*state, ct_space)) ++state;
if (*state != '(') return parse_location_path(); if (*state != '(') return parse_location_path();