Refactoring: Merged two chartype tables
git-svn-id: http://pugixml.googlecode.com/svn/trunk@672 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
parent
0e6d53c9e5
commit
86f9ea3c2c
@ -909,61 +909,35 @@ namespace
|
|||||||
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
|
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
|
||||||
};
|
};
|
||||||
|
|
||||||
enum chartypex
|
enum chartypex_t
|
||||||
{
|
{
|
||||||
ctx_space = 1, // \r, \n, space, tab
|
ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
|
||||||
ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _
|
ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
|
||||||
ctx_digit = 4, // 0-9
|
ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
|
||||||
ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
|
ctx_digit = 8, // 0-9
|
||||||
|
ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
|
||||||
};
|
};
|
||||||
|
|
||||||
const unsigned char chartypex_table[256] =
|
const unsigned char chartypex_table[256] =
|
||||||
{
|
{
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
|
||||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47
|
0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
|
||||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63
|
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
|
||||||
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79
|
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95
|
|
||||||
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111
|
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127
|
|
||||||
|
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+
|
0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
|
|
||||||
};
|
|
||||||
|
|
||||||
enum output_chartype_t
|
|
||||||
{
|
|
||||||
oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
|
|
||||||
oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, "
|
|
||||||
};
|
|
||||||
|
|
||||||
const unsigned char output_chartype_table[256] =
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
|
||||||
{
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||||
0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||||
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
@ -974,7 +948,6 @@ namespace
|
|||||||
|
|
||||||
#define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
|
#define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
|
||||||
#define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
|
#define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
|
||||||
#define IS_OUTPUT_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, output_chartype_table)
|
|
||||||
|
|
||||||
bool is_little_endian()
|
bool is_little_endian()
|
||||||
{
|
{
|
||||||
@ -2708,14 +2681,14 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type)
|
void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
|
||||||
{
|
{
|
||||||
while (*s)
|
while (*s)
|
||||||
{
|
{
|
||||||
const char_t* prev = s;
|
const char_t* prev = s;
|
||||||
|
|
||||||
// While *s is a usual symbol
|
// While *s is a usual symbol
|
||||||
while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s;
|
while (!IS_CHARTYPEX(*s, type)) ++s;
|
||||||
|
|
||||||
writer.write(prev, static_cast<size_t>(s - prev));
|
writer.write(prev, static_cast<size_t>(s - prev));
|
||||||
|
|
||||||
@ -2781,7 +2754,7 @@ namespace
|
|||||||
writer.write(a.name()[0] ? a.name() : default_name);
|
writer.write(a.name()[0] ? a.name() : default_name);
|
||||||
writer.write('=', '"');
|
writer.write('=', '"');
|
||||||
|
|
||||||
text_output_escaped(writer, a.value(), oct_special_attr);
|
text_output_escaped(writer, a.value(), ctx_special_attr);
|
||||||
|
|
||||||
writer.write('"');
|
writer.write('"');
|
||||||
}
|
}
|
||||||
@ -2834,7 +2807,7 @@ namespace
|
|||||||
{
|
{
|
||||||
writer.write('>');
|
writer.write('>');
|
||||||
|
|
||||||
text_output_escaped(writer, node.first_child().value(), oct_special_pcdata);
|
text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata);
|
||||||
|
|
||||||
writer.write('<', '/');
|
writer.write('<', '/');
|
||||||
writer.write(name);
|
writer.write(name);
|
||||||
@ -2859,7 +2832,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
case node_pcdata:
|
case node_pcdata:
|
||||||
text_output_escaped(writer, node.value(), oct_special_pcdata);
|
text_output_escaped(writer, node.value(), ctx_special_pcdata);
|
||||||
if ((flags & format_raw) == 0) writer.write('\n');
|
if ((flags & format_raw) == 0) writer.write('\n');
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -5153,7 +5126,7 @@ namespace
|
|||||||
bool check_string_to_number_format(const char_t* string)
|
bool check_string_to_number_format(const char_t* string)
|
||||||
{
|
{
|
||||||
// parse leading whitespace
|
// parse leading whitespace
|
||||||
while (IS_CHARTYPEX(*string, ctx_space)) ++string;
|
while (IS_CHARTYPE(*string, ct_space)) ++string;
|
||||||
|
|
||||||
// parse sign
|
// parse sign
|
||||||
if (*string == '-') ++string;
|
if (*string == '-') ++string;
|
||||||
@ -5175,7 +5148,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
// parse trailing whitespace
|
// parse trailing whitespace
|
||||||
while (IS_CHARTYPEX(*string, ctx_space)) ++string;
|
while (IS_CHARTYPE(*string, ct_space)) ++string;
|
||||||
|
|
||||||
return *string == 0;
|
return *string == 0;
|
||||||
}
|
}
|
||||||
@ -5319,10 +5292,10 @@ namespace
|
|||||||
{
|
{
|
||||||
char_t ch = *it++;
|
char_t ch = *it++;
|
||||||
|
|
||||||
if (IS_CHARTYPEX(ch, ctx_space))
|
if (IS_CHARTYPE(ch, ct_space))
|
||||||
{
|
{
|
||||||
// replace whitespace sequence with single space
|
// replace whitespace sequence with single space
|
||||||
while (IS_CHARTYPEX(*it, ctx_space)) it++;
|
while (IS_CHARTYPE(*it, ct_space)) it++;
|
||||||
|
|
||||||
// avoid leading spaces
|
// avoid leading spaces
|
||||||
if (write != buffer) *write++ = ' ';
|
if (write != buffer) *write++ = ' ';
|
||||||
@ -5331,7 +5304,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
// remove trailing space
|
// remove trailing space
|
||||||
if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--;
|
if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--;
|
||||||
|
|
||||||
// zero-terminate
|
// zero-terminate
|
||||||
*write = 0;
|
*write = 0;
|
||||||
@ -5760,7 +5733,7 @@ namespace pugi
|
|||||||
{
|
{
|
||||||
const char_t* cur = _cur;
|
const char_t* cur = _cur;
|
||||||
|
|
||||||
while (IS_CHARTYPEX(*cur, ctx_space)) ++cur;
|
while (IS_CHARTYPE(*cur, ct_space)) ++cur;
|
||||||
|
|
||||||
// save lexeme position for error reporting
|
// save lexeme position for error reporting
|
||||||
_cur_lexeme_pos = cur;
|
_cur_lexeme_pos = cur;
|
||||||
@ -7915,7 +7888,7 @@ namespace pugi
|
|||||||
// This is either a function call, or not - if not, we shall proceed with location path
|
// This is either a function call, or not - if not, we shall proceed with location path
|
||||||
const char_t* state = _lexer.state();
|
const char_t* state = _lexer.state();
|
||||||
|
|
||||||
while (IS_CHARTYPEX(*state, ctx_space)) ++state;
|
while (IS_CHARTYPE(*state, ct_space)) ++state;
|
||||||
|
|
||||||
if (*state != '(') return parse_location_path();
|
if (*state != '(') return parse_location_path();
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user