XPath: Implement optimized translate()

translate() with constant arguments now uses a 128-byte table and a table
lookup instead of searching characters in the source string. The table is
generated during query optimization.

git-svn-id: https://pugixml.googlecode.com/svn/trunk@1052 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
Arseny Kapoulkine 2014-10-05 07:57:58 +00:00
parent b17501c3fb
commit 5239fcbd00
2 changed files with 101 additions and 9 deletions

View File

@ -432,6 +432,7 @@ PUGI__NS_BEGIN
// get header
xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
assert(header);
// deallocate
size_t page_offset = sizeof(xml_memory_page) + header->page_offset;
@ -7345,10 +7346,8 @@ PUGI__NS_BEGIN
*write = 0;
}
PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
{
size_t to_length = strlength(to);
char_t* write = buffer;
while (*buffer)
@ -7367,6 +7366,65 @@ PUGI__NS_BEGIN
*write = 0;
}
PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
{
unsigned char table[128] = {};
while (*from)
{
unsigned int fc = static_cast<unsigned int>(*from);
unsigned int tc = static_cast<unsigned int>(*to);
if (fc >= 128 || tc >= 128)
return 0;
if (!table[fc])
table[fc] = tc ? static_cast<unsigned char>(tc) : 128;
from++;
if (tc) to++;
}
for (int i = 0; i < 128; ++i)
if (!table[i])
table[i] = static_cast<unsigned char>(i);
void* result = alloc->allocate_nothrow(sizeof(table));
if (result)
{
memcpy(result, table, sizeof(table));
}
return static_cast<unsigned char*>(result);
}
PUGI__FN void translate_table(char_t* buffer, const unsigned char* table)
{
char_t* write = buffer;
while (*buffer)
{
char_t ch = *buffer++;
unsigned int index = static_cast<unsigned int>(ch);
if (index < 128)
{
unsigned char code = table[index];
*write = static_cast<char_t>(code);
write += 1 - (code >> 7);
}
else
{
*write++ = ch;
}
}
// zero-terminate
*write = 0;
}
struct xpath_variable_boolean: xpath_variable
{
xpath_variable_boolean(): value(false)
@ -8111,6 +8169,7 @@ PUGI__NS_BEGIN
ast_func_normalize_space_0, // normalize-space()
ast_func_normalize_space_1, // normalize-space(left)
ast_func_translate, // translate(left, right, third)
ast_func_translate_table, // translate(left, right, third) where right/third are constants
ast_func_boolean, // boolean(left)
ast_func_not, // not(left)
ast_func_true, // true()
@ -8189,6 +8248,8 @@ PUGI__NS_BEGIN
xpath_variable* variable;
// node test for ast_step (node name/namespace/node type/pi target)
const char_t* nodetest;
// table for ast_func_translate_table
const unsigned char* table;
} _data;
xpath_ast_node(const xpath_ast_node&);
@ -9298,7 +9359,16 @@ PUGI__NS_BEGIN
xpath_string from = _right->eval_string(c, swapped_stack);
xpath_string to = _right->_next->eval_string(c, swapped_stack);
translate(s.data(stack.result), from.c_str(), to.c_str());
translate(s.data(stack.result), from.c_str(), to.c_str(), to.length());
return s;
}
case ast_func_translate_table:
{
xpath_string s = _left->eval_string(c, stack);
translate_table(s.data(stack.result), _data.table);
return s;
}
@ -9468,11 +9538,11 @@ PUGI__NS_BEGIN
}
}
void optimize()
void optimize(xpath_allocator* alloc)
{
if (_left) _left->optimize();
if (_right) _right->optimize();
if (_next) _next->optimize();
if (_left) _left->optimize(alloc);
if (_right) _right->optimize(alloc);
if (_next) _next->optimize(alloc);
// Replace descendant-or-self::node()/child::foo with descendant::foo
// The former is a full form of //foo, the latter is much faster since it executes the node test immediately
@ -9484,6 +9554,18 @@ PUGI__NS_BEGIN
_axis = axis_descendant;
_left = _left->_left;
}
// Replace translate() with constant arguments with a table
if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
{
unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
if (table)
{
_type = ast_func_translate_table;
_data.table = table;
}
}
}
bool is_posinv_expr() const
@ -10838,7 +10920,7 @@ namespace pugi
if (qimpl->root)
{
qimpl->root->optimize();
qimpl->root->optimize(&qimpl->alloc);
_impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
_result.error = 0;

View File

@ -557,6 +557,16 @@ TEST(xpath_string_translate)
CHECK_XPATH_FAIL(STR("translate('a', 'b', 'c', 'd')"));
}
TEST(xpath_string_translate_table)
{
xml_node c;
CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc', 'ABC')"), STR("ABCd\xe9 "));
CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc\xe9', 'ABC!')"), STR("ABCd! "));
CHECK_XPATH_STRING(c, STR("translate('abcde', concat('abc', 'd'), 'ABCD')"), STR("ABCDe"));
CHECK_XPATH_STRING(c, STR("translate('abcde', 'abcd', concat('ABC', 'D'))"), STR("ABCDe"));
}
TEST_XML(xpath_nodeset_last, "<node><c1/><c1/><c2/><c3/><c3/><c3/><c3/></node>")
{
xml_node n = doc.child(STR("node"));