XPath: Optimize attribute axis lookup

When looking for an attribute by name, finding the first attribute means
we can stop looking since attribute names are unique. This makes some
queries faster by 40%.

Another very common pattern in XPath queries is finding an attribute with
a specified value using a predicate (@name = 'value'). While we perform an
optimal amount of traversal in that case, there is a substantial overhead
with evaluating the nodes, saving and restoring the stack state, pushing
the attribute node into a set, etc. Detecting this pattern allows us to
use optimized code, resulting in up to 2x speedup for some queries.

git-svn-id: https://pugixml.googlecode.com/svn/trunk@1061 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
Arseny Kapoulkine 2014-10-16 03:46:42 +00:00
parent 883031fb45
commit 5da51dff27
2 changed files with 60 additions and 15 deletions

View File

@ -7473,6 +7473,11 @@ PUGI__NS_BEGIN
*write = 0;
}
inline bool is_xpath_attribute(const char_t* name)
{
return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
}
struct xpath_variable_boolean: xpath_variable
{
xpath_variable_boolean(): value(false)
@ -8217,7 +8222,6 @@ PUGI__NS_BEGIN
ast_func_normalize_space_0, // normalize-space()
ast_func_normalize_space_1, // normalize-space(left)
ast_func_translate, // translate(left, right, third)
ast_func_translate_table, // translate(left, right, third) where right/third are constants
ast_func_boolean, // boolean(left)
ast_func_not, // not(left)
ast_func_true, // true()
@ -8230,7 +8234,10 @@ PUGI__NS_BEGIN
ast_func_ceiling, // ceiling(left)
ast_func_round, // round(left)
ast_step, // process set left with step
ast_step_root // select root node
ast_step_root, // select root node
ast_opt_translate_table, // translate(left, right, third) where right/third are constants
ast_opt_compare_attribute // @name = 'string'
};
enum axis_t
@ -8296,7 +8303,7 @@ PUGI__NS_BEGIN
xpath_variable* variable;
// node test for ast_step (node name/namespace/node type/pi target)
const char_t* nodetest;
// table for ast_func_translate_table
// table for ast_opt_translate_table
const unsigned char* table;
} _data;
@ -8498,36 +8505,38 @@ PUGI__NS_BEGIN
}
}
void step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc)
bool step_push(xpath_node_set_raw& ns, const xml_attribute a, const xml_node parent, xpath_allocator* alloc)
{
if (!a) return;
if (!a) return false;
const char_t* name = a.name();
// There are no attribute nodes corresponding to attributes that declare namespaces
// That is, "xmlns:..." or "xmlns"
if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
switch (_test)
{
case nodetest_name:
if (strequal(name, _data.nodetest))
if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
{
ns.push_back(xpath_node(a, parent), alloc);
return true;
}
break;
case nodetest_type_node:
case nodetest_all:
ns.push_back(xpath_node(a, parent), alloc);
if (is_xpath_attribute(name))
ns.push_back(xpath_node(a, parent), alloc);
break;
case nodetest_all_in_namespace:
if (starts_with(name, _data.nodetest))
if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
ns.push_back(xpath_node(a, parent), alloc);
break;
default:
;
}
return false;
}
void step_push(xpath_node_set_raw& ns, const xml_node n, xpath_allocator* alloc)
@ -8589,7 +8598,8 @@ PUGI__NS_BEGIN
case axis_attribute:
{
for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
step_push(ns, a, n, alloc);
if (step_push(ns, a, n, alloc))
break; // step_push returns true if it found a unique attribute
break;
}
@ -9007,6 +9017,15 @@ PUGI__NS_BEGIN
return false;
}
case ast_opt_compare_attribute:
{
const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
}
case ast_variable:
{
assert(_rettype == _data.variable->type());
@ -9412,7 +9431,7 @@ PUGI__NS_BEGIN
return s;
}
case ast_func_translate_table:
case ast_opt_translate_table:
{
xpath_string s = _left->eval_string(c, stack);
@ -9610,10 +9629,18 @@ PUGI__NS_BEGIN
if (table)
{
_type = ast_func_translate_table;
_type = ast_opt_translate_table;
_data.table = table;
}
}
// Use optimized path for @attr = 'value' or @attr = $value
if (_type == ast_op_equal &&
_left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
(_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->rettype() == xpath_type_string)))
{
_type = ast_opt_compare_attribute;
}
}
bool is_posinv_expr() const

View File

@ -561,4 +561,22 @@ TEST_XML(xpath_paths_unsorted_child, "<node><foo><bar/></foo><node><foo><bar/></
CHECK(ns[2] == nss[1]);
}
TEST_XML(xpath_paths_optimize_compare_attribute, "<node id='1' /><node id='2' /><node xmlns='3' />")
{
CHECK_XPATH_NODESET(doc, STR("node[@id = '1']")) % 2;
CHECK_XPATH_NODESET(doc, STR("node[@id = '2']")) % 4;
CHECK_XPATH_NODESET(doc, STR("node[@id = 2]")) % 4;
CHECK_XPATH_NODESET(doc, STR("node[@id[. > 3] = '2']"));
CHECK_XPATH_NODESET(doc, STR("node['1' = @id]")) % 2;
xpath_variable_set set;
set.set(STR("var1"), STR("2"));
set.set(STR("var2"), 2.0);
CHECK_XPATH_NODESET_VAR(doc, STR("node[@id = $var1]"), &set) % 4;
CHECK_XPATH_NODESET_VAR(doc, STR("node[@id = $var2]"), &set) % 4;
CHECK_XPATH_NODESET(doc, STR("node[@xmlns = '3']"));
}
#endif