XPath stack optimization: Rewrite part of the recursive descent parser to precedence climbing to reduce stack usage

git-svn-id: http://pugixml.googlecode.com/svn/trunk@931 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
arseny.kapoulkine@gmail.com 2012-11-18 01:11:50 +00:00
parent cee7eca229
commit 4fe55906fa

View File

@ -7279,6 +7279,7 @@ PUGI__NS_BEGIN
enum ast_type_t
{
ast_unknown,
ast_op_or, // left or right
ast_op_and, // left and right
ast_op_equal, // left = right
@ -9338,7 +9339,9 @@ PUGI__NS_BEGIN
// | FilterExpr
// | FilterExpr '/' RelativeLocationPath
// | FilterExpr '//' RelativeLocationPath
xpath_ast_node* parse_path_expression()
// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
// UnaryExpr ::= UnionExpr | '-' UnaryExpr
xpath_ast_node* parse_path_or_unary_expression()
{
// Clarification.
// PathExpr begins with either LocationPath or FilterExpr.
@ -9384,170 +9387,136 @@ PUGI__NS_BEGIN
return n;
}
else return parse_location_path();
}
// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
xpath_ast_node* parse_union_expression()
{
xpath_ast_node* n = parse_path_expression();
while (_lexer.current() == lex_union)
else if (_lexer.current() == lex_minus)
{
_lexer.next();
xpath_ast_node* expr = parse_union_expression();
if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
throw_error("Union operator has to be applied to node sets");
n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
}
return n;
}
// UnaryExpr ::= UnionExpr | '-' UnaryExpr
xpath_ast_node* parse_unary_expression()
{
if (_lexer.current() == lex_minus)
{
_lexer.next();
xpath_ast_node* expr = parse_unary_expression();
// precedence 7+ - only parses union expressions
xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
}
else return parse_union_expression();
else
return parse_location_path();
}
// MultiplicativeExpr ::= UnaryExpr
// | MultiplicativeExpr '*' UnaryExpr
// | MultiplicativeExpr 'div' UnaryExpr
// | MultiplicativeExpr 'mod' UnaryExpr
xpath_ast_node* parse_multiplicative_expression()
struct binary_op_t
{
xpath_ast_node* n = parse_unary_expression();
ast_type_t asttype;
xpath_value_type rettype;
int precedence;
while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
(_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
{
}
binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
{
}
static binary_op_t parse(xpath_lexer& lexer)
{
switch (lexer.current())
{
case lex_string:
if (lexer.contents() == PUGIXML_TEXT("or"))
return binary_op_t(ast_op_or, xpath_type_boolean, 1);
else if (lexer.contents() == PUGIXML_TEXT("and"))
return binary_op_t(ast_op_and, xpath_type_boolean, 2);
else if (lexer.contents() == PUGIXML_TEXT("div"))
return binary_op_t(ast_op_divide, xpath_type_number, 6);
else if (lexer.contents() == PUGIXML_TEXT("mod"))
return binary_op_t(ast_op_mod, xpath_type_number, 6);
else
return binary_op_t();
case lex_equal:
return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
case lex_not_equal:
return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
case lex_less:
return binary_op_t(ast_op_less, xpath_type_boolean, 4);
case lex_greater:
return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
case lex_less_or_equal:
return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
case lex_greater_or_equal:
return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
case lex_plus:
return binary_op_t(ast_op_add, xpath_type_number, 5);
case lex_minus:
return binary_op_t(ast_op_subtract, xpath_type_number, 5);
case lex_multiply:
return binary_op_t(ast_op_multiply, xpath_type_number, 6);
case lex_union:
return binary_op_t(ast_op_union, xpath_type_node_set, 7);
default:
return binary_op_t();
}
}
};
xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
{
binary_op_t op = binary_op_t::parse(_lexer);
while (op.asttype != ast_unknown && op.precedence >= limit)
{
ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
_lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
_lexer.next();
xpath_ast_node* expr = parse_unary_expression();
xpath_ast_node* rhs = parse_path_or_unary_expression();
n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
}
binary_op_t nextop = binary_op_t::parse(_lexer);
return n;
}
// AdditiveExpr ::= MultiplicativeExpr
// | AdditiveExpr '+' MultiplicativeExpr
// | AdditiveExpr '-' MultiplicativeExpr
xpath_ast_node* parse_additive_expression()
while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
{
xpath_ast_node* n = parse_multiplicative_expression();
rhs = parse_expression_rec(rhs, nextop.precedence);
while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
{
lexeme_t l = _lexer.current();
_lexer.next();
xpath_ast_node* expr = parse_multiplicative_expression();
n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
nextop = binary_op_t::parse(_lexer);
}
return n;
if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
throw_error("Union operator has to be applied to node sets");
lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
op = binary_op_t::parse(_lexer);
}
return lhs;
}
// Expr ::= OrExpr
// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
// EqualityExpr ::= RelationalExpr
// | EqualityExpr '=' RelationalExpr
// | EqualityExpr '!=' RelationalExpr
// RelationalExpr ::= AdditiveExpr
// | RelationalExpr '<' AdditiveExpr
// | RelationalExpr '>' AdditiveExpr
// | RelationalExpr '<=' AdditiveExpr
// | RelationalExpr '>=' AdditiveExpr
xpath_ast_node* parse_relational_expression()
{
xpath_ast_node* n = parse_additive_expression();
while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
_lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
{
lexeme_t l = _lexer.current();
_lexer.next();
xpath_ast_node* expr = parse_additive_expression();
n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
}
return n;
}
// EqualityExpr ::= RelationalExpr
// | EqualityExpr '=' RelationalExpr
// | EqualityExpr '!=' RelationalExpr
xpath_ast_node* parse_equality_expression()
{
xpath_ast_node* n = parse_relational_expression();
while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
{
lexeme_t l = _lexer.current();
_lexer.next();
xpath_ast_node* expr = parse_relational_expression();
n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
}
return n;
}
// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
xpath_ast_node* parse_and_expression()
{
xpath_ast_node* n = parse_equality_expression();
while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
{
_lexer.next();
xpath_ast_node* expr = parse_equality_expression();
n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
}
return n;
}
// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
xpath_ast_node* parse_or_expression()
{
xpath_ast_node* n = parse_and_expression();
while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
{
_lexer.next();
xpath_ast_node* expr = parse_and_expression();
n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
}
return n;
}
// Expr ::= OrExpr
// AdditiveExpr ::= MultiplicativeExpr
// | AdditiveExpr '+' MultiplicativeExpr
// | AdditiveExpr '-' MultiplicativeExpr
// MultiplicativeExpr ::= UnaryExpr
// | MultiplicativeExpr '*' UnaryExpr
// | MultiplicativeExpr 'div' UnaryExpr
// | MultiplicativeExpr 'mod' UnaryExpr
xpath_ast_node* parse_expression()
{
return parse_or_expression();
return parse_expression_rec(parse_path_or_unary_expression(), 0);
}
xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)