XPath: Replaced std::string with xpath_string, refactored normalize_space, namespace_uri and translate

git-svn-id: http://pugixml.googlecode.com/svn/trunk@657 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
arseny.kapoulkine 2010-08-29 15:21:23 +00:00
parent ea0dbe7884
commit 1e57d99484

View File

@ -58,11 +58,177 @@ namespace pugi
{ {
namespace impl namespace impl
{ {
size_t strlen(const char_t* s);
bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count); bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count);
void widen_ascii(wchar_t* dest, const char* source); void widen_ascii(wchar_t* dest, const char* source);
} }
} }
namespace
{
using namespace pugi;
class xpath_string
{
const char_t* _buffer;
bool _uses_heap;
static char_t* duplicate_string(const char_t* string, size_t length)
{
char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
if (!result) return 0; // $$ out of memory
memcpy(result, string, length * sizeof(char_t));
result[length] = 0;
return result;
}
static char_t* duplicate_string(const char_t* string)
{
return duplicate_string(string, impl::strlen(string));
}
public:
xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
{
}
~xpath_string()
{
if (_uses_heap) get_memory_deallocation_function()(const_cast<char_t*>(_buffer));
}
explicit xpath_string(const char_t* str, bool use_heap = true)
{
if (*str)
{
_buffer = use_heap ? duplicate_string(str) : str;
_uses_heap = use_heap;
}
else
{
_buffer = PUGIXML_TEXT("");
_uses_heap = false;
}
}
xpath_string(const char_t* begin, const char_t* end)
{
assert(begin <= end);
if (begin != end)
{
_buffer = duplicate_string(begin, static_cast<size_t>(end - begin));
_uses_heap = true;
}
else
{
_buffer = PUGIXML_TEXT("");
_uses_heap = false;
}
}
xpath_string(const xpath_string& o)
{
_buffer = o._uses_heap ? duplicate_string(o._buffer) : o._buffer;
_uses_heap = o._uses_heap;
}
xpath_string& operator=(const xpath_string& o)
{
xpath_string temp(o);
swap(temp);
return *this;
}
void swap(xpath_string& o)
{
std::swap(_buffer, o._buffer);
std::swap(_uses_heap, o._uses_heap);
}
void append(const xpath_string& o)
{
// skip empty sources
if (!*o._buffer) return;
// fast append for empty target and constant source
if (!*_buffer && !o._uses_heap)
{
_buffer = o._buffer;
_uses_heap = false;
}
else
{
// need to make heap copy
size_t target_length = impl::strlen(_buffer);
size_t source_length = impl::strlen(o._buffer);
size_t length = target_length + source_length;
// allocate new buffer
char_t* result = static_cast<char_t*>(get_memory_allocation_function()((length + 1) * sizeof(char_t)));
if (!result) return; // $$ out of memory
// append both strings in the new buffer
memcpy(result, _buffer, target_length * sizeof(char_t));
memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
result[length] = 0;
// destroy old buffer
if (_uses_heap) get_memory_deallocation_function()(const_cast<char_t*>(_buffer));
// finalize
_buffer = result;
_uses_heap = true;
}
}
const char_t* c_str() const
{
return _buffer;
}
size_t length() const
{
return impl::strlen(_buffer);
}
char_t* data()
{
// make private heap copy
if (!_uses_heap)
{
_buffer = duplicate_string(_buffer);
_uses_heap = true;
}
return const_cast<char_t*>(_buffer);
}
bool empty() const
{
return *_buffer == 0;
}
bool operator==(const xpath_string& o) const
{
return impl::strequal(_buffer, o._buffer);
}
bool operator!=(const xpath_string& o) const
{
return !impl::strequal(_buffer, o._buffer);
}
};
xpath_string xpath_string_const(const char_t* str)
{
return xpath_string(str, false);
}
}
namespace namespace
{ {
using namespace pugi; using namespace pugi;
@ -118,14 +284,23 @@ namespace
#ifdef PUGIXML_WCHAR_MODE #ifdef PUGIXML_WCHAR_MODE
return wcschr(s, c); return wcschr(s, c);
#else #else
return ::strchr(s, c); return strchr(s, c);
#endif #endif
} }
string_t string_value(const xpath_node& na) const char_t* find_substring(const char_t* s, const char_t* p)
{
#ifdef PUGIXML_WCHAR_MODE
return wcsstr(s, p);
#else
return strstr(s, p);
#endif
}
xpath_string string_value(const xpath_node& na)
{ {
if (na.attribute()) if (na.attribute())
return na.attribute().value(); return xpath_string_const(na.attribute().value());
else else
{ {
const xml_node& n = na.node(); const xml_node& n = na.node();
@ -136,19 +311,19 @@ namespace
case node_cdata: case node_cdata:
case node_comment: case node_comment:
case node_pi: case node_pi:
return n.value(); return xpath_string_const(n.value());
case node_document: case node_document:
case node_element: case node_element:
{ {
string_t result; xpath_string result;
xml_node cur = n.first_child(); xml_node cur = n.first_child();
while (cur && cur != n) while (cur && cur != n)
{ {
if (cur.type() == node_pcdata || cur.type() == node_cdata) if (cur.type() == node_pcdata || cur.type() == node_cdata)
result += cur.value(); result.append(xpath_string_const(cur.value()));
if (cur.first_child()) if (cur.first_child())
cur = cur.first_child(); cur = cur.first_child();
@ -167,7 +342,7 @@ namespace
} }
default: default:
return string_t(); return xpath_string();
} }
} }
} }
@ -391,11 +566,11 @@ namespace
} }
#endif #endif
string_t convert_number_to_string(double value) xpath_string convert_number_to_string(double value)
{ {
// try special number conversion // try special number conversion
const char_t* special = convert_number_to_string_special(value); const char_t* special = convert_number_to_string_special(value);
if (special) return special; if (special) return xpath_string_const(special);
// get mantissa + exponent form // get mantissa + exponent form
char mantissa_buffer[64]; char mantissa_buffer[64];
@ -451,7 +626,7 @@ namespace
assert(s < result + sizeof(result) / sizeof(result[0])); assert(s < result + sizeof(result) / sizeof(result[0]));
*s = 0; *s = 0;
return string_t(result); return xpath_string(result);
} }
bool check_string_to_number_format(const char_t* string) bool check_string_to_number_format(const char_t* string)
@ -535,30 +710,51 @@ namespace
return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
} }
const char_t* local_name(const char_t* name) const char_t* qualified_name(const xpath_node& node)
{ {
return node.attribute() ? node.attribute().name() : node.node().name();
}
const char_t* local_name(const xpath_node& node)
{
const char_t* name = qualified_name(node);
const char_t* p = find_char(name, ':'); const char_t* p = find_char(name, ':');
return p ? p + 1 : name; return p ? p + 1 : name;
} }
struct namespace_uri_predicate
{
const char_t* prefix;
size_t prefix_length;
namespace_uri_predicate(const char_t* name)
{
const char_t* pos = find_char(name, ':');
prefix = pos ? name : 0;
prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
}
bool operator()(const xml_attribute& a) const
{
const char_t* name = a.name();
if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
return prefix ? name[5] == ':' && impl::strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
}
};
const char_t* namespace_uri(const xml_node& node) const char_t* namespace_uri(const xml_node& node)
{ {
const char_t* pos = find_char(node.name(), ':'); namespace_uri_predicate pred = node.name();
string_t ns = PUGIXML_TEXT("xmlns");
if (pos)
{
ns += ':';
ns.append(node.name(), pos);
}
xml_node p = node; xml_node p = node;
while (p) while (p)
{ {
xml_attribute a = p.attribute(ns.c_str()); xml_attribute a = p.find_attribute(pred);
if (a) return a.value(); if (a) return a.value();
@ -570,19 +766,16 @@ namespace
const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent) const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
{ {
const char_t* pos = find_char(attr.name(), ':'); namespace_uri_predicate pred = attr.name();
// Default namespace does not apply to attributes // Default namespace does not apply to attributes
if (!pos) return PUGIXML_TEXT(""); if (!pred.prefix) return PUGIXML_TEXT("");
string_t ns = PUGIXML_TEXT("xmlns:");
ns.append(attr.name(), pos);
xml_node p = parent; xml_node p = parent;
while (p) while (p)
{ {
xml_attribute a = p.attribute(ns.c_str()); xml_attribute a = p.find_attribute(pred);
if (a) return a.value(); if (a) return a.value();
@ -592,6 +785,62 @@ namespace
return PUGIXML_TEXT(""); return PUGIXML_TEXT("");
} }
const char_t* namespace_uri(const xpath_node& node)
{
return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
}
void normalize_space(char_t* buffer)
{
char_t* write = buffer;
for (char_t* it = buffer; *it; )
{
char_t ch = *it++;
if (IS_CHARTYPEX(ch, ctx_space))
{
// replace whitespace sequence with single space
while (IS_CHARTYPEX(*it, ctx_space)) it++;
// avoid leading spaces
if (write != buffer) *write++ = ' ';
}
else *write++ = ch;
}
// remove trailing space
if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--;
// zero-terminate
*write = 0;
}
void translate(char_t* buffer, const char_t* from, const char_t* to)
{
size_t to_length = impl::strlen(to);
char_t* write = buffer;
while (*buffer)
{
#ifdef __DMC__
volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from buffer otherwise)
#endif
char_t ch = *buffer++;
const char_t* pos = find_char(from, ch);
if (!pos)
*write++ = ch; // do not process
else if (static_cast<size_t>(pos - from) < to_length)
*write++ = to[pos - from]; // replace
}
// zero-terminate
*write = 0;
}
struct equal_to struct equal_to
{ {
template <typename T> bool operator()(const T& lhs, const T& rhs) const template <typename T> bool operator()(const T& lhs, const T& rhs) const
@ -1456,7 +1705,7 @@ namespace pugi
} }
else if (lt == xpath_type_string) else if (lt == xpath_type_string)
{ {
string_t l = lhs->eval_string(c); xpath_string l = lhs->eval_string(c);
xpath_node_set rs = rhs->eval_node_set(c); xpath_node_set rs = rhs->eval_node_set(c);
for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri) for (xpath_node_set::const_iterator ri = rs.begin(); ri != rs.end(); ++ri)
@ -2068,10 +2317,10 @@ namespace pugi
case ast_func_contains: case ast_func_contains:
{ {
string_t lr = _left->eval_string(c); xpath_string lr = _left->eval_string(c);
string_t rr = _right->eval_string(c); xpath_string rr = _right->eval_string(c);
return rr.empty() || lr.find(rr) != string_t::npos; return find_substring(lr.c_str(), rr.c_str()) != 0;
} }
case ast_func_boolean: case ast_func_boolean:
@ -2090,7 +2339,7 @@ namespace pugi
{ {
if (c.n.attribute()) return false; if (c.n.attribute()) return false;
string_t lang = _left->eval_string(c); xpath_string lang = _left->eval_string(c);
for (xml_node n = c.n.node(); n; n = n.parent()) for (xml_node n = c.n.node(); n; n = n.parent())
{ {
@ -2170,10 +2419,10 @@ namespace pugi
return (double)_left->eval_node_set(c).size(); return (double)_left->eval_node_set(c).size();
case ast_func_string_length_0: case ast_func_string_length_0:
return (double)string_value(c.n).size(); return (double)string_value(c.n).length();
case ast_func_string_length_1: case ast_func_string_length_1:
return (double)_left->eval_string(c).size(); return (double)_left->eval_string(c).length();
case ast_func_number_0: case ast_func_number_0:
return convert_string_to_number(string_value(c.n).c_str()); return convert_string_to_number(string_value(c.n).c_str());
@ -2232,19 +2481,18 @@ namespace pugi
} }
} }
string_t eval_string(const xpath_context& c) xpath_string eval_string(const xpath_context& c)
{ {
switch (_type) switch (_type)
{ {
case ast_string_constant: case ast_string_constant:
return _data.string; return xpath_string_const(_data.string);
case ast_func_local_name_0: case ast_func_local_name_0:
{ {
xpath_node na = c.n; xpath_node na = c.n;
if (na.attribute()) return local_name(na.attribute().name()); return xpath_string_const(local_name(na));
else return local_name(na.node().name());
} }
case ast_func_local_name_1: case ast_func_local_name_1:
@ -2252,16 +2500,14 @@ namespace pugi
xpath_node_set ns = _left->eval_node_set(c); xpath_node_set ns = _left->eval_node_set(c);
xpath_node na = ns.first(); xpath_node na = ns.first();
if (na.attribute()) return local_name(na.attribute().name()); return xpath_string_const(local_name(na));
else return local_name(na.node().name());
} }
case ast_func_name_0: case ast_func_name_0:
{ {
xpath_node na = c.n; xpath_node na = c.n;
if (na.attribute()) return na.attribute().name(); return xpath_string_const(qualified_name(na));
else return na.node().name();
} }
case ast_func_name_1: case ast_func_name_1:
@ -2269,16 +2515,14 @@ namespace pugi
xpath_node_set ns = _left->eval_node_set(c); xpath_node_set ns = _left->eval_node_set(c);
xpath_node na = ns.first(); xpath_node na = ns.first();
if (na.attribute()) return na.attribute().name(); return xpath_string_const(qualified_name(na));
else return na.node().name();
} }
case ast_func_namespace_uri_0: case ast_func_namespace_uri_0:
{ {
xpath_node na = c.n; xpath_node na = c.n;
if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); return xpath_string_const(namespace_uri(na));
else return namespace_uri(na.node());
} }
case ast_func_namespace_uri_1: case ast_func_namespace_uri_1:
@ -2286,8 +2530,7 @@ namespace pugi
xpath_node_set ns = _left->eval_node_set(c); xpath_node_set ns = _left->eval_node_set(c);
xpath_node na = ns.first(); xpath_node na = ns.first();
if (na.attribute()) return namespace_uri(na.attribute(), na.parent()); return xpath_string_const(namespace_uri(na));
else return namespace_uri(na.node());
} }
case ast_func_string_0: case ast_func_string_0:
@ -2298,122 +2541,96 @@ namespace pugi
case ast_func_concat: case ast_func_concat:
{ {
string_t r = _left->eval_string(c); xpath_string r = _left->eval_string(c);
for (xpath_ast_node* n = _right; n; n = n->_next) for (xpath_ast_node* n = _right; n; n = n->_next)
r += n->eval_string(c); r.append(n->eval_string(c));
return r; return r;
} }
case ast_func_substring_before: case ast_func_substring_before:
{ {
string_t s = _left->eval_string(c); xpath_string s = _left->eval_string(c);
string_t::size_type pos = s.find(_right->eval_string(c)); xpath_string p = _right->eval_string(c);
const char_t* pos = find_substring(s.c_str(), p.c_str());
if (pos == string_t::npos) return string_t(); return pos ? xpath_string(s.c_str(), pos) : xpath_string();
else return string_t(s.begin(), s.begin() + pos);
} }
case ast_func_substring_after: case ast_func_substring_after:
{ {
string_t s = _left->eval_string(c); xpath_string s = _left->eval_string(c);
string_t p = _right->eval_string(c); xpath_string p = _right->eval_string(c);
string_t::size_type pos = s.find(p); const char_t* pos = find_substring(s.c_str(), p.c_str());
if (pos == string_t::npos) return string_t(); return pos ? xpath_string(pos + p.length()) : xpath_string();
else return string_t(s.begin() + pos + p.length(), s.end());
} }
case ast_func_substring_2: case ast_func_substring_2:
{ {
string_t s = _left->eval_string(c); xpath_string s = _left->eval_string(c);
double first = round_nearest(_right->eval_number(c)); double first = round_nearest(_right->eval_number(c));
if (is_nan(first)) return string_t(); // NaN if (is_nan(first)) return xpath_string(); // NaN
else if (first >= s.length() + 1) return string_t(); else if (first >= s.length() + 1) return xpath_string();
size_t pos = first < 1 ? 1 : (size_t)first; size_t pos = first < 1 ? 1 : (size_t)first;
return s.substr(pos - 1); return xpath_string(s.c_str() + (pos - 1));
} }
case ast_func_substring_3: case ast_func_substring_3:
{ {
string_t s = _left->eval_string(c); xpath_string s = _left->eval_string(c);
size_t s_length = s.length();
double first = round_nearest(_right->eval_number(c)); double first = round_nearest(_right->eval_number(c));
double last = first + round_nearest(_right->_next->eval_number(c)); double last = first + round_nearest(_right->_next->eval_number(c));
if (is_nan(first) || is_nan(last)) return string_t(); if (is_nan(first) || is_nan(last)) return xpath_string();
else if (first >= s.length() + 1) return string_t(); else if (first >= s_length + 1) return xpath_string();
else if (first >= last) return string_t(); else if (first >= last) return xpath_string();
size_t pos = first < 1 ? 1 : (size_t)first; size_t pos = first < 1 ? 1 : (size_t)first;
size_t end = last >= s.length() + 1 ? s.length() + 1 : (size_t)last; size_t end = last >= s_length + 1 ? s_length + 1 : (size_t)last;
size_t size_requested = end - pos; size_t size_requested = end - pos;
size_t size_to_end = s.length() - pos + 1; size_t size_to_end = s_length - pos + 1;
size_t size = size_requested < size_to_end ? size_requested : size_to_end;
return s.substr(pos - 1, size_requested < size_to_end ? size_requested : size_to_end); return xpath_string(s.c_str() + pos - 1, s.c_str() + pos - 1 + size);
} }
case ast_func_normalize_space_0: case ast_func_normalize_space_0:
{
xpath_string s = string_value(c.n);
normalize_space(s.data());
return s;
}
case ast_func_normalize_space_1: case ast_func_normalize_space_1:
{ {
string_t s = _type == ast_func_normalize_space_0 ? string_value(c.n) : _left->eval_string(c); xpath_string s = _left->eval_string(c);
string_t r;
r.reserve(s.size());
for (string_t::const_iterator it = s.begin(); it != s.end(); ++it)
{
if (IS_CHARTYPEX(*it, ctx_space))
{
if (!r.empty() && r[r.size() - 1] != ' ')
r += ' ';
}
else
{
#ifdef __DMC__
volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from &*it otherwise)
#endif
char_t ch = *it;
r += ch; normalize_space(s.data());
}
}
string_t::size_type pos = r.find_last_not_of(' ');
if (pos == string_t::npos) r = string_t();
else r.erase(r.begin() + pos + 1, r.end());
return r; return s;
} }
case ast_func_translate: case ast_func_translate:
{ {
string_t s = _left->eval_string(c); xpath_string s = _left->eval_string(c);
string_t from = _right->eval_string(c); xpath_string from = _right->eval_string(c);
string_t to = _right->_next->eval_string(c); xpath_string to = _right->_next->eval_string(c);
for (string_t::iterator it = s.begin(); it != s.end(); ) translate(s.data(), from.c_str(), to.c_str());
{
#ifdef __DMC__
volatile // explicitly store to local to work around DMC bug (it loads 4 bytes from &*it otherwise)
#endif
char_t ch = *it;
string_t::size_type pos = from.find(ch);
if (pos == string_t::npos)
++it;
else if (pos >= to.length())
it = s.erase(it);
else
*it++ = to[pos];
}
return s; return s;
} }
@ -2422,7 +2639,7 @@ namespace pugi
switch (_rettype) switch (_rettype)
{ {
case xpath_type_boolean: case xpath_type_boolean:
return eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"); return xpath_string_const(eval_boolean(c) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
case xpath_type_number: case xpath_type_number:
return convert_number_to_string(eval_number(c)); return convert_number_to_string(eval_number(c));
@ -2430,12 +2647,12 @@ namespace pugi
case xpath_type_node_set: case xpath_type_node_set:
{ {
xpath_node_set ns = eval_node_set(c); xpath_node_set ns = eval_node_set(c);
return ns.empty() ? string_t() : string_value(ns.first()); return ns.empty() ? xpath_string() : string_value(ns.first());
} }
default: default:
assert(!"Wrong expression for return type string"); assert(!"Wrong expression for return type string");
return string_t(); return xpath_string();
} }
} }
} }
@ -3083,9 +3300,9 @@ namespace pugi
// QName or NCName:* // QName or NCName:*
else else
{ {
const char_t* colon_pos = std::char_traits<char_t>::find(nt_name.begin, static_cast<size_t>(nt_name.end - nt_name.begin), ':'); const char_t* colon_pos = std::find(nt_name.begin, nt_name.end, ':');
if (colon_pos && colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:* if (colon_pos + 2 == nt_name.end && colon_pos[1] == '*') // NCName:*
{ {
nt_name.end--; // erase * nt_name.end--; // erase *
@ -3490,7 +3707,7 @@ namespace pugi
xpath_context c(n, 1, 1); xpath_context c(n, 1, 1);
return _root->eval_string(c); return _root->eval_string(c).c_str();
} }
xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const xpath_node_set xpath_query::evaluate_node_set(const xml_node& n) const