Merge b2de8c92c2 into 8a679cfe78
This commit is contained in:
commit
7f06e4a62d
@ -36,6 +36,7 @@ cmake_dependent_option(PUGIXML_BUILD_SHARED_AND_STATIC_LIBS
|
|||||||
|
|
||||||
# Expose options from the pugiconfig.hpp
|
# Expose options from the pugiconfig.hpp
|
||||||
option(PUGIXML_WCHAR_MODE "Enable wchar_t mode" OFF)
|
option(PUGIXML_WCHAR_MODE "Enable wchar_t mode" OFF)
|
||||||
|
option(PUGIXML_CHAR8_MODE "Enable char8_t mode" OFF)
|
||||||
option(PUGIXML_COMPACT "Enable compact mode" OFF)
|
option(PUGIXML_COMPACT "Enable compact mode" OFF)
|
||||||
|
|
||||||
# Advanced options from pugiconfig.hpp
|
# Advanced options from pugiconfig.hpp
|
||||||
@ -51,6 +52,7 @@ endif()
|
|||||||
|
|
||||||
set(PUGIXML_PUBLIC_DEFINITIONS
|
set(PUGIXML_PUBLIC_DEFINITIONS
|
||||||
$<$<BOOL:${PUGIXML_WCHAR_MODE}>:PUGIXML_WCHAR_MODE>
|
$<$<BOOL:${PUGIXML_WCHAR_MODE}>:PUGIXML_WCHAR_MODE>
|
||||||
|
$<$<BOOL:${PUGIXML_CHAR8_MODE}>:PUGIXML_CHAR8_MODE>
|
||||||
$<$<BOOL:${PUGIXML_COMPACT}>:PUGIXML_COMPACT>
|
$<$<BOOL:${PUGIXML_COMPACT}>:PUGIXML_COMPACT>
|
||||||
$<$<BOOL:${PUGIXML_NO_XPATH}>:PUGIXML_NO_XPATH>
|
$<$<BOOL:${PUGIXML_NO_XPATH}>:PUGIXML_NO_XPATH>
|
||||||
$<$<BOOL:${PUGIXML_NO_STL}>:PUGIXML_NO_STL>
|
$<$<BOOL:${PUGIXML_NO_STL}>:PUGIXML_NO_STL>
|
||||||
|
|||||||
@ -228,6 +228,8 @@ pugixml uses several defines to control the compilation process. There are two w
|
|||||||
|
|
||||||
[[PUGIXML_WCHAR_MODE]]`PUGIXML_WCHAR_MODE` define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See <<dom.unicode>> for more details.
|
[[PUGIXML_WCHAR_MODE]]`PUGIXML_WCHAR_MODE` define toggles between UTF-8 style interface (the in-memory text encoding is assumed to be UTF-8, most functions use `char` as character type) and UTF-16/32 style interface (the in-memory text encoding is assumed to be UTF-16/32, depending on `wchar_t` size, most functions use `wchar_t` as character type). See <<dom.unicode>> for more details.
|
||||||
|
|
||||||
|
[[PUGIXML_CHAR8_MODE]]`PUGIXML_CHAR8_MODE` define makes the UTF-8 style interface use `char8_t` instead of `char`.
|
||||||
|
|
||||||
[[PUGIXML_COMPACT]]`PUGIXML_COMPACT` define activates a different internal representation of document storage that is much more memory efficient for documents with a lot of markup (i.e. nodes and attributes), but is slightly slower to parse and access. For details see <<dom.memory.compact>>.
|
[[PUGIXML_COMPACT]]`PUGIXML_COMPACT` define activates a different internal representation of document storage that is much more memory efficient for documents with a lot of markup (i.e. nodes and attributes), but is slightly slower to parse and access. For details see <<dom.memory.compact>>.
|
||||||
|
|
||||||
[[PUGIXML_NO_XPATH]]`PUGIXML_NO_XPATH` define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation. This option is provided in case you do not need XPath functionality and need to save code space.
|
[[PUGIXML_NO_XPATH]]`PUGIXML_NO_XPATH` define disables XPath. Both XPath interfaces and XPath implementation are excluded from compilation. This option is provided in case you do not need XPath functionality and need to save code space.
|
||||||
@ -399,7 +401,7 @@ Nodes and attributes do not exist without a document tree, so you can't create t
|
|||||||
[[dom.unicode]]
|
[[dom.unicode]]
|
||||||
=== Unicode interface
|
=== Unicode interface
|
||||||
|
|
||||||
There are two choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The choice is controlled via <<PUGIXML_WCHAR_MODE,PUGIXML_WCHAR_MODE>> define; you can set it via `pugiconfig.hpp` or via preprocessor options, as discussed in <<install.building.config>>. If this define is set, the wchar_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on the size of `wchar_t` type.
|
There are three choices of interface and internal representation when configuring pugixml: you can either choose the UTF-8 (also called char) interface or UTF-16/32 (also called wchar_t) one. The UTF-8 interface can either use char (the default) or char8_t. The choice is controlled via the <<PUGIXML_WCHAR_MODE,PUGIXML_WCHAR_MODE>> and <<PUGIXML_CHAR8_MODE,PUGIXML_CHAR8_MODE>> defines; you can set them via `pugiconfig.hpp` or via preprocessor options, as discussed in <<install.building.config>>. If `PUGIXML_WCHAR_MODE` is set, the wchar_t interface is used; otherwise, if `PUGIXML_CHAR8_MODE` is set, the char8_t interface is used; otherwise (by default) the char interface is used. The exact wide character encoding is assumed to be either UTF-16 or UTF-32 and is determined based on the size of `wchar_t` type.
|
||||||
|
|
||||||
NOTE: If the size of `wchar_t` is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points.
|
NOTE: If the size of `wchar_t` is 2, pugixml assumes UTF-16 encoding instead of UCS-2, which means that some characters are represented as two code points.
|
||||||
|
|
||||||
@ -411,6 +413,14 @@ const char* xml_node::name() const;
|
|||||||
bool xml_node::set_name(const char* value);
|
bool xml_node::set_name(const char* value);
|
||||||
----
|
----
|
||||||
|
|
||||||
|
like this in char8_t mode:
|
||||||
|
|
||||||
|
[source]
|
||||||
|
----
|
||||||
|
const char8_t* xml_node::name() const;
|
||||||
|
bool xml_node::set_name(const char8_t* value);
|
||||||
|
----
|
||||||
|
|
||||||
and like this in wchar_t mode:
|
and like this in wchar_t mode:
|
||||||
|
|
||||||
[source]
|
[source]
|
||||||
@ -420,7 +430,7 @@ bool xml_node::set_name(const wchar_t* value);
|
|||||||
----
|
----
|
||||||
|
|
||||||
[[char_t]][[string_t]]
|
[[char_t]][[string_t]]
|
||||||
There is a special type, `pugi::char_t`, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type `pugi::string_t`, which is defined as the STL string of the character type; it corresponds to `std::string` in char mode and to `std::wstring` in wchar_t mode.
|
There is a special type, `pugi::char_t`, that is defined as the character type and depends on the library configuration; it will be also used in the documentation hereafter. There is also a type `pugi::string_t`, which is defined as the STL string of the character type; it corresponds to `std::string` in char mode, `std::u8string` in char8_t mode, and to `std::wstring` in wchar_t mode.
|
||||||
|
|
||||||
In addition to the interface, the internal implementation changes to store XML data as `pugi::char_t`; this means that these two modes have different memory usage characteristics - generally UTF-8 mode is more memory and performance efficient, especially if `sizeof(wchar_t)` is 4. The conversion to `pugi::char_t` upon document loading and from `pugi::char_t` upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is non-ASCII, wchar_t mode is probably a better choice.
|
In addition to the interface, the internal implementation changes to store XML data as `pugi::char_t`; this means that these two modes have different memory usage characteristics - generally UTF-8 mode is more memory and performance efficient, especially if `sizeof(wchar_t)` is 4. The conversion to `pugi::char_t` upon document loading and from `pugi::char_t` upon document saving happen automatically, which also carries minor performance penalty. The general advice however is to select the character mode based on usage scenario, i.e. if UTF-8 is inconvenient to process and most of your XML data is non-ASCII, wchar_t mode is probably a better choice.
|
||||||
|
|
||||||
@ -443,13 +453,15 @@ std::wstring as_wide(const std::string& str);
|
|||||||
|
|
||||||
[NOTE]
|
[NOTE]
|
||||||
====
|
====
|
||||||
Most examples in this documentation assume char interface and therefore will not compile with <<PUGIXML_WCHAR_MODE,PUGIXML_WCHAR_MODE>>. This is done to simplify the documentation; usually the only changes you'll have to make is to pass `wchar_t` string literals, i.e. instead of
|
Most examples in this documentation assume char interface and therefore will not compile with <<PUGIXML_WCHAR_MODE,PUGIXML_WCHAR_MODE>> or <<PUGIXML_CHAR8_MODE,PUGIXML_CHAR8_MODE>>. This is done to simplify the documentation; usually the only changes you'll have to make is to pass the appropriate string literals, i.e. instead of
|
||||||
|
|
||||||
`xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");`
|
`xml_node node = doc.child("bookstore").find_child_by_attribute("book", "id", "12345");`
|
||||||
|
|
||||||
you'll have to use
|
you'll have to use
|
||||||
|
|
||||||
`xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");`
|
`xml_node node = doc.child(L"bookstore").find_child_by_attribute(L"book", L"id", L"12345");`
|
||||||
|
|
||||||
|
in wchar_t mode.
|
||||||
====
|
====
|
||||||
|
|
||||||
[[dom.thread]]
|
[[dom.thread]]
|
||||||
|
|||||||
@ -17,6 +17,9 @@
|
|||||||
// Uncomment this to enable wchar_t mode
|
// Uncomment this to enable wchar_t mode
|
||||||
// #define PUGIXML_WCHAR_MODE
|
// #define PUGIXML_WCHAR_MODE
|
||||||
|
|
||||||
|
// Uncomment this to enable char8_t mode
|
||||||
|
//#define PUGIXML_CHAR8_MODE
|
||||||
|
|
||||||
// Uncomment this to enable compact mode
|
// Uncomment this to enable compact mode
|
||||||
// #define PUGIXML_COMPACT
|
// #define PUGIXML_COMPACT
|
||||||
|
|
||||||
|
|||||||
@ -219,6 +219,8 @@ PUGI__NS_BEGIN
|
|||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
return wcslen(s);
|
return wcslen(s);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return strlen(reinterpret_cast<const char*>(s));
|
||||||
#else
|
#else
|
||||||
return strlen(s);
|
return strlen(s);
|
||||||
#endif
|
#endif
|
||||||
@ -231,6 +233,8 @@ PUGI__NS_BEGIN
|
|||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
return wcscmp(src, dst) == 0;
|
return wcscmp(src, dst) == 0;
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return strcmp(reinterpret_cast<const char*>(src), reinterpret_cast<const char*>(dst)) == 0;
|
||||||
#else
|
#else
|
||||||
return strcmp(src, dst) == 0;
|
return strcmp(src, dst) == 0;
|
||||||
#endif
|
#endif
|
||||||
@ -2302,7 +2306,7 @@ PUGI__NS_BEGIN
|
|||||||
return wchar_decoder::process(str, length, 0, utf8_counter());
|
return wchar_decoder::process(str, length, 0, utf8_counter());
|
||||||
}
|
}
|
||||||
|
|
||||||
PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
|
PUGI__FN void as_utf8_end(u8char_t* buffer, size_t size, const wchar_t* str, size_t length)
|
||||||
{
|
{
|
||||||
// convert to utf8
|
// convert to utf8
|
||||||
uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
|
uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
|
||||||
@ -2314,13 +2318,13 @@ PUGI__NS_BEGIN
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PUGIXML_NO_STL
|
#ifndef PUGIXML_NO_STL
|
||||||
PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
|
PUGI__FN std::basic_string<u8char_t> as_utf8_impl(const wchar_t* str, size_t length)
|
||||||
{
|
{
|
||||||
// first pass: get length in utf8 characters
|
// first pass: get length in utf8 characters
|
||||||
size_t size = as_utf8_begin(str, length);
|
size_t size = as_utf8_begin(str, length);
|
||||||
|
|
||||||
// allocate resulting string
|
// allocate resulting string
|
||||||
std::string result;
|
std::basic_string<u8char_t> result;
|
||||||
result.resize(size);
|
result.resize(size);
|
||||||
|
|
||||||
// second pass: convert to utf8
|
// second pass: convert to utf8
|
||||||
@ -3505,7 +3509,7 @@ PUGI__NS_BEGIN
|
|||||||
#else
|
#else
|
||||||
static char_t* parse_skip_bom(char_t* s)
|
static char_t* parse_skip_bom(char_t* s)
|
||||||
{
|
{
|
||||||
return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
|
return (s[0] == char_t('\xef') && s[1] == char_t('\xbb') && s[2] == char_t('\xbf')) ? s + 3 : s;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -4609,6 +4613,8 @@ PUGI__NS_BEGIN
|
|||||||
{
|
{
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
return wcstod(value, 0);
|
return wcstod(value, 0);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return strtod(reinterpret_cast<const char*>(value), 0);
|
||||||
#else
|
#else
|
||||||
return strtod(value, 0);
|
return strtod(value, 0);
|
||||||
#endif
|
#endif
|
||||||
@ -4618,6 +4624,8 @@ PUGI__NS_BEGIN
|
|||||||
{
|
{
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
return static_cast<float>(wcstod(value, 0));
|
return static_cast<float>(wcstod(value, 0));
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return static_cast<float>(strtod(reinterpret_cast<const char*>(value), 0));
|
||||||
#else
|
#else
|
||||||
return static_cast<float>(strtod(value, 0));
|
return static_cast<float>(strtod(value, 0));
|
||||||
#endif
|
#endif
|
||||||
@ -4676,6 +4684,8 @@ PUGI__NS_BEGIN
|
|||||||
for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
|
for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
|
||||||
|
|
||||||
return strcpy_insitu(dest, header, header_mask, wbuf, offset);
|
return strcpy_insitu(dest, header, header_mask, wbuf, offset);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return strcpy_insitu(dest, header, header_mask, reinterpret_cast<const char8_t*>(buf), strlen(reinterpret_cast<const char*>(buf)));
|
||||||
#else
|
#else
|
||||||
return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
|
return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
|
||||||
#endif
|
#endif
|
||||||
@ -5106,13 +5116,25 @@ namespace pugi
|
|||||||
|
|
||||||
#ifndef PUGIXML_NO_STL
|
#ifndef PUGIXML_NO_STL
|
||||||
PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
|
PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
, utf8_stream(0)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
|
PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
, utf8_stream(0)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char8_t, std::char_traits<char8_t> >& stream): narrow_stream(0), wide_stream(0), utf8_stream(&stream)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
|
PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
|
||||||
{
|
{
|
||||||
if (narrow_stream)
|
if (narrow_stream)
|
||||||
@ -5120,6 +5142,13 @@ namespace pugi
|
|||||||
assert(!wide_stream);
|
assert(!wide_stream);
|
||||||
narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
|
narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
|
||||||
}
|
}
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
else if (utf8_stream)
|
||||||
|
{
|
||||||
|
assert(!wide_stream);
|
||||||
|
utf8_stream->write(reinterpret_cast<const char8_t*>(data), static_cast<std::streamsize>(size));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
assert(wide_stream);
|
assert(wide_stream);
|
||||||
@ -6494,6 +6523,15 @@ namespace pugi
|
|||||||
|
|
||||||
print(writer, indent, flags, encoding_wchar, depth);
|
print(writer, indent, flags, encoding_wchar, depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
PUGI__FN void xml_node::print(std::basic_ostream<char8_t, std::char_traits<char8_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
|
||||||
|
{
|
||||||
|
xml_writer_stream writer(stream);
|
||||||
|
|
||||||
|
print(writer, indent, flags, encoding_wchar, depth);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PUGI__FN ptrdiff_t xml_node::offset_debug() const
|
PUGI__FN ptrdiff_t xml_node::offset_debug() const
|
||||||
@ -7316,6 +7354,15 @@ namespace pugi
|
|||||||
|
|
||||||
return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
|
return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char8_t, std::char_traits<char8_t> >& stream, unsigned int options)
|
||||||
|
{
|
||||||
|
reset();
|
||||||
|
|
||||||
|
return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_utf8, &_buffer);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
|
PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
|
||||||
@ -7418,6 +7465,15 @@ namespace pugi
|
|||||||
|
|
||||||
save(writer, indent, flags, encoding_wchar);
|
save(writer, indent, flags, encoding_wchar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
PUGI__FN void xml_document::save(std::basic_ostream<char8_t, std::char_traits<char8_t> >& stream, const char_t* indent, unsigned int flags) const
|
||||||
|
{
|
||||||
|
xml_writer_stream writer(stream);
|
||||||
|
|
||||||
|
save(writer, indent, flags, encoding_wchar);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
|
PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
|
||||||
@ -7448,14 +7504,14 @@ namespace pugi
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PUGIXML_NO_STL
|
#ifndef PUGIXML_NO_STL
|
||||||
PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
|
PUGI__FN std::basic_string<u8char_t> PUGIXML_FUNCTION as_utf8(const wchar_t* str)
|
||||||
{
|
{
|
||||||
assert(str);
|
assert(str);
|
||||||
|
|
||||||
return impl::as_utf8_impl(str, impl::strlength_wide(str));
|
return impl::as_utf8_impl(str, impl::strlength_wide(str));
|
||||||
}
|
}
|
||||||
|
|
||||||
PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
|
PUGI__FN std::basic_string<u8char_t> PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
|
||||||
{
|
{
|
||||||
return impl::as_utf8_impl(str.c_str(), str.size());
|
return impl::as_utf8_impl(str.c_str(), str.size());
|
||||||
}
|
}
|
||||||
@ -8096,6 +8152,9 @@ PUGI__NS_BEGIN
|
|||||||
{
|
{
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
return wcschr(s, c);
|
return wcschr(s, c);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return reinterpret_cast<const char8_t*>(
|
||||||
|
strchr(reinterpret_cast<const char*>(s), static_cast<char>(c)));
|
||||||
#else
|
#else
|
||||||
return strchr(s, c);
|
return strchr(s, c);
|
||||||
#endif
|
#endif
|
||||||
@ -8106,6 +8165,9 @@ PUGI__NS_BEGIN
|
|||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
|
// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
|
||||||
return (*p == 0) ? s : wcsstr(s, p);
|
return (*p == 0) ? s : wcsstr(s, p);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return reinterpret_cast<const char8_t*>(
|
||||||
|
strstr(reinterpret_cast<const char*>(s), reinterpret_cast<const char*>(p)));
|
||||||
#else
|
#else
|
||||||
return strstr(s, p);
|
return strstr(s, p);
|
||||||
#endif
|
#endif
|
||||||
@ -8552,6 +8614,8 @@ PUGI__NS_BEGIN
|
|||||||
// parse string
|
// parse string
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
return wcstod(string, 0);
|
return wcstod(string, 0);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return strtod(reinterpret_cast<const char*>(string), 0);
|
||||||
#else
|
#else
|
||||||
return strtod(string, 0);
|
return strtod(string, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -122,10 +122,17 @@
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(PUGIXML_CHAR8_MODE) && !defined(__cpp_char8_t)
|
||||||
|
# error "char8_t mode requires C++20 or later"
|
||||||
|
#endif
|
||||||
|
|
||||||
// Character interface macros
|
// Character interface macros
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
# define PUGIXML_TEXT(t) L ## t
|
# define PUGIXML_TEXT(t) L ## t
|
||||||
# define PUGIXML_CHAR wchar_t
|
# define PUGIXML_CHAR wchar_t
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
# define PUGIXML_TEXT(t) u8 ## t
|
||||||
|
# define PUGIXML_CHAR char8_t
|
||||||
#else
|
#else
|
||||||
# define PUGIXML_TEXT(t) t
|
# define PUGIXML_TEXT(t) t
|
||||||
# define PUGIXML_CHAR char
|
# define PUGIXML_CHAR char
|
||||||
@ -136,6 +143,13 @@ namespace pugi
|
|||||||
// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
|
// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
|
||||||
typedef PUGIXML_CHAR char_t;
|
typedef PUGIXML_CHAR char_t;
|
||||||
|
|
||||||
|
// Character type used for UTF-8; depends on PUGIXML_CHAR8_MODE
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
typedef char8_t u8char_t;
|
||||||
|
#else
|
||||||
|
typedef char u8char_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef PUGIXML_NO_STL
|
#ifndef PUGIXML_NO_STL
|
||||||
// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
|
// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
|
||||||
typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
|
typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
|
||||||
@ -351,12 +365,18 @@ namespace pugi
|
|||||||
// Construct writer from an output stream object
|
// Construct writer from an output stream object
|
||||||
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
|
xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
|
||||||
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
|
xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
xml_writer_stream(std::basic_ostream<char8_t, std::char_traits<char8_t> >& stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
virtual void write(const void* data, size_t size) PUGIXML_OVERRIDE;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
|
std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
|
||||||
std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
|
std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
std::basic_ostream<char8_t, std::char_traits<char8_t> >* utf8_stream;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -696,6 +716,9 @@ namespace pugi
|
|||||||
// Print subtree to stream
|
// Print subtree to stream
|
||||||
void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
|
void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
|
||||||
void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
|
void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
void print(std::basic_ostream<char8_t, std::char_traits<char8_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Child nodes iterators
|
// Child nodes iterators
|
||||||
@ -1068,6 +1091,9 @@ namespace pugi
|
|||||||
// Load document from stream.
|
// Load document from stream.
|
||||||
xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
|
xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
|
||||||
xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
|
xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
xml_parse_result load(std::basic_istream<char8_t, std::char_traits<char8_t> >& stream, unsigned int options = parse_default);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
||||||
@ -1098,6 +1124,9 @@ namespace pugi
|
|||||||
// Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
|
// Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
|
||||||
void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
|
void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
|
||||||
void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
|
void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
void save(std::basic_ostream<char8_t, std::char_traits<char8_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Save XML to file
|
// Save XML to file
|
||||||
@ -1433,8 +1462,8 @@ namespace pugi
|
|||||||
|
|
||||||
#ifndef PUGIXML_NO_STL
|
#ifndef PUGIXML_NO_STL
|
||||||
// Convert wide string to UTF8
|
// Convert wide string to UTF8
|
||||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
|
std::basic_string<u8char_t, std::char_traits<u8char_t>, std::allocator<u8char_t> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
|
||||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
|
std::basic_string<u8char_t, std::char_traits<u8char_t>, std::allocator<u8char_t> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
|
||||||
|
|
||||||
// Convert UTF8 to wide string
|
// Convert UTF8 to wide string
|
||||||
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
|
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
|
||||||
|
|||||||
@ -44,12 +44,11 @@ static void build_document_order(std::vector<pugi::xpath_node>& result, pugi::xm
|
|||||||
|
|
||||||
bool test_string_equal(const pugi::char_t* lhs, const pugi::char_t* rhs)
|
bool test_string_equal(const pugi::char_t* lhs, const pugi::char_t* rhs)
|
||||||
{
|
{
|
||||||
return (!lhs || !rhs) ? lhs == rhs :
|
if (!lhs || !rhs) return lhs == rhs;
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
typedef std::char_traits<pugi::char_t> traits;
|
||||||
wcscmp(lhs, rhs) == 0;
|
const size_t lhs_len = traits::length(lhs);
|
||||||
#else
|
const size_t rhs_len = traits::length(rhs);
|
||||||
strcmp(lhs, rhs) == 0;
|
return lhs_len == rhs_len && traits::compare(lhs, rhs, lhs_len) == 0;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool test_node(const pugi::xml_node& node, const pugi::char_t* contents, const pugi::char_t* indent, unsigned int flags)
|
bool test_node(const pugi::xml_node& node, const pugi::char_t* contents, const pugi::char_t* indent, unsigned int flags)
|
||||||
@ -73,11 +72,7 @@ bool test_double_nan(double value)
|
|||||||
#ifndef PUGIXML_NO_XPATH
|
#ifndef PUGIXML_NO_XPATH
|
||||||
static size_t strlength(const pugi::char_t* s)
|
static size_t strlength(const pugi::char_t* s)
|
||||||
{
|
{
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
return std::char_traits<pugi::char_t>::length(s);
|
||||||
return wcslen(s);
|
|
||||||
#else
|
|
||||||
return strlen(s);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool test_xpath_string(const pugi::xpath_node& node, const pugi::char_t* query, pugi::xpath_variable_set* variables, const pugi::char_t* expected)
|
bool test_xpath_string(const pugi::xpath_node& node, const pugi::char_t* query, pugi::xpath_variable_set* variables, const pugi::char_t* expected)
|
||||||
|
|||||||
@ -9,6 +9,8 @@
|
|||||||
#include <new>
|
#include <new>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
struct test_runner
|
struct test_runner
|
||||||
{
|
{
|
||||||
test_runner(const char* name)
|
test_runner(const char* name)
|
||||||
@ -154,6 +156,39 @@ struct dummy_fixture {};
|
|||||||
|
|
||||||
#define STR(text) PUGIXML_TEXT(text)
|
#define STR(text) PUGIXML_TEXT(text)
|
||||||
|
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
# if defined(__clang__) || defined(__GNUC__)
|
||||||
|
# define ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
|
||||||
|
# else
|
||||||
|
# define ALIASING_BARRIER(ptr)
|
||||||
|
# endif
|
||||||
|
inline const char8_t* char_cast(const char* bytes)
|
||||||
|
{
|
||||||
|
ALIASING_BARRIER(bytes);
|
||||||
|
return reinterpret_cast<const char8_t*>(bytes);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
|
#define RAW(text) L ## text
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
#define RAW(text) char_cast(text)
|
||||||
|
#else
|
||||||
|
#define RAW(text) text
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(PUGIXML_CHAR8_MODE)
|
||||||
|
#define U8RAW(text) char_cast(text)
|
||||||
|
#else
|
||||||
|
#define U8RAW(text) text
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef PUGIXML_CHAR8_MODE
|
||||||
|
#define U8STR(text) u8 ## text
|
||||||
|
#else
|
||||||
|
#define U8STR(text) text
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__DMC__) || defined(__BORLANDC__)
|
#if defined(__DMC__) || defined(__BORLANDC__)
|
||||||
#define U_LITERALS // DMC does not understand \x01234 (it parses first three digits), but understands \u01234
|
#define U_LITERALS // DMC does not understand \x01234 (it parses first three digits), but understands \u01234
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -919,6 +919,8 @@ inline void check_utftest_document(const xml_document& doc)
|
|||||||
size_t wcharsize = sizeof(wchar_t);
|
size_t wcharsize = sizeof(wchar_t);
|
||||||
|
|
||||||
CHECK(wcharsize == 2 ? (v[7] == wchar_cast(0xd852) && v[8] == wchar_cast(0xdf62)) : (v[7] == wchar_cast(0x24b62)));
|
CHECK(wcharsize == 2 ? (v[7] == wchar_cast(0xd852) && v[8] == wchar_cast(0xdf62)) : (v[7] == wchar_cast(0x24b62)));
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK_STRING(v, u8"\u4E16\u754C\u6709\u5F88\u591A\u8BED\u8A00\U00024B62");
|
||||||
#else
|
#else
|
||||||
// unicode string
|
// unicode string
|
||||||
CHECK_STRING(v, "\xe4\xb8\x96\xe7\x95\x8c\xe6\x9c\x89\xe5\xbe\x88\xe5\xa4\x9a\xe8\xaf\xad\xe8\xa8\x80\xf0\xa4\xad\xa2");
|
CHECK_STRING(v, "\xe4\xb8\x96\xe7\x95\x8c\xe6\x9c\x89\xe5\xbe\x88\xe5\xa4\x9a\xe8\xaf\xad\xe8\xa8\x80\xf0\xa4\xad\xa2");
|
||||||
@ -1524,6 +1526,8 @@ TEST(document_load_buffer_utf_truncated)
|
|||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
CHECK(name[0] == 0x20ac && name[1] == 0);
|
CHECK(name[0] == 0x20ac && name[1] == 0);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK_STRING(name, u8"\u20AC");
|
||||||
#else
|
#else
|
||||||
CHECK_STRING(name, "\xe2\x82\xac");
|
CHECK_STRING(name, "\xe2\x82\xac");
|
||||||
#endif
|
#endif
|
||||||
@ -1569,6 +1573,8 @@ TEST(document_load_stream_truncated)
|
|||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
CHECK(name[0] == 0x20ac && name[1] == 0);
|
CHECK(name[0] == 0x20ac && name[1] == 0);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK_STRING(name, u8"\u20AC");
|
||||||
#else
|
#else
|
||||||
CHECK_STRING(name, "\xe2\x82\xac");
|
CHECK_STRING(name, "\xe2\x82\xac");
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -682,6 +682,8 @@ struct find_predicate_prefix
|
|||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
// can't use wcsncmp here because of a bug in DMC
|
// can't use wcsncmp here because of a bug in DMC
|
||||||
return std::basic_string<char_t>(obj.name()).compare(0, wcslen(prefix), prefix) == 0;
|
return std::basic_string<char_t>(obj.name()).compare(0, wcslen(prefix), prefix) == 0;
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return strncmp(reinterpret_cast<const char*>(obj.name()), reinterpret_cast<const char*>(prefix), strlen(reinterpret_cast<const char*>(prefix))) == 0;
|
||||||
#else
|
#else
|
||||||
return strncmp(obj.name(), prefix, strlen(prefix)) == 0;
|
return strncmp(obj.name(), prefix, strlen(prefix)) == 0;
|
||||||
#endif
|
#endif
|
||||||
@ -807,6 +809,8 @@ struct test_walker: xml_tree_walker
|
|||||||
std::copy(buf, buf + strlen(buf) + 1, &wbuf[0]);
|
std::copy(buf, buf + strlen(buf) + 1, &wbuf[0]);
|
||||||
|
|
||||||
return std::basic_string<char_t>(wbuf);
|
return std::basic_string<char_t>(wbuf);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
return std::basic_string<char_t>(char_cast(buf));
|
||||||
#else
|
#else
|
||||||
return std::basic_string<char_t>(buf);
|
return std::basic_string<char_t>(buf);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -568,6 +568,8 @@ TEST(parse_escapes_unicode)
|
|||||||
size_t wcharsize = sizeof(wchar_t);
|
size_t wcharsize = sizeof(wchar_t);
|
||||||
|
|
||||||
CHECK(v[0] == 0x3b3 && v[1] == 0x3b3 && (wcharsize == 2 ? v[2] == wchar_cast(0xd852) && v[3] == wchar_cast(0xdf62) : v[2] == wchar_cast(0x24b62)));
|
CHECK(v[0] == 0x3b3 && v[1] == 0x3b3 && (wcharsize == 2 ? v[2] == wchar_cast(0xd852) && v[3] == wchar_cast(0xdf62) : v[2] == wchar_cast(0x24b62)));
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK_STRING(doc.child_value(STR("node")), u8"\u03B3\u03B3\U00024B62");
|
||||||
#else
|
#else
|
||||||
CHECK_STRING(doc.child_value(STR("node")), "\xce\xb3\xce\xb3\xf0\xa4\xad\xa2");
|
CHECK_STRING(doc.child_value(STR("node")), "\xce\xb3\xce\xb3\xf0\xa4\xad\xa2");
|
||||||
#endif
|
#endif
|
||||||
@ -1104,6 +1106,8 @@ TEST(parse_bom_fragment_invalid_utf8)
|
|||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
CHECK(value[0] == wchar_cast(0xfefb) && value[1] == 0);
|
CHECK(value[0] == wchar_cast(0xfefb) && value[1] == 0);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK(value[0] == 0xef && value[1] == 0xbb && value[2] == 0xbb);
|
||||||
#else
|
#else
|
||||||
CHECK_STRING(value, "\xef\xbb\xbb");
|
CHECK_STRING(value, "\xef\xbb\xbb");
|
||||||
#endif
|
#endif
|
||||||
@ -1119,6 +1123,8 @@ TEST(parse_bom_fragment_invalid_utf16)
|
|||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
CHECK(value[0] == wchar_cast(0xfffe) && value[1] == 0);
|
CHECK(value[0] == wchar_cast(0xfffe) && value[1] == 0);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK(value[0] == 0xef && value[1] == 0xbf && value[2] == 0xbe);
|
||||||
#else
|
#else
|
||||||
CHECK_STRING(value, "\xef\xbf\xbe");
|
CHECK_STRING(value, "\xef\xbf\xbe");
|
||||||
#endif
|
#endif
|
||||||
@ -1134,6 +1140,8 @@ TEST(parse_bom_fragment_invalid_utf32)
|
|||||||
|
|
||||||
#ifdef PUGIXML_WCHAR_MODE
|
#ifdef PUGIXML_WCHAR_MODE
|
||||||
CHECK(value[0] == wchar_cast(0xffff) && value[1] == 0);
|
CHECK(value[0] == wchar_cast(0xffff) && value[1] == 0);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK(value[0] == 0xef && value[1] == 0xbf && value[2] == 0xbf);
|
||||||
#else
|
#else
|
||||||
CHECK_STRING(value, "\xef\xbf\xbf");
|
CHECK_STRING(value, "\xef\xbf\xbf");
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -16,6 +16,10 @@ static xml_parse_result load_concat(xml_document& doc, const char_t* a, const ch
|
|||||||
wcscpy(buffer, a);
|
wcscpy(buffer, a);
|
||||||
wcscat(buffer, b);
|
wcscat(buffer, b);
|
||||||
wcscat(buffer, c);
|
wcscat(buffer, c);
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
strcpy(reinterpret_cast<char*>(buffer), reinterpret_cast<const char*>(a));
|
||||||
|
strcat(reinterpret_cast<char*>(buffer), reinterpret_cast<const char*>(b));
|
||||||
|
strcat(reinterpret_cast<char*>(buffer), reinterpret_cast<const char*>(c));
|
||||||
#else
|
#else
|
||||||
strcpy(buffer, a);
|
strcpy(buffer, a);
|
||||||
strcat(buffer, b);
|
strcat(buffer, b);
|
||||||
|
|||||||
@ -80,16 +80,16 @@ TEST(as_wide_string)
|
|||||||
|
|
||||||
TEST(as_utf8_empty)
|
TEST(as_utf8_empty)
|
||||||
{
|
{
|
||||||
CHECK(as_utf8(L"") == "");
|
CHECK(as_utf8(L"") == U8STR(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(as_utf8_valid_basic)
|
TEST(as_utf8_valid_basic)
|
||||||
{
|
{
|
||||||
// valid 1-byte, 2-byte and 3-byte outputs
|
// valid 1-byte, 2-byte and 3-byte outputs
|
||||||
#ifdef U_LITERALS
|
#ifdef U_LITERALS
|
||||||
CHECK(as_utf8(L"?\u0400\u203D") == "?\xd0\x80\xe2\x80\xbd");
|
CHECK(as_utf8(L"?\u0400\u203D") == U8RAW("?\xd0\x80\xe2\x80\xbd"));
|
||||||
#else
|
#else
|
||||||
CHECK(as_utf8(L"?\x0400\x203D") == "?\xd0\x80\xe2\x80\xbd");
|
CHECK(as_utf8(L"?\x0400\x203D") == U8RAW("?\xd0\x80\xe2\x80\xbd"));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,14 +106,14 @@ TEST(as_utf8_valid_astral)
|
|||||||
s[1] = ' ';
|
s[1] = ' ';
|
||||||
s[2] = wchar_cast(0x1003ff);
|
s[2] = wchar_cast(0x1003ff);
|
||||||
|
|
||||||
CHECK(as_utf8(s.c_str()) == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
|
CHECK(as_utf8(s.c_str()) == U8RAW("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#ifdef U_LITERALS
|
#ifdef U_LITERALS
|
||||||
CHECK(as_utf8(L"\uda1d\ude24 \udbc0\udfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
|
CHECK(as_utf8(L"\uda1d\ude24 \udbc0\udfff") == U8RAW("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"));
|
||||||
#else
|
#else
|
||||||
CHECK(as_utf8(L"\xda1d\xde24 \xdbc0\xdfff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
|
CHECK(as_utf8(L"\xda1d\xde24 \xdbc0\xdfff") == U8RAW("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf"));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -129,17 +129,17 @@ TEST(as_utf8_invalid)
|
|||||||
CHECK(as_utf8(L"a\uda1d") == "a");
|
CHECK(as_utf8(L"a\uda1d") == "a");
|
||||||
CHECK(as_utf8(L"a\uda1d_") == "a_");
|
CHECK(as_utf8(L"a\uda1d_") == "a_");
|
||||||
#else
|
#else
|
||||||
CHECK(as_utf8(L"a\xda1d") == "a");
|
CHECK(as_utf8(L"a\xda1d") == U8STR("a"));
|
||||||
CHECK(as_utf8(L"a\xda1d_") == "a_");
|
CHECK(as_utf8(L"a\xda1d_") == U8STR("a_"));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// check incorrect leading code
|
// check incorrect leading code
|
||||||
#ifdef U_LITERALS
|
#ifdef U_LITERALS
|
||||||
CHECK(as_utf8(L"a\ude24") == "a");
|
CHECK(as_utf8(L"a\ude24") == STR("a"));
|
||||||
CHECK(as_utf8(L"a\ude24_") == "a_");
|
CHECK(as_utf8(L"a\ude24_") == STR("a_"));
|
||||||
#else
|
#else
|
||||||
CHECK(as_utf8(L"a\xde24") == "a");
|
CHECK(as_utf8(L"a\xde24") == U8STR("a"));
|
||||||
CHECK(as_utf8(L"a\xde24_") == "a_");
|
CHECK(as_utf8(L"a\xde24_") == U8STR("a_"));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -148,6 +148,6 @@ TEST(as_utf8_string)
|
|||||||
{
|
{
|
||||||
std::basic_string<wchar_t> s = L"abcd";
|
std::basic_string<wchar_t> s = L"abcd";
|
||||||
|
|
||||||
CHECK(as_utf8(s) == "abcd");
|
CHECK(as_utf8(s) == U8STR("abcd"));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -220,6 +220,8 @@ TEST_XML(write_escape_unicode, "<node attr='㰀'/>")
|
|||||||
#else
|
#else
|
||||||
CHECK_NODE(doc, STR("<node attr=\"\x3c00\"/>"));
|
CHECK_NODE(doc, STR("<node attr=\"\x3c00\"/>"));
|
||||||
#endif
|
#endif
|
||||||
|
#elif defined(PUGIXML_CHAR8_MODE)
|
||||||
|
CHECK_NODE(doc, STR("<node attr=\"\u3c00\"/>"));
|
||||||
#else
|
#else
|
||||||
CHECK_NODE(doc, STR("<node attr=\"\xe3\xb0\x80\"/>"));
|
CHECK_NODE(doc, STR("<node attr=\"\xe3\xb0\x80\"/>"));
|
||||||
#endif
|
#endif
|
||||||
@ -370,11 +372,11 @@ TEST(write_encoding_huge)
|
|||||||
const unsigned int N = 16000;
|
const unsigned int N = 16000;
|
||||||
|
|
||||||
// make a large utf8 name consisting of 3-byte chars (3 does not divide internal buffer size, so will need split correction)
|
// make a large utf8 name consisting of 3-byte chars (3 does not divide internal buffer size, so will need split correction)
|
||||||
std::string s_utf8 = "<";
|
std::basic_string<pugi::char_t> s_utf8 = STR("<");
|
||||||
|
|
||||||
for (unsigned int i = 0; i < N; ++i) s_utf8 += "\xE2\x82\xAC";
|
for (unsigned int i = 0; i < N; ++i) s_utf8 += RAW("\xE2\x82\xAC");
|
||||||
|
|
||||||
s_utf8 += "/>";
|
s_utf8 += STR("/>");
|
||||||
|
|
||||||
xml_document doc;
|
xml_document doc;
|
||||||
CHECK(doc.load_buffer(&s_utf8[0], s_utf8.length(), parse_default, encoding_utf8));
|
CHECK(doc.load_buffer(&s_utf8[0], s_utf8.length(), parse_default, encoding_utf8));
|
||||||
@ -393,9 +395,9 @@ TEST(write_encoding_huge_invalid)
|
|||||||
const unsigned int N = 16000;
|
const unsigned int N = 16000;
|
||||||
|
|
||||||
// make a large utf8 name consisting of non-leading chars
|
// make a large utf8 name consisting of non-leading chars
|
||||||
std::string s_utf8;
|
std::basic_string<pugi::char_t> s_utf8;
|
||||||
|
|
||||||
for (unsigned int i = 0; i < N; ++i) s_utf8 += "\x82";
|
for (unsigned int i = 0; i < N; ++i) s_utf8 += RAW("\x82");
|
||||||
|
|
||||||
xml_document doc;
|
xml_document doc;
|
||||||
doc.append_child().set_name(s_utf8.c_str());
|
doc.append_child().set_name(s_utf8.c_str());
|
||||||
@ -451,7 +453,7 @@ TEST(write_unicode_invalid_utf16)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static bool test_write_unicode_invalid(const char* name, const wchar_t* expected)
|
static bool test_write_unicode_invalid(const char_t* name, const wchar_t* expected)
|
||||||
{
|
{
|
||||||
xml_document doc;
|
xml_document doc;
|
||||||
doc.append_child(node_pcdata).set_value(name);
|
doc.append_child(node_pcdata).set_value(name);
|
||||||
@ -462,31 +464,31 @@ static bool test_write_unicode_invalid(const char* name, const wchar_t* expected
|
|||||||
TEST(write_unicode_invalid_utf8)
|
TEST(write_unicode_invalid_utf8)
|
||||||
{
|
{
|
||||||
// invalid 1-byte input
|
// invalid 1-byte input
|
||||||
CHECK(test_write_unicode_invalid("a\xb0", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xb0"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xb0_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xb0_"), L"a_"));
|
||||||
|
|
||||||
// invalid 2-byte input
|
// invalid 2-byte input
|
||||||
CHECK(test_write_unicode_invalid("a\xc0", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xc0"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xd0", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xd0"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xc0_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xc0_"), L"a_"));
|
||||||
CHECK(test_write_unicode_invalid("a\xd0_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xd0_"), L"a_"));
|
||||||
|
|
||||||
// invalid 3-byte input
|
// invalid 3-byte input
|
||||||
CHECK(test_write_unicode_invalid("a\xe2\x80", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xe2\x80"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xe2", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xe2"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xe2\x80_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xe2\x80_"), L"a_"));
|
||||||
CHECK(test_write_unicode_invalid("a\xe2_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xe2_"), L"a_"));
|
||||||
|
|
||||||
// invalid 4-byte input
|
// invalid 4-byte input
|
||||||
CHECK(test_write_unicode_invalid("a\xf2\x97\x98", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xf2\x97\x98"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xf2\x97", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xf2\x97"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xf2", L"a"));
|
CHECK(test_write_unicode_invalid(RAW("a\xf2"), L"a"));
|
||||||
CHECK(test_write_unicode_invalid("a\xf2\x97\x98_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xf2\x97\x98_"), L"a_"));
|
||||||
CHECK(test_write_unicode_invalid("a\xf2\x97_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xf2\x97_"), L"a_"));
|
||||||
CHECK(test_write_unicode_invalid("a\xf2_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xf2_"), L"a_"));
|
||||||
|
|
||||||
// invalid 5-byte input
|
// invalid 5-byte input
|
||||||
CHECK(test_write_unicode_invalid("a\xf8_", L"a_"));
|
CHECK(test_write_unicode_invalid(RAW("a\xf8_"), L"a_"));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -570,7 +570,7 @@ TEST(xpath_string_translate_table)
|
|||||||
|
|
||||||
CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc', 'ABC')"), STR("ABCd\xe9 "));
|
CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc', 'ABC')"), STR("ABCd\xe9 "));
|
||||||
CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc\xe9', 'ABC!')"), STR("ABCd! "));
|
CHECK_XPATH_STRING(c, STR("translate('abcd\xe9 ', 'abc\xe9', 'ABC!')"), STR("ABCd! "));
|
||||||
CHECK_XPATH_STRING(c, STR("translate('abcd! ', 'abc!', 'ABC\xe9')"), STR("ABCd\xe9 "));
|
CHECK_XPATH_STRING(c, RAW("translate('abcd! ', 'abc!', 'ABC\xe9')"), RAW("ABCd\xe9 "));
|
||||||
CHECK_XPATH_STRING(c, STR("translate('abcde', concat('abc', 'd'), 'ABCD')"), STR("ABCDe"));
|
CHECK_XPATH_STRING(c, STR("translate('abcde', concat('abc', 'd'), 'ABCD')"), STR("ABCDe"));
|
||||||
CHECK_XPATH_STRING(c, STR("translate('abcde', 'abcd', concat('ABC', 'D'))"), STR("ABCDe"));
|
CHECK_XPATH_STRING(c, STR("translate('abcde', 'abcd', concat('ABC', 'D'))"), STR("ABCDe"));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -224,7 +224,7 @@ TEST(xpath_parse_paths_valid_unicode)
|
|||||||
#if defined(PUGIXML_WCHAR_MODE)
|
#if defined(PUGIXML_WCHAR_MODE)
|
||||||
xpath_query q(paths[i]);
|
xpath_query q(paths[i]);
|
||||||
#elif !defined(PUGIXML_NO_STL)
|
#elif !defined(PUGIXML_NO_STL)
|
||||||
std::basic_string<char> path_utf8 = as_utf8(paths[i]);
|
std::basic_string<char_t> path_utf8 = as_utf8(paths[i]);
|
||||||
xpath_query q(path_utf8.c_str());
|
xpath_query q(path_utf8.c_str());
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@ -410,7 +410,7 @@ TEST(xpath_variables_name_unicode)
|
|||||||
const char_t* name = L"\x0400\x203D";
|
const char_t* name = L"\x0400\x203D";
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
const char_t* name = "\xd0\x80\xe2\x80\xbd";
|
const char_t* name = STR("\xd0\x80\xe2\x80\xbd");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
xpath_variable_set set;
|
xpath_variable_set set;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user