Parsing used to work on a non null-terminated buffer, inserting a fake null terminator to increase performance. This makes it impossible to implement fragment parsing that preserves PCDATA contents (as witnessed by some tests for boundary conditions that actually depended on this behavior). Since almost all uses result in us allocating an internal buffer anyway, the new policy is to make sure all buffers that are allocated by pugixml are null-terminated - the only exception now is external calls to load_buffer_inplace that don't trigger encoding conversion. git-svn-id: https://pugixml.googlecode.com/svn/trunk@977 99668b35-9821-0410-8761-19e4c4f06640
1172 lines
29 KiB
C++
1172 lines
29 KiB
C++
#define _CRT_SECURE_NO_WARNINGS
|
|
#define _SCL_SECURE_NO_WARNINGS
|
|
#define _SCL_SECURE_NO_DEPRECATE
|
|
#define _CRT_NONSTDC_NO_DEPRECATE 0
|
|
|
|
#include <string.h> // because Borland's STL is braindead, we have to include <string.h> _before_ <string> in order to get memcpy
|
|
|
|
#include "common.hpp"
|
|
|
|
#include "writer_string.hpp"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <fstream>
|
|
#include <sstream>
|
|
|
|
#include <string>
|
|
#include <algorithm>
|
|
|
|
#ifdef __MINGW32__
|
|
# include <io.h> // for unlink in C++0x mode
|
|
#endif
|
|
|
|
#if defined(__CELLOS_LV2__) || defined(ANDROID) || defined(_GLIBCXX_HAVE_UNISTD_H)
|
|
# include <unistd.h> // for unlink
|
|
#endif
|
|
|
|
static bool load_file_in_memory(const char* path, char*& data, size_t& size)
|
|
{
|
|
FILE* file = fopen(path, "rb");
|
|
if (!file) return false;
|
|
|
|
fseek(file, 0, SEEK_END);
|
|
size = static_cast<size_t>(ftell(file));
|
|
fseek(file, 0, SEEK_SET);
|
|
|
|
data = new char[size];
|
|
|
|
CHECK(fread(data, 1, size, file) == size);
|
|
fclose(file);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool test_file_contents(const char* path, const char* data, size_t size)
|
|
{
|
|
char* fdata;
|
|
size_t fsize;
|
|
if (!load_file_in_memory(path, fdata, fsize)) return false;
|
|
|
|
bool result = (size == fsize && memcmp(data, fdata, size) == 0);
|
|
|
|
delete[] fdata;
|
|
|
|
return result;
|
|
}
|
|
|
|
TEST(document_create_empty)
|
|
{
|
|
pugi::xml_document doc;
|
|
CHECK_NODE(doc, STR(""));
|
|
}
|
|
|
|
TEST(document_create)
|
|
{
|
|
pugi::xml_document doc;
|
|
doc.append_child().set_name(STR("node"));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
#ifndef PUGIXML_NO_STL
|
|
TEST(document_load_stream)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
std::istringstream iss("<node/>");
|
|
CHECK(doc.load(iss));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_stream_offset)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
std::istringstream iss("<foobar> <node/>");
|
|
|
|
std::string s;
|
|
iss >> s;
|
|
|
|
CHECK(doc.load(iss));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_stream_text)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
std::ifstream iss("tests/data/multiline.xml");
|
|
CHECK(doc.load(iss));
|
|
CHECK_NODE(doc, STR("<node1 /><node2 /><node3 />"));
|
|
}
|
|
|
|
TEST(document_load_stream_error)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
std::ifstream fs("filedoesnotexist");
|
|
CHECK(doc.load(fs).status == status_io_error);
|
|
|
|
std::istringstream iss("<node/>");
|
|
test_runner::_memory_fail_threshold = 1;
|
|
CHECK(doc.load(iss).status == status_out_of_memory);
|
|
}
|
|
|
|
TEST(document_load_stream_empty)
|
|
{
|
|
std::istringstream iss;
|
|
|
|
pugi::xml_document doc;
|
|
doc.load(iss); // parse result depends on STL implementation
|
|
CHECK(!doc.first_child());
|
|
}
|
|
|
|
TEST(document_load_stream_wide)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
std::basic_istringstream<wchar_t> iss(L"<node/>");
|
|
CHECK(doc.load(iss));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
#ifndef PUGIXML_NO_EXCEPTIONS
|
|
TEST(document_load_stream_exceptions)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
// Windows has newline translation for text-mode files, so reading from this stream reaches eof and sets fail|eof bits.
|
|
// This test does not cause stream to throw an exception on Linux - I have no idea how to get read() to fail except
|
|
// newline translation.
|
|
std::ifstream iss("tests/data/multiline.xml");
|
|
iss.exceptions(std::ios::eofbit | std::ios::badbit | std::ios::failbit);
|
|
|
|
try
|
|
{
|
|
doc.load(iss);
|
|
|
|
CHECK(iss.good()); // if the exception was not thrown, stream reading should succeed without errors
|
|
}
|
|
catch (const std::ios_base::failure&)
|
|
{
|
|
CHECK(!doc.first_child());
|
|
}
|
|
}
|
|
#endif
|
|
|
|
TEST(document_load_stream_error_previous)
|
|
{
|
|
pugi::xml_document doc;
|
|
CHECK(doc.load(STR("<node/>")));
|
|
CHECK(doc.first_child());
|
|
|
|
std::ifstream fs1("filedoesnotexist");
|
|
CHECK(doc.load(fs1).status == status_io_error);
|
|
CHECK(!doc.first_child());
|
|
}
|
|
|
|
TEST(document_load_stream_wide_error_previous)
|
|
{
|
|
pugi::xml_document doc;
|
|
CHECK(doc.load(STR("<node/>")));
|
|
CHECK(doc.first_child());
|
|
|
|
std::basic_ifstream<wchar_t> fs1("filedoesnotexist");
|
|
CHECK(doc.load(fs1).status == status_io_error);
|
|
CHECK(!doc.first_child());
|
|
}
|
|
|
|
template <typename T> class char_array_buffer: public std::basic_streambuf<T>
|
|
{
|
|
public:
|
|
char_array_buffer(T* begin, T* end)
|
|
{
|
|
this->setg(begin, begin, end);
|
|
}
|
|
|
|
typename std::basic_streambuf<T>::int_type underflow()
|
|
{
|
|
return this->gptr() == this->egptr() ? std::basic_streambuf<T>::traits_type::eof() : std::basic_streambuf<T>::traits_type::to_int_type(*this->gptr());
|
|
}
|
|
};
|
|
|
|
TEST(document_load_stream_nonseekable)
|
|
{
|
|
char contents[] = "<node />";
|
|
char_array_buffer<char> buffer(contents, contents + sizeof(contents) / sizeof(contents[0]));
|
|
std::istream in(&buffer);
|
|
|
|
pugi::xml_document doc;
|
|
CHECK(doc.load(in));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_stream_wide_nonseekable)
|
|
{
|
|
wchar_t contents[] = L"<node />";
|
|
char_array_buffer<wchar_t> buffer(contents, contents + sizeof(contents) / sizeof(contents[0]));
|
|
std::basic_istream<wchar_t> in(&buffer);
|
|
|
|
pugi::xml_document doc;
|
|
CHECK(doc.load(in));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_stream_nonseekable_large)
|
|
{
|
|
std::basic_string<pugi::char_t> str;
|
|
str += STR("<node>");
|
|
for (int i = 0; i < 10000; ++i) str += STR("<node />");
|
|
str += STR("</node>");
|
|
|
|
char_array_buffer<pugi::char_t> buffer(&str[0], &str[0] + str.length());
|
|
std::basic_istream<pugi::char_t> in(&buffer);
|
|
|
|
pugi::xml_document doc;
|
|
CHECK(doc.load(in));
|
|
CHECK_NODE(doc, str.c_str());
|
|
}
|
|
#endif
|
|
|
|
TEST(document_load_string)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load(STR("<node/>")));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_file)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_file("tests/data/small.xml"));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_file_empty)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_file("tests/data/empty.xml"));
|
|
CHECK(!doc.first_child());
|
|
}
|
|
|
|
TEST(document_load_file_large)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_file("tests/data/large.xml"));
|
|
|
|
std::basic_string<pugi::char_t> str;
|
|
str += STR("<node>");
|
|
for (int i = 0; i < 10000; ++i) str += STR("<node />");
|
|
str += STR("</node>");
|
|
|
|
CHECK_NODE(doc, str.c_str());
|
|
}
|
|
|
|
TEST(document_load_file_error)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_file("filedoesnotexist").status == status_file_not_found);
|
|
|
|
test_runner::_memory_fail_threshold = 1;
|
|
CHECK(doc.load_file("tests/data/small.xml").status == status_out_of_memory);
|
|
}
|
|
|
|
TEST(document_load_file_error_previous)
|
|
{
|
|
pugi::xml_document doc;
|
|
CHECK(doc.load(STR("<node/>")));
|
|
CHECK(doc.first_child());
|
|
|
|
CHECK(doc.load_file("filedoesnotexist").status == status_file_not_found);
|
|
CHECK(!doc.first_child());
|
|
}
|
|
|
|
TEST(document_load_file_wide_ascii)
|
|
{
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_file(L"tests/data/small.xml"));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST_XML(document_save, "<node/>")
|
|
{
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR(""), pugi::format_no_declaration | pugi::format_raw, get_native_encoding());
|
|
|
|
CHECK(writer.as_string() == STR("<node />"));
|
|
}
|
|
|
|
#ifndef PUGIXML_NO_STL
|
|
TEST_XML(document_save_stream, "<node/>")
|
|
{
|
|
std::ostringstream oss;
|
|
|
|
doc.save(oss, STR(""), pugi::format_no_declaration | pugi::format_raw);
|
|
|
|
CHECK(oss.str() == "<node />");
|
|
}
|
|
|
|
TEST_XML(document_save_stream_wide, "<node/>")
|
|
{
|
|
std::basic_ostringstream<wchar_t> oss;
|
|
|
|
doc.save(oss, STR(""), pugi::format_no_declaration | pugi::format_raw);
|
|
|
|
CHECK(oss.str() == L"<node />");
|
|
}
|
|
#endif
|
|
|
|
TEST_XML(document_save_bom, "<n/>")
|
|
{
|
|
unsigned int flags = format_no_declaration | format_raw | format_write_bom;
|
|
|
|
// specific encodings
|
|
CHECK(test_save_narrow(doc, flags, encoding_utf8, "\xef\xbb\xbf<n />", 8));
|
|
CHECK(test_save_narrow(doc, flags, encoding_utf16_be, "\xfe\xff\x00<\x00n\x00 \x00/\x00>", 12));
|
|
CHECK(test_save_narrow(doc, flags, encoding_utf16_le, "\xff\xfe<\x00n\x00 \x00/\x00>\x00", 12));
|
|
CHECK(test_save_narrow(doc, flags, encoding_utf32_be, "\x00\x00\xfe\xff\x00\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>", 24));
|
|
CHECK(test_save_narrow(doc, flags, encoding_utf32_le, "\xff\xfe\x00\x00<\x00\x00\x00n\x00\x00\x00 \x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 24));
|
|
CHECK(test_save_narrow(doc, flags, encoding_latin1, "<n />", 5));
|
|
|
|
// encodings synonyms
|
|
CHECK(save_narrow(doc, flags, encoding_utf16) == save_narrow(doc, flags, (is_little_endian() ? encoding_utf16_le : encoding_utf16_be)));
|
|
CHECK(save_narrow(doc, flags, encoding_utf32) == save_narrow(doc, flags, (is_little_endian() ? encoding_utf32_le : encoding_utf32_be)));
|
|
|
|
size_t wcharsize = sizeof(wchar_t);
|
|
CHECK(save_narrow(doc, flags, encoding_wchar) == save_narrow(doc, flags, (wcharsize == 2 ? encoding_utf16 : encoding_utf32)));
|
|
}
|
|
|
|
TEST_XML(document_save_declaration, "<node/>")
|
|
{
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR(""), pugi::format_default, get_native_encoding());
|
|
|
|
CHECK(writer.as_string() == STR("<?xml version=\"1.0\"?>\n<node />\n"));
|
|
}
|
|
|
|
TEST(document_save_declaration_empty)
|
|
{
|
|
xml_document doc;
|
|
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR(""), pugi::format_default, get_native_encoding());
|
|
|
|
CHECK(writer.as_string() == STR("<?xml version=\"1.0\"?>\n"));
|
|
}
|
|
|
|
TEST_XML(document_save_declaration_present_first, "<node/>")
|
|
{
|
|
doc.insert_child_before(node_declaration, doc.first_child()).append_attribute(STR("encoding")) = STR("utf8");
|
|
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR(""), pugi::format_default, get_native_encoding());
|
|
|
|
CHECK(writer.as_string() == STR("<?xml encoding=\"utf8\"?>\n<node />\n"));
|
|
}
|
|
|
|
TEST_XML(document_save_declaration_present_second, "<node/>")
|
|
{
|
|
doc.insert_child_before(node_declaration, doc.first_child()).append_attribute(STR("encoding")) = STR("utf8");
|
|
doc.insert_child_before(node_comment, doc.first_child()).set_value(STR("text"));
|
|
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR(""), pugi::format_default, get_native_encoding());
|
|
|
|
CHECK(writer.as_string() == STR("<!--text-->\n<?xml encoding=\"utf8\"?>\n<node />\n"));
|
|
}
|
|
|
|
TEST_XML(document_save_declaration_present_last, "<node/>")
|
|
{
|
|
doc.append_child(node_declaration).append_attribute(STR("encoding")) = STR("utf8");
|
|
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR(""), pugi::format_default, get_native_encoding());
|
|
|
|
// node writer only looks for declaration before the first element child
|
|
CHECK(writer.as_string() == STR("<?xml version=\"1.0\"?>\n<node />\n<?xml encoding=\"utf8\"?>\n"));
|
|
}
|
|
|
|
TEST_XML(document_save_declaration_latin1, "<node/>")
|
|
{
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR(""), pugi::format_default, encoding_latin1);
|
|
|
|
CHECK(writer.as_narrow() == "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<node />\n");
|
|
}
|
|
|
|
#define USE_MKSTEMP defined(__unix) || defined(__QNX__) || defined(ANDROID)
|
|
|
|
struct temp_file
|
|
{
|
|
char path[512];
|
|
int fd;
|
|
|
|
temp_file(): fd(0)
|
|
{
|
|
#if USE_MKSTEMP
|
|
strcpy(path, "/tmp/pugiXXXXXX");
|
|
|
|
fd = mkstemp(path);
|
|
CHECK(fd != -1);
|
|
#elif defined(__CELLOS_LV2__) || defined(_WIN32_WCE)
|
|
path[0] = 0; // no temporary file support
|
|
#else
|
|
tmpnam(path);
|
|
#endif
|
|
}
|
|
|
|
~temp_file()
|
|
{
|
|
#ifndef _WIN32_WCE
|
|
CHECK(unlink(path) == 0);
|
|
#endif
|
|
|
|
#if USE_MKSTEMP
|
|
CHECK(close(fd) == 0);
|
|
#endif
|
|
}
|
|
};
|
|
|
|
TEST_XML(document_save_file, "<node/>")
|
|
{
|
|
temp_file f;
|
|
|
|
CHECK(doc.save_file(f.path));
|
|
|
|
CHECK(doc.load_file(f.path, pugi::parse_default | pugi::parse_declaration));
|
|
CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><node />"));
|
|
}
|
|
|
|
TEST_XML(document_save_file_wide, "<node/>")
|
|
{
|
|
temp_file f;
|
|
|
|
// widen the path
|
|
wchar_t wpath[32];
|
|
std::copy(f.path, f.path + strlen(f.path) + 1, wpath + 0);
|
|
|
|
CHECK(doc.save_file(wpath));
|
|
|
|
CHECK(doc.load_file(f.path, pugi::parse_default | pugi::parse_declaration));
|
|
CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><node />"));
|
|
}
|
|
|
|
TEST_XML(document_save_file_error, "<node/>")
|
|
{
|
|
CHECK(!doc.save_file("tests/data/unknown/output.xml"));
|
|
}
|
|
|
|
TEST_XML(document_save_file_text, "<node/>")
|
|
{
|
|
temp_file f;
|
|
|
|
CHECK(doc.save_file(f.path, STR(""), pugi::format_no_declaration | pugi::format_save_file_text));
|
|
CHECK(test_file_contents(f.path, "<node />\n", 9) || test_file_contents(f.path, "<node />\r\n", 10));
|
|
|
|
CHECK(doc.save_file(f.path, STR(""), pugi::format_no_declaration));
|
|
CHECK(test_file_contents(f.path, "<node />\n", 9));
|
|
}
|
|
|
|
TEST_XML(document_save_file_wide_text, "<node/>")
|
|
{
|
|
temp_file f;
|
|
|
|
// widen the path
|
|
wchar_t wpath[32];
|
|
std::copy(f.path, f.path + strlen(f.path) + 1, wpath + 0);
|
|
|
|
CHECK(doc.save_file(wpath, STR(""), pugi::format_no_declaration | pugi::format_save_file_text));
|
|
CHECK(test_file_contents(f.path, "<node />\n", 9) || test_file_contents(f.path, "<node />\r\n", 10));
|
|
|
|
CHECK(doc.save_file(wpath, STR(""), pugi::format_no_declaration));
|
|
CHECK(test_file_contents(f.path, "<node />\n", 9));
|
|
}
|
|
|
|
TEST(document_load_buffer)
|
|
{
|
|
const pugi::char_t text[] = STR("<?xml?><node/>");
|
|
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_buffer(text, sizeof(text)));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_buffer_inplace)
|
|
{
|
|
pugi::char_t text[] = STR("<?xml?><node/>");
|
|
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_buffer_inplace(text, sizeof(text)));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_load_buffer_inplace_own)
|
|
{
|
|
allocation_function alloc = get_memory_allocation_function();
|
|
|
|
size_t size = strlen("<?xml?><node/>") * sizeof(pugi::char_t);
|
|
|
|
pugi::char_t* text = static_cast<pugi::char_t*>(alloc(size));
|
|
CHECK(text);
|
|
|
|
memcpy(text, STR("<?xml?><node/>"), size);
|
|
|
|
pugi::xml_document doc;
|
|
|
|
CHECK(doc.load_buffer_inplace_own(text, size));
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
}
|
|
|
|
TEST(document_parse_result_bool)
|
|
{
|
|
xml_parse_result result;
|
|
|
|
result.status = status_ok;
|
|
CHECK(result);
|
|
CHECK(!!result);
|
|
CHECK(result == true);
|
|
|
|
for (int i = 1; i < 20; ++i)
|
|
{
|
|
result.status = static_cast<xml_parse_status>(i);
|
|
CHECK(!result);
|
|
CHECK(result == false);
|
|
}
|
|
}
|
|
|
|
TEST(document_parse_result_description)
|
|
{
|
|
xml_parse_result result;
|
|
|
|
for (int i = 0; i < 20; ++i)
|
|
{
|
|
result.status = static_cast<xml_parse_status>(i);
|
|
|
|
CHECK(result.description() != 0);
|
|
CHECK(result.description()[0] != 0);
|
|
}
|
|
}
|
|
|
|
TEST(document_load_fail)
|
|
{
|
|
xml_document doc;
|
|
CHECK(!doc.load(STR("<foo><bar/>")));
|
|
CHECK(doc.child(STR("foo")).child(STR("bar")));
|
|
}
|
|
|
|
inline void check_utftest_document(const xml_document& doc)
|
|
{
|
|
// ascii text
|
|
CHECK_STRING(doc.last_child().first_child().name(), STR("English"));
|
|
|
|
// check that we have parsed some non-ascii text
|
|
CHECK(static_cast<unsigned int>(doc.last_child().last_child().name()[0]) >= 0x80);
|
|
|
|
// check magic string
|
|
const pugi::char_t* v = doc.last_child().child(STR("Heavy")).previous_sibling().child_value();
|
|
|
|
#ifdef PUGIXML_WCHAR_MODE
|
|
CHECK(v[0] == 0x4e16 && v[1] == 0x754c && v[2] == 0x6709 && v[3] == 0x5f88 && v[4] == 0x591a && v[5] == wchar_cast(0x8bed) && v[6] == wchar_cast(0x8a00));
|
|
|
|
// last character is a surrogate pair
|
|
size_t wcharsize = sizeof(wchar_t);
|
|
|
|
CHECK(wcharsize == 2 ? (v[7] == wchar_cast(0xd852) && v[8] == wchar_cast(0xdf62)) : (v[7] == wchar_cast(0x24b62)));
|
|
#else
|
|
// unicode string
|
|
CHECK_STRING(v, "\xe4\xb8\x96\xe7\x95\x8c\xe6\x9c\x89\xe5\xbe\x88\xe5\xa4\x9a\xe8\xaf\xad\xe8\xa8\x80\xf0\xa4\xad\xa2");
|
|
#endif
|
|
}
|
|
|
|
TEST(document_load_file_convert_auto)
|
|
{
|
|
const char* files[] =
|
|
{
|
|
"tests/data/utftest_utf16_be.xml",
|
|
"tests/data/utftest_utf16_be_bom.xml",
|
|
"tests/data/utftest_utf16_be_nodecl.xml",
|
|
"tests/data/utftest_utf16_le.xml",
|
|
"tests/data/utftest_utf16_le_bom.xml",
|
|
"tests/data/utftest_utf16_le_nodecl.xml",
|
|
"tests/data/utftest_utf32_be.xml",
|
|
"tests/data/utftest_utf32_be_bom.xml",
|
|
"tests/data/utftest_utf32_be_nodecl.xml",
|
|
"tests/data/utftest_utf32_le.xml",
|
|
"tests/data/utftest_utf32_le_bom.xml",
|
|
"tests/data/utftest_utf32_le_nodecl.xml",
|
|
"tests/data/utftest_utf8.xml",
|
|
"tests/data/utftest_utf8_bom.xml",
|
|
"tests/data/utftest_utf8_nodecl.xml"
|
|
};
|
|
|
|
xml_encoding encodings[] =
|
|
{
|
|
encoding_utf16_be, encoding_utf16_be, encoding_utf16_be,
|
|
encoding_utf16_le, encoding_utf16_le, encoding_utf16_le,
|
|
encoding_utf32_be, encoding_utf32_be, encoding_utf32_be,
|
|
encoding_utf32_le, encoding_utf32_le, encoding_utf32_le,
|
|
encoding_utf8, encoding_utf8, encoding_utf8
|
|
};
|
|
|
|
for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i)
|
|
{
|
|
xml_document doc;
|
|
xml_parse_result res = doc.load_file(files[i]);
|
|
|
|
CHECK(res);
|
|
CHECK(res.encoding == encodings[i]);
|
|
check_utftest_document(doc);
|
|
}
|
|
}
|
|
|
|
TEST(document_load_file_convert_specific)
|
|
{
|
|
const char* files[] =
|
|
{
|
|
"tests/data/utftest_utf16_be.xml",
|
|
"tests/data/utftest_utf16_be_bom.xml",
|
|
"tests/data/utftest_utf16_be_nodecl.xml",
|
|
"tests/data/utftest_utf16_le.xml",
|
|
"tests/data/utftest_utf16_le_bom.xml",
|
|
"tests/data/utftest_utf16_le_nodecl.xml",
|
|
"tests/data/utftest_utf32_be.xml",
|
|
"tests/data/utftest_utf32_be_bom.xml",
|
|
"tests/data/utftest_utf32_be_nodecl.xml",
|
|
"tests/data/utftest_utf32_le.xml",
|
|
"tests/data/utftest_utf32_le_bom.xml",
|
|
"tests/data/utftest_utf32_le_nodecl.xml",
|
|
"tests/data/utftest_utf8.xml",
|
|
"tests/data/utftest_utf8_bom.xml",
|
|
"tests/data/utftest_utf8_nodecl.xml"
|
|
};
|
|
|
|
xml_encoding encodings[] =
|
|
{
|
|
encoding_utf16_be, encoding_utf16_be, encoding_utf16_be,
|
|
encoding_utf16_le, encoding_utf16_le, encoding_utf16_le,
|
|
encoding_utf32_be, encoding_utf32_be, encoding_utf32_be,
|
|
encoding_utf32_le, encoding_utf32_le, encoding_utf32_le,
|
|
encoding_utf8, encoding_utf8, encoding_utf8
|
|
};
|
|
|
|
for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i)
|
|
{
|
|
for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j)
|
|
{
|
|
xml_encoding encoding = encodings[j];
|
|
|
|
xml_document doc;
|
|
xml_parse_result res = doc.load_file(files[i], parse_default, encoding);
|
|
|
|
if (encoding == encodings[i])
|
|
{
|
|
CHECK(res);
|
|
CHECK(res.encoding == encoding);
|
|
check_utftest_document(doc);
|
|
}
|
|
else
|
|
{
|
|
// should not get past first tag
|
|
CHECK(!doc.first_child());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(document_load_file_convert_native_endianness)
|
|
{
|
|
const char* files[2][6] =
|
|
{
|
|
{
|
|
"tests/data/utftest_utf16_be.xml",
|
|
"tests/data/utftest_utf16_be_bom.xml",
|
|
"tests/data/utftest_utf16_be_nodecl.xml",
|
|
"tests/data/utftest_utf32_be.xml",
|
|
"tests/data/utftest_utf32_be_bom.xml",
|
|
"tests/data/utftest_utf32_be_nodecl.xml",
|
|
},
|
|
{
|
|
"tests/data/utftest_utf16_le.xml",
|
|
"tests/data/utftest_utf16_le_bom.xml",
|
|
"tests/data/utftest_utf16_le_nodecl.xml",
|
|
"tests/data/utftest_utf32_le.xml",
|
|
"tests/data/utftest_utf32_le_bom.xml",
|
|
"tests/data/utftest_utf32_le_nodecl.xml",
|
|
}
|
|
};
|
|
|
|
xml_encoding encodings[] =
|
|
{
|
|
encoding_utf16, encoding_utf16, encoding_utf16,
|
|
encoding_utf32, encoding_utf32, encoding_utf32
|
|
};
|
|
|
|
for (unsigned int i = 0; i < sizeof(files[0]) / sizeof(files[0][0]); ++i)
|
|
{
|
|
const char* right_file = files[is_little_endian()][i];
|
|
const char* wrong_file = files[!is_little_endian()][i];
|
|
|
|
for (unsigned int j = 0; j < sizeof(encodings) / sizeof(encodings[0]); ++j)
|
|
{
|
|
xml_encoding encoding = encodings[j];
|
|
|
|
// check file with right endianness
|
|
{
|
|
xml_document doc;
|
|
xml_parse_result res = doc.load_file(right_file, parse_default, encoding);
|
|
|
|
if (encoding == encodings[i])
|
|
{
|
|
CHECK(res);
|
|
check_utftest_document(doc);
|
|
}
|
|
else
|
|
{
|
|
// should not get past first tag
|
|
CHECK(!doc.first_child());
|
|
}
|
|
}
|
|
|
|
// check file with wrong endianness
|
|
{
|
|
xml_document doc;
|
|
doc.load_file(wrong_file, parse_default, encoding);
|
|
CHECK(!doc.first_child());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
struct file_data_t
|
|
{
|
|
const char* path;
|
|
xml_encoding encoding;
|
|
|
|
char* data;
|
|
size_t size;
|
|
};
|
|
|
|
|
|
TEST(document_contents_preserve)
|
|
{
|
|
file_data_t files[] =
|
|
{
|
|
{"tests/data/utftest_utf16_be_clean.xml", encoding_utf16_be, 0, 0},
|
|
{"tests/data/utftest_utf16_le_clean.xml", encoding_utf16_le, 0, 0},
|
|
{"tests/data/utftest_utf32_be_clean.xml", encoding_utf32_be, 0, 0},
|
|
{"tests/data/utftest_utf32_le_clean.xml", encoding_utf32_le, 0, 0},
|
|
{"tests/data/utftest_utf8_clean.xml", encoding_utf8, 0, 0}
|
|
};
|
|
|
|
// load files in memory
|
|
for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i)
|
|
{
|
|
CHECK(load_file_in_memory(files[i].path, files[i].data, files[i].size));
|
|
}
|
|
|
|
// convert each file to each format and compare bitwise
|
|
for (unsigned int src = 0; src < sizeof(files) / sizeof(files[0]); ++src)
|
|
{
|
|
for (unsigned int dst = 0; dst < sizeof(files) / sizeof(files[0]); ++dst)
|
|
{
|
|
// parse into document (preserve comments, declaration and whitespace pcdata)
|
|
xml_document doc;
|
|
CHECK(doc.load_buffer(files[src].data, files[src].size, parse_default | parse_ws_pcdata | parse_declaration | parse_comments));
|
|
|
|
// compare saved document with the original (raw formatting, without extra declaration, write bom if it was in original file)
|
|
CHECK(test_save_narrow(doc, format_raw | format_no_declaration | format_write_bom, files[dst].encoding, files[dst].data, files[dst].size));
|
|
}
|
|
}
|
|
|
|
// cleanup
|
|
for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j)
|
|
{
|
|
delete[] files[j].data;
|
|
}
|
|
}
|
|
|
|
TEST(document_contents_preserve_latin1)
|
|
{
|
|
file_data_t files[] =
|
|
{
|
|
{"tests/data/latintest_utf8.xml", encoding_utf8, 0, 0},
|
|
{"tests/data/latintest_latin1.xml", encoding_latin1, 0, 0}
|
|
};
|
|
|
|
// load files in memory
|
|
for (unsigned int i = 0; i < sizeof(files) / sizeof(files[0]); ++i)
|
|
{
|
|
CHECK(load_file_in_memory(files[i].path, files[i].data, files[i].size));
|
|
}
|
|
|
|
// convert each file to each format and compare bitwise
|
|
for (unsigned int src = 0; src < sizeof(files) / sizeof(files[0]); ++src)
|
|
{
|
|
for (unsigned int dst = 0; dst < sizeof(files) / sizeof(files[0]); ++dst)
|
|
{
|
|
// parse into document (preserve comments, declaration and whitespace pcdata)
|
|
xml_document doc;
|
|
CHECK(doc.load_buffer(files[src].data, files[src].size, parse_default | parse_ws_pcdata | parse_declaration | parse_comments, files[src].encoding));
|
|
|
|
// compare saved document with the original (raw formatting, without extra declaration, write bom if it was in original file)
|
|
CHECK(test_save_narrow(doc, format_raw | format_no_declaration | format_write_bom, files[dst].encoding, files[dst].data, files[dst].size));
|
|
}
|
|
}
|
|
|
|
// cleanup
|
|
for (unsigned int j = 0; j < sizeof(files) / sizeof(files[0]); ++j)
|
|
{
|
|
delete[] files[j].data;
|
|
}
|
|
}
|
|
|
|
static bool test_parse_fail(const void* buffer, size_t size, xml_encoding encoding = encoding_utf8)
|
|
{
|
|
// copy buffer to heap (to enable out-of-bounds checks)
|
|
void* temp = malloc(size);
|
|
memcpy(temp, buffer, size);
|
|
|
|
// check that this parses without buffer overflows (yielding an error)
|
|
xml_document doc;
|
|
bool result = doc.load_buffer_inplace(temp, size, parse_default, encoding);
|
|
|
|
free(temp);
|
|
|
|
return !result;
|
|
}
|
|
|
|
TEST(document_convert_invalid_utf8)
|
|
{
|
|
// invalid 1-byte input
|
|
CHECK(test_parse_fail("<\xb0", 2));
|
|
|
|
// invalid 2-byte input
|
|
CHECK(test_parse_fail("<\xc0", 2));
|
|
CHECK(test_parse_fail("<\xd0", 2));
|
|
|
|
// invalid 3-byte input
|
|
CHECK(test_parse_fail("<\xe2\x80", 3));
|
|
CHECK(test_parse_fail("<\xe2", 2));
|
|
|
|
// invalid 4-byte input
|
|
CHECK(test_parse_fail("<\xf2\x97\x98", 4));
|
|
CHECK(test_parse_fail("<\xf2\x97", 3));
|
|
CHECK(test_parse_fail("<\xf2", 2));
|
|
|
|
// invalid 5-byte input
|
|
CHECK(test_parse_fail("<\xf8", 2));
|
|
}
|
|
|
|
TEST(document_convert_invalid_utf16)
|
|
{
|
|
// check non-terminated degenerate handling
|
|
CHECK(test_parse_fail("\x00<\xda\x1d", 4, encoding_utf16_be));
|
|
CHECK(test_parse_fail("<\x00\x1d\xda", 4, encoding_utf16_le));
|
|
|
|
// check incorrect leading code
|
|
CHECK(test_parse_fail("\x00<\xde\x24", 4, encoding_utf16_be));
|
|
CHECK(test_parse_fail("<\x00\x24\xde", 4, encoding_utf16_le));
|
|
}
|
|
|
|
TEST(document_load_buffer_empty)
|
|
{
|
|
xml_encoding encodings[] =
|
|
{
|
|
encoding_auto,
|
|
encoding_utf8,
|
|
encoding_utf16_le,
|
|
encoding_utf16_be,
|
|
encoding_utf16,
|
|
encoding_utf32_le,
|
|
encoding_utf32_be,
|
|
encoding_utf32,
|
|
encoding_wchar,
|
|
encoding_latin1
|
|
};
|
|
|
|
char buffer[1];
|
|
|
|
for (unsigned int i = 0; i < sizeof(encodings) / sizeof(encodings[0]); ++i)
|
|
{
|
|
xml_encoding encoding = encodings[i];
|
|
|
|
xml_document doc;
|
|
CHECK(doc.load_buffer(buffer, 0, parse_default, encoding) && !doc.first_child());
|
|
CHECK(doc.load_buffer(0, 0, parse_default, encoding) && !doc.first_child());
|
|
|
|
CHECK(doc.load_buffer_inplace(buffer, 0, parse_default, encoding) && !doc.first_child());
|
|
CHECK(doc.load_buffer_inplace(0, 0, parse_default, encoding) && !doc.first_child());
|
|
|
|
void* own_buffer = pugi::get_memory_allocation_function()(1);
|
|
|
|
CHECK(doc.load_buffer_inplace_own(own_buffer, 0, parse_default, encoding) && !doc.first_child());
|
|
CHECK(doc.load_buffer_inplace_own(0, 0, parse_default, encoding) && !doc.first_child());
|
|
}
|
|
}
|
|
|
|
TEST(document_progressive_truncation)
|
|
{
|
|
char* original_data;
|
|
size_t original_size;
|
|
|
|
CHECK(load_file_in_memory("tests/data/truncation.xml", original_data, original_size));
|
|
|
|
char* buffer = new char[original_size];
|
|
|
|
for (size_t i = 1; i < original_size; ++i)
|
|
{
|
|
char* truncated_data = buffer + original_size - i;
|
|
|
|
memcpy(truncated_data, original_data, i);
|
|
|
|
xml_document doc;
|
|
bool result = doc.load_buffer_inplace(truncated_data, i);
|
|
|
|
// some truncate locations are parseable - those that come after declaration, declaration + doctype, declaration + doctype + comment and eof
|
|
CHECK(((i - 21) < 3 || (i - 66) < 3 || (i - 95) < 3 || i >= 3325) ? result : !result);
|
|
}
|
|
|
|
delete[] buffer;
|
|
delete[] original_data;
|
|
}
|
|
|
|
TEST(document_load_buffer_short)
|
|
{
|
|
char* data = new char[4];
|
|
memcpy(data, "abcd", 4);
|
|
|
|
xml_document doc;
|
|
|
|
CHECK(doc.load_buffer(data, 4));
|
|
CHECK(doc.load_buffer(data + 1, 3));
|
|
CHECK(doc.load_buffer(data + 2, 2));
|
|
CHECK(doc.load_buffer(data + 3, 1));
|
|
CHECK(doc.load_buffer(data + 4, 0));
|
|
CHECK(doc.load_buffer(0, 0));
|
|
|
|
delete[] data;
|
|
}
|
|
|
|
TEST(document_load_buffer_inplace_short)
|
|
{
|
|
char* data = new char[4];
|
|
memcpy(data, "abcd", 4);
|
|
|
|
xml_document doc;
|
|
|
|
CHECK(doc.load_buffer_inplace(data, 4));
|
|
CHECK(doc.load_buffer_inplace(data + 1, 3));
|
|
CHECK(doc.load_buffer_inplace(data + 2, 2));
|
|
CHECK(doc.load_buffer_inplace(data + 3, 1));
|
|
CHECK(doc.load_buffer_inplace(data + 4, 0));
|
|
CHECK(doc.load_buffer_inplace(0, 0));
|
|
|
|
delete[] data;
|
|
}
|
|
|
|
#ifndef PUGIXML_NO_EXCEPTIONS
|
|
TEST(document_load_exceptions)
|
|
{
|
|
bool thrown = false;
|
|
|
|
try
|
|
{
|
|
pugi::xml_document doc;
|
|
if (!doc.load(STR("<node attribute='value"))) throw std::bad_alloc();
|
|
|
|
CHECK_FORCE_FAIL("Expected parsing failure");
|
|
}
|
|
catch (const std::bad_alloc&)
|
|
{
|
|
thrown = true;
|
|
}
|
|
|
|
CHECK(thrown);
|
|
}
|
|
#endif
|
|
|
|
TEST_XML_FLAGS(document_element, "<?xml version='1.0'?><node><child/></node><!---->", parse_default | parse_declaration | parse_comments)
|
|
{
|
|
CHECK(doc.document_element() == doc.child(STR("node")));
|
|
}
|
|
|
|
TEST_XML_FLAGS(document_element_absent, "<!---->", parse_comments)
|
|
{
|
|
CHECK(doc.document_element() == xml_node());
|
|
}
|
|
|
|
TEST_XML(document_reset, "<node><child/></node>")
|
|
{
|
|
CHECK(doc.first_child());
|
|
|
|
doc.reset();
|
|
CHECK(!doc.first_child());
|
|
CHECK_NODE(doc, STR(""));
|
|
|
|
doc.reset();
|
|
CHECK(!doc.first_child());
|
|
CHECK_NODE(doc, STR(""));
|
|
|
|
CHECK(doc.load(STR("<node/>")));
|
|
CHECK(doc.first_child());
|
|
CHECK_NODE(doc, STR("<node />"));
|
|
|
|
doc.reset();
|
|
CHECK(!doc.first_child());
|
|
CHECK_NODE(doc, STR(""));
|
|
}
|
|
|
|
TEST(document_reset_empty)
|
|
{
|
|
xml_document doc;
|
|
|
|
doc.reset();
|
|
CHECK(!doc.first_child());
|
|
CHECK_NODE(doc, STR(""));
|
|
}
|
|
|
|
TEST_XML(document_reset_copy, "<node><child/></node>")
|
|
{
|
|
xml_document doc2;
|
|
|
|
CHECK_NODE(doc2, STR(""));
|
|
|
|
doc2.reset(doc);
|
|
|
|
CHECK_NODE(doc2, STR("<node><child /></node>"));
|
|
CHECK(doc.first_child() != doc2.first_child());
|
|
|
|
doc.reset(doc2);
|
|
|
|
CHECK_NODE(doc, STR("<node><child /></node>"));
|
|
CHECK(doc.first_child() != doc2.first_child());
|
|
|
|
CHECK(doc.first_child().offset_debug() == -1);
|
|
}
|
|
|
|
TEST_XML(document_reset_copy_self, "<node><child/></node>")
|
|
{
|
|
CHECK_NODE(doc, STR("<node><child /></node>"));
|
|
|
|
doc.reset(doc);
|
|
|
|
CHECK(!doc.first_child());
|
|
CHECK_NODE(doc, STR(""));
|
|
}
|
|
|
|
struct document_data_t
|
|
{
|
|
xml_encoding encoding;
|
|
|
|
const unsigned char* data;
|
|
size_t size;
|
|
};
|
|
|
|
#include <stdio.h>
|
|
|
|
TEST(document_load_buffer_utf_truncated)
|
|
{
|
|
const unsigned char utf8[] = {'<', 0xe2, 0x82, 0xac, '/', '>'};
|
|
const unsigned char utf16_be[] = {0, '<', 0x20, 0xac, 0, '/', 0, '>'};
|
|
const unsigned char utf16_le[] = {'<', 0, 0xac, 0x20, '/', 0, '>', 0};
|
|
const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'};
|
|
const unsigned char utf32_le[] = {'<', 0, 0, 0, 0xac, 0x20, 0, 0, '/', 0, 0, 0, '>', 0, 0, 0};
|
|
|
|
const document_data_t data[] =
|
|
{
|
|
{ encoding_utf8, utf8, sizeof(utf8) },
|
|
{ encoding_utf16_be, utf16_be, sizeof(utf16_be) },
|
|
{ encoding_utf16_le, utf16_le, sizeof(utf16_le) },
|
|
{ encoding_utf32_be, utf32_be, sizeof(utf32_be) },
|
|
{ encoding_utf32_le, utf32_le, sizeof(utf32_le) },
|
|
};
|
|
|
|
for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
|
|
{
|
|
const document_data_t& d = data[i];
|
|
|
|
for (size_t j = 0; j <= d.size; ++j)
|
|
{
|
|
char* buffer = new char[j];
|
|
memcpy(buffer, d.data, j);
|
|
|
|
xml_document doc;
|
|
xml_parse_result res = doc.load_buffer(buffer, j, parse_default, d.encoding);
|
|
|
|
if (j == d.size)
|
|
{
|
|
CHECK(res);
|
|
|
|
const char_t* name = doc.first_child().name();
|
|
|
|
#ifdef PUGIXML_WCHAR_MODE
|
|
CHECK(name[0] == 0x20ac && name[1] == 0);
|
|
#else
|
|
CHECK_STRING(name, "\xe2\x82\xac");
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
CHECK(!res || !doc.first_child());
|
|
}
|
|
|
|
delete[] buffer;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef PUGIXML_NO_STL
|
|
TEST(document_load_stream_truncated)
|
|
{
|
|
const unsigned char utf32_be[] = {0, 0, 0, '<', 0, 0, 0x20, 0xac, 0, 0, 0, '/', 0, 0, 0, '>'};
|
|
|
|
for (size_t i = 0; i <= sizeof(utf32_be); ++i)
|
|
{
|
|
std::string prefix(reinterpret_cast<const char*>(utf32_be), i);
|
|
std::istringstream iss(prefix);
|
|
|
|
xml_document doc;
|
|
xml_parse_result res = doc.load(iss);
|
|
|
|
if (i == sizeof(utf32_be))
|
|
{
|
|
CHECK(res);
|
|
}
|
|
else
|
|
{
|
|
CHECK(!res || !doc.first_child());
|
|
|
|
if (i < 8)
|
|
{
|
|
CHECK(!doc.first_child());
|
|
}
|
|
else
|
|
{
|
|
const char_t* name = doc.first_child().name();
|
|
|
|
#ifdef PUGIXML_WCHAR_MODE
|
|
CHECK(name[0] == 0x20ac && name[1] == 0);
|
|
#else
|
|
CHECK_STRING(name, "\xe2\x82\xac");
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif |