From c258fba6f107da8e99f113446340a59e767fed67 Mon Sep 17 00:00:00 2001 From: Lior Lahav Date: Sun, 19 Jul 2020 17:21:36 +0300 Subject: [PATCH 01/46] Replaced fopen and _wfopen deprecated functions with the safer fopen_s and _wfopen_s --- src/pugixml.cpp | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 4d0c2c9..ff04421 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -4981,7 +4981,20 @@ PUGI__NS_BEGIN #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) { +#if defined(_MSC_VER) && _MSC_VER >= 1900 + FILE* file = nullptr; + if (_wfopen_s(&file, path, mode) == 0) + { + return file; + } + else + { + return nullptr; + } +#else return _wfopen(path, mode); +#endif + } #else PUGI__FN char* convert_path_heap(const wchar_t* str) @@ -7187,7 +7200,22 @@ namespace pugi reset(); using impl::auto_deleter; // MSVC7 workaround + + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + FILE* filePtr = nullptr; + auto success = fopen_s(&filePtr, path_, "rb") == 0; + auto_deleter file(filePtr, impl::close_file); + + if (success == false) + { + xml_parse_result res{}; + res.status = status_file_not_found; + return res; + } +#else //_MSC_VER >= 1900 auto_deleter file(fopen(path_, "rb"), impl::close_file); +#endif return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); } @@ -7270,7 +7298,21 @@ namespace pugi PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const { using impl::auto_deleter; // MSVC7 workaround + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + FILE* filePtr = nullptr; + auto success = fopen_s(&filePtr, path_, (flags & format_save_file_text) ? "w" : "wb") == 0; + + auto_deleter file(filePtr, impl::close_file); + + if (success == false) + { + return false; + } +#else //_MSC_VER >= 1900 auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); +#endif + return impl::save_file_impl(*this, file.data, indent, flags, encoding); } From 24e454066d47b61147eb1abec27114bc52c36f4c Mon Sep 17 00:00:00 2001 From: James McCollum Date: Mon, 26 Oct 2020 08:17:46 -0400 Subject: [PATCH 02/46] Added fix to pugixml_dll.rc so shared lib can be built with MinGW --- scripts/pugixml_dll.rc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pugixml_dll.rc b/scripts/pugixml_dll.rc index d71a65d..1c7c5a7 100644 --- a/scripts/pugixml_dll.rc +++ b/scripts/pugixml_dll.rc @@ -5,7 +5,7 @@ #define PUGIXML_VERSION_PATCH 0 #define PUGIXML_VERSION_NUMBER "1.10.0\0" -#ifdef GCC_WINDRES +#if defined(GCC_WINDRES) || defined(__MINGW32__) VS_VERSION_INFO VERSIONINFO #else VS_VERSION_INFO VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE From df42668e183b368ad2a7128427bfa3681e45db93 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 25 Nov 2020 08:38:22 -0800 Subject: [PATCH 03/46] Cleanup code and feature detection We now use open_file similarly to open_file_wide, and activate the workaround for MSVC 2005+ since that's when the _s versions were added in the first place. --- src/pugixml.cpp | 57 +++++++++++++------------------------------------ 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index ff04421..20da23b 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -4981,20 +4981,12 @@ PUGI__NS_BEGIN #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) { -#if defined(_MSC_VER) && _MSC_VER >= 1900 - FILE* file = nullptr; - if (_wfopen_s(&file, path, mode) == 0) - { - return file; - } - else - { - return nullptr; - } +#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 + FILE* file = 0; + return _wfopen_s(&file, path, mode) == 0 ? file : 0; #else return _wfopen(path, mode); #endif - } #else PUGI__FN char* convert_path_heap(const wchar_t* str) @@ -5038,6 +5030,16 @@ PUGI__NS_BEGIN } #endif + PUGI__FN FILE* open_file(const char* path, const char* mode) + { +#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 + FILE* file = 0; + return fopen_s(&file, path, mode) == 0 ? file : 0; +#else + return fopen(path, mode); +#endif + } + PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) { if (!file) return false; @@ -7200,22 +7202,7 @@ namespace pugi reset(); using impl::auto_deleter; // MSVC7 workaround - - -#if defined(_MSC_VER) && _MSC_VER >= 1900 - FILE* filePtr = nullptr; - auto success = fopen_s(&filePtr, path_, "rb") == 0; - auto_deleter file(filePtr, impl::close_file); - - if (success == false) - { - xml_parse_result res{}; - res.status = status_file_not_found; - return res; - } -#else //_MSC_VER >= 1900 - auto_deleter file(fopen(path_, "rb"), impl::close_file); -#endif + auto_deleter file(impl::open_file(path_, "rb"), impl::close_file); return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); } @@ -7298,21 +7285,7 @@ namespace pugi PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const { using impl::auto_deleter; // MSVC7 workaround - -#if defined(_MSC_VER) && _MSC_VER >= 1900 - FILE* filePtr = nullptr; - auto success = fopen_s(&filePtr, path_, (flags & format_save_file_text) ? "w" : "wb") == 0; - - auto_deleter file(filePtr, impl::close_file); - - if (success == false) - { - return false; - } -#else //_MSC_VER >= 1900 - auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); -#endif - + auto_deleter file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); return impl::save_file_impl(*this, file.data, indent, flags, encoding); } From 5f97d5d66f65197417c80de8d10487c6ad96aff1 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 25 Nov 2020 09:28:26 -0800 Subject: [PATCH 04/46] Fix -Wshadow in remove_children() child variable was shadowing xml_node::child --- src/pugixml.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index a74b118..e9a6944 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -6142,13 +6142,13 @@ namespace pugi impl::xml_allocator& alloc = impl::get_allocator(_root); if (!alloc.reserve()) return false; - for (xml_node_struct* child = _root->first_child; child; ) + for (xml_node_struct* cur = _root->first_child; cur; ) { - xml_node_struct* next = child->next_sibling; + xml_node_struct* next = cur->next_sibling; - impl::destroy_node(child, alloc); + impl::destroy_node(cur, alloc); - child = next; + cur = next; } _root->first_child = 0; From 8afc1239a3f506bcb2ff79f909108751cf443120 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 25 Nov 2020 09:48:19 -0800 Subject: [PATCH 05/46] tests: Fix test fallout for MSVC6 One more XPath test falls prey to MSVC6 NaN comparison codegen issues. --- tests/test_xpath_variables.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_xpath_variables.cpp b/tests/test_xpath_variables.cpp index adc4464..1a1fc19 100644 --- a/tests/test_xpath_variables.cpp +++ b/tests/test_xpath_variables.cpp @@ -661,7 +661,9 @@ TEST_XML(xpath_variables_type_conversion, "15") CHECK_XPATH_BOOLEAN_VAR(xml_node(), STR("string($b) = '42'"), &set, true); CHECK_XPATH_BOOLEAN_VAR(xml_node(), STR("boolean($c) = true()"), &set, true); +#ifndef MSVC6_NAN_BUG CHECK_XPATH_BOOLEAN_VAR(xml_node(), STR("number($c) = 0"), &set, false); +#endif CHECK_XPATH_BOOLEAN_VAR(xml_node(), STR("string($c) = 'test'"), &set, true); CHECK_XPATH_BOOLEAN_VAR(xml_node(), STR("boolean($d) = true()"), &set, true); From 70bd6a6b0ade8d3c55bf74dc9d27c67baeaa8ee3 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 25 Nov 2020 10:18:42 -0800 Subject: [PATCH 06/46] Update version to 1.11 and update documentation --- CMakeLists.txt | 2 +- docs/manual.adoc | 18 ++ docs/manual.html | 374 +++++++++++++++++++++-------------- docs/quickstart.html | 280 +++++++++++++------------- readme.txt | 2 +- scripts/nuget/pugixml.nuspec | 2 +- scripts/pugixml.podspec | 2 +- scripts/pugixml_dll.rc | 4 +- src/pugiconfig.hpp | 2 +- src/pugixml.cpp | 2 +- src/pugixml.hpp | 4 +- tests/test_version.cpp | 2 +- 12 files changed, 394 insertions(+), 300 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fda789c..96c0160 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.4) -project(pugixml VERSION 1.10 LANGUAGES CXX) +project(pugixml VERSION 1.11 LANGUAGES CXX) include(CMakePackageConfigHelpers) include(CMakeDependentOption) diff --git a/docs/manual.adoc b/docs/manual.adoc index c566673..f82b82f 100644 --- a/docs/manual.adoc +++ b/docs/manual.adoc @@ -2138,6 +2138,24 @@ Because of the differences in document object models, performance considerations :!numbered: +[[v1.11]] +=== v1.11 ^2020-11-26^ + +Maintenance release. Changes: + +* New features: + . Add xml_node::remove_attributes and xml_node::remove_children + . Add a way to customize floating point precision via xml_attribute::set and xml_text::set overloads + +* XPath improvements: + . XPath parser now limits recursion depth which prevents stack overflow on malicious queries + +* Compatibility improvements: + . Fix Visual Studio warnings when built using clang-cl compiler + . Fix Wconversion warnings in gcc + . Fix Wzero-as-null-pointer-constant warnings in pugixml.hpp + . Work around several static analysis false positives + [[v1.10]] === v1.10 ^2019-09-15^ diff --git a/docs/manual.html b/docs/manual.html index 4a1d501..91bbbda 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -2,22 +2,21 @@ - + - + -pugixml 1.10 manual +pugixml 1.11 manual - @@ -838,7 +774,7 @@ git checkout v1.10
-
pugixml.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source?
+
pugixml.cpp(3477) : fatal error C1010: unexpected end of file while looking for precompiled header. Did you forget to add '#include "stdafx.h"' to your source?
@@ -1057,13 +993,13 @@ In that example PUGIXML_API is inconsistent between several source
  • -

    Document node (node_document) - this is the root of the tree, which consists of several child nodes. This node corresponds to xml_document class; note that xml_document is a sub-class of xml_node, so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation.

    +

    Document node (node_document) - this is the root of the tree, which consists of several child nodes. This node corresponds to xml_document class; note that xml_document is a sub-class of xml_node, so the entire node interface is also available. However, document node is special in several ways, which are covered below. There can be only one document node in the tree; document node does not have any XML representation. Document generally has one child element node (see document_element()), although documents parsed from XML fragments (see parse_fragment) can have more than one.

  • Element/tag node (node_element) - this is the most common type of node, which represents XML elements. Element nodes have a name, a collection of attributes and a collection of child nodes (both of which may be empty). The attribute is a simple name/value pair. The example XML representation of element nodes is as follows:

    -
    <node attr="value"><child/></node>
    +
    <node attr="value"><child/></node>
    @@ -1074,7 +1010,7 @@ In that example PUGIXML_API is inconsistent between several source

    Plain character data nodes (node_pcdata) represent plain text in XML. PCDATA nodes have a value, but do not have a name or children/attributes. Note that plain character data is not a part of the element node but instead has its own node; an element node can have several child PCDATA nodes. The example XML representation of text nodes is as follows:

    -
    <node> text1 <child/> text2 </node>
    +
    <node> text1 <child/> text2 </node>
    @@ -1085,7 +1021,7 @@ In that example PUGIXML_API is inconsistent between several source

    Character data nodes (node_cdata) represent text in XML that is quoted in a special way. CDATA nodes do not differ from PCDATA nodes except in XML representation - the above text example looks like this with CDATA:

    -
    <node> <![CDATA[text1]]> <child/> <![CDATA[text2]]> </node>
    +
    <node> <![CDATA[text1]]> <child/> <![CDATA[text2]]> </node>
    @@ -1096,7 +1032,7 @@ In that example PUGIXML_API is inconsistent between several source

    Comment nodes (node_comment) represent comments in XML. Comment nodes have a value, but do not have a name or children/attributes. The example XML representation of a comment node is as follows:

    -
    <!-- comment text -->
    +
    <!-- comment text -->
    @@ -1107,7 +1043,7 @@ In that example PUGIXML_API is inconsistent between several source

    Processing instruction node (node_pi) represent processing instructions (PI) in XML. PI nodes have a name and an optional value, but do not have children/attributes. The example XML representation of a PI node is as follows:

    -
    <?name value?>
    +
    <?name value?>
    @@ -1118,7 +1054,7 @@ In that example PUGIXML_API is inconsistent between several source

    Declaration node (node_declaration) represents document declarations in XML. Declaration nodes have a name ("xml") and an optional collection of attributes, but do not have value or children. There can be only one declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a declaration node is as follows:

    -
    <?xml version="1.0"?>
    +
    <?xml version="1.0"?>
    @@ -1129,7 +1065,7 @@ In that example PUGIXML_API is inconsistent between several source

    Document type declaration node (node_doctype) represents document type declarations in XML. Document type declaration nodes have a value, which corresponds to the entire document type contents; no additional nodes are created for inner elements like <!ENTITY>. There can be only one document type declaration node in a document; moreover, it should be the topmost node (its parent should be the document). The example XML representation of a document type declaration node is as follows:

    -
    <!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)> ]>
    +
    <!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)> ]>
    @@ -1792,7 +1728,7 @@ You should use the usual bitwise arithmetics to manipulate the bitmask: to enabl Since this flag significantly changes the DOM structure it is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments. This flag is off by default.

  • -

    parse_fragment determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid. This flag is off by default.

    +

    parse_fragment determines if document should be treated as a fragment of a valid XML. Parsing document as a fragment leads to top-level PCDATA content (i.e. text that is not located inside a node) to be added to a tree, and additionally treats documents without element nodes as valid and permits multiple top-level element nodes. This flag is off by default.

@@ -1972,6 +1908,9 @@ The current behavior for Unicode conversion is to skip all invalid UTF sequences
  • Unicode validation is not performed so invalid UTF sequences are not rejected.

  • +
  • +

    Document can contain multiple top-level element nodes.

    +
  • @@ -2661,7 +2600,9 @@ All attributes have name and value, both of which are strings (value may be empt bool xml_attribute::set_value(long rhs); bool xml_attribute::set_value(unsigned long rhs); bool xml_attribute::set_value(double rhs); +bool xml_attribute::set_value(double rhs, int precision); bool xml_attribute::set_value(float rhs); +bool xml_attribute::set_value(float rhs, int precision); bool xml_attribute::set_value(bool rhs); bool xml_attribute::set_value(long long rhs); bool xml_attribute::set_value(unsigned long long rhs); @@ -2833,17 +2774,19 @@ Nodes and attributes do not exist without a document tree, so you can’t cr

    6.3. Removing nodes/attributes

    -

    +

    If you do not want your document to contain some node or attribute, you can remove it with one of the following functions:

    bool xml_node::remove_attribute(const xml_attribute& a);
    -bool xml_node::remove_child(const xml_node& n);
    +bool xml_node::remove_attributes(); +bool xml_node::remove_child(const xml_node& n); +bool xml_node::remove_children();
    -

    remove_attribute removes the attribute from the attribute list of the node, and returns the operation result. remove_child removes the child node with the entire subtree (including all descendant nodes and attributes) from the document, and returns the operation result. Removing fails if one of the following is true:

    +

    remove_attribute removes the attribute from the attribute list of the node, and returns the operation result. remove_child removes the child node with the entire subtree (including all descendant nodes and attributes) from the document, and returns the operation result. remove_attributes removes all the attributes of the node, and returns the operation result. remove_children removes all the child nodes of the node, and returns the operation result. Removing fails if one of the following is true:

      @@ -2918,7 +2861,9 @@ If you do not want your document to contain some node or attribute, you can remo bool xml_text::set(long rhs); bool xml_text::set(unsigned long rhs); bool xml_text::set(double rhs); +bool xml_text::set(double rhs, int precision); bool xml_text::set(float rhs); +bool xml_text::set(float rhs, int precision); bool xml_text::set(bool rhs); bool xml_text::set(long long rhs); bool xml_text::set(unsigned long long rhs); @@ -4041,6 +3986,58 @@ If exceptions are disabled, then in the event of parsing failure the query is in

      9. Changelog

      +

      v1.11 2020-11-26

      +
      +

      Maintenance release. Changes:

      +
      +
      +
        +
      • +

        New features:

        +
        +
          +
        1. +

          Add xml_node::remove_attributes and xml_node::remove_children

          +
        2. +
        3. +

          Add a way to customize floating point precision via xml_attribute::set and xml_text::set overloads

          +
        4. +
        +
        +
      • +
      • +

        XPath improvements:

        +
        +
          +
        1. +

          XPath parser now limits recursion depth which prevents stack overflow on malicious queries

          +
        2. +
        +
        +
      • +
      • +

        Compatibility improvements:

        +
        +
          +
        1. +

          Fix Visual Studio warnings when built using clang-cl compiler

          +
        2. +
        3. +

          Fix Wconversion warnings in gcc

          +
        4. +
        5. +

          Fix Wzero-as-null-pointer-constant warnings in pugixml.hpp

          +
        6. +
        7. +

          Work around several static analysis false positives

          +
        8. +
        +
        +
      • +
      +
      +
      +

      v1.10 2019-09-15

      Maintenance release. Changes:

      @@ -5641,8 +5638,10 @@ If exceptions are disabled, then in the event of parsing failure the query is in bool remove_attribute(const xml_attribute& a); bool remove_attribute(const char_t* name); + bool remove_attributes(); bool remove_child(const xml_node& n); bool remove_child(const char_t* name); + bool remove_children(); xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); @@ -5862,8 +5861,79 @@ If exceptions are disabled, then in the event of parsing failure the query is in
      + \ No newline at end of file diff --git a/docs/quickstart.html b/docs/quickstart.html index 26173aa..cdf5763 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -2,22 +2,21 @@ - + - + -pugixml 1.10 quick start guide +pugixml 1.11 quick start guide -
      -
      Copyright (c) 2006-2019 Arseny Kapoulkine
      +
      Copyright (c) 2006-2020 Arseny Kapoulkine
       
       Permission is hereby granted, free of charge, to any person
       obtaining a copy of this software and associated documentation
      @@ -666,7 +666,7 @@ OTHER DEALINGS IN THE SOFTWARE.
      This software is based on pugixml library (https://pugixml.org).
      -pugixml is Copyright (C) 2006-2019 Arseny Kapoulkine.
      +pugixml is Copyright (C) 2006-2020 Arseny Kapoulkine.
      @@ -5861,7 +5861,7 @@ If exceptions are disabled, then in the event of parsing failure the query is in