From d3199a0c392a2eca90379170b2676e6c7430dddc Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 15 Apr 2023 12:41:44 -0700 Subject: [PATCH 1/2] Fix get_file_size behavior inconsistency for folders Different OSes have different behavior when trying to fopen/fseek/ftell a folder. On Linux, some systems return 0 size, some systems return an error, and some systems return LONG_MAX. LONG_MAX is particularly problematic because that causes spurious OOMs under address sanitizer. Using fstat manually cleans this up, however it introduces a new dependency on platform specific headers that we didn't have before, and also has unclear behavior on 64-bit systems wrt 32-bit sizes which will need to be tested further as I'm not certain if the behavior needs to be special-cased only for MSVC/MinGW, which are currently not handled by this path (unless MinGW defines __unix__...) --- src/pugixml.cpp | 17 ++++++++++++++++- tests/test_document.cpp | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 2f15073..9e6f887 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -40,6 +40,11 @@ // For placement new #include +// For load_file +#if defined(__unix__) || defined(__APPLE__) +#include +#endif + #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable: 4127) // conditional expression is constant @@ -4759,7 +4764,17 @@ PUGI_IMPL_NS_BEGIN // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick PUGI_IMPL_FN xml_parse_status get_file_size(FILE* file, size_t& out_result) { - #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400 + #if defined(__unix__) || defined(__APPLE__) + // this simultaneously retrieves the file size and file mode (to guard against loading non-files) + struct stat st; + if (fstat(fileno(file), &st) != 0) return status_io_error; + + // anything that's not a regular file doesn't have a coherent length + if (!S_ISREG(st.st_mode)) return status_io_error; + + typedef off_t length_type; + length_type length = st.st_size; + #elif defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400 // there are 64-bit versions of fseek/ftell, let's use them typedef __int64 length_type; diff --git a/tests/test_document.cpp b/tests/test_document.cpp index 2e2904d..a8100c8 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -589,7 +589,7 @@ TEST(document_load_file_wide_out_of_memory) CHECK(result.status == status_out_of_memory || result.status == status_file_not_found); } -#if defined(__APPLE__) +#if defined(__unix__) || defined(__APPLE__) TEST(document_load_file_special_folder) { xml_document doc; From a13b5cc08dab4763f8e780558bb3f5a0b6187a37 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 15 Apr 2023 13:41:15 -0700 Subject: [PATCH 2/2] Use stricter subset for now to avoid compat issues with Unix-like platforms --- src/pugixml.cpp | 4 ++-- tests/test_document.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 9e6f887..3d2f582 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -41,7 +41,7 @@ #include // For load_file -#if defined(__unix__) || defined(__APPLE__) +#if defined(__linux__) || defined(__APPLE__) #include #endif @@ -4764,7 +4764,7 @@ PUGI_IMPL_NS_BEGIN // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick PUGI_IMPL_FN xml_parse_status get_file_size(FILE* file, size_t& out_result) { - #if defined(__unix__) || defined(__APPLE__) + #if defined(__linux__) || defined(__APPLE__) // this simultaneously retrieves the file size and file mode (to guard against loading non-files) struct stat st; if (fstat(fileno(file), &st) != 0) return status_io_error; diff --git a/tests/test_document.cpp b/tests/test_document.cpp index a8100c8..2226177 100644 --- a/tests/test_document.cpp +++ b/tests/test_document.cpp @@ -589,7 +589,7 @@ TEST(document_load_file_wide_out_of_memory) CHECK(result.status == status_out_of_memory || result.status == status_file_not_found); } -#if defined(__unix__) || defined(__APPLE__) +#if defined(__linux__) || defined(__APPLE__) TEST(document_load_file_special_folder) { xml_document doc;