diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 75a20ed7b..a58e4eb6f 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -1938,6 +1938,7 @@ class binary_reader { std::pair size_and_type; size_t dimlen = 0; + bool isndarray = false; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -1952,7 +1953,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, size_and_type.second))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, size_and_type.second))) { return false; } @@ -1964,7 +1965,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray))) { return false; } @@ -1976,7 +1977,7 @@ class binary_reader { while (current != ']') { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, current))) { return false; } @@ -1991,8 +1992,9 @@ class binary_reader @param[out] result determined size @return whether size determination completed */ - bool get_ubjson_size_value(std::size_t& result, char_int_type prefix = 0) + bool get_ubjson_size_value(std::size_t& result, bool& isndarray, char_int_type prefix = 0) { + isndarray = false; if (prefix == 0) { prefix = get_ignore_noop(); @@ -2132,7 +2134,7 @@ class binary_reader return false; } } - result |= (1ull << (sizeof(result) * 8 - 1)); // low 63 bit of result stores the total element count, sign-bit indicates ndarray + isndarray = true; return sax->end_array(); } result = 0; @@ -2168,6 +2170,7 @@ class binary_reader */ bool get_ubjson_size_type(std::pair& result) { + bool isndarray = false; result.first = string_t::npos; // size result.second = 0; // type @@ -2185,7 +2188,7 @@ class binary_reader exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr)); } - if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type") || (input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() ))) + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type"))) { return false; } @@ -2202,12 +2205,22 @@ class binary_reader exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); } - return get_ubjson_size_value(result.first); + bool iserr = get_ubjson_size_value(result.first, isndarray); + if (input_format == input_format_t::bjdata && isndarray) + { + result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return iserr; } if (current == '#') { - return get_ubjson_size_value(result.first); + bool iserr = get_ubjson_size_value(result.first, isndarray); + if (input_format == input_format_t::bjdata && isndarray) + { + result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return iserr; } return true; @@ -2411,12 +2424,14 @@ class binary_reader // detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) { std::map bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} }; + size_and_type.second &= ~(256); + string_t key = "_ArrayType_"; if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) { @@ -2428,7 +2443,6 @@ class binary_reader size_and_type.second = 'U'; } - size_and_type.first &= ~(1ull << (sizeof(std::size_t) * 8 - 1)); key = "_ArrayData_"; if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) { @@ -2508,7 +2522,7 @@ class binary_reader return false; } - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) { return false; } @@ -2584,7 +2598,8 @@ class binary_reader { // get size of following number string std::size_t size{}; - auto res = get_ubjson_size_value(size); + bool isndarray = false; + auto res = get_ubjson_size_value(size, isndarray); if (JSON_HEDLEY_UNLIKELY(!res)) { return res; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index bbd84d2be..c9abe3f90 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -10412,6 +10412,7 @@ class binary_reader { std::pair size_and_type; size_t dimlen = 0; + bool isndarray = false; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -10426,7 +10427,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, size_and_type.second))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, size_and_type.second))) { return false; } @@ -10438,7 +10439,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray))) { return false; } @@ -10450,7 +10451,7 @@ class binary_reader { while (current != ']') { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, current))) { return false; } @@ -10465,8 +10466,9 @@ class binary_reader @param[out] result determined size @return whether size determination completed */ - bool get_ubjson_size_value(std::size_t& result, char_int_type prefix = 0) + bool get_ubjson_size_value(std::size_t& result, bool& isndarray, char_int_type prefix = 0) { + isndarray = false; if (prefix == 0) { prefix = get_ignore_noop(); @@ -10606,7 +10608,7 @@ class binary_reader return false; } } - result |= (1ull << (sizeof(result) * 8 - 1)); // low 63 bit of result stores the total element count, sign-bit indicates ndarray + isndarray = true; return sax->end_array(); } result = 0; @@ -10642,6 +10644,7 @@ class binary_reader */ bool get_ubjson_size_type(std::pair& result) { + bool isndarray = false; result.first = string_t::npos; // size result.second = 0; // type @@ -10659,7 +10662,7 @@ class binary_reader exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr)); } - if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type") || (input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() ))) + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type"))) { return false; } @@ -10676,12 +10679,22 @@ class binary_reader exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); } - return get_ubjson_size_value(result.first); + bool iserr = get_ubjson_size_value(result.first, isndarray); + if (input_format == input_format_t::bjdata && isndarray) + { + result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return iserr; } if (current == '#') { - return get_ubjson_size_value(result.first); + bool iserr = get_ubjson_size_value(result.first, isndarray); + if (input_format == input_format_t::bjdata && isndarray) + { + result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return iserr; } return true; @@ -10885,12 +10898,14 @@ class binary_reader // detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) { std::map bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} }; + size_and_type.second &= ~(256); + string_t key = "_ArrayType_"; if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) { @@ -10902,7 +10917,6 @@ class binary_reader size_and_type.second = 'U'; } - size_and_type.first &= ~(1ull << (sizeof(std::size_t) * 8 - 1)); key = "_ArrayData_"; if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) { @@ -10982,7 +10996,7 @@ class binary_reader return false; } - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) { return false; } @@ -11058,7 +11072,8 @@ class binary_reader { // get size of following number string std::size_t size{}; - auto res = get_ubjson_size_value(size); + bool isndarray = false; + auto res = get_ubjson_size_value(size, isndarray); if (JSON_HEDLEY_UNLIKELY(!res)) { return res; diff --git a/tests/src/unit-bjdata.cpp b/tests/src/unit-bjdata.cpp index dc2c63122..166717972 100644 --- a/tests/src/unit-bjdata.cpp +++ b/tests/src/unit-bjdata.cpp @@ -2488,6 +2488,34 @@ TEST_CASE("BJData") CHECK_THROWS_WITH(_ = json::from_bjdata(v), "[json.exception.parse_error.112] parse error at byte 4: syntax error while parsing BJData size: expected '#' after type information; last byte: 0x02"); } + SECTION("optimized array: negative size") + { + std::vector v1 = {'[', '#', 'i', 0xF1}; + std::vector v2 = {'[', '$', 'I', '#', 'i', 0xF2}; + std::vector v3 = {'[', '#', '[', 'i', 0xF3, 'i', 0x02, ']'}; + std::vector v4 = {'[', '$', 'I', '#', '[', 'i', 0xF4, 'i', 0x02, ']'}; + std::vector v5 = {'[', '$', 'I', '#', '[', 'i', 0xF5, 'i', 0xF1, ']'}; + std::vector v6 = {'[', '$', 0xF6, '#', 'i', 0xF7}; + + json _; + CHECK_THROWS_AS(_ = json::from_bjdata(v1), json::out_of_range&); + CHECK_THROWS_WITH(_ = json::from_bjdata(v1), "[json.exception.out_of_range.408] excessive array size: 18446744073709551601"); + + CHECK_THROWS_AS(_ = json::from_bjdata(v2), json::out_of_range&); + CHECK_THROWS_WITH(_ = json::from_bjdata(v2), "[json.exception.out_of_range.408] excessive array size: 18446744073709551602"); + + CHECK(json::from_bjdata(v3, true, false).is_discarded()); + + CHECK_THROWS_AS(_ = json::from_bjdata(v4), json::out_of_range&); + CHECK_THROWS_WITH(_ = json::from_bjdata(v4), "[json.exception.out_of_range.408] excessive array size: 18446744073709551592"); + + CHECK_THROWS_AS(_ = json::from_bjdata(v5), json::parse_error&); + CHECK_THROWS_WITH(_ = json::from_bjdata(v5), "[json.exception.parse_error.110] parse error at byte 11: syntax error while parsing BJData number: unexpected end of input"); + + CHECK_THROWS_AS(_ = json::from_bjdata(v6), json::out_of_range&); + CHECK_THROWS_WITH(_ = json::from_bjdata(v6), "[json.exception.out_of_range.408] excessive array size: 18446744073709551607"); + } + SECTION("do not accept NTFZ markers in ndarray optimized type") { json _;