change bjdata ndarray flag to detect negative size, as part of #3475 (#3479)

* change bjdata ndarray flag to detect negative size, fix https://github.com/nlohmann/json/issues/3475

* fix CI error

* fix CI on 32bit windows

* remove platform specific out_of_range error messages

* Incorporate suggestions from @nlohmann and @falbrechtskirchinger

* fix CI errors

* add coverage

* fix sax event order

* fix coverage
This commit is contained in:
Qianqian Fang 2022-05-10 15:13:24 -04:00 committed by GitHub
parent d6efe672b5
commit a8a547d7a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 144 additions and 42 deletions

View File

@ -1938,6 +1938,7 @@ class binary_reader
{
std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0;
bool is_ndarray = false;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{
@ -1952,7 +1953,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, size_and_type.second)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second)))
{
return false;
}
@ -1964,7 +1965,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray)))
{
return false;
}
@ -1976,7 +1977,7 @@ class binary_reader
{
while (current != ']')
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current)))
{
return false;
}
@ -1991,8 +1992,9 @@ class binary_reader
@param[out] result determined size
@return whether size determination completed
*/
bool get_ubjson_size_value(std::size_t& result, char_int_type prefix = 0)
bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
{
is_ndarray = false;
if (prefix == 0)
{
prefix = get_ignore_noop();
@ -2132,7 +2134,7 @@ class binary_reader
return false;
}
}
result |= (1ull << (sizeof(result) * 8 - 1)); // low 63 bit of result stores the total element count, sign-bit indicates ndarray
is_ndarray = true;
return sax->end_array();
}
result = 0;
@ -2168,6 +2170,7 @@ class binary_reader
*/
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
{
bool is_ndarray = false;
result.first = string_t::npos; // size
result.second = 0; // type
@ -2185,7 +2188,7 @@ class binary_reader
exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr));
}
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type") || (input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() )))
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type")))
{
return false;
}
@ -2202,12 +2205,22 @@ class binary_reader
exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
}
return get_ubjson_size_value(result.first);
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
}
return is_error;
}
if (current == '#')
{
return get_ubjson_size_value(result.first);
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
}
return is_error;
}
return true;
@ -2408,17 +2421,26 @@ class binary_reader
return false;
}
// detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1)))
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
{
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
};
size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
}
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
{
return false;
}
@ -2428,7 +2450,6 @@ class binary_reader
size_and_type.second = 'U';
}
size_and_type.first &= ~(1ull << (sizeof(std::size_t) * 8 - 1));
key = "_ArrayData_";
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) ))
{
@ -2508,9 +2529,12 @@ class binary_reader
return false;
}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1)))
// do not accept ND-array size in objects in BJData
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
{
return false;
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
}
string_t key;
@ -2584,7 +2608,8 @@ class binary_reader
{
// get size of following number string
std::size_t size{};
auto res = get_ubjson_size_value(size);
bool is_ndarray = false;
auto res = get_ubjson_size_value(size, is_ndarray);
if (JSON_HEDLEY_UNLIKELY(!res))
{
return res;

View File

@ -10412,6 +10412,7 @@ class binary_reader
{
std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0;
bool is_ndarray = false;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{
@ -10426,7 +10427,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, size_and_type.second)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second)))
{
return false;
}
@ -10438,7 +10439,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray)))
{
return false;
}
@ -10450,7 +10451,7 @@ class binary_reader
{
while (current != ']')
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current)))
{
return false;
}
@ -10465,8 +10466,9 @@ class binary_reader
@param[out] result determined size
@return whether size determination completed
*/
bool get_ubjson_size_value(std::size_t& result, char_int_type prefix = 0)
bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
{
is_ndarray = false;
if (prefix == 0)
{
prefix = get_ignore_noop();
@ -10606,7 +10608,7 @@ class binary_reader
return false;
}
}
result |= (1ull << (sizeof(result) * 8 - 1)); // low 63 bit of result stores the total element count, sign-bit indicates ndarray
is_ndarray = true;
return sax->end_array();
}
result = 0;
@ -10642,6 +10644,7 @@ class binary_reader
*/
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
{
bool is_ndarray = false;
result.first = string_t::npos; // size
result.second = 0; // type
@ -10659,7 +10662,7 @@ class binary_reader
exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr));
}
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type") || (input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() )))
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type")))
{
return false;
}
@ -10676,12 +10679,22 @@ class binary_reader
exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
}
return get_ubjson_size_value(result.first);
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
}
return is_error;
}
if (current == '#')
{
return get_ubjson_size_value(result.first);
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
}
return is_error;
}
return true;
@ -10882,17 +10895,26 @@ class binary_reader
return false;
}
// detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1)))
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
{
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
};
size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
}
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
{
return false;
}
@ -10902,7 +10924,6 @@ class binary_reader
size_and_type.second = 'U';
}
size_and_type.first &= ~(1ull << (sizeof(std::size_t) * 8 - 1));
key = "_ArrayData_";
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) ))
{
@ -10982,9 +11003,12 @@ class binary_reader
return false;
}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1)))
// do not accept ND-array size in objects in BJData
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
{
return false;
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
}
string_t key;
@ -11058,7 +11082,8 @@ class binary_reader
{
// get size of following number string
std::size_t size{};
auto res = get_ubjson_size_value(size);
bool is_ndarray = false;
auto res = get_ubjson_size_value(size, is_ndarray);
if (JSON_HEDLEY_UNLIKELY(!res))
{
return res;

View File

@ -1130,7 +1130,7 @@ TEST_CASE("BJData")
{
json j = json::from_bjdata(std::vector<uint8_t>({'h', 0x00, 0x7c}));
json::number_float_t d{j};
CHECK(!std::isfinite(d));
CHECK_FALSE(std::isfinite(d));
CHECK(j.dump() == "null");
}
@ -2035,77 +2035,98 @@ TEST_CASE("BJData")
{
std::vector<uint8_t> v = {'[', 'T', 'F', ']'};
SaxCountdown scp(0);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("start_object()")
{
std::vector<uint8_t> v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'};
SaxCountdown scp(0);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("key() in object")
{
std::vector<uint8_t> v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'};
SaxCountdown scp(1);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("start_array(len)")
{
std::vector<uint8_t> v = {'[', '#', 'i', '2', 'T', 'F'};
SaxCountdown scp(0);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("start_object(len)")
{
std::vector<uint8_t> v = {'{', '#', 'i', '1', 3, 'f', 'o', 'o', 'F'};
SaxCountdown scp(0);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("key() in object with length")
{
std::vector<uint8_t> v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'};
SaxCountdown scp(1);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("start_array() in ndarray _ArraySize_")
{
std::vector<uint8_t> v = {'[', '$', 'i', '#', '[', '$', 'i', '#', 'i', 2, 2, 1, 1, 2};
SaxCountdown scp(2);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("number_integer() in ndarray _ArraySize_")
{
std::vector<uint8_t> v = {'[', '$', 'U', '#', '[', '$', 'i', '#', 'i', 2, 2, 1, 1, 2};
SaxCountdown scp(3);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("key() in ndarray _ArrayType_")
{
std::vector<uint8_t> v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4};
SaxCountdown scp(6);
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("string() in ndarray _ArrayType_")
{
std::vector<uint8_t> v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4};
SaxCountdown scp(7);
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("key() in ndarray _ArrayData_")
{
std::vector<uint8_t> v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4};
SaxCountdown scp(8);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("string() in ndarray _ArrayData_")
{
std::vector<uint8_t> v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4};
SaxCountdown scp(9);
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("string() in ndarray _ArrayType_")
{
std::vector<uint8_t> v = {'[', '$', 'U', '#', '[', '$', 'i', '#', 'i', 2, 3, 2, 6, 5, 4, 3, 2, 1};
SaxCountdown scp(11);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
SECTION("start_array() in ndarray _ArrayData_")
{
std::vector<uint8_t> v = {'[', '$', 'U', '#', '[', 'i', 2, 'i', 3, ']', 6, 5, 4, 3, 2, 1};
SaxCountdown scp(13);
CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata));
CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata));
}
}
@ -2488,6 +2509,37 @@ TEST_CASE("BJData")
CHECK_THROWS_WITH(_ = json::from_bjdata(v), "[json.exception.parse_error.112] parse error at byte 4: syntax error while parsing BJData size: expected '#' after type information; last byte: 0x02");
}
SECTION("optimized array: negative size")
{
std::vector<uint8_t> v1 = {'[', '#', 'i', 0xF1};
std::vector<uint8_t> v2 = {'[', '$', 'I', '#', 'i', 0xF2};
std::vector<uint8_t> v3 = {'[', '$', 'I', '#', '[', 'i', 0xF4, 'i', 0x02, ']'};
std::vector<uint8_t> v4 = {'[', '$', 0xF6, '#', 'i', 0xF7};
std::vector<uint8_t> v5 = {'[', '$', 'I', '#', '[', 'i', 0xF5, 'i', 0xF1, ']'};
std::vector<uint8_t> v6 = {'[', '#', '[', 'i', 0xF3, 'i', 0x02, ']'};
json _;
static bool is_64bit = (sizeof(size_t) == 8);
if (is_64bit)
{
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v1), "[json.exception.out_of_range.408] excessive array size: 18446744073709551601", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v2), "[json.exception.out_of_range.408] excessive array size: 18446744073709551602", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v3), "[json.exception.out_of_range.408] excessive array size: 18446744073709551592", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v4), "[json.exception.out_of_range.408] excessive array size: 18446744073709551607", json::out_of_range&);
}
else
{
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v1), "[json.exception.out_of_range.408] excessive array size: 4294967281", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v2), "[json.exception.out_of_range.408] excessive array size: 4294967282", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v3), "[json.exception.out_of_range.408] excessive array size: 4294967272", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v4), "[json.exception.out_of_range.408] excessive array size: 4294967287", json::out_of_range&);
}
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v5), "[json.exception.parse_error.110] parse error at byte 11: syntax error while parsing BJData number: unexpected end of input", json::parse_error&);
CHECK(json::from_bjdata(v6, true, false).is_discarded());
}
SECTION("do not accept NTFZ markers in ndarray optimized type")
{
json _;