Incorporate suggestions from @nlohmann and @falbrechtskirchinger

This commit is contained in:
Qianqian Fang 2022-05-09 10:41:59 -04:00
parent ffcf148667
commit cfb5da6ebd
3 changed files with 80 additions and 52 deletions

View File

@ -1938,7 +1938,7 @@ class binary_reader
{ {
std::pair<std::size_t, char_int_type> size_and_type; std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0; size_t dimlen = 0;
bool isndarray = false; bool is_ndarray = false;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{ {
@ -1953,7 +1953,7 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, size_and_type.second))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second)))
{ {
return false; return false;
} }
@ -1965,7 +1965,7 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray)))
{ {
return false; return false;
} }
@ -1977,7 +1977,7 @@ class binary_reader
{ {
while (current != ']') while (current != ']')
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, current))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current)))
{ {
return false; return false;
} }
@ -1992,9 +1992,9 @@ class binary_reader
@param[out] result determined size @param[out] result determined size
@return whether size determination completed @return whether size determination completed
*/ */
bool get_ubjson_size_value(std::size_t& result, bool& isndarray, char_int_type prefix = 0) bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
{ {
isndarray = false; is_ndarray = false;
if (prefix == 0) if (prefix == 0)
{ {
prefix = get_ignore_noop(); prefix = get_ignore_noop();
@ -2134,7 +2134,7 @@ class binary_reader
return false; return false;
} }
} }
isndarray = true; is_ndarray = true;
return sax->end_array(); return sax->end_array();
} }
result = 0; result = 0;
@ -2170,7 +2170,7 @@ class binary_reader
*/ */
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result) bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
{ {
bool isndarray = false; bool is_ndarray = false;
result.first = string_t::npos; // size result.first = string_t::npos; // size
result.second = 0; // type result.second = 0; // type
@ -2205,22 +2205,22 @@ class binary_reader
exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
} }
bool iserr = get_ubjson_size_value(result.first, isndarray); bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && isndarray) if (input_format == input_format_t::bjdata && is_ndarray)
{ {
result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
} }
return iserr; return is_error;
} }
if (current == '#') if (current == '#')
{ {
bool iserr = get_ubjson_size_value(result.first, isndarray); bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && isndarray) if (input_format == input_format_t::bjdata && is_ndarray)
{ {
result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
} }
return iserr; return is_error;
} }
return true; return true;
@ -2424,16 +2424,23 @@ class binary_reader
// detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= (1 << 8))
{ {
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
}; };
size_and_type.second -= 256; size_and_type.second &= ~(1 << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
string_t key = "_ArrayType_"; string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
}
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
{ {
return false; return false;
} }
@ -2522,9 +2529,11 @@ class binary_reader
return false; return false;
} }
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= (1 << 8))
{ {
return false; auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
} }
string_t key; string_t key;
@ -2598,8 +2607,8 @@ class binary_reader
{ {
// get size of following number string // get size of following number string
std::size_t size{}; std::size_t size{};
bool isndarray = false; bool is_ndarray = false;
auto res = get_ubjson_size_value(size, isndarray); auto res = get_ubjson_size_value(size, is_ndarray);
if (JSON_HEDLEY_UNLIKELY(!res)) if (JSON_HEDLEY_UNLIKELY(!res))
{ {
return res; return res;

View File

@ -10412,7 +10412,7 @@ class binary_reader
{ {
std::pair<std::size_t, char_int_type> size_and_type; std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0; size_t dimlen = 0;
bool isndarray = false; bool is_ndarray = false;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{ {
@ -10427,7 +10427,7 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, size_and_type.second))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second)))
{ {
return false; return false;
} }
@ -10439,7 +10439,7 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray)))
{ {
return false; return false;
} }
@ -10451,7 +10451,7 @@ class binary_reader
{ {
while (current != ']') while (current != ']')
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, isndarray, current))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current)))
{ {
return false; return false;
} }
@ -10466,9 +10466,9 @@ class binary_reader
@param[out] result determined size @param[out] result determined size
@return whether size determination completed @return whether size determination completed
*/ */
bool get_ubjson_size_value(std::size_t& result, bool& isndarray, char_int_type prefix = 0) bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
{ {
isndarray = false; is_ndarray = false;
if (prefix == 0) if (prefix == 0)
{ {
prefix = get_ignore_noop(); prefix = get_ignore_noop();
@ -10608,7 +10608,7 @@ class binary_reader
return false; return false;
} }
} }
isndarray = true; is_ndarray = true;
return sax->end_array(); return sax->end_array();
} }
result = 0; result = 0;
@ -10644,7 +10644,7 @@ class binary_reader
*/ */
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result) bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
{ {
bool isndarray = false; bool is_ndarray = false;
result.first = string_t::npos; // size result.first = string_t::npos; // size
result.second = 0; // type result.second = 0; // type
@ -10679,22 +10679,22 @@ class binary_reader
exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
} }
bool iserr = get_ubjson_size_value(result.first, isndarray); bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && isndarray) if (input_format == input_format_t::bjdata && is_ndarray)
{ {
result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
} }
return iserr; return is_error;
} }
if (current == '#') if (current == '#')
{ {
bool iserr = get_ubjson_size_value(result.first, isndarray); bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && isndarray) if (input_format == input_format_t::bjdata && is_ndarray)
{ {
result.second |= 256; // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
} }
return iserr; return is_error;
} }
return true; return true;
@ -10898,16 +10898,23 @@ class binary_reader
// detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= (1 << 8))
{ {
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
}; };
size_and_type.second -= 256; size_and_type.second &= ~(1 << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
string_t key = "_ArrayType_"; string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
}
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
{ {
return false; return false;
} }
@ -10996,9 +11003,11 @@ class binary_reader
return false; return false;
} }
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= 256) if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.second >= (1 << 8))
{ {
return false; auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
} }
string_t key; string_t key;
@ -11072,8 +11081,8 @@ class binary_reader
{ {
// get size of following number string // get size of following number string
std::size_t size{}; std::size_t size{};
bool isndarray = false; bool is_ndarray = false;
auto res = get_ubjson_size_value(size, isndarray); auto res = get_ubjson_size_value(size, is_ndarray);
if (JSON_HEDLEY_UNLIKELY(!res)) if (JSON_HEDLEY_UNLIKELY(!res))
{ {
return res; return res;

View File

@ -2498,13 +2498,23 @@ TEST_CASE("BJData")
std::vector<uint8_t> v6 = {'[', '#', '[', 'i', 0xF3, 'i', 0x02, ']'}; std::vector<uint8_t> v6 = {'[', '#', '[', 'i', 0xF3, 'i', 0x02, ']'};
json _; json _;
CHECK_THROWS_AS(_ = json::from_bjdata(v1), json::out_of_range&); static bool is_64bit = (sizeof(size_t) == 8);
CHECK_THROWS_AS(_ = json::from_bjdata(v2), json::out_of_range&);
CHECK_THROWS_AS(_ = json::from_bjdata(v3), json::out_of_range&);
CHECK_THROWS_AS(_ = json::from_bjdata(v4), json::out_of_range&);
CHECK_THROWS_AS(_ = json::from_bjdata(v5), json::parse_error&); if (is_64bit)
CHECK_THROWS_WITH(_ = json::from_bjdata(v5), "[json.exception.parse_error.110] parse error at byte 11: syntax error while parsing BJData number: unexpected end of input"); {
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v1), "[json.exception.out_of_range.408] excessive array size: 18446744073709551601", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v2), "[json.exception.out_of_range.408] excessive array size: 18446744073709551602", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v3), "[json.exception.out_of_range.408] excessive array size: 18446744073709551592", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v4), "[json.exception.out_of_range.408] excessive array size: 18446744073709551607", json::out_of_range&);
}
else
{
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v1), "[json.exception.out_of_range.408] excessive array size: 4294967281", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v2), "[json.exception.out_of_range.408] excessive array size: 4294967282", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v3), "[json.exception.out_of_range.408] excessive array size: 4294967272", json::out_of_range&);
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v4), "[json.exception.out_of_range.408] excessive array size: 4294967287", json::out_of_range&);
}
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v5), "[json.exception.parse_error.110] parse error at byte 11: syntax error while parsing BJData number: unexpected end of input", json::parse_error&);
CHECK(json::from_bjdata(v6, true, false).is_discarded()); CHECK(json::from_bjdata(v6, true, false).is_discarded());
} }