add tests for optimized ndarray, improve coverage, fix clang/gcc warnings

This commit is contained in:
Qianqian Fang 2022-02-18 15:22:19 -05:00
parent 55682ffb5a
commit c8752b349f
3 changed files with 170 additions and 33 deletions

View File

@ -1903,6 +1903,9 @@ class binary_reader
uint64_t len{};
return get_number(input_format, len) && get_string(input_format, len, result);
}
default:
{
}
}
}
auto last_token = get_token_string();
@ -1920,9 +1923,12 @@ class binary_reader
std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0;
bool is_optimized = get_ubjson_size_type(size_and_type);
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{
return false;
}
if (is_optimized && size_and_type.first != string_t::npos)
if (size_and_type.first != string_t::npos)
{
if (size_and_type.second != 0)
{
@ -1954,7 +1960,7 @@ class binary_reader
{
while (current != ']')
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current)))
{
return false;
}
@ -2066,7 +2072,7 @@ class binary_reader
{
return false;
}
result = static_cast<std::size_t>(number);
result = number;
return true;
}
case '[':
@ -2083,12 +2089,14 @@ class binary_reader
}
return true;
}
default:
{
}
}
}
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
exception_message(input_format, concat("expected length type specification (U, i, I, l, L) after '#'; last byte: 0x", last_token), "size"), nullptr));
}
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), nullptr));
}
}
@ -2243,27 +2251,27 @@ class binary_reader
{
case 'u':
{
uint16_t number;
uint16_t number{};
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'm':
{
uint32_t number;
uint32_t number{};
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'M':
{
uint64_t number;
uint64_t number{};
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'h':
{
const int byte2 = get();
unsigned int byte2 = get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
{
return false;
}
const int byte1 = get();
unsigned int byte1 = get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
{
return false;
@ -2277,11 +2285,11 @@ class binary_reader
// without such support. An example of a small decoder for
// half-precision floating-point numbers in the C language
// is shown in Fig. 3.
const int half = (byte1 << 8) + byte2;
unsigned int half = (byte1 << 8) + byte2;
const double val = [&half]
{
const int exp = (half >> 10) & 0x1F;
const int mant = half & 0x3FF;
unsigned int exp = (half >> 10) & 0x1F;
unsigned int mant = half & 0x3FF;
JSON_ASSERT(0 <= exp&& exp <= 32);
JSON_ASSERT(0 <= mant&& mant <= 1024);
switch (exp)
@ -2300,6 +2308,9 @@ class binary_reader
? static_cast<number_float_t>(-val)
: static_cast<number_float_t>(val), "");
}
default:
{
}
}
}
auto last_token = get_token_string();
@ -2738,7 +2749,7 @@ class binary_reader
const bool is_little_endian = little_endianness();
/// input format
input_format_t input_format;
input_format_t input_format = input_format_t::json;
/// the SAX parser
json_sax_t* sax = nullptr;

View File

@ -10279,6 +10279,9 @@ class binary_reader
uint64_t len{};
return get_number(input_format, len) && get_string(input_format, len, result);
}
default:
{
}
}
}
auto last_token = get_token_string();
@ -10296,9 +10299,12 @@ class binary_reader
std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0;
bool is_optimized = get_ubjson_size_type(size_and_type);
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{
return false;
}
if (is_optimized && size_and_type.first != string_t::npos)
if (size_and_type.first != string_t::npos)
{
if (size_and_type.second != 0)
{
@ -10330,7 +10336,7 @@ class binary_reader
{
while (current != ']')
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current)))
{
return false;
}
@ -10442,7 +10448,7 @@ class binary_reader
{
return false;
}
result = static_cast<std::size_t>(number);
result = number;
return true;
}
case '[':
@ -10459,12 +10465,14 @@ class binary_reader
}
return true;
}
default:
{
}
}
}
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
exception_message(input_format, concat("expected length type specification (U, i, I, l, L) after '#'; last byte: 0x", last_token), "size"), nullptr));
}
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), nullptr));
}
}
@ -10619,27 +10627,27 @@ class binary_reader
{
case 'u':
{
uint16_t number;
uint16_t number{};
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'm':
{
uint32_t number;
uint32_t number{};
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'M':
{
uint64_t number;
uint64_t number{};
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'h':
{
const int byte2 = get();
unsigned int byte2 = get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
{
return false;
}
const int byte1 = get();
unsigned int byte1 = get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
{
return false;
@ -10653,11 +10661,11 @@ class binary_reader
// without such support. An example of a small decoder for
// half-precision floating-point numbers in the C language
// is shown in Fig. 3.
const int half = (byte1 << 8) + byte2;
unsigned int half = (byte1 << 8) + byte2;
const double val = [&half]
{
const int exp = (half >> 10) & 0x1F;
const int mant = half & 0x3FF;
unsigned int exp = (half >> 10) & 0x1F;
unsigned int mant = half & 0x3FF;
JSON_ASSERT(0 <= exp&& exp <= 32);
JSON_ASSERT(0 <= mant&& mant <= 1024);
switch (exp)
@ -10676,6 +10684,9 @@ class binary_reader
? static_cast<number_float_t>(-val)
: static_cast<number_float_t>(val), "");
}
default:
{
}
}
}
auto last_token = get_token_string();
@ -11114,7 +11125,7 @@ class binary_reader
const bool is_little_endian = little_endianness();
/// input format
input_format_t input_format;
input_format_t input_format = input_format_t::json;
/// the SAX parser
json_sax_t* sax = nullptr;

View File

@ -2248,7 +2248,7 @@ TEST_CASE("BJData")
CHECK(json::to_bjdata(json::from_bjdata(v_C), true, true) == v_S); // char is serialized to string
}
SECTION("optimized ndarray (type and vector-size)")
SECTION("optimized ndarray (type and vector-size as optimized 1D array)")
{
// create vector with two elements of the same type
std::vector<uint8_t> v_N = {'[', '$', 'N', '#', '[', '$', 'i', '#', 'i', 2, 1, 2};
@ -2284,6 +2284,121 @@ TEST_CASE("BJData")
CHECK(json::from_bjdata(v_S) == json({"a", "a"}));
CHECK(json::from_bjdata(v_C) == json({"a", "a"}));
#ifdef BJDATA_TEST_ROUNDTRIP // round-trip to vectorized size (ndarray) is not yet supported
// roundtrip: output should be optimized
std::vector<uint8_t> v_empty = {'[', '#', 'i', 0};
CHECK(json::to_bjdata(json::from_bjdata(v_N), true, true) == v_empty);
CHECK(json::to_bjdata(json::from_bjdata(v_T), true, true) == v_T);
CHECK(json::to_bjdata(json::from_bjdata(v_F), true, true) == v_F);
CHECK(json::to_bjdata(json::from_bjdata(v_Z), true, true) == v_Z);
CHECK(json::to_bjdata(json::from_bjdata(v_i), true, true) == v_i);
CHECK(json::to_bjdata(json::from_bjdata(v_U), true, true) == v_U);
CHECK(json::to_bjdata(json::from_bjdata(v_I), true, true) == v_I);
CHECK(json::to_bjdata(json::from_bjdata(v_u), true, true) == v_u);
CHECK(json::to_bjdata(json::from_bjdata(v_l), true, true) == v_l);
CHECK(json::to_bjdata(json::from_bjdata(v_m), true, true) == v_m);
CHECK(json::to_bjdata(json::from_bjdata(v_L), true, true) == v_L);
CHECK(json::to_bjdata(json::from_bjdata(v_M), true, true) == v_M);
CHECK(json::to_bjdata(json::from_bjdata(v_D), true, true) == v_D);
CHECK(json::to_bjdata(json::from_bjdata(v_S), true, true) == v_S);
CHECK(json::to_bjdata(json::from_bjdata(v_C), true, true) == v_S); // char is serialized to string
#endif
}
SECTION("optimized ndarray (type and vector-size as 1D array)")
{
// create vector with two elements of the same type
std::vector<uint8_t> v_N = {'[', '$', 'N', '#', '[', 'i', 1, 'i', 2, ']'};
std::vector<uint8_t> v_T = {'[', '$', 'T', '#', '[', 'i', 1, 'i', 2, ']'};
std::vector<uint8_t> v_F = {'[', '$', 'F', '#', '[', 'i', 1, 'i', 2, ']'};
std::vector<uint8_t> v_Z = {'[', '$', 'Z', '#', '[', 'i', 1, 'i', 2, ']'};
std::vector<uint8_t> v_i = {'[', '$', 'i', '#', '[', 'i', 1, 'i', 2, ']', 0x7F, 0x7F};
std::vector<uint8_t> v_U = {'[', '$', 'U', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF};
std::vector<uint8_t> v_I = {'[', '$', 'I', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0x7F, 0xFF, 0x7F};
std::vector<uint8_t> v_u = {'[', '$', 'u', '#', '[', 'i', 1, 'i', 2, ']', 0x0F, 0xA7, 0x0F, 0xA7};
std::vector<uint8_t> v_l = {'[', '$', 'l', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F};
std::vector<uint8_t> v_m = {'[', '$', 'm', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xC9, 0x9A, 0xBB, 0xFF, 0xC9, 0x9A, 0xBB};
std::vector<uint8_t> v_L = {'[', '$', 'L', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F};
std::vector<uint8_t> v_M = {'[', '$', 'M', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D, 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D};
std::vector<uint8_t> v_D = {'[', '$', 'D', '#', '[', 'i', 1, 'i', 2, ']', 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40, 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40};
std::vector<uint8_t> v_S = {'[', '$', 'S', '#', '[', 'i', 1, 'i', 2, ']', 'i', 1, 'a', 'i', 1, 'a'};
std::vector<uint8_t> v_C = {'[', '$', 'C', '#', '[', 'i', 1, 'i', 2, ']', 'a', 'a'};
// check if vector is parsed correctly
CHECK(json::from_bjdata(v_N) == json::array());
CHECK(json::from_bjdata(v_T) == json({true, true}));
CHECK(json::from_bjdata(v_F) == json({false, false}));
CHECK(json::from_bjdata(v_Z) == json({nullptr, nullptr}));
CHECK(json::from_bjdata(v_i) == json({127, 127}));
CHECK(json::from_bjdata(v_U) == json({255, 255}));
CHECK(json::from_bjdata(v_I) == json({32767, 32767}));
CHECK(json::from_bjdata(v_u) == json({42767, 42767}));
CHECK(json::from_bjdata(v_l) == json({2147483647, 2147483647}));
CHECK(json::from_bjdata(v_m) == json({3147483647, 3147483647}));
CHECK(json::from_bjdata(v_L) == json({9223372036854775807, 9223372036854775807}));
CHECK(json::from_bjdata(v_M) == json({10223372036854775807ull, 10223372036854775807ull}));
CHECK(json::from_bjdata(v_D) == json({3.1415926, 3.1415926}));
CHECK(json::from_bjdata(v_S) == json({"a", "a"}));
CHECK(json::from_bjdata(v_C) == json({"a", "a"}));
#ifdef BJDATA_TEST_ROUNDTRIP // round-trip to vectorized size (ndarray) is not yet supported
// roundtrip: output should be optimized
std::vector<uint8_t> v_empty = {'[', '#', 'i', 0};
CHECK(json::to_bjdata(json::from_bjdata(v_N), true, true) == v_empty);
CHECK(json::to_bjdata(json::from_bjdata(v_T), true, true) == v_T);
CHECK(json::to_bjdata(json::from_bjdata(v_F), true, true) == v_F);
CHECK(json::to_bjdata(json::from_bjdata(v_Z), true, true) == v_Z);
CHECK(json::to_bjdata(json::from_bjdata(v_i), true, true) == v_i);
CHECK(json::to_bjdata(json::from_bjdata(v_U), true, true) == v_U);
CHECK(json::to_bjdata(json::from_bjdata(v_I), true, true) == v_I);
CHECK(json::to_bjdata(json::from_bjdata(v_u), true, true) == v_u);
CHECK(json::to_bjdata(json::from_bjdata(v_l), true, true) == v_l);
CHECK(json::to_bjdata(json::from_bjdata(v_m), true, true) == v_m);
CHECK(json::to_bjdata(json::from_bjdata(v_L), true, true) == v_L);
CHECK(json::to_bjdata(json::from_bjdata(v_M), true, true) == v_M);
CHECK(json::to_bjdata(json::from_bjdata(v_D), true, true) == v_D);
CHECK(json::to_bjdata(json::from_bjdata(v_S), true, true) == v_S);
CHECK(json::to_bjdata(json::from_bjdata(v_C), true, true) == v_S); // char is serialized to string
#endif
}
SECTION("optimized ndarray (type and vector-size as size-optimized array)")
{
// create vector with two elements of the same type
std::vector<uint8_t> v_N = {'[', '$', 'N', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
std::vector<uint8_t> v_T = {'[', '$', 'T', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
std::vector<uint8_t> v_F = {'[', '$', 'F', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
std::vector<uint8_t> v_Z = {'[', '$', 'Z', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
std::vector<uint8_t> v_i = {'[', '$', 'i', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0x7F, 0x7F};
std::vector<uint8_t> v_U = {'[', '$', 'U', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF};
std::vector<uint8_t> v_I = {'[', '$', 'I', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0x7F, 0xFF, 0x7F};
std::vector<uint8_t> v_u = {'[', '$', 'u', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0x0F, 0xA7, 0x0F, 0xA7};
std::vector<uint8_t> v_l = {'[', '$', 'l', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F};
std::vector<uint8_t> v_m = {'[', '$', 'm', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xC9, 0x9A, 0xBB, 0xFF, 0xC9, 0x9A, 0xBB};
std::vector<uint8_t> v_L = {'[', '$', 'L', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F};
std::vector<uint8_t> v_M = {'[', '$', 'M', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D, 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D};
std::vector<uint8_t> v_D = {'[', '$', 'D', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40, 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40};
std::vector<uint8_t> v_S = {'[', '$', 'S', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 'i', 1, 'a', 'i', 1, 'a'};
std::vector<uint8_t> v_C = {'[', '$', 'C', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 'a', 'a'};
// check if vector is parsed correctly
CHECK(json::from_bjdata(v_N) == json::array());
CHECK(json::from_bjdata(v_T) == json({true, true}));
CHECK(json::from_bjdata(v_F) == json({false, false}));
CHECK(json::from_bjdata(v_Z) == json({nullptr, nullptr}));
CHECK(json::from_bjdata(v_i) == json({127, 127}));
CHECK(json::from_bjdata(v_U) == json({255, 255}));
CHECK(json::from_bjdata(v_I) == json({32767, 32767}));
CHECK(json::from_bjdata(v_u) == json({42767, 42767}));
CHECK(json::from_bjdata(v_l) == json({2147483647, 2147483647}));
CHECK(json::from_bjdata(v_m) == json({3147483647, 3147483647}));
CHECK(json::from_bjdata(v_L) == json({9223372036854775807, 9223372036854775807}));
CHECK(json::from_bjdata(v_M) == json({10223372036854775807ull, 10223372036854775807ull}));
CHECK(json::from_bjdata(v_D) == json({3.1415926, 3.1415926}));
CHECK(json::from_bjdata(v_S) == json({"a", "a"}));
CHECK(json::from_bjdata(v_C) == json({"a", "a"}));
#ifdef BJDATA_TEST_ROUNDTRIP // round-trip to vectorized size (ndarray) is not yet supported
// roundtrip: output should be optimized
std::vector<uint8_t> v_empty = {'[', '#', 'i', 0};