From c8752b349f464fbf1399ffda7aac24b430930104 Mon Sep 17 00:00:00 2001
From: Qianqian Fang <fangqq@gmail.com>
Date: Fri, 18 Feb 2022 15:22:19 -0500
Subject: [PATCH] add tests for optimized ndarray, improve coverage, fix
 clang/gcc warnings

---
 .../nlohmann/detail/input/binary_reader.hpp   |  43 ++++---
 single_include/nlohmann/json.hpp              |  43 ++++---
 test/src/unit-bjdata.cpp                      | 117 +++++++++++++++++-
 3 files changed, 170 insertions(+), 33 deletions(-)

diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp
index 25bdb95fd..c070859c6 100644
--- a/include/nlohmann/detail/input/binary_reader.hpp
+++ b/include/nlohmann/detail/input/binary_reader.hpp
@@ -1903,6 +1903,9 @@ class binary_reader
                             uint64_t len{};
                             return get_number(input_format, len) && get_string(input_format, len, result);
                         }
+                        default:
+                        {
+                        }
                     }
                 }
                 auto last_token = get_token_string();
@@ -1920,9 +1923,12 @@ class binary_reader
         std::pair<std::size_t, char_int_type> size_and_type;
         size_t dimlen = 0;
 
-        bool is_optimized = get_ubjson_size_type(size_and_type);
+        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
+        {
+            return false;
+        }
 
-        if (is_optimized && size_and_type.first != string_t::npos)
+        if (size_and_type.first != string_t::npos)
         {
             if (size_and_type.second != 0)
             {
@@ -1954,7 +1960,7 @@ class binary_reader
         {
             while (current != ']')
             {
-                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen)))
+                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current)))
                 {
                     return false;
                 }
@@ -2066,7 +2072,7 @@ class binary_reader
                             {
                                 return false;
                             }
-                            result = static_cast<std::size_t>(number);
+                            result = number;
                             return true;
                         }
                         case '[':
@@ -2083,12 +2089,14 @@ class binary_reader
                             }
                             return true;
                         }
+                        default:
+                        {
+                        }
                     }
                 }
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
-                                        exception_message(input_format, concat("expected length type specification (U, i, I, l, L) after '#'; last byte: 0x", last_token), "size"), nullptr));
             }
+            auto last_token = get_token_string();
+            return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), nullptr));
         }
     }
 
@@ -2243,27 +2251,27 @@ class binary_reader
                     {
                         case 'u':
                         {
-                            uint16_t number;
+                            uint16_t number{};
                             return get_number(input_format, number) && sax->number_unsigned(number);
                         }
                         case 'm':
                         {
-                            uint32_t number;
+                            uint32_t number{};
                             return get_number(input_format, number) && sax->number_unsigned(number);
                         }
                         case 'M':
                         {
-                            uint64_t number;
+                            uint64_t number{};
                             return get_number(input_format, number) && sax->number_unsigned(number);
                         }
                         case 'h':
                         {
-                            const int byte2 = get();
+                            unsigned int byte2 = get();
                             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
                             {
                                 return false;
                             }
-                            const int byte1 = get();
+                            unsigned int byte1 = get();
                             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
                             {
                                 return false;
@@ -2277,11 +2285,11 @@ class binary_reader
                             // without such support. An example of a small decoder for
                             // half-precision floating-point numbers in the C language
                             // is shown in Fig. 3.
-                            const int half = (byte1 << 8) + byte2;
+                            unsigned int half = (byte1 << 8) + byte2;
                             const double val = [&half]
                             {
-                                const int exp = (half >> 10) & 0x1F;
-                                const int mant = half & 0x3FF;
+                                unsigned int exp = (half >> 10) & 0x1F;
+                                unsigned int mant = half & 0x3FF;
                                 JSON_ASSERT(0 <= exp&& exp <= 32);
                                 JSON_ASSERT(0 <= mant&& mant <= 1024);
                                 switch (exp)
@@ -2300,6 +2308,9 @@ class binary_reader
                                                      ? static_cast<number_float_t>(-val)
                                                      : static_cast<number_float_t>(val), "");
                         }
+                        default:
+                        {
+                        }
                     }
                 }
                 auto last_token = get_token_string();
@@ -2738,7 +2749,7 @@ class binary_reader
     const bool is_little_endian = little_endianness();
 
     /// input format
-    input_format_t input_format;
+    input_format_t input_format = input_format_t::json;
 
     /// the SAX parser
     json_sax_t* sax = nullptr;
diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp
index cd7e83f9e..697f26745 100644
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@@ -10279,6 +10279,9 @@ class binary_reader
                             uint64_t len{};
                             return get_number(input_format, len) && get_string(input_format, len, result);
                         }
+                        default:
+                        {
+                        }
                     }
                 }
                 auto last_token = get_token_string();
@@ -10296,9 +10299,12 @@ class binary_reader
         std::pair<std::size_t, char_int_type> size_and_type;
         size_t dimlen = 0;
 
-        bool is_optimized = get_ubjson_size_type(size_and_type);
+        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
+        {
+            return false;
+        }
 
-        if (is_optimized && size_and_type.first != string_t::npos)
+        if (size_and_type.first != string_t::npos)
         {
             if (size_and_type.second != 0)
             {
@@ -10330,7 +10336,7 @@ class binary_reader
         {
             while (current != ']')
             {
-                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen)))
+                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current)))
                 {
                     return false;
                 }
@@ -10442,7 +10448,7 @@ class binary_reader
                             {
                                 return false;
                             }
-                            result = static_cast<std::size_t>(number);
+                            result = number;
                             return true;
                         }
                         case '[':
@@ -10459,12 +10465,14 @@ class binary_reader
                             }
                             return true;
                         }
+                        default:
+                        {
+                        }
                     }
                 }
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
-                                        exception_message(input_format, concat("expected length type specification (U, i, I, l, L) after '#'; last byte: 0x", last_token), "size"), nullptr));
             }
+            auto last_token = get_token_string();
+            return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), nullptr));
         }
     }
 
@@ -10619,27 +10627,27 @@ class binary_reader
                     {
                         case 'u':
                         {
-                            uint16_t number;
+                            uint16_t number{};
                             return get_number(input_format, number) && sax->number_unsigned(number);
                         }
                         case 'm':
                         {
-                            uint32_t number;
+                            uint32_t number{};
                             return get_number(input_format, number) && sax->number_unsigned(number);
                         }
                         case 'M':
                         {
-                            uint64_t number;
+                            uint64_t number{};
                             return get_number(input_format, number) && sax->number_unsigned(number);
                         }
                         case 'h':
                         {
-                            const int byte2 = get();
+                            unsigned int byte2 = get();
                             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
                             {
                                 return false;
                             }
-                            const int byte1 = get();
+                            unsigned int byte1 = get();
                             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "half")))
                             {
                                 return false;
@@ -10653,11 +10661,11 @@ class binary_reader
                             // without such support. An example of a small decoder for
                             // half-precision floating-point numbers in the C language
                             // is shown in Fig. 3.
-                            const int half = (byte1 << 8) + byte2;
+                            unsigned int half = (byte1 << 8) + byte2;
                             const double val = [&half]
                             {
-                                const int exp = (half >> 10) & 0x1F;
-                                const int mant = half & 0x3FF;
+                                unsigned int exp = (half >> 10) & 0x1F;
+                                unsigned int mant = half & 0x3FF;
                                 JSON_ASSERT(0 <= exp&& exp <= 32);
                                 JSON_ASSERT(0 <= mant&& mant <= 1024);
                                 switch (exp)
@@ -10676,6 +10684,9 @@ class binary_reader
                                                      ? static_cast<number_float_t>(-val)
                                                      : static_cast<number_float_t>(val), "");
                         }
+                        default:
+                        {
+                        }
                     }
                 }
                 auto last_token = get_token_string();
@@ -11114,7 +11125,7 @@ class binary_reader
     const bool is_little_endian = little_endianness();
 
     /// input format
-    input_format_t input_format;
+    input_format_t input_format = input_format_t::json;
 
     /// the SAX parser
     json_sax_t* sax = nullptr;
diff --git a/test/src/unit-bjdata.cpp b/test/src/unit-bjdata.cpp
index a5b80a2e1..a8a1b75c3 100644
--- a/test/src/unit-bjdata.cpp
+++ b/test/src/unit-bjdata.cpp
@@ -2248,7 +2248,7 @@ TEST_CASE("BJData")
                 CHECK(json::to_bjdata(json::from_bjdata(v_C), true, true) == v_S); // char is serialized to string
             }
 
-            SECTION("optimized ndarray (type and vector-size)")
+            SECTION("optimized ndarray (type and vector-size as optimized 1D array)")
             {
                 // create vector with two elements of the same type
                 std::vector<uint8_t> v_N = {'[', '$', 'N', '#', '[', '$', 'i', '#', 'i', 2, 1, 2};
@@ -2284,6 +2284,121 @@ TEST_CASE("BJData")
                 CHECK(json::from_bjdata(v_S) == json({"a", "a"}));
                 CHECK(json::from_bjdata(v_C) == json({"a", "a"}));
 
+#ifdef BJDATA_TEST_ROUNDTRIP  // round-trip to vectorized size (ndarray) is not yet supported
+                // roundtrip: output should be optimized
+                std::vector<uint8_t> v_empty = {'[', '#', 'i', 0};
+                CHECK(json::to_bjdata(json::from_bjdata(v_N), true, true) == v_empty);
+                CHECK(json::to_bjdata(json::from_bjdata(v_T), true, true) == v_T);
+                CHECK(json::to_bjdata(json::from_bjdata(v_F), true, true) == v_F);
+                CHECK(json::to_bjdata(json::from_bjdata(v_Z), true, true) == v_Z);
+                CHECK(json::to_bjdata(json::from_bjdata(v_i), true, true) == v_i);
+                CHECK(json::to_bjdata(json::from_bjdata(v_U), true, true) == v_U);
+                CHECK(json::to_bjdata(json::from_bjdata(v_I), true, true) == v_I);
+                CHECK(json::to_bjdata(json::from_bjdata(v_u), true, true) == v_u);
+                CHECK(json::to_bjdata(json::from_bjdata(v_l), true, true) == v_l);
+                CHECK(json::to_bjdata(json::from_bjdata(v_m), true, true) == v_m);
+                CHECK(json::to_bjdata(json::from_bjdata(v_L), true, true) == v_L);
+                CHECK(json::to_bjdata(json::from_bjdata(v_M), true, true) == v_M);
+                CHECK(json::to_bjdata(json::from_bjdata(v_D), true, true) == v_D);
+                CHECK(json::to_bjdata(json::from_bjdata(v_S), true, true) == v_S);
+                CHECK(json::to_bjdata(json::from_bjdata(v_C), true, true) == v_S); // char is serialized to string
+#endif
+            }
+
+            SECTION("optimized ndarray (type and vector-size as 1D array)")
+            {
+                // create vector with two elements of the same type
+                std::vector<uint8_t> v_N = {'[', '$', 'N', '#', '[', 'i', 1, 'i', 2, ']'};
+                std::vector<uint8_t> v_T = {'[', '$', 'T', '#', '[', 'i', 1, 'i', 2, ']'};
+                std::vector<uint8_t> v_F = {'[', '$', 'F', '#', '[', 'i', 1, 'i', 2, ']'};
+                std::vector<uint8_t> v_Z = {'[', '$', 'Z', '#', '[', 'i', 1, 'i', 2, ']'};
+                std::vector<uint8_t> v_i = {'[', '$', 'i', '#', '[', 'i', 1, 'i', 2, ']', 0x7F, 0x7F};
+                std::vector<uint8_t> v_U = {'[', '$', 'U', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF};
+                std::vector<uint8_t> v_I = {'[', '$', 'I', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0x7F, 0xFF, 0x7F};
+                std::vector<uint8_t> v_u = {'[', '$', 'u', '#', '[', 'i', 1, 'i', 2, ']', 0x0F, 0xA7, 0x0F, 0xA7};
+                std::vector<uint8_t> v_l = {'[', '$', 'l', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F};
+                std::vector<uint8_t> v_m = {'[', '$', 'm', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xC9, 0x9A, 0xBB, 0xFF, 0xC9, 0x9A, 0xBB};
+                std::vector<uint8_t> v_L = {'[', '$', 'L', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F};
+                std::vector<uint8_t> v_M = {'[', '$', 'M', '#', '[', 'i', 1, 'i', 2, ']', 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D, 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D};
+                std::vector<uint8_t> v_D = {'[', '$', 'D', '#', '[', 'i', 1, 'i', 2, ']', 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40, 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40};
+                std::vector<uint8_t> v_S = {'[', '$', 'S', '#', '[', 'i', 1, 'i', 2, ']', 'i', 1, 'a', 'i', 1, 'a'};
+                std::vector<uint8_t> v_C = {'[', '$', 'C', '#', '[', 'i', 1, 'i', 2, ']', 'a', 'a'};
+
+                // check if vector is parsed correctly
+                CHECK(json::from_bjdata(v_N) == json::array());
+                CHECK(json::from_bjdata(v_T) == json({true, true}));
+                CHECK(json::from_bjdata(v_F) == json({false, false}));
+                CHECK(json::from_bjdata(v_Z) == json({nullptr, nullptr}));
+                CHECK(json::from_bjdata(v_i) == json({127, 127}));
+                CHECK(json::from_bjdata(v_U) == json({255, 255}));
+                CHECK(json::from_bjdata(v_I) == json({32767, 32767}));
+                CHECK(json::from_bjdata(v_u) == json({42767, 42767}));
+                CHECK(json::from_bjdata(v_l) == json({2147483647, 2147483647}));
+                CHECK(json::from_bjdata(v_m) == json({3147483647, 3147483647}));
+                CHECK(json::from_bjdata(v_L) == json({9223372036854775807, 9223372036854775807}));
+                CHECK(json::from_bjdata(v_M) == json({10223372036854775807ull, 10223372036854775807ull}));
+                CHECK(json::from_bjdata(v_D) == json({3.1415926, 3.1415926}));
+                CHECK(json::from_bjdata(v_S) == json({"a", "a"}));
+                CHECK(json::from_bjdata(v_C) == json({"a", "a"}));
+
+#ifdef BJDATA_TEST_ROUNDTRIP  // round-trip to vectorized size (ndarray) is not yet supported
+                // roundtrip: output should be optimized
+                std::vector<uint8_t> v_empty = {'[', '#', 'i', 0};
+                CHECK(json::to_bjdata(json::from_bjdata(v_N), true, true) == v_empty);
+                CHECK(json::to_bjdata(json::from_bjdata(v_T), true, true) == v_T);
+                CHECK(json::to_bjdata(json::from_bjdata(v_F), true, true) == v_F);
+                CHECK(json::to_bjdata(json::from_bjdata(v_Z), true, true) == v_Z);
+                CHECK(json::to_bjdata(json::from_bjdata(v_i), true, true) == v_i);
+                CHECK(json::to_bjdata(json::from_bjdata(v_U), true, true) == v_U);
+                CHECK(json::to_bjdata(json::from_bjdata(v_I), true, true) == v_I);
+                CHECK(json::to_bjdata(json::from_bjdata(v_u), true, true) == v_u);
+                CHECK(json::to_bjdata(json::from_bjdata(v_l), true, true) == v_l);
+                CHECK(json::to_bjdata(json::from_bjdata(v_m), true, true) == v_m);
+                CHECK(json::to_bjdata(json::from_bjdata(v_L), true, true) == v_L);
+                CHECK(json::to_bjdata(json::from_bjdata(v_M), true, true) == v_M);
+                CHECK(json::to_bjdata(json::from_bjdata(v_D), true, true) == v_D);
+                CHECK(json::to_bjdata(json::from_bjdata(v_S), true, true) == v_S);
+                CHECK(json::to_bjdata(json::from_bjdata(v_C), true, true) == v_S); // char is serialized to string
+#endif
+
+            }
+
+            SECTION("optimized ndarray (type and vector-size as size-optimized array)")
+            {
+                // create vector with two elements of the same type
+                std::vector<uint8_t> v_N = {'[', '$', 'N', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
+                std::vector<uint8_t> v_T = {'[', '$', 'T', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
+                std::vector<uint8_t> v_F = {'[', '$', 'F', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
+                std::vector<uint8_t> v_Z = {'[', '$', 'Z', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2};
+                std::vector<uint8_t> v_i = {'[', '$', 'i', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0x7F, 0x7F};
+                std::vector<uint8_t> v_U = {'[', '$', 'U', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF};
+                std::vector<uint8_t> v_I = {'[', '$', 'I', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0x7F, 0xFF, 0x7F};
+                std::vector<uint8_t> v_u = {'[', '$', 'u', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0x0F, 0xA7, 0x0F, 0xA7};
+                std::vector<uint8_t> v_l = {'[', '$', 'l', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F};
+                std::vector<uint8_t> v_m = {'[', '$', 'm', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xC9, 0x9A, 0xBB, 0xFF, 0xC9, 0x9A, 0xBB};
+                std::vector<uint8_t> v_L = {'[', '$', 'L', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F};
+                std::vector<uint8_t> v_M = {'[', '$', 'M', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D, 0xFF, 0xFF, 0x63, 0xA7, 0xB3, 0xB6, 0xE0, 0x8D};
+                std::vector<uint8_t> v_D = {'[', '$', 'D', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40, 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40};
+                std::vector<uint8_t> v_S = {'[', '$', 'S', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 'i', 1, 'a', 'i', 1, 'a'};
+                std::vector<uint8_t> v_C = {'[', '$', 'C', '#', '[', '#', 'i', 2, 'i', 1, 'i', 2, 'a', 'a'};
+
+                // check if vector is parsed correctly
+                CHECK(json::from_bjdata(v_N) == json::array());
+                CHECK(json::from_bjdata(v_T) == json({true, true}));
+                CHECK(json::from_bjdata(v_F) == json({false, false}));
+                CHECK(json::from_bjdata(v_Z) == json({nullptr, nullptr}));
+                CHECK(json::from_bjdata(v_i) == json({127, 127}));
+                CHECK(json::from_bjdata(v_U) == json({255, 255}));
+                CHECK(json::from_bjdata(v_I) == json({32767, 32767}));
+                CHECK(json::from_bjdata(v_u) == json({42767, 42767}));
+                CHECK(json::from_bjdata(v_l) == json({2147483647, 2147483647}));
+                CHECK(json::from_bjdata(v_m) == json({3147483647, 3147483647}));
+                CHECK(json::from_bjdata(v_L) == json({9223372036854775807, 9223372036854775807}));
+                CHECK(json::from_bjdata(v_M) == json({10223372036854775807ull, 10223372036854775807ull}));
+                CHECK(json::from_bjdata(v_D) == json({3.1415926, 3.1415926}));
+                CHECK(json::from_bjdata(v_S) == json({"a", "a"}));
+                CHECK(json::from_bjdata(v_C) == json({"a", "a"}));
+
 #ifdef BJDATA_TEST_ROUNDTRIP  // round-trip to vectorized size (ndarray) is not yet supported
                 // roundtrip: output should be optimized
                 std::vector<uint8_t> v_empty = {'[', '#', 'i', 0};