diff --git a/docs/examples/to_bon8.cpp b/docs/examples/to_bon8.cpp new file mode 100644 index 000000000..2f2634253 --- /dev/null +++ b/docs/examples/to_bon8.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +using json = nlohmann::json; +using namespace nlohmann::literals; + +int main() +{ + // create a JSON value + json j = R"({"compact": true, "schema": 0})"_json; + + // serialize it to BON8 + std::vector v = json::to_bon8(j); + + // print the vector content + for (auto& byte : v) + { + std::cout << "0x" << std::hex << std::setw(2) << std::setfill('0') << (int)byte << " "; + } + std::cout << std::endl; +} diff --git a/docs/examples/to_bon8.output b/docs/examples/to_bon8.output new file mode 100644 index 000000000..8cce605dd --- /dev/null +++ b/docs/examples/to_bon8.output @@ -0,0 +1 @@ +0x88 0x63 0x6f 0x6d 0x70 0x61 0x63 0x74 0xf9 0x73 0x63 0x68 0x65 0x6d 0x61 0x90 diff --git a/docs/mkdocs/docs/api/basic_json/index.md b/docs/mkdocs/docs/api/basic_json/index.md index 648670144..3514ce914 100644 --- a/docs/mkdocs/docs/api/basic_json/index.md +++ b/docs/mkdocs/docs/api/basic_json/index.md @@ -277,6 +277,7 @@ Access to the JSON value - [**from_cbor**](from_cbor.md) (_static_) - create a JSON value from an input in CBOR format - [**from_msgpack**](from_msgpack.md) (_static_) - create a JSON value from an input in MessagePack format - [**from_ubjson**](from_ubjson.md) (_static_) - create a JSON value from an input in UBJSON format +- [**to_bon8**](to_bon8.md) (static) - create a BON8 serialization of a given JSON value - [**to_bjdata**](to_bjdata.md) (_static_) - create a BJData serialization of a given JSON value - [**to_bson**](to_bson.md) (_static_) - create a BSON serialization of a given JSON value - [**to_cbor**](to_cbor.md) (_static_) - create a CBOR serialization of a given JSON value diff --git a/docs/mkdocs/docs/api/basic_json/to_bon8.md b/docs/mkdocs/docs/api/basic_json/to_bon8.md new file mode 100644 index 000000000..277fd3151 --- /dev/null +++ b/docs/mkdocs/docs/api/basic_json/to_bon8.md @@ -0,0 +1,57 @@ +# basic_json::to_bon8 + +```cpp +// (1) +static std::vector to_bon8(const basic_json& j); + +// (2) +static void to_bon8(const basic_json& j, detail::output_adapter o); +static void to_bon8(const basic_json& j, detail::output_adapter o); +``` + +Serializes a given JSON value `j` to a byte vector using the BON8 serialization format. BON8 is a binary serialization +format which aims to be more compact than JSON itself, yet more efficient to parse. + +1. Returns a byte vector containing the BON8 serialization. +2. Writes the BON8 serialization to an output adapter. + +## Parameters + +`j` (in) +: JSON value to serialize + +`o` (in) +: output adapter to write serialization to + +## Return value + +1. BON8 serialization as byte vector +2. / + +## Exception safety + +Strong guarantee: if an exception is thrown, there are no changes in the JSON value. + +## Complexity + +Linear in the size of the JSON value `j`. + +## Example + +??? example + + The example shows the serialization of a JSON value to a byte vector in BON8 format. + + ```cpp + --8<-- "examples/to_bon8.cpp" + ``` + + Output: + + ```json + --8<-- "examples/to_bon8.output" + ``` + +## Version history + +- Added in version 3.11.0. diff --git a/docs/mkdocs/docs/features/binary_formats/index.md b/docs/mkdocs/docs/features/binary_formats/index.md index e74290b09..41cb72c9f 100644 --- a/docs/mkdocs/docs/features/binary_formats/index.md +++ b/docs/mkdocs/docs/features/binary_formats/index.md @@ -8,6 +8,7 @@ a network. Hence, the library supports - [CBOR](cbor.md) (Concise Binary Object Representation), - [MessagePack](messagepack.md), and - [UBJSON](ubjson.md) (Universal Binary JSON) +- BON8 to efficiently encode JSON values to byte vectors and to decode such vectors. @@ -22,6 +23,7 @@ to efficiently encode JSON values to byte vectors and to decode such vectors. | CBOR | complete | incomplete, but all JSON types are supported | | MessagePack | complete | complete | | UBJSON | complete | complete | +| BON8 | complete | not yet implemented | ### Binary values @@ -32,21 +34,30 @@ to efficiently encode JSON values to byte vectors and to decode such vectors. | CBOR | supported | supported | | MessagePack | supported | supported | | UBJSON | not supported | not supported | +| BON8 | not supported | not supported | See [binary values](../binary_values.md) for more information. ### Sizes -| Format | canada.json | twitter.json | citm_catalog.json | jeopardy.json | -|--------------------|-------------|--------------|-------------------|---------------| -| BJData | 53.2 % | 91.1 % | 78.1 % | 96.6 % | -| BJData (size) | 58.6 % | 92.1 % | 86.7 % | 97.4 % | -| BJData (size+tyoe) | 58.6 % | 92.1 % | 86.5 % | 97.4 % | -| BSON | 85.8 % | 95.2 % | 95.8 % | 106.7 % | -| CBOR | 50.5 % | 86.3 % | 68.4 % | 88.0 % | -| MessagePack | 50.5 % | 86.0 % | 68.5 % | 87.9 % | -| UBJSON | 53.2 % | 91.3 % | 78.2 % | 96.6 % | -| UBJSON (size) | 58.6 % | 92.3 % | 86.8 % | 97.4 % | -| UBJSON (size+type) | 55.9 % | 92.3 % | 85.0 % | 95.0 % | +| Format | [canada.json](https://github.com/nlohmann/json_test_data/blob/master/nativejson-benchmark/canada.json) | [twitter.json](https://github.com/nlohmann/json_test_data/blob/master/nativejson-benchmark/twitter.json) | [citm_catalog.json](https://github.com/nlohmann/json_test_data/blob/master/nativejson-benchmark/citm_catalog.json) | [jeopardy.json](https://github.com/nlohmann/json_test_data/blob/master/jeopardy/jeopardy.json) | [sample.json](https://github.com/nlohmann/json_test_data/blob/master/json_testsuite/sample.json) | +|--------------------|--------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------| +| BJData | 53.2 % | 91.1 % | 78.1 % | 96.6 % | +| BJData (size) | 58.6 % | 92.1 % | 86.7 % | 97.4 % | +| BJData (size+tyoe) | 58.6 % | 92.1 % | 86.5 % | 97.4 % | +| BSON | 85.8 % | 95.2 % | 95.8 % | 106.7 % (1) | N/A (2) | +| CBOR | 50.5 % | 86.3 % | 68.4 % | 88.0 % | 87,2 % | +| MessagePack | 50.5 % | 86.0 % | 68.5 % | 87.9 % | 87,2 % | +| UBJSON | 53.2 % | 91.3 % | 78.2 % | 96.6 % | 88,2 % | +| UBJSON (size) | 58.6 % | 92.3 % | 86.8 % | 97.4 % | 89,3 % | +| UBJSON (size+type) | 55.9 % | 92.3 % | 85.0 % | 95.0 % | 89,5 % | +| BON8 | 50,5 % | 83,8 % | 63,5 % | 87,5 % | 85,6 % | Sizes compared to minified JSON value. + +Notes: + +- (1) The JSON value is an array that needed to be wrapped in an object to be processed by BSON. We used an empty object key for minimal overhead. +- (2) The JSON value contained a string with code point `U+0000` which cannot be represented by BSON. + +The JSON files are part of the [nlohmann/json_test_data](https://github.com/nlohmann/json_test_data) repository. diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml index 5e66db596..6f9bbf718 100644 --- a/docs/mkdocs/mkdocs.yml +++ b/docs/mkdocs/mkdocs.yml @@ -196,6 +196,7 @@ nav: - 'swap': api/basic_json/swap.md - 'std::swap<basic_json>': api/basic_json/std_swap.md - 'to_bjdata': api/basic_json/to_bjdata.md + - 'to_bon8': api/basic_json/to_bon8.md - 'to_bson': api/basic_json/to_bson.md - 'to_cbor': api/basic_json/to_cbor.md - 'to_msgpack': api/basic_json/to_msgpack.md diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index a6e100e76..c22fa4a08 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -128,6 +128,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bon8: + result = parse_bon8_internal(true); + break; + case input_format_t::json: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE @@ -2735,6 +2739,301 @@ class binary_reader } } + ////////// + // BON8 // + ////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true) or whether the last read character should + be considered instead (false) + + @return whether a valid BON8 value was passed to the SAX parser + */ + bool parse_bon8_internal(const bool get_char) + { + switch (get_char ? get() : current) + { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + return get_bon8_array(static_cast(current - 0x80)); + + case 0x85: + return get_bon8_array(static_cast(-1)); + + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + return get_bon8_object(static_cast(current - 0x86)); + + case 0x8B: + return get_bon8_object(static_cast(-1)); + + case 0x8C: + { + std::int32_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8D: + { + std::int64_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8E: + { + float number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x8F: + { + double number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return sax->number_unsigned(static_cast(current) - static_cast(0x90)); + + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + case 0xC0: + case 0xC1: + return sax->number_integer(static_cast(0xB7) - static_cast(current)); + + case 0xF8: + return sax->boolean(false); + + case 0xF9: + return sax->boolean(true); + + case 0xFA: + return sax->null(); + + case 0xFB: + return sax->number_float(-1.0, ""); + + case 0xFC: + return sax->number_float(0.0, ""); + + case 0xFD: + return sax->number_float(1.0, ""); + + case 0xFE: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bon8, "invalid byte: 0x" + last_token, "value"), nullptr)); + } + + default: + { + string_t s; + return get_bon8_string(s) && sax->string(s); + } + } + } + + bool get_bon8_array(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) + { + return false; + } + + if (len != static_cast(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(true))) + { + return false; + } + } + } + else + { + while (get() != 0xFE) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + bool get_bon8_object(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len))) + { + return false; + } + + if (len != 0) + { + string_t key; + if (len != static_cast(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!get_bon8_string(key) || !sax->key(key))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFE) + { + if (JSON_HEDLEY_UNLIKELY(!get_bon8_string(key) || !sax->key(key))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + key.clear(); + } + } + } + + return sax->end_object(); + } + + bool get_bon8_string(string_t& result) + { + while (true) + { + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + + if ((current & 0x80) == 0x00) + { + result.push_back(static_cast(current)); + get(); + } + else if ((current & 0xE0) == 0xC0) + { + result.push_back(static_cast(current)); + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + get(); + } + else if ((current & 0xF0) == 0xE0) + { + result.push_back(static_cast(current)); + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + get(); + } + else if ((current & 0xF8) == 0xF0) + { + result.push_back(static_cast(current)); + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + get(); + } + else if (current == 0xFF) + { + get(); + return true; + } + else + { + return true; + } + } + } + /////////////////////// // Utility functions // /////////////////////// @@ -2934,6 +3233,10 @@ class binary_reader error_msg += "BSON"; break; + case input_format_t::bon8: + error_msg += "BON8"; + break; + case input_format_t::bjdata: error_msg += "BJData"; break; diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 33fca3e4b..e92f60e98 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -32,7 +32,7 @@ namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata, bon8 }; //////////////////// // input adapters // diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index f475d57be..05a7e7a0a 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -948,6 +948,18 @@ class binary_writer } } + /*! + @param[in] j JSON value to serialize + */ + void write_bon8(const BasicJsonType& j) + { + const bool last_written_value_is_string = write_bon8_internal(j); + if (last_written_value_is_string) + { + oa->write_character(to_char_type(0xFF)); + } + } + private: ////////// // BSON // @@ -1724,6 +1736,279 @@ class binary_writer return false; } + ////////// + // BON8 // + ////////// + + /*! + * @param j + * @return whether the last written value was a string + */ + bool write_bon8_internal(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: + { + oa->write_character(to_char_type(0xFA)); + return false; + } + + case value_t::boolean: + { + oa->write_character(j.m_value.boolean + ? to_char_type(0xF9) + : to_char_type(0xF8)); + return false; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned > static_cast((std::numeric_limits::max)())) + { + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(j.m_value.number_unsigned) + " cannot be represented by BON8 as it does not fit int64", &j)); + } + write_bon8_integer(static_cast(j.m_value.number_unsigned)); + return false; + } + + case value_t::number_integer: + { + write_bon8_integer(j.m_value.number_integer); + return false; + } + + case value_t::number_float: + { + // special values + if (j.m_value.number_float == -1.0) + { + oa->write_character(to_char_type(0xFB)); + } + else if (j.m_value.number_float == 0.0 && !std::signbit(j.m_value.number_float)) + { + oa->write_character(to_char_type(0xFC)); + } + else if (j.m_value.number_float == 1.0) + { + oa->write_character(to_char_type(0xFD)); + } + else if (std::isnan(j.m_value.number_float)) + { + oa->write_character(to_char_type(0x8E)); + oa->write_character(to_char_type(0x7F)); + oa->write_character(to_char_type(0x80)); + oa->write_character(to_char_type(0x00)); + oa->write_character(to_char_type(0x01)); + } + else + { + // write float with prefix + write_compact_float(j.m_value.number_float, detail::input_format_t::bon8); + } + return false; + } + + case value_t::string: + { + // empty string: use end-of-text symbol + if (j.m_value.string->empty()) + { + oa->write_character(to_char_type(0xFF)); + return false; // already wrote 0xFF byte + } + + // write strings as is + oa->write_characters( + reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); + return true; + } + + case value_t::array: + { + bool last_written_value_is_string = false; + const auto N = j.m_value.array->size(); + if (N <= 4) + { + // start array with count (80..84) + oa->write_character(static_cast(0x80 + N)); + } + else + { + // start array + oa->write_character(to_char_type(0x85)); + } + + // write each element + for (std::size_t i = 0; i < N; ++i) + { + const auto& el = j.m_value.array->operator[](i); + + // check if 0xFF after nonempty string and string is required + if (i > 0) + { + const auto& prev = j.m_value.array->operator[](i - 1); + if (el.is_string() && prev.is_string() && !prev.m_value.string->empty()) + { + oa->write_character(to_char_type(0xFF)); + } + } + + last_written_value_is_string = write_bon8_internal(el); + } + + if (N > 4) + { + // end of container + oa->write_character(to_char_type(0xFE)); + last_written_value_is_string = false; // 0xFE is not a string byte + } + + return last_written_value_is_string; + } + + case value_t::object: + { + bool last_written_value_is_string = false; + const auto N = j.m_value.object->size(); + if (N <= 4) + { + // start object with count (86..8A) + oa->write_character(static_cast(0x86 + N)); + } + else + { + // start object + oa->write_character(to_char_type(0x8B)); + } + + // write each element + for (auto it = j.m_value.object->begin(); it != j.m_value.object->end(); ++it) + { + const auto& key = it->first; + const auto& value = it->second; + + write_bon8_internal(key); + + // check if we need a 0xFF separator between key and value + if (!key.empty() && value.is_string()) + { + oa->write_character(to_char_type(0xFF)); + } + + last_written_value_is_string = write_bon8_internal(value); + + // check if we need a 0xFF separator between the value and the next key + if (value.is_string() && !value.m_value.string->empty() && std::next(it) != j.m_value.object->end()) + { + oa->write_character(to_char_type(0xFF)); + } + } + + if (N > 4) + { + // end of container + oa->write_character(to_char_type(0xFE)); + last_written_value_is_string = false; // 0xFE is not a string byte + } + + return last_written_value_is_string; + } + + case value_t::binary: + case value_t::discarded: + default: + return false; + } + } + + void write_bon8_integer(typename BasicJsonType::number_integer_t value) + { + if (value < (std::numeric_limits::min)() || value > (std::numeric_limits::max)()) + { + // 64 bit integers + oa->write_character(to_char_type(0x8D)); + write_number(static_cast(value)); + } + else if (value < -33818506 || value > 67637031) + { + // 32 bit integers + oa->write_character(to_char_type(0x8C)); + write_number(static_cast(value)); + } + else if (value <= -264075) + { + JSON_ASSERT(value >= -33818506); + value = -(value + 264075); + oa->write_character(static_cast(0xF0 + (value >> 22 & 0x07))); + oa->write_character(static_cast(0xC0 + (value >> 16 & 0x3F))); + oa->write_character(static_cast(value >> 8)); + oa->write_character(static_cast(value)); + } + else if (value <= -1931) + { + JSON_ASSERT(value >= -264074); + value = -(value + 1931); + oa->write_character(static_cast(0xE0 + (value >> 14 & 0x0F))); + oa->write_character(static_cast(0xC0 + (value >> 8 & 0x3F))); + oa->write_character(static_cast(value)); + } + else if (value <= -11) + { + JSON_ASSERT(value >= -1930); + value = -(value + 11); + oa->write_character(static_cast(0xC2 + (value >> 6 & 0x1F))); + oa->write_character(static_cast(0xC0 + (value & 0x3F))); + } + else if (value <= -1) + { + JSON_ASSERT(value >= -10); + value = -(value + 1); + oa->write_character(static_cast(0xB8 + value)); + } + else if (value <= 39) + { + JSON_ASSERT(value >= 0); + oa->write_character(static_cast(0x90 + value)); + } + else if (value <= 3879) + { + JSON_ASSERT(value >= 40); + value -= 40; + oa->write_character(static_cast(0xC2 + (value >> 7 & 0x1F))); + oa->write_character(static_cast(value & 0x7F)); + } + else if (value <= 528167) + { + JSON_ASSERT(value >= 3880); + value -= 3880; + oa->write_character(static_cast(0xE0 + (value >> 15 & 0x0F))); + oa->write_character(static_cast(value >> 8 & 0x7F)); + oa->write_character(static_cast(value)); + } + else + { + JSON_ASSERT(value >= 528168); + JSON_ASSERT(value <= 67637031); + value -= 528168; + oa->write_character(static_cast(0xF0 + (value >> 23 & 0x17))); + oa->write_character(static_cast(value >> 16 & 0x7F)); + oa->write_character(static_cast(value >> 8)); + oa->write_character(static_cast(value)); + } + } + + static constexpr CharType get_bon8_float_prefix(float /*unused*/) + { + return to_char_type(0x8E); + } + + static constexpr CharType get_bon8_float_prefix(double /*unused*/) + { + return to_char_type(0x8F); + } + /////////////////////// // Utility functions // /////////////////////// @@ -1764,20 +2049,54 @@ class binary_writer #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif - if (static_cast(n) >= static_cast(std::numeric_limits::lowest()) && - static_cast(n) <= static_cast((std::numeric_limits::max)()) && - static_cast(static_cast(n)) == static_cast(n)) + if (std::isnan(n) || std::isinf(n) || (static_cast(n) >= static_cast(std::numeric_limits::lowest()) && + static_cast(n) <= static_cast((std::numeric_limits::max)()) && + static_cast(static_cast(n)) == static_cast(n))) { - oa->write_character(format == detail::input_format_t::cbor - ? get_cbor_float_prefix(static_cast(n)) - : get_msgpack_float_prefix(static_cast(n))); + switch (format) + { + case input_format_t::cbor: + oa->write_character(get_cbor_float_prefix(static_cast(n))); + break; + case input_format_t::msgpack: + oa->write_character(get_msgpack_float_prefix(static_cast(n))); + break; + case input_format_t::bon8: + oa->write_character(get_bon8_float_prefix(static_cast(n))); + break; + // LCOV_EXCL_START + case input_format_t::bson: + case input_format_t::json: + case input_format_t::ubjson: + case input_format_t::bjdata: + default: + break; + // LCOV_EXCL_STOP + } write_number(static_cast(n)); } else { - oa->write_character(format == detail::input_format_t::cbor - ? get_cbor_float_prefix(n) - : get_msgpack_float_prefix(n)); + switch (format) + { + case input_format_t::cbor: + oa->write_character(get_cbor_float_prefix(n)); + break; + case input_format_t::msgpack: + oa->write_character(get_msgpack_float_prefix(n)); + break; + case input_format_t::bon8: + oa->write_character(get_bon8_float_prefix(n)); + break; + // LCOV_EXCL_START + case input_format_t::bson: + case input_format_t::json: + case input_format_t::ubjson: + case input_format_t::bjdata: + default: + break; + // LCOV_EXCL_STOP + } write_number(n); } #ifdef __GNUC__ diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 31ca64539..dac29609e 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -4355,6 +4355,29 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec binary_writer(o).write_bson(j); } + /// @brief create a BSON serialization of a given JSON value + /// @sa https://json.nlohmann.me/api/basic_json/to_bon8/ + static std::vector to_bon8(const basic_json& j) + { + std::vector result; + to_bon8(j, result); + return result; + } + + /// @brief create a BSON serialization of a given JSON value + /// @sa https://json.nlohmann.me/api/basic_json/to_bon8/ + static void to_bon8(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bon8(j); + } + + /// @brief create a BSON serialization of a given JSON value + /// @sa https://json.nlohmann.me/api/basic_json/to_bon8/ + static void to_bon8(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bon8(j); + } + /// @brief create a JSON value from an input in CBOR format /// @sa https://json.nlohmann.me/api/basic_json/from_cbor/ template @@ -4604,6 +4627,36 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } + + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(InputType&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::forward(i)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bon8(InputType&&, const bool, const bool) + */ + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(IteratorType first, IteratorType last, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + /// @} ////////////////////////// diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index a858728c4..837fc7a14 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6174,7 +6174,7 @@ namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata, bon8 }; //////////////////// // input adapters // @@ -9274,6 +9274,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bon8: + result = parse_bon8_internal(true); + break; + case input_format_t::json: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE @@ -11881,6 +11885,301 @@ class binary_reader } } + ////////// + // BON8 // + ////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true) or whether the last read character should + be considered instead (false) + + @return whether a valid BON8 value was passed to the SAX parser + */ + bool parse_bon8_internal(const bool get_char) + { + switch (get_char ? get() : current) + { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + return get_bon8_array(static_cast(current - 0x80)); + + case 0x85: + return get_bon8_array(static_cast(-1)); + + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + return get_bon8_object(static_cast(current - 0x86)); + + case 0x8B: + return get_bon8_object(static_cast(-1)); + + case 0x8C: + { + std::int32_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8D: + { + std::int64_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8E: + { + float number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x8F: + { + double number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return sax->number_unsigned(static_cast(current) - static_cast(0x90)); + + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + case 0xC0: + case 0xC1: + return sax->number_integer(static_cast(0xB7) - static_cast(current)); + + case 0xF8: + return sax->boolean(false); + + case 0xF9: + return sax->boolean(true); + + case 0xFA: + return sax->null(); + + case 0xFB: + return sax->number_float(-1.0, ""); + + case 0xFC: + return sax->number_float(0.0, ""); + + case 0xFD: + return sax->number_float(1.0, ""); + + case 0xFE: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bon8, "invalid byte: 0x" + last_token, "value"), nullptr)); + } + + default: + { + string_t s; + return get_bon8_string(s) && sax->string(s); + } + } + } + + bool get_bon8_array(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) + { + return false; + } + + if (len != static_cast(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(true))) + { + return false; + } + } + } + else + { + while (get() != 0xFE) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + bool get_bon8_object(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len))) + { + return false; + } + + if (len != 0) + { + string_t key; + if (len != static_cast(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!get_bon8_string(key) || !sax->key(key))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFE) + { + if (JSON_HEDLEY_UNLIKELY(!get_bon8_string(key) || !sax->key(key))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + key.clear(); + } + } + } + + return sax->end_object(); + } + + bool get_bon8_string(string_t& result) + { + while (true) + { + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + + if ((current & 0x80) == 0x00) + { + result.push_back(static_cast(current)); + get(); + } + else if ((current & 0xE0) == 0xC0) + { + result.push_back(static_cast(current)); + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + get(); + } + else if ((current & 0xF0) == 0xE0) + { + result.push_back(static_cast(current)); + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + get(); + } + else if ((current & 0xF8) == 0xF0) + { + result.push_back(static_cast(current)); + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + result.push_back(static_cast(get())); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bon8, "string"))) + { + return false; + } + get(); + } + else if (current == 0xFF) + { + get(); + return true; + } + else + { + return true; + } + } + } + /////////////////////// // Utility functions // /////////////////////// @@ -12080,6 +12379,10 @@ class binary_reader error_msg += "BSON"; break; + case input_format_t::bon8: + error_msg += "BON8"; + break; + case input_format_t::bjdata: error_msg += "BJData"; break; @@ -15988,6 +16291,18 @@ class binary_writer } } + /*! + @param[in] j JSON value to serialize + */ + void write_bon8(const BasicJsonType& j) + { + const bool last_written_value_is_string = write_bon8_internal(j); + if (last_written_value_is_string) + { + oa->write_character(to_char_type(0xFF)); + } + } + private: ////////// // BSON // @@ -16764,6 +17079,279 @@ class binary_writer return false; } + ////////// + // BON8 // + ////////// + + /*! + * @param j + * @return whether the last written value was a string + */ + bool write_bon8_internal(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: + { + oa->write_character(to_char_type(0xFA)); + return false; + } + + case value_t::boolean: + { + oa->write_character(j.m_value.boolean + ? to_char_type(0xF9) + : to_char_type(0xF8)); + return false; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned > static_cast((std::numeric_limits::max)())) + { + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(j.m_value.number_unsigned) + " cannot be represented by BON8 as it does not fit int64", &j)); + } + write_bon8_integer(static_cast(j.m_value.number_unsigned)); + return false; + } + + case value_t::number_integer: + { + write_bon8_integer(j.m_value.number_integer); + return false; + } + + case value_t::number_float: + { + // special values + if (j.m_value.number_float == -1.0) + { + oa->write_character(to_char_type(0xFB)); + } + else if (j.m_value.number_float == 0.0 && !std::signbit(j.m_value.number_float)) + { + oa->write_character(to_char_type(0xFC)); + } + else if (j.m_value.number_float == 1.0) + { + oa->write_character(to_char_type(0xFD)); + } + else if (std::isnan(j.m_value.number_float)) + { + oa->write_character(to_char_type(0x8E)); + oa->write_character(to_char_type(0x7F)); + oa->write_character(to_char_type(0x80)); + oa->write_character(to_char_type(0x00)); + oa->write_character(to_char_type(0x01)); + } + else + { + // write float with prefix + write_compact_float(j.m_value.number_float, detail::input_format_t::bon8); + } + return false; + } + + case value_t::string: + { + // empty string: use end-of-text symbol + if (j.m_value.string->empty()) + { + oa->write_character(to_char_type(0xFF)); + return false; // already wrote 0xFF byte + } + + // write strings as is + oa->write_characters( + reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); + return true; + } + + case value_t::array: + { + bool last_written_value_is_string = false; + const auto N = j.m_value.array->size(); + if (N <= 4) + { + // start array with count (80..84) + oa->write_character(static_cast(0x80 + N)); + } + else + { + // start array + oa->write_character(to_char_type(0x85)); + } + + // write each element + for (std::size_t i = 0; i < N; ++i) + { + const auto& el = j.m_value.array->operator[](i); + + // check if 0xFF after nonempty string and string is required + if (i > 0) + { + const auto& prev = j.m_value.array->operator[](i - 1); + if (el.is_string() && prev.is_string() && !prev.m_value.string->empty()) + { + oa->write_character(to_char_type(0xFF)); + } + } + + last_written_value_is_string = write_bon8_internal(el); + } + + if (N > 4) + { + // end of container + oa->write_character(to_char_type(0xFE)); + last_written_value_is_string = false; // 0xFE is not a string byte + } + + return last_written_value_is_string; + } + + case value_t::object: + { + bool last_written_value_is_string = false; + const auto N = j.m_value.object->size(); + if (N <= 4) + { + // start object with count (86..8A) + oa->write_character(static_cast(0x86 + N)); + } + else + { + // start object + oa->write_character(to_char_type(0x8B)); + } + + // write each element + for (auto it = j.m_value.object->begin(); it != j.m_value.object->end(); ++it) + { + const auto& key = it->first; + const auto& value = it->second; + + write_bon8_internal(key); + + // check if we need a 0xFF separator between key and value + if (!key.empty() && value.is_string()) + { + oa->write_character(to_char_type(0xFF)); + } + + last_written_value_is_string = write_bon8_internal(value); + + // check if we need a 0xFF separator between the value and the next key + if (value.is_string() && !value.m_value.string->empty() && std::next(it) != j.m_value.object->end()) + { + oa->write_character(to_char_type(0xFF)); + } + } + + if (N > 4) + { + // end of container + oa->write_character(to_char_type(0xFE)); + last_written_value_is_string = false; // 0xFE is not a string byte + } + + return last_written_value_is_string; + } + + case value_t::binary: + case value_t::discarded: + default: + return false; + } + } + + void write_bon8_integer(typename BasicJsonType::number_integer_t value) + { + if (value < (std::numeric_limits::min)() || value > (std::numeric_limits::max)()) + { + // 64 bit integers + oa->write_character(to_char_type(0x8D)); + write_number(static_cast(value)); + } + else if (value < -33818506 || value > 67637031) + { + // 32 bit integers + oa->write_character(to_char_type(0x8C)); + write_number(static_cast(value)); + } + else if (value <= -264075) + { + JSON_ASSERT(value >= -33818506); + value = -(value + 264075); + oa->write_character(static_cast(0xF0 + (value >> 22 & 0x07))); + oa->write_character(static_cast(0xC0 + (value >> 16 & 0x3F))); + oa->write_character(static_cast(value >> 8)); + oa->write_character(static_cast(value)); + } + else if (value <= -1931) + { + JSON_ASSERT(value >= -264074); + value = -(value + 1931); + oa->write_character(static_cast(0xE0 + (value >> 14 & 0x0F))); + oa->write_character(static_cast(0xC0 + (value >> 8 & 0x3F))); + oa->write_character(static_cast(value)); + } + else if (value <= -11) + { + JSON_ASSERT(value >= -1930); + value = -(value + 11); + oa->write_character(static_cast(0xC2 + (value >> 6 & 0x1F))); + oa->write_character(static_cast(0xC0 + (value & 0x3F))); + } + else if (value <= -1) + { + JSON_ASSERT(value >= -10); + value = -(value + 1); + oa->write_character(static_cast(0xB8 + value)); + } + else if (value <= 39) + { + JSON_ASSERT(value >= 0); + oa->write_character(static_cast(0x90 + value)); + } + else if (value <= 3879) + { + JSON_ASSERT(value >= 40); + value -= 40; + oa->write_character(static_cast(0xC2 + (value >> 7 & 0x1F))); + oa->write_character(static_cast(value & 0x7F)); + } + else if (value <= 528167) + { + JSON_ASSERT(value >= 3880); + value -= 3880; + oa->write_character(static_cast(0xE0 + (value >> 15 & 0x0F))); + oa->write_character(static_cast(value >> 8 & 0x7F)); + oa->write_character(static_cast(value)); + } + else + { + JSON_ASSERT(value >= 528168); + JSON_ASSERT(value <= 67637031); + value -= 528168; + oa->write_character(static_cast(0xF0 + (value >> 23 & 0x17))); + oa->write_character(static_cast(value >> 16 & 0x7F)); + oa->write_character(static_cast(value >> 8)); + oa->write_character(static_cast(value)); + } + } + + static constexpr CharType get_bon8_float_prefix(float /*unused*/) + { + return to_char_type(0x8E); + } + + static constexpr CharType get_bon8_float_prefix(double /*unused*/) + { + return to_char_type(0x8F); + } + /////////////////////// // Utility functions // /////////////////////// @@ -16804,20 +17392,54 @@ class binary_writer #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" #endif - if (static_cast(n) >= static_cast(std::numeric_limits::lowest()) && - static_cast(n) <= static_cast((std::numeric_limits::max)()) && - static_cast(static_cast(n)) == static_cast(n)) + if (std::isnan(n) || std::isinf(n) || (static_cast(n) >= static_cast(std::numeric_limits::lowest()) && + static_cast(n) <= static_cast((std::numeric_limits::max)()) && + static_cast(static_cast(n)) == static_cast(n))) { - oa->write_character(format == detail::input_format_t::cbor - ? get_cbor_float_prefix(static_cast(n)) - : get_msgpack_float_prefix(static_cast(n))); + switch (format) + { + case input_format_t::cbor: + oa->write_character(get_cbor_float_prefix(static_cast(n))); + break; + case input_format_t::msgpack: + oa->write_character(get_msgpack_float_prefix(static_cast(n))); + break; + case input_format_t::bon8: + oa->write_character(get_bon8_float_prefix(static_cast(n))); + break; + // LCOV_EXCL_START + case input_format_t::bson: + case input_format_t::json: + case input_format_t::ubjson: + case input_format_t::bjdata: + default: + break; + // LCOV_EXCL_STOP + } write_number(static_cast(n)); } else { - oa->write_character(format == detail::input_format_t::cbor - ? get_cbor_float_prefix(n) - : get_msgpack_float_prefix(n)); + switch (format) + { + case input_format_t::cbor: + oa->write_character(get_cbor_float_prefix(n)); + break; + case input_format_t::msgpack: + oa->write_character(get_msgpack_float_prefix(n)); + break; + case input_format_t::bon8: + oa->write_character(get_bon8_float_prefix(n)); + break; + // LCOV_EXCL_START + case input_format_t::bson: + case input_format_t::json: + case input_format_t::ubjson: + case input_format_t::bjdata: + default: + break; + // LCOV_EXCL_STOP + } write_number(n); } #ifdef __GNUC__ @@ -23658,6 +24280,29 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec binary_writer(o).write_bson(j); } + /// @brief create a BSON serialization of a given JSON value + /// @sa https://json.nlohmann.me/api/basic_json/to_bon8/ + static std::vector to_bon8(const basic_json& j) + { + std::vector result; + to_bon8(j, result); + return result; + } + + /// @brief create a BSON serialization of a given JSON value + /// @sa https://json.nlohmann.me/api/basic_json/to_bon8/ + static void to_bon8(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bon8(j); + } + + /// @brief create a BSON serialization of a given JSON value + /// @sa https://json.nlohmann.me/api/basic_json/to_bon8/ + static void to_bon8(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bon8(j); + } + /// @brief create a JSON value from an input in CBOR format /// @sa https://json.nlohmann.me/api/basic_json/from_cbor/ template @@ -23907,6 +24552,36 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } + + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(InputType&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::forward(i)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bon8(InputType&&, const bool, const bool) + */ + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(IteratorType first, IteratorType last, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + /// @} ////////////////////////// diff --git a/tests/src/unit-bon8.cpp b/tests/src/unit-bon8.cpp new file mode 100644 index 000000000..f69286af2 --- /dev/null +++ b/tests/src/unit-bon8.cpp @@ -0,0 +1,902 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.10.2 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +#include +using nlohmann::json; +#ifdef JSON_TEST_NO_GLOBAL_UDLS + using namespace nlohmann::literals; // NOLINT(google-build-using-namespace) +#endif + +#include +#include +#include +#include +#include "test_utils.hpp" + +namespace +{ +class SaxCountdown +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() + { + return events_left-- > 0; + } + + bool boolean(bool /*unused*/) + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t /*unused*/) + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t /*unused*/) + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t /*unused*/, const std::string& /*unused*/) + { + return events_left-- > 0; + } + + bool string(std::string& /*unused*/) + { + return events_left-- > 0; + } + + bool binary(std::vector& /*unused*/) + { + return events_left-- > 0; + } + + bool start_object(std::size_t /*unused*/) + { + return events_left-- > 0; + } + + bool key(std::string& /*unused*/) + { + return events_left-- > 0; + } + + bool end_object() + { + return events_left-- > 0; + } + + bool start_array(std::size_t /*unused*/) + { + return events_left-- > 0; + } + + bool end_array() + { + return events_left-- > 0; + } + + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const json::exception& /*unused*/) // NOLINT(readability-convert-member-functions-to-static) + { + return false; + } + + private: + int events_left = 0; +}; +} // namespace + +TEST_CASE("BON8") +{ + SECTION("individual values") + { + SECTION("discarded") + { + // discarded values are not serialized + json j = json::value_t::discarded; + const auto result = json::to_bon8(j); + CHECK(result.empty()); + } + + SECTION("null") + { + json j = nullptr; + std::vector expected = {0xFA}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("boolean") + { + SECTION("true") + { + json j = true; + std::vector expected = {0xF9}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("false") + { + json j = false; + std::vector expected = {0xF8}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("unsigned integers") + { + SECTION("0..39") + { + SECTION("0") + { + json j = 0U; + std::vector expected = {0x90}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("39") + { + json j = 39U; + std::vector expected = {0xB7}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("40..3879") + { + SECTION("40") + { + json j = 40U; + std::vector expected = {0xC2, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("3879") + { + json j = 3879U; + std::vector expected = {0xDF, 0x7F}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("3880..524287") + { + SECTION("3880") + { + json j = 3880U; + std::vector expected = {0xE0, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("528167") + { + json j = 528167U; + std::vector expected = {0xEF, 0x7F, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("528168..67637031") + { + SECTION("528168") + { + json j = 528168U; + std::vector expected = {0xF0, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("67637031") + { + json j = 67637031U; + std::vector expected = {0xF7, 0x7F, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("67637032..2147483647 (int32max)") + { + SECTION("67637032") + { + json j = 67637032U; + std::vector expected = {0x8C, 0x04, 0x08, 0x0F, 0x28}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("2147483647 (int32max)") + { + json j = 2147483647U; + std::vector expected = {0x8C, 0x7F, 0xFF, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("2147483648..9223372036854775807 (int64max)") + { + SECTION("2147483648") + { + json j = 2147483648U; + std::vector expected = {0x8D, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("9223372036854775807 (int64max)") + { + json j = 9223372036854775807U; + std::vector expected = {0x8D, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("greater than int64max") + { + json j = 9223372036854775808U; + CHECK_THROWS_WITH_AS(json::to_bon8(j), "[json.exception.out_of_range.407] integer number 9223372036854775808 cannot be represented by BON8 as it does not fit int64", json::out_of_range); + } + } + + SECTION("signed integers") + { + SECTION("-9223372036854775808 (int64min)..-2147483649") + { + SECTION("-9223372036854775808") + { + json j = INT64_MIN; + std::vector expected = {0x8D, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("-2147483649") + { + // cannot use -2147483649 directly, see https://developercommunity.visualstudio.com/t/-2147483648-c4146-error/141813#T-N229960 + json j = static_cast(-2147483647) - 2; + std::vector expected = {0x8D, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("-2147483648 (int32min)..-33818507") + { + SECTION("-2147483648") + { + // cannot use -2147483648 directly, see https://developercommunity.visualstudio.com/t/-2147483648-c4146-error/141813#T-N229960 + json j = -2147483647 - 1; + std::vector expected = {0x8C, 0x80, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("-33818507") + { + json j = -33818507; + std::vector expected = {0x8C, 0xFD, 0xFB, 0xF8, 0x75}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("-33818506..-264075") + { + SECTION("-33818506") + { + json j = -33818506; + std::vector expected = {0xF7, 0xFF, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("-264075") + { + json j = -264075; + std::vector expected = {0xF0, 0xC0, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("-264074..-1931") + { + SECTION("-264074") + { + json j = -264074; + std::vector expected = {0xEF, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("-1931") + { + json j = -1931; + std::vector expected = {0xE0, 0xC0, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("-1930..-11") + { + SECTION("-1930") + { + json j = -1930; + std::vector expected = {0xDF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("-11") + { + json j = -11; + std::vector expected = {0xC2, 0xC0}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("-10..-1") + { + SECTION("-10") + { + json j = -10; + std::vector expected = {0xC1}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("-1") + { + json j = -1; + std::vector expected = {0xB8}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("0..39") + { + SECTION("0") + { + json j = 0; + std::vector expected = {0x90}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("39") + { + json j = 39; + std::vector expected = {0xB7}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("40..3879") + { + SECTION("40") + { + json j = 40; + std::vector expected = {0xC2, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("3879") + { + json j = 3879; + std::vector expected = {0xDF, 0x7F}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("3880..524287") + { + SECTION("3880") + { + json j = 3880; + std::vector expected = {0xE0, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("528167") + { + json j = 528167; + std::vector expected = {0xEF, 0x7F, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("528168..67637031") + { + SECTION("528168") + { + json j = 528168; + std::vector expected = {0xF0, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("67637031") + { + json j = 67637031; + std::vector expected = {0xF7, 0x7F, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("67637032..2147483647 (int32max)") + { + SECTION("67637032") + { + json j = 67637032; + std::vector expected = {0x8C, 0x04, 0x08, 0x0F, 0x28}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("2147483647 (int32max)") + { + json j = 2147483647; + std::vector expected = {0x8C, 0x7F, 0xFF, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("2147483648..9223372036854775807 (int64max)") + { + SECTION("2147483648") + { + json j = 2147483648; + std::vector expected = {0x8D, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("9223372036854775807 (int64max)") + { + json j = 9223372036854775807; + std::vector expected = {0x8D, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + } + + SECTION("floating-point numbers") + { + SECTION("special values") + { + SECTION("-1.0") + { + json j = -1.0; + std::vector expected = {0xFB}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("0.0") + { + json j = 0.0; + std::vector expected = {0xFC}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("1.0") + { + json j = 1.0; + std::vector expected = {0xFD}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("-0.0") + { + json j = -0.0; + std::vector expected = {0x8E, 0x80, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("NAN") + { + json j = NAN; + std::vector expected = {0x8E, 0x7F, 0x80, 0x00, 0x01}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + json::number_float_t d{json::from_bon8(result)}; + CHECK(std::isnan(d)); + } + + SECTION("infinity") + { + json j = INFINITY; + std::vector expected = {0x8E, 0x7F, 0x80, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("-infinity") + { + json j = -INFINITY; + std::vector expected = {0x8E, 0xFF, 0x80, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("floats") + { + SECTION("2.0") + { + json j = 2.0; + std::vector expected = {0x8E, 0x40, 0x00, 0x00, 0x00}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("doubles") + { + SECTION("100000000.1") + { + json j = 100000000.1; + std::vector expected = {0x8F, 0x41, 0x97, 0xD7, 0x84, 0x00, 0x66, 0x66, 0x66}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + } + + SECTION("string") + { + SECTION("empty string") + { + json j = ""; + std::vector expected = {0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("other strings") + { + json j = "This is a string."; + std::vector expected = {'T', 'h', 'i', 's', ' ', 'i', 's', ' ', 'a', ' ', 's', 't', 'r', 'i', 'n', 'g', '.', 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("multi-byte, 2 bytes") + { + json j = "\xC2\xA3"; + std::vector expected = {0xC2, 0xA3, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("multi-byte, 3 bytes") + { + json j = "\xEF\xB8\xBB"; + std::vector expected = {0xEF, 0xB8, 0xBB, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("multi-byte, 4 bytes") + { + json j = "\xF0\x9F\x80\x84"; + std::vector expected = {0xF0, 0x9F, 0x80, 0x84, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("invalid string") + { + std::vector v = {0xF0, 0x9F, 0x80, 0x84}; + json j; + CHECK_THROWS_WITH_AS(j = json::from_bon8(v), "[json.exception.parse_error.110] parse error at byte 5: syntax error while parsing BON8 string: unexpected end of input", json::parse_error); + } + } + + SECTION("array") + { + SECTION("array with count") + { + SECTION("empty array") + { + json j = json::array(); + std::vector expected = {0x80}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("[false]") + { + json j = {false}; + std::vector expected = {0x81, 0xF8}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("[false, null]") + { + json j = {false, nullptr}; + std::vector expected = {0x82, 0xF8, 0xFA}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("[false, null, true]") + { + json j = {false, nullptr, true}; + std::vector expected = {0x83, 0xF8, 0xFA, 0xF9}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("[false, null, true, 1.0]") + { + json j = {false, nullptr, true, 1.0}; + std::vector expected = {0x84, 0xF8, 0xFA, 0xF9, 0xFD}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("[\"s\", \"s\"]") + { + json j = {"s", "s"}; + std::vector expected = {0x82, 's', 0xFF, 's', 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("[\"\", \"s\"]") + { + json j = {"", "s"}; + std::vector expected = {0x82, 0xFF, 's', 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("[[[\"foo\"]]]") + { + json j = R"([[["foo"]]])"_json; + std::vector expected = {0x81, 0x81, 0x81, 'f', 'o', 'o', 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("[[[1]]]") + { + json j = R"([[[1]]])"_json; + std::vector expected = {0x81, 0x81, 0x81, 0x91}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("[[[\"\"]]]") + { + json j = R"([[[""]]])"_json; + std::vector expected = {0x81, 0x81, 0x81, 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + + SECTION("array without count") + { + SECTION("[false, null, true, 1.0, [], 0.0]") + { + json j = {false, nullptr, true, 1.0, json::array(), 0.0}; + std::vector expected = {0x85, 0xF8, 0xFA, 0xF9, 0xFD, 0x80, 0xFC, 0xFE}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + } + + SECTION("object") + { + SECTION("object with count") + { + SECTION("empty object") + { + json j = json::object(); + std::vector expected = {0x86}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("{\"foo\": null}") + { + json j = {{"foo", nullptr}}; + std::vector expected = {0x87, 'f', 'o', 'o', 0xFA}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("{\"\": true, \"foo\": null}") + { + json j = {{"", true}, {"foo", nullptr}}; + std::vector expected = {0x88, 0xFF, 0xF9, 'f', 'o', 'o', 0xFA}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + + SECTION("{\"a\": \"\", \"c\": \"d\"}") + { + json j = {{"a", ""}, {"c", "d"}}; + std::vector expected = {0x88, 'a', 0xFF, 0xFF, 'c', 0xFF, 'd', 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + + SECTION("{\"a\": \"b\", \"c\": \"d\"}") + { + json j = {{"a", "b"}, {"c", "d"}}; + std::vector expected = {0x88, 'a', 0xFF, 'b', 0xFF, 'c', 0xFF, 'd', 0xFF}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + } + } + + SECTION("object without count") + { + SECTION("{\"one\": 1, \"two\": 2, \"three\": 3, \"four\": 4, \"five\": 5}") + { + json j = R"({"one": 1, "two": 2, "three": 3, "four": 4, "five": 5})"_json; + std::vector expected = {0x8b, 'f', 'i', 'v', 'e', 0x95, 'f', 'o', 'u', 'r', 0x94, 'o', 'n', 'e', 0x91, 't', 'h', 'r', 'e', 'e', 0x93, 't', 'w', 'o', 0x92, 0xFE}; + const auto result = json::to_bon8(j); + CHECK(result == expected); + CHECK(json::from_bon8(result) == j); + } + } + } + } + + SECTION("SAX aborts") + { + SECTION("start_array(len)") + { + std::vector v = {0x80}; + SaxCountdown scp(0); + CHECK(!json::sax_parse(v, &scp, json::input_format_t::bon8)); + } + + SECTION("error in array with size") + { + std::vector v = {0x81}; + SaxCountdown scp(1000); + CHECK(!json::sax_parse(v, &scp, json::input_format_t::bon8)); + } + + SECTION("error in array without size") + { + std::vector v = {0x85}; + SaxCountdown scp(1000); + CHECK(!json::sax_parse(v, &scp, json::input_format_t::bon8)); + } + + SECTION("start_object(len)") + { + std::vector v = {0x86}; + SaxCountdown scp(0); + CHECK(!json::sax_parse(v, &scp, json::input_format_t::bon8)); + } + + SECTION("key()") + { + std::vector v = {0x87, 'f', 'o', 'o', 0xFF, 0xFA}; + SaxCountdown scp(1); + CHECK(!json::sax_parse(v, &scp, json::input_format_t::bon8)); + } + + SECTION("error in object with size") + { + std::vector v = {0x87, 'f', 'o', 'o', 0xFF}; + SaxCountdown scp(1000); + CHECK(!json::sax_parse(v, &scp, json::input_format_t::bon8)); + } + + SECTION("error in object without size") + { + std::vector v = {0x8B, 'f', 'o', 'o', 0xFF}; + SaxCountdown scp(1000); + CHECK(!json::sax_parse(v, &scp, json::input_format_t::bon8)); + } + } +}