diff --git a/src/json.hpp b/src/json.hpp index 0a2c6fde0..55ade790b 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -715,6 +715,73 @@ class basic_json private: + + /*! + @brief a type to hold JSON type information + + This bitfield type holds information about JSON types. It is internally + used to hold the basic JSON type enumeration, as well as additional + information in the case of values that have been parsed from a string + including whether of not it was created directly or parsed, and + in the case of floating point numbers the number of significant + figures in the original representaiton and if it was in exponential + form, if a '+' was included in the exponent and the capitilization of + the exponent marker. The sole purpose of this information is to permit + accurate round trips. + + @since version 2.0.0 + */ + union type_data_t { + struct { + uint16_t type : 4; + uint16_t parsed : 1; + uint16_t has_exp : 1; + uint16_t exp_plus : 1; + uint16_t exp_cap : 1; + uint16_t precision : 8; + } bits; + uint16_t data; + + // Return the type + value_t get() const + { + return static_cast(bits.type); + } + + // Test type for equality (ignore other fields) + bool operator==(const value_t& rhs) const + { + return static_cast(bits.type) == rhs; + } + + // Assignment + type_data_t & operator=(value_t rhs) + { + bits.type = static_cast(rhs); + return *this; + } + + // Construct from value_t + type_data_t(value_t t) noexcept + { + *reinterpret_cast(this) = 0; + bits.type = static_cast(t); + } + + // Default constructor + type_data_t() noexcept + { + data = 0; + bits.type = reinterpret_cast(value_t::null); + } + + // Copy constructor + type_data_t(const type_data_t& t) noexcept + { + data = t.data; + } + }; + /// helper for exception-safe object creation template static T* create(Args&& ... args) @@ -1752,7 +1819,7 @@ class basic_json } // check if iterator range is complete for primitive values - switch (m_type) + switch (m_type.get()) { case value_t::boolean: case value_t::number_float: @@ -1773,7 +1840,7 @@ class basic_json } } - switch (m_type) + switch (m_type.get()) { case value_t::number_integer: { @@ -1857,7 +1924,7 @@ class basic_json basic_json(const basic_json& other) : m_type(other.m_type) { - switch (m_type) + switch (m_type.get()) { case value_t::object: { @@ -1987,7 +2054,7 @@ class basic_json */ ~basic_json() { - switch (m_type) + switch (m_type.get()) { case value_t::object: { @@ -2087,7 +2154,7 @@ class basic_json */ value_t type() const noexcept { - return m_type; + return m_type.get(); } /*! @@ -2370,7 +2437,7 @@ class basic_json */ operator value_t() const noexcept { - return m_type; + return m_type.get(); } /// @} @@ -2525,7 +2592,7 @@ class basic_json , int>::type = 0> T get_impl(T*) const { - switch (m_type) + switch (m_type.get()) { case value_t::number_integer: { @@ -3657,7 +3724,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (m_type.get()) { case value_t::boolean: case value_t::number_float: @@ -3676,7 +3743,7 @@ class basic_json m_value.string = nullptr; } - m_type = value_t::null; + m_type = type_data_t(value_t::null); break; } @@ -3763,7 +3830,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (m_type.get()) { case value_t::boolean: case value_t::number_float: @@ -3782,7 +3849,7 @@ class basic_json m_value.string = nullptr; } - m_type = value_t::null; + m_type = type_data_t(value_t::null); break; } @@ -4271,7 +4338,7 @@ class basic_json */ bool empty() const noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::null: { @@ -4329,7 +4396,7 @@ class basic_json */ size_type size() const noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::null: { @@ -4391,7 +4458,7 @@ class basic_json */ size_type max_size() const noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::array: { @@ -4450,7 +4517,7 @@ class basic_json */ void clear() noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::number_integer: { @@ -4543,7 +4610,7 @@ class basic_json assert(m_value.array != nullptr); m_value.array->push_back(std::move(val)); // invalidate object - val.m_type = value_t::null; + val.m_type = type_data_t(value_t::null); } /*! @@ -5568,7 +5635,7 @@ class basic_json /// return the type as string string_t type_name() const { - switch (m_type) + switch (m_type.get()) { case value_t::null: return "null"; @@ -5774,7 +5841,7 @@ class basic_json // variable to hold indentation for recursive calls unsigned int new_indent = current_indent; - switch (m_type) + switch (m_type.get()) { case value_t::object: { @@ -5885,23 +5952,60 @@ class basic_json case value_t::number_float: { - // If the number is an integer then output as a fixed with with - // precision 1 to output "0.0", "1.0" etc as expected for some - // round trip tests otherwise 15 digits of precision allows - // round-trip IEEE 754 string->double->string; to be safe, we - // read this value from - // std::numeric_limits::digits10 - if (std::fmod(m_value.number_float, 1) == 0) + // Buffer size: precision (2^8-1 = 255) + other ('-.e-xxx' = 7) + null (1) + char buf[263]; + int len; + + if (m_type.bits.parsed) { - o << std::fixed << std::setprecision(1); + // Number was parsed from a string so use the same precision + if (m_type.bits.has_exp) + { + // Exponent - output in exponential form - handle capitalization of e/E + if (m_type.bits.exp_cap) + { + len = snprintf(buf, sizeof(buf), "%.*E", m_type.bits.precision, m_value.number_float) + 1; + } + else + { + len = snprintf(buf, sizeof(buf), "%.*e", m_type.bits.precision, m_value.number_float) + 1; + } + + // Remove '+' sign from the exponent if necessary + if (!m_type.bits.exp_plus) + { + if (len > static_cast(sizeof(buf))) len = sizeof(buf); + for (int i = 0; i < len; i++) + { + if (buf[i] == '+') + { + for (; i + 1 < len; i++) buf[i] = buf[i + 1]; + } + } + } + } + else + { + // No exponent - output as a decimal + snprintf(buf, sizeof(buf), "%.*f", m_type.bits.precision, m_value.number_float); + } + } + else if (m_value.number_float == 0) + { + // Special case for zero to get "0.0"/"-0.0" + if (std::signbit(m_value.number_float)) o << "-0.0"; + else o << "0.0"; + return; } else { - // std::defaultfloat not supported in gcc version < 5 - o.unsetf(std::ios_base::floatfield); - o << std::setprecision(std::numeric_limits::digits10); + // Otherwise 6, 15 or 16 digits of precision allows round-trip IEEE 754 + // string->float->string, string->double->string or string->long double->string; + // to be safe, we read this value from std::numeric_limits::digits10 + snprintf(buf, sizeof(buf), "%.*g", std::numeric_limits::digits10, m_value.number_float); } - o << m_value.number_float; + + o << buf; return; } @@ -5925,7 +6029,7 @@ class basic_json ////////////////////// /// the type of the current element - value_t m_type = value_t::null; + type_data_t m_type = type_data_t(value_t::null); /// the value of the current element json_value m_value = {}; @@ -6149,7 +6253,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6176,7 +6280,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6222,7 +6326,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6258,7 +6362,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6288,7 +6392,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6328,7 +6432,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6371,7 +6475,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6408,7 +6512,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6443,7 +6547,7 @@ class basic_json assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6479,7 +6583,7 @@ class basic_json assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6521,7 +6625,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6571,7 +6675,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6595,7 +6699,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -7106,789 +7210,386 @@ class basic_json m_start = m_cursor; assert(m_start != nullptr); - - { - lexer_char_t yych; - unsigned int yyaccept = 0; - static const unsigned char yybm[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 32, 32, 0, 0, 32, 0, 0, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 96, 64, 0, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 0, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - }; - if ((m_limit - m_cursor) < 5) - { - yyfill(); // LCOV_EXCL_LINE; + + { + lexer_char_t yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 32, 32, 0, 0, 32, 0, 0, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 96, 64, 0, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 0, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + }; + if ((m_limit - m_cursor) < 5) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= ':') { + if (yych <= ' ') { + if (yych <= '\n') { + if (yych <= 0x00) goto basic_json_parser_28; + if (yych <= 0x08) goto basic_json_parser_30; + if (yych >= '\n') goto basic_json_parser_4; + } else { + if (yych == '\r') goto basic_json_parser_2; + if (yych <= 0x1F) goto basic_json_parser_30; } - yych = *m_cursor; - if (yych <= ':') - { - if (yych <= ' ') - { - if (yych <= '\n') - { - if (yych <= 0x00) - { - goto basic_json_parser_28; - } - if (yych <= 0x08) - { - goto basic_json_parser_30; - } - if (yych >= '\n') - { - goto basic_json_parser_4; - } - } - else - { - if (yych == '\r') - { - goto basic_json_parser_2; - } - if (yych <= 0x1F) - { - goto basic_json_parser_30; - } - } + } else { + if (yych <= ',') { + if (yych == '"') goto basic_json_parser_27; + if (yych <= '+') goto basic_json_parser_30; + goto basic_json_parser_16; + } else { + if (yych <= '/') { + if (yych <= '-') goto basic_json_parser_23; + goto basic_json_parser_30; + } else { + if (yych <= '0') goto basic_json_parser_24; + if (yych <= '9') goto basic_json_parser_26; + goto basic_json_parser_18; } - else - { - if (yych <= ',') - { - if (yych == '"') - { - goto basic_json_parser_27; - } - if (yych <= '+') - { - goto basic_json_parser_30; - } - goto basic_json_parser_16; - } - else - { - if (yych <= '/') - { - if (yych <= '-') - { - goto basic_json_parser_23; - } - goto basic_json_parser_30; - } - else - { - if (yych <= '0') - { - goto basic_json_parser_24; - } - if (yych <= '9') - { - goto basic_json_parser_26; - } - goto basic_json_parser_18; - } - } - } - } - else - { - if (yych <= 'n') - { - if (yych <= ']') - { - if (yych == '[') - { - goto basic_json_parser_8; - } - if (yych <= '\\') - { - goto basic_json_parser_30; - } - goto basic_json_parser_10; - } - else - { - if (yych == 'f') - { - goto basic_json_parser_22; - } - if (yych <= 'm') - { - goto basic_json_parser_30; - } - goto basic_json_parser_20; - } - } - else - { - if (yych <= '{') - { - if (yych == 't') - { - goto basic_json_parser_21; - } - if (yych <= 'z') - { - goto basic_json_parser_30; - } - goto basic_json_parser_12; - } - else - { - if (yych <= '}') - { - if (yych <= '|') - { - goto basic_json_parser_30; - } - goto basic_json_parser_14; - } - else - { - if (yych == 0xEF) - { - goto basic_json_parser_6; - } - goto basic_json_parser_30; - } - } - } - } -basic_json_parser_2: - ++m_cursor; - yych = *m_cursor; - goto basic_json_parser_5; -basic_json_parser_3: - { - return scan(); - } -basic_json_parser_4: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; -basic_json_parser_5: - if (yybm[0 + yych] & 32) - { - goto basic_json_parser_4; - } - goto basic_json_parser_3; -basic_json_parser_6: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 0xBB) - { - goto basic_json_parser_64; - } -basic_json_parser_7: - { - return token_type::parse_error; - } -basic_json_parser_8: - ++m_cursor; - { - return token_type::begin_array; - } -basic_json_parser_10: - ++m_cursor; - { - return token_type::end_array; - } -basic_json_parser_12: - ++m_cursor; - { - return token_type::begin_object; - } -basic_json_parser_14: - ++m_cursor; - { - return token_type::end_object; - } -basic_json_parser_16: - ++m_cursor; - { - return token_type::value_separator; - } -basic_json_parser_18: - ++m_cursor; - { - return token_type::name_separator; - } -basic_json_parser_20: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'u') - { - goto basic_json_parser_60; - } - goto basic_json_parser_7; -basic_json_parser_21: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'r') - { - goto basic_json_parser_56; - } - goto basic_json_parser_7; -basic_json_parser_22: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych == 'a') - { - goto basic_json_parser_51; - } - goto basic_json_parser_7; -basic_json_parser_23: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_7; - } - if (yych <= '0') - { - goto basic_json_parser_50; - } - if (yych <= '9') - { - goto basic_json_parser_41; - } - goto basic_json_parser_7; -basic_json_parser_24: - yyaccept = 1; - yych = *(m_marker = ++m_cursor); - if (yych <= 'D') - { - if (yych == '.') - { - goto basic_json_parser_43; - } - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_44; - } - if (yych == 'e') - { - goto basic_json_parser_44; - } - } -basic_json_parser_25: - { - return token_type::value_number; - } -basic_json_parser_26: - yyaccept = 1; - yych = *(m_marker = ++m_cursor); - goto basic_json_parser_42; -basic_json_parser_27: - yyaccept = 0; - yych = *(m_marker = ++m_cursor); - if (yych <= 0x0F) - { - goto basic_json_parser_7; - } - goto basic_json_parser_32; -basic_json_parser_28: - ++m_cursor; - { - return token_type::end_of_input; - } -basic_json_parser_30: - yych = *++m_cursor; - goto basic_json_parser_7; -basic_json_parser_31: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; -basic_json_parser_32: - if (yybm[0 + yych] & 64) - { - goto basic_json_parser_31; - } - if (yych <= 0x0F) - { - goto basic_json_parser_33; - } - if (yych <= '"') - { - goto basic_json_parser_35; - } - goto basic_json_parser_34; -basic_json_parser_33: - m_cursor = m_marker; - if (yyaccept == 0) - { - goto basic_json_parser_7; - } - else - { - goto basic_json_parser_25; - } -basic_json_parser_34: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; - if (yych <= 'e') - { - if (yych <= '/') - { - if (yych == '"') - { - goto basic_json_parser_31; - } - if (yych <= '.') - { - goto basic_json_parser_33; - } - goto basic_json_parser_31; - } - else - { - if (yych <= '\\') - { - if (yych <= '[') - { - goto basic_json_parser_33; - } - goto basic_json_parser_31; - } - else - { - if (yych == 'b') - { - goto basic_json_parser_31; - } - goto basic_json_parser_33; - } - } - } - else - { - if (yych <= 'q') - { - if (yych <= 'f') - { - goto basic_json_parser_31; - } - if (yych == 'n') - { - goto basic_json_parser_31; - } - goto basic_json_parser_33; - } - else - { - if (yych <= 's') - { - if (yych <= 'r') - { - goto basic_json_parser_31; - } - goto basic_json_parser_33; - } - else - { - if (yych <= 't') - { - goto basic_json_parser_31; - } - if (yych <= 'u') - { - goto basic_json_parser_37; - } - goto basic_json_parser_33; - } - } - } -basic_json_parser_35: - ++m_cursor; - { - return token_type::value_string; - } -basic_json_parser_37: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_33; - } - if (yych >= ':') - { - goto basic_json_parser_33; - } - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_38; - } - if (yych <= '`') - { - goto basic_json_parser_33; - } - if (yych >= 'g') - { - goto basic_json_parser_33; - } - } -basic_json_parser_38: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_33; - } - if (yych >= ':') - { - goto basic_json_parser_33; - } - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_39; - } - if (yych <= '`') - { - goto basic_json_parser_33; - } - if (yych >= 'g') - { - goto basic_json_parser_33; - } - } -basic_json_parser_39: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_33; - } - if (yych >= ':') - { - goto basic_json_parser_33; - } - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_40; - } - if (yych <= '`') - { - goto basic_json_parser_33; - } - if (yych >= 'g') - { - goto basic_json_parser_33; - } - } -basic_json_parser_40: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; - if (yych <= '@') - { - if (yych <= '/') - { - goto basic_json_parser_33; - } - if (yych <= '9') - { - goto basic_json_parser_31; - } - goto basic_json_parser_33; - } - else - { - if (yych <= 'F') - { - goto basic_json_parser_31; - } - if (yych <= '`') - { - goto basic_json_parser_33; - } - if (yych <= 'f') - { - goto basic_json_parser_31; - } - goto basic_json_parser_33; - } -basic_json_parser_41: - yyaccept = 1; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; -basic_json_parser_42: - if (yybm[0 + yych] & 128) - { - goto basic_json_parser_41; - } - if (yych <= 'D') - { - if (yych != '.') - { - goto basic_json_parser_25; - } - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_44; - } - if (yych == 'e') - { - goto basic_json_parser_44; - } - goto basic_json_parser_25; - } -basic_json_parser_43: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_33; - } - if (yych <= '9') - { - goto basic_json_parser_48; - } - goto basic_json_parser_33; -basic_json_parser_44: - yych = *++m_cursor; - if (yych <= ',') - { - if (yych != '+') - { - goto basic_json_parser_33; - } - } - else - { - if (yych <= '-') - { - goto basic_json_parser_45; - } - if (yych <= '/') - { - goto basic_json_parser_33; - } - if (yych <= '9') - { - goto basic_json_parser_46; - } - goto basic_json_parser_33; - } -basic_json_parser_45: - yych = *++m_cursor; - if (yych <= '/') - { - goto basic_json_parser_33; - } - if (yych >= ':') - { - goto basic_json_parser_33; - } -basic_json_parser_46: - ++m_cursor; - if (m_limit <= m_cursor) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; - if (yych <= '/') - { - goto basic_json_parser_25; - } - if (yych <= '9') - { - goto basic_json_parser_46; - } - goto basic_json_parser_25; -basic_json_parser_48: - yyaccept = 1; - m_marker = ++m_cursor; - if ((m_limit - m_cursor) < 3) - { - yyfill(); // LCOV_EXCL_LINE; - } - yych = *m_cursor; - if (yych <= 'D') - { - if (yych <= '/') - { - goto basic_json_parser_25; - } - if (yych <= '9') - { - goto basic_json_parser_48; - } - goto basic_json_parser_25; - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_44; - } - if (yych == 'e') - { - goto basic_json_parser_44; - } - goto basic_json_parser_25; - } -basic_json_parser_50: - yyaccept = 1; - yych = *(m_marker = ++m_cursor); - if (yych <= 'D') - { - if (yych == '.') - { - goto basic_json_parser_43; - } - goto basic_json_parser_25; - } - else - { - if (yych <= 'E') - { - goto basic_json_parser_44; - } - if (yych == 'e') - { - goto basic_json_parser_44; - } - goto basic_json_parser_25; - } -basic_json_parser_51: - yych = *++m_cursor; - if (yych != 'l') - { - goto basic_json_parser_33; - } - yych = *++m_cursor; - if (yych != 's') - { - goto basic_json_parser_33; - } - yych = *++m_cursor; - if (yych != 'e') - { - goto basic_json_parser_33; - } - ++m_cursor; - { - return token_type::literal_false; - } -basic_json_parser_56: - yych = *++m_cursor; - if (yych != 'u') - { - goto basic_json_parser_33; - } - yych = *++m_cursor; - if (yych != 'e') - { - goto basic_json_parser_33; - } - ++m_cursor; - { - return token_type::literal_true; - } -basic_json_parser_60: - yych = *++m_cursor; - if (yych != 'l') - { - goto basic_json_parser_33; - } - yych = *++m_cursor; - if (yych != 'l') - { - goto basic_json_parser_33; - } - ++m_cursor; - { - return token_type::literal_null; - } -basic_json_parser_64: - yych = *++m_cursor; - if (yych != 0xBF) - { - goto basic_json_parser_33; - } - ++m_cursor; - { - return scan(); } } + } else { + if (yych <= 'n') { + if (yych <= ']') { + if (yych == '[') goto basic_json_parser_8; + if (yych <= '\\') goto basic_json_parser_30; + goto basic_json_parser_10; + } else { + if (yych == 'f') goto basic_json_parser_22; + if (yych <= 'm') goto basic_json_parser_30; + goto basic_json_parser_20; + } + } else { + if (yych <= '{') { + if (yych == 't') goto basic_json_parser_21; + if (yych <= 'z') goto basic_json_parser_30; + goto basic_json_parser_12; + } else { + if (yych <= '}') { + if (yych <= '|') goto basic_json_parser_30; + goto basic_json_parser_14; + } else { + if (yych == 0xEF) goto basic_json_parser_6; + goto basic_json_parser_30; + } + } + } + } +basic_json_parser_2: + ++m_cursor; + yych = *m_cursor; + goto basic_json_parser_5; +basic_json_parser_3: + { return scan(); } +basic_json_parser_4: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; +basic_json_parser_5: + if (yybm[0+yych] & 32) { + goto basic_json_parser_4; + } + goto basic_json_parser_3; +basic_json_parser_6: + yyaccept = 0; + yych = *(m_marker = ++m_cursor); + if (yych == 0xBB) goto basic_json_parser_64; +basic_json_parser_7: + { return token_type::parse_error; } +basic_json_parser_8: + ++m_cursor; + { return token_type::begin_array; } +basic_json_parser_10: + ++m_cursor; + { return token_type::end_array; } +basic_json_parser_12: + ++m_cursor; + { return token_type::begin_object; } +basic_json_parser_14: + ++m_cursor; + { return token_type::end_object; } +basic_json_parser_16: + ++m_cursor; + { return token_type::value_separator; } +basic_json_parser_18: + ++m_cursor; + { return token_type::name_separator; } +basic_json_parser_20: + yyaccept = 0; + yych = *(m_marker = ++m_cursor); + if (yych == 'u') goto basic_json_parser_60; + goto basic_json_parser_7; +basic_json_parser_21: + yyaccept = 0; + yych = *(m_marker = ++m_cursor); + if (yych == 'r') goto basic_json_parser_56; + goto basic_json_parser_7; +basic_json_parser_22: + yyaccept = 0; + yych = *(m_marker = ++m_cursor); + if (yych == 'a') goto basic_json_parser_51; + goto basic_json_parser_7; +basic_json_parser_23: + yych = *++m_cursor; + if (yych <= '/') goto basic_json_parser_7; + if (yych <= '0') goto basic_json_parser_50; + if (yych <= '9') goto basic_json_parser_41; + goto basic_json_parser_7; +basic_json_parser_24: + yyaccept = 1; + yych = *(m_marker = ++m_cursor); + if (yych <= 'D') { + if (yych == '.') goto basic_json_parser_43; + } else { + if (yych <= 'E') goto basic_json_parser_44; + if (yych == 'e') goto basic_json_parser_44; + } +basic_json_parser_25: + { return token_type::value_number; } +basic_json_parser_26: + yyaccept = 1; + yych = *(m_marker = ++m_cursor); + goto basic_json_parser_42; +basic_json_parser_27: + yyaccept = 0; + yych = *(m_marker = ++m_cursor); + if (yych <= 0x0F) goto basic_json_parser_7; + goto basic_json_parser_32; +basic_json_parser_28: + ++m_cursor; + { return token_type::end_of_input; } +basic_json_parser_30: + yych = *++m_cursor; + goto basic_json_parser_7; +basic_json_parser_31: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; +basic_json_parser_32: + if (yybm[0+yych] & 64) { + goto basic_json_parser_31; + } + if (yych <= 0x0F) goto basic_json_parser_33; + if (yych <= '"') goto basic_json_parser_35; + goto basic_json_parser_34; +basic_json_parser_33: + m_cursor = m_marker; + if (yyaccept == 0) { + goto basic_json_parser_7; + } else { + goto basic_json_parser_25; + } +basic_json_parser_34: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= 'e') { + if (yych <= '/') { + if (yych == '"') goto basic_json_parser_31; + if (yych <= '.') goto basic_json_parser_33; + goto basic_json_parser_31; + } else { + if (yych <= '\\') { + if (yych <= '[') goto basic_json_parser_33; + goto basic_json_parser_31; + } else { + if (yych == 'b') goto basic_json_parser_31; + goto basic_json_parser_33; + } + } + } else { + if (yych <= 'q') { + if (yych <= 'f') goto basic_json_parser_31; + if (yych == 'n') goto basic_json_parser_31; + goto basic_json_parser_33; + } else { + if (yych <= 's') { + if (yych <= 'r') goto basic_json_parser_31; + goto basic_json_parser_33; + } else { + if (yych <= 't') goto basic_json_parser_31; + if (yych <= 'u') goto basic_json_parser_37; + goto basic_json_parser_33; + } + } + } +basic_json_parser_35: + ++m_cursor; + { return token_type::value_string; } +basic_json_parser_37: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= '@') { + if (yych <= '/') goto basic_json_parser_33; + if (yych >= ':') goto basic_json_parser_33; + } else { + if (yych <= 'F') goto basic_json_parser_38; + if (yych <= '`') goto basic_json_parser_33; + if (yych >= 'g') goto basic_json_parser_33; + } +basic_json_parser_38: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= '@') { + if (yych <= '/') goto basic_json_parser_33; + if (yych >= ':') goto basic_json_parser_33; + } else { + if (yych <= 'F') goto basic_json_parser_39; + if (yych <= '`') goto basic_json_parser_33; + if (yych >= 'g') goto basic_json_parser_33; + } +basic_json_parser_39: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= '@') { + if (yych <= '/') goto basic_json_parser_33; + if (yych >= ':') goto basic_json_parser_33; + } else { + if (yych <= 'F') goto basic_json_parser_40; + if (yych <= '`') goto basic_json_parser_33; + if (yych >= 'g') goto basic_json_parser_33; + } +basic_json_parser_40: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= '@') { + if (yych <= '/') goto basic_json_parser_33; + if (yych <= '9') goto basic_json_parser_31; + goto basic_json_parser_33; + } else { + if (yych <= 'F') goto basic_json_parser_31; + if (yych <= '`') goto basic_json_parser_33; + if (yych <= 'f') goto basic_json_parser_31; + goto basic_json_parser_33; + } +basic_json_parser_41: + yyaccept = 1; + m_marker = ++m_cursor; + if ((m_limit - m_cursor) < 3) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; +basic_json_parser_42: + if (yybm[0+yych] & 128) { + goto basic_json_parser_41; + } + if (yych <= 'D') { + if (yych != '.') goto basic_json_parser_25; + } else { + if (yych <= 'E') goto basic_json_parser_44; + if (yych == 'e') goto basic_json_parser_44; + goto basic_json_parser_25; + } +basic_json_parser_43: + yych = *++m_cursor; + if (yych <= '/') goto basic_json_parser_33; + if (yych <= '9') goto basic_json_parser_48; + goto basic_json_parser_33; +basic_json_parser_44: + yych = *++m_cursor; + if (yych <= ',') { + if (yych != '+') goto basic_json_parser_33; + } else { + if (yych <= '-') goto basic_json_parser_45; + if (yych <= '/') goto basic_json_parser_33; + if (yych <= '9') goto basic_json_parser_46; + goto basic_json_parser_33; + } +basic_json_parser_45: + yych = *++m_cursor; + if (yych <= '/') goto basic_json_parser_33; + if (yych >= ':') goto basic_json_parser_33; +basic_json_parser_46: + ++m_cursor; + if (m_limit <= m_cursor) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= '/') goto basic_json_parser_25; + if (yych <= '9') goto basic_json_parser_46; + goto basic_json_parser_25; +basic_json_parser_48: + yyaccept = 1; + m_marker = ++m_cursor; + if ((m_limit - m_cursor) < 3) yyfill(); // LCOV_EXCL_LINE; + yych = *m_cursor; + if (yych <= 'D') { + if (yych <= '/') goto basic_json_parser_25; + if (yych <= '9') goto basic_json_parser_48; + goto basic_json_parser_25; + } else { + if (yych <= 'E') goto basic_json_parser_44; + if (yych == 'e') goto basic_json_parser_44; + goto basic_json_parser_25; + } +basic_json_parser_50: + yyaccept = 1; + yych = *(m_marker = ++m_cursor); + if (yych <= 'D') { + if (yych == '.') goto basic_json_parser_43; + goto basic_json_parser_25; + } else { + if (yych <= 'E') goto basic_json_parser_44; + if (yych == 'e') goto basic_json_parser_44; + goto basic_json_parser_25; + } +basic_json_parser_51: + yych = *++m_cursor; + if (yych != 'l') goto basic_json_parser_33; + yych = *++m_cursor; + if (yych != 's') goto basic_json_parser_33; + yych = *++m_cursor; + if (yych != 'e') goto basic_json_parser_33; + ++m_cursor; + { return token_type::literal_false; } +basic_json_parser_56: + yych = *++m_cursor; + if (yych != 'u') goto basic_json_parser_33; + yych = *++m_cursor; + if (yych != 'e') goto basic_json_parser_33; + ++m_cursor; + { return token_type::literal_true; } +basic_json_parser_60: + yych = *++m_cursor; + if (yych != 'l') goto basic_json_parser_33; + yych = *++m_cursor; + if (yych != 'l') goto basic_json_parser_33; + ++m_cursor; + { return token_type::literal_null; } +basic_json_parser_64: + yych = *++m_cursor; + if (yych != 0xBF) goto basic_json_parser_33; + ++m_cursor; + { return scan(); } + } } @@ -8114,124 +7815,138 @@ basic_json_parser_64: return std::strtof(reinterpret_cast(m_start), endptr); } - /*! - @brief static_cast between two types and indicate if it results in error - - This function performs a static_cast between @a source and @a dest. It - then checks if a static_cast back to @a dest produces an error. - - @param[in] source the value to cast from - - @param[out] dest the value to cast to - - @return @a true if the cast was performed without error, @a false otherwise - */ - template - bool attempt_cast(T_A source, T_B& dest) const - { - dest = static_cast(source); - return (source == static_cast(dest)); - } - /*! @brief return number value for number tokens - This function translates the last token into the most appropriate - number type (either integer, unsigned integer or floating point), which - is passed back to the caller via the result parameter. The pointer @a - m_start points to the beginning of the parsed number. We first examine - the first character to determine the sign of the number and then pass - this pointer to either @a std::strtoull (if positive) or @a - std::strtoll (if negative), both of which set @a endptr to the first - character past the converted number. If this pointer is not the same as - @a m_cursor, then either more or less characters have been used during - the comparison. + This function translates the last token into the most appropriate number + type (either integer, unsigned integer or floating point), which is + passed back to the caller via the result parameter. - This can happen for inputs like "01" which will be treated like number - 0 followed by number 1. This will also occur for valid floating point - inputs like "12e3" will be incorrectly read as 12. Numbers that are too - large or too small for a signed/unsigned long long will cause a range - error (@a errno set to ERANGE). The parsed number is cast to a @ref - number_integer_t/@ref number_unsigned_t using the helper function @ref - attempt_cast, which returns @a false if the cast could not be peformed - without error. + This function parses the integer component up to the radix point or exponent + while collecting information about the 'floating point representation', which + it stores in the result parameter. If there is no radix point or exponent, + and the number can fit into a @ref number_integer_t or @ref number_unsigned_t + then it sets the result parameter accordingly. + + The 'floating point representation' includes the number of significant figures + after the radix point, whether the number is in exponential or decimal form, + the capitalization of the exponent marker, and if the optional '+' is present + in the exponent. This information is necessary to perform accurate round trips + of floating point numbers. - In any of these cases (more/less characters read, range error or a cast - error) the pointer is passed to @a std:strtod, which also sets @a - endptr to the first character past the converted number. The resulting - @ref number_float_t is then cast to a @ref number_integer_t/@ref - number_unsigned_t using @ref attempt_cast and if no error occurs is - stored in that form, otherwise it is stored as a @ref number_float_t. + If the number is a floating point number the number is then parsed using + @a std:strtod (or @a std:strtof or @a std::strtold). - A final comparison is made of @a endptr and if still not the same as - @ref m_cursor a bad input is assumed and @a result parameter is set to - NAN. - - @param[out] result @ref basic_json object to receive the number, or NAN - if the conversion read past the current token. The latter case needs to - be treated by the caller function. + @param[out] result @ref basic_json object to receive the number, or NAN if the + conversion read past the current token. The latter case needs to be + treated by the caller function. */ void get_number(basic_json& result) const { - typename string_t::value_type* endptr; assert(m_start != nullptr); - errno = 0; - // attempt to parse it as an integer - first checking for a - // negative number - if (*reinterpret_cast(m_start) != '-') - { - // positive, parse with strtoull and attempt cast to - // number_unsigned_t - if (attempt_cast(std::strtoull(reinterpret_cast(m_start), &endptr, - 10), result.m_value.number_unsigned)) - { - result.m_type = value_t::number_unsigned; - } - else - { - // cast failed due to overflow - store as float - result.m_type = value_t::number_float; - } + const lexer::lexer_char_t *curptr = m_start; + result.m_type.bits.parsed = true; + + // 'found_radix_point' will be set to 0xFF upon finding a radix point + // and later used to mask in/out the precision depending whether a + // radix is found i.e. 'precision &= found_radix_point' + uint8_t found_radix_point = 0; + uint8_t precision = 0; + + // Accumulate the integer conversion result (unsigned for now) + number_unsigned_t value = 0; + + // Maximum absolute value of the relevant integer type + number_unsigned_t max; + + // Temporarily store the type to avoid unecessary bitfield access + value_t type; + + // Look for sign + if (*curptr == '-') { + type = value_t::number_integer; + max = static_cast(std::numeric_limits::max()) + 1; + curptr++; } - else - { - // Negative, parse with strtoll and attempt cast to - // number_integer_t - if (attempt_cast(std::strtoll(reinterpret_cast(m_start), &endptr, - 10), result.m_value.number_unsigned)) - { - result.m_type = value_t::number_integer; - } - else - { - // cast failed due to overflow - store as float - result.m_type = value_t::number_float; - } + else { + type = value_t::number_unsigned; + max = static_cast(std::numeric_limits::max()); + if (*curptr == '+') curptr++; } - // check the end of the number was reached and no range error - // occurred - if (reinterpret_cast(endptr) != m_cursor || errno == ERANGE) + // Count the significant figures + for (; curptr < m_cursor; curptr++) { - result.m_type = value_t::number_float; - } - if (result.m_type == value_t::number_float) - { - // either the number won't fit in an integer (range error from - // strtoull/strtoll or overflow on cast) or there was something - // else after the number, which could be an exponent - - // parse with strtod - result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); - - // anything after the number is an error - if (reinterpret_cast(endptr) != m_cursor) + // Quickly skip tests if a digit + if (*curptr < '0' || *curptr > '9') { - throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); + if (*curptr == '.') + { + // Don't count '.' but change to float + type = value_t::number_float; + + // Reset precision count + precision = 0; + found_radix_point = 0xFF; + continue; + } + // Assume exponent (if not then will fail parse): change to + // float, stop counting and record exponent details + type = value_t::number_float; + result.m_type.bits.has_exp = true; + + // Exponent capitalization + result.m_type.bits.exp_cap = (*curptr == 'E'); + + // Exponent '+' sign + result.m_type.bits.exp_plus = (*(++curptr) == '+'); + break; } + + // Skip if definitely not an integer + if (type != value_t::number_float) { + + // Multiply last value by ten and add the new digit + auto temp = value * 10 + *curptr - 0x30; + + // Test for overflow + if (temp < value || temp > max) + { + // Overflow + type = value_t::number_float; + } + else + { + // No overflow - save it + value = temp; + } + } + precision++; } + + // If no radix point was found then precision would now be set to + // the number of digits, which is wrong - clear it + result.m_type.bits.precision = precision & found_radix_point; + + // Save the value (if not a float) + if (type == value_t::number_unsigned) + { + result.m_value.number_unsigned = value; + } + else if (type == value_t::number_integer) + { + result.m_value.number_integer = -static_cast(value); + } + else + { + // Parse with strtod + result.m_value.number_float = str_to_float_t(static_cast(nullptr), NULL); + } + + // Save the type + result.m_type = type; } private: diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index c80a2bea2..7d6a72115 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -715,6 +715,73 @@ class basic_json private: + + /*! + @brief a type to hold JSON type information + + This bitfield type holds information about JSON types. It is internally + used to hold the basic JSON type enumeration, as well as additional + information in the case of values that have been parsed from a string + including whether of not it was created directly or parsed, and + in the case of floating point numbers the number of significant + figures in the original representaiton and if it was in exponential + form, if a '+' was included in the exponent and the capitilization of + the exponent marker. The sole purpose of this information is to permit + accurate round trips. + + @since version 2.0.0 + */ + union type_data_t { + struct { + uint16_t type : 4; + uint16_t parsed : 1; + uint16_t has_exp : 1; + uint16_t exp_plus : 1; + uint16_t exp_cap : 1; + uint16_t precision : 8; + } bits; + uint16_t data; + + // Return the type + value_t get() const + { + return static_cast(bits.type); + } + + // Test type for equality (ignore other fields) + bool operator==(const value_t& rhs) const + { + return static_cast(bits.type) == rhs; + } + + // Assignment + type_data_t & operator=(value_t rhs) + { + bits.type = static_cast(rhs); + return *this; + } + + // Construct from value_t + type_data_t(value_t t) noexcept + { + *reinterpret_cast(this) = 0; + bits.type = static_cast(t); + } + + // Default constructor + type_data_t() noexcept + { + data = 0; + bits.type = reinterpret_cast(value_t::null); + } + + // Copy constructor + type_data_t(const type_data_t& t) noexcept + { + data = t.data; + } + }; + /// helper for exception-safe object creation template static T* create(Args&& ... args) @@ -1752,7 +1819,7 @@ class basic_json } // check if iterator range is complete for primitive values - switch (m_type) + switch (m_type.get()) { case value_t::boolean: case value_t::number_float: @@ -1773,7 +1840,7 @@ class basic_json } } - switch (m_type) + switch (m_type.get()) { case value_t::number_integer: { @@ -1857,7 +1924,7 @@ class basic_json basic_json(const basic_json& other) : m_type(other.m_type) { - switch (m_type) + switch (m_type.get()) { case value_t::object: { @@ -1987,7 +2054,7 @@ class basic_json */ ~basic_json() { - switch (m_type) + switch (m_type.get()) { case value_t::object: { @@ -2087,7 +2154,7 @@ class basic_json */ value_t type() const noexcept { - return m_type; + return m_type.get(); } /*! @@ -2370,7 +2437,7 @@ class basic_json */ operator value_t() const noexcept { - return m_type; + return m_type.get(); } /// @} @@ -2525,7 +2592,7 @@ class basic_json , int>::type = 0> T get_impl(T*) const { - switch (m_type) + switch (m_type.get()) { case value_t::number_integer: { @@ -3657,7 +3724,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (m_type.get()) { case value_t::boolean: case value_t::number_float: @@ -3676,7 +3743,7 @@ class basic_json m_value.string = nullptr; } - m_type = value_t::null; + m_type = type_data_t(value_t::null); break; } @@ -3763,7 +3830,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (m_type.get()) { case value_t::boolean: case value_t::number_float: @@ -3782,7 +3849,7 @@ class basic_json m_value.string = nullptr; } - m_type = value_t::null; + m_type = type_data_t(value_t::null); break; } @@ -4271,7 +4338,7 @@ class basic_json */ bool empty() const noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::null: { @@ -4329,7 +4396,7 @@ class basic_json */ size_type size() const noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::null: { @@ -4391,7 +4458,7 @@ class basic_json */ size_type max_size() const noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::array: { @@ -4450,7 +4517,7 @@ class basic_json */ void clear() noexcept { - switch (m_type) + switch (m_type.get()) { case value_t::number_integer: { @@ -4543,7 +4610,7 @@ class basic_json assert(m_value.array != nullptr); m_value.array->push_back(std::move(val)); // invalidate object - val.m_type = value_t::null; + val.m_type = type_data_t(value_t::null); } /*! @@ -5568,7 +5635,7 @@ class basic_json /// return the type as string string_t type_name() const { - switch (m_type) + switch (m_type.get()) { case value_t::null: return "null"; @@ -5774,7 +5841,7 @@ class basic_json // variable to hold indentation for recursive calls unsigned int new_indent = current_indent; - switch (m_type) + switch (m_type.get()) { case value_t::object: { @@ -5885,23 +5952,60 @@ class basic_json case value_t::number_float: { - // If the number is an integer then output as a fixed with with - // precision 1 to output "0.0", "1.0" etc as expected for some - // round trip tests otherwise 15 digits of precision allows - // round-trip IEEE 754 string->double->string; to be safe, we - // read this value from - // std::numeric_limits::digits10 - if (std::fmod(m_value.number_float, 1) == 0) + // Buffer size: precision (2^8-1 = 255) + other ('-.e-xxx' = 7) + null (1) + char buf[263]; + int len; + + if (m_type.bits.parsed) { - o << std::fixed << std::setprecision(1); + // Number was parsed from a string so use the same precision + if (m_type.bits.has_exp) + { + // Exponent - output in exponential form - handle capitalization of e/E + if (m_type.bits.exp_cap) + { + len = snprintf(buf, sizeof(buf), "%.*E", m_type.bits.precision, m_value.number_float) + 1; + } + else + { + len = snprintf(buf, sizeof(buf), "%.*e", m_type.bits.precision, m_value.number_float) + 1; + } + + // Remove '+' sign from the exponent if necessary + if (!m_type.bits.exp_plus) + { + if (len > static_cast(sizeof(buf))) len = sizeof(buf); + for (int i = 0; i < len; i++) + { + if (buf[i] == '+') + { + for (; i + 1 < len; i++) buf[i] = buf[i + 1]; + } + } + } + } + else + { + // No exponent - output as a decimal + snprintf(buf, sizeof(buf), "%.*f", m_type.bits.precision, m_value.number_float); + } + } + else if (m_value.number_float == 0) + { + // Special case for zero to get "0.0"/"-0.0" + if (std::signbit(m_value.number_float)) o << "-0.0"; + else o << "0.0"; + return; } else { - // std::defaultfloat not supported in gcc version < 5 - o.unsetf(std::ios_base::floatfield); - o << std::setprecision(std::numeric_limits::digits10); + // Otherwise 6, 15 or 16 digits of precision allows round-trip IEEE 754 + // string->float->string, string->double->string or string->long double->string; + // to be safe, we read this value from std::numeric_limits::digits10 + snprintf(buf, sizeof(buf), "%.*g", std::numeric_limits::digits10, m_value.number_float); } - o << m_value.number_float; + + o << buf; return; } @@ -5925,7 +6029,7 @@ class basic_json ////////////////////// /// the type of the current element - value_t m_type = value_t::null; + type_data_t m_type = type_data_t(value_t::null); /// the value of the current element json_value m_value = {}; @@ -6149,7 +6253,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6176,7 +6280,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6222,7 +6326,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6258,7 +6362,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6288,7 +6392,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6328,7 +6432,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6371,7 +6475,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6408,7 +6512,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6443,7 +6547,7 @@ class basic_json assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6479,7 +6583,7 @@ class basic_json assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6521,7 +6625,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6571,7 +6675,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -6595,7 +6699,7 @@ class basic_json { assert(m_object != nullptr); - switch (m_object->m_type) + switch (m_object->m_type.get()) { case basic_json::value_t::object: { @@ -7393,124 +7497,138 @@ class basic_json return std::strtof(reinterpret_cast(m_start), endptr); } - /*! - @brief static_cast between two types and indicate if it results in error - - This function performs a static_cast between @a source and @a dest. It - then checks if a static_cast back to @a dest produces an error. - - @param[in] source the value to cast from - - @param[out] dest the value to cast to - - @return @a true if the cast was performed without error, @a false otherwise - */ - template - bool attempt_cast(T_A source, T_B& dest) const - { - dest = static_cast(source); - return (source == static_cast(dest)); - } - /*! @brief return number value for number tokens - This function translates the last token into the most appropriate - number type (either integer, unsigned integer or floating point), which - is passed back to the caller via the result parameter. The pointer @a - m_start points to the beginning of the parsed number. We first examine - the first character to determine the sign of the number and then pass - this pointer to either @a std::strtoull (if positive) or @a - std::strtoll (if negative), both of which set @a endptr to the first - character past the converted number. If this pointer is not the same as - @a m_cursor, then either more or less characters have been used during - the comparison. + This function translates the last token into the most appropriate number + type (either integer, unsigned integer or floating point), which is + passed back to the caller via the result parameter. - This can happen for inputs like "01" which will be treated like number - 0 followed by number 1. This will also occur for valid floating point - inputs like "12e3" will be incorrectly read as 12. Numbers that are too - large or too small for a signed/unsigned long long will cause a range - error (@a errno set to ERANGE). The parsed number is cast to a @ref - number_integer_t/@ref number_unsigned_t using the helper function @ref - attempt_cast, which returns @a false if the cast could not be peformed - without error. + This function parses the integer component up to the radix point or exponent + while collecting information about the 'floating point representation', which + it stores in the result parameter. If there is no radix point or exponent, + and the number can fit into a @ref number_integer_t or @ref number_unsigned_t + then it sets the result parameter accordingly. - In any of these cases (more/less characters read, range error or a cast - error) the pointer is passed to @a std:strtod, which also sets @a - endptr to the first character past the converted number. The resulting - @ref number_float_t is then cast to a @ref number_integer_t/@ref - number_unsigned_t using @ref attempt_cast and if no error occurs is - stored in that form, otherwise it is stored as a @ref number_float_t. + The 'floating point representation' includes the number of significant figures + after the radix point, whether the number is in exponential or decimal form, + the capitalization of the exponent marker, and if the optional '+' is present + in the exponent. This information is necessary to perform accurate round trips + of floating point numbers. - A final comparison is made of @a endptr and if still not the same as - @ref m_cursor a bad input is assumed and @a result parameter is set to - NAN. + If the number is a floating point number the number is then parsed using + @a std:strtod (or @a std:strtof or @a std::strtold). - @param[out] result @ref basic_json object to receive the number, or NAN - if the conversion read past the current token. The latter case needs to - be treated by the caller function. + @param[out] result @ref basic_json object to receive the number, or NAN if the + conversion read past the current token. The latter case needs to be + treated by the caller function. */ void get_number(basic_json& result) const { - typename string_t::value_type* endptr; assert(m_start != nullptr); - errno = 0; - // attempt to parse it as an integer - first checking for a - // negative number - if (*reinterpret_cast(m_start) != '-') + const lexer::lexer_char_t *curptr = m_start; + result.m_type.bits.parsed = true; + + // 'found_radix_point' will be set to 0xFF upon finding a radix point + // and later used to mask in/out the precision depending whether a + // radix is found i.e. 'precision &= found_radix_point' + uint8_t found_radix_point = 0; + uint8_t precision = 0; + + // Accumulate the integer conversion result (unsigned for now) + number_unsigned_t value = 0; + + // Maximum absolute value of the relevant integer type + number_unsigned_t max; + + // Temporarily store the type to avoid unecessary bitfield access + value_t type; + + // Look for sign + if (*curptr == '-') { + type = value_t::number_integer; + max = static_cast(std::numeric_limits::max()) + 1; + curptr++; + } + else { + type = value_t::number_unsigned; + max = static_cast(std::numeric_limits::max()); + if (*curptr == '+') curptr++; + } + + // Count the significant figures + for (; curptr < m_cursor; curptr++) { - // positive, parse with strtoull and attempt cast to - // number_unsigned_t - if (attempt_cast(std::strtoull(reinterpret_cast(m_start), &endptr, - 10), result.m_value.number_unsigned)) + + // Quickly skip tests if a digit + if (*curptr < '0' || *curptr > '9') { - result.m_type = value_t::number_unsigned; + if (*curptr == '.') + { + // Don't count '.' but change to float + type = value_t::number_float; + + // Reset precision count + precision = 0; + found_radix_point = 0xFF; + continue; + } + // Assume exponent (if not then will fail parse): change to + // float, stop counting and record exponent details + type = value_t::number_float; + result.m_type.bits.has_exp = true; + + // Exponent capitalization + result.m_type.bits.exp_cap = (*curptr == 'E'); + + // Exponent '+' sign + result.m_type.bits.exp_plus = (*(++curptr) == '+'); + break; } - else - { - // cast failed due to overflow - store as float - result.m_type = value_t::number_float; + + // Skip if definitely not an integer + if (type != value_t::number_float) { + + // Multiply last value by ten and add the new digit + auto temp = value * 10 + *curptr - 0x30; + + // Test for overflow + if (temp < value || temp > max) + { + // Overflow + type = value_t::number_float; + } + else + { + // No overflow - save it + value = temp; + } } + precision++; + } + + // If no radix point was found then precision would now be set to + // the number of digits, which is wrong - clear it + result.m_type.bits.precision = precision & found_radix_point; + + // Save the value (if not a float) + if (type == value_t::number_unsigned) + { + result.m_value.number_unsigned = value; + } + else if (type == value_t::number_integer) + { + result.m_value.number_integer = -static_cast(value); } else { - // Negative, parse with strtoll and attempt cast to - // number_integer_t - if (attempt_cast(std::strtoll(reinterpret_cast(m_start), &endptr, - 10), result.m_value.number_unsigned)) - { - result.m_type = value_t::number_integer; - } - else - { - // cast failed due to overflow - store as float - result.m_type = value_t::number_float; - } + // Parse with strtod + result.m_value.number_float = str_to_float_t(static_cast(nullptr), NULL); } - // check the end of the number was reached and no range error - // occurred - if (reinterpret_cast(endptr) != m_cursor || errno == ERANGE) - { - result.m_type = value_t::number_float; - } - - if (result.m_type == value_t::number_float) - { - // either the number won't fit in an integer (range error from - // strtoull/strtoll or overflow on cast) or there was something - // else after the number, which could be an exponent - - // parse with strtod - result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); - - // anything after the number is an error - if (reinterpret_cast(endptr) != m_cursor) - { - throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); - } - } + // Save the type + result.m_type = type; } private: diff --git a/test/json_roundtrip/roundtrip28.json b/test/json_roundtrip/roundtrip28.json new file mode 100644 index 000000000..749ffaf93 --- /dev/null +++ b/test/json_roundtrip/roundtrip28.json @@ -0,0 +1 @@ +[4.940656458412e-324] \ No newline at end of file diff --git a/test/json_roundtrip/roundtrip29.json b/test/json_roundtrip/roundtrip29.json new file mode 100644 index 000000000..4bf65ec4b --- /dev/null +++ b/test/json_roundtrip/roundtrip29.json @@ -0,0 +1 @@ +[2.2250738585072e-308] \ No newline at end of file diff --git a/test/json_roundtrip/roundtrip30.json b/test/json_roundtrip/roundtrip30.json new file mode 100644 index 000000000..fd357651b --- /dev/null +++ b/test/json_roundtrip/roundtrip30.json @@ -0,0 +1 @@ +[1.2345E-30] \ No newline at end of file diff --git a/test/json_roundtrip/roundtrip31.json b/test/json_roundtrip/roundtrip31.json new file mode 100644 index 000000000..1428d1443 --- /dev/null +++ b/test/json_roundtrip/roundtrip31.json @@ -0,0 +1 @@ +[1.2345E+30] \ No newline at end of file diff --git a/test/json_roundtrip/roundtrip32.json b/test/json_roundtrip/roundtrip32.json new file mode 100644 index 000000000..d6f34011f --- /dev/null +++ b/test/json_roundtrip/roundtrip32.json @@ -0,0 +1 @@ +[1.2345e+30] \ No newline at end of file diff --git a/test/unit.cpp b/test/unit.cpp index 507c55cca..7cf6e9051 100644 --- a/test/unit.cpp +++ b/test/unit.cpp @@ -9719,7 +9719,8 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("-0e-:").parse(), std::invalid_argument); CHECK_THROWS_AS(json::parser("-0f").parse(), std::invalid_argument); - CHECK_THROWS_WITH(json::parser("01").parse(), "parse error - 0 is not a number"); + CHECK_THROWS_WITH(json::parser("01").parse(), + "parse error - unexpected number literal; expected end of input"); CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("1.").parse(), "parse error - unexpected '.'; expected end of input"); @@ -11766,10 +11767,15 @@ TEST_CASE("compliance tests from nativejson-benchmark") "test/json_roundtrip/roundtrip21.json", "test/json_roundtrip/roundtrip22.json", "test/json_roundtrip/roundtrip23.json", - //"test/json_roundtrip/roundtrip24.json", - //"test/json_roundtrip/roundtrip25.json", - //"test/json_roundtrip/roundtrip26.json", - //"test/json_roundtrip/roundtrip27.json" + "test/json_roundtrip/roundtrip24.json", + "test/json_roundtrip/roundtrip25.json", + "test/json_roundtrip/roundtrip26.json", + "test/json_roundtrip/roundtrip27.json", + "test/json_roundtrip/roundtrip28.json", + "test/json_roundtrip/roundtrip29.json", + "test/json_roundtrip/roundtrip30.json", + "test/json_roundtrip/roundtrip31.json", + "test/json_roundtrip/roundtrip32.json" }) { CAPTURE(filename); @@ -12097,17 +12103,17 @@ TEST_CASE("regression tests") // unsigned integer parsing - expected to overflow and be stored as a float j = custom_json::parse("4294967296"); // 2^32 CHECK(static_cast(j.type()) == static_cast(custom_json::value_t::number_float)); - CHECK(j.get() == 4294967296.0); + CHECK(j.get() == 4294967296.0f); // integer object creation - expected to wrap and still be stored as an integer j = -2147483649LL; // -2^31-1 CHECK(static_cast(j.type()) == static_cast(custom_json::value_t::number_integer)); - CHECK(j.get() == 2147483647.0); // Wrap + CHECK(j.get() == 2147483647); // Wrap - // integer parsing - expected to overflow and be stored as a float - j = custom_json::parse("-2147483648"); // -2^31 + // integer parsing - expected to overflow and be stored as a float with rounding + j = custom_json::parse("-2147483649"); // -2^31 CHECK(static_cast(j.type()) == static_cast(custom_json::value_t::number_float)); - CHECK(j.get() == -2147483648.0); + CHECK(j.get() == -2147483650.0f); } SECTION("issue #93 reverse_iterator operator inheritance problem")