From f35f60c844c10eadcd62ebde2a15297298ba5ff0 Mon Sep 17 00:00:00 2001 From: Trevor Welsby Date: Wed, 27 Jan 2016 12:53:56 +1000 Subject: [PATCH 1/3] Change parse to record float precision --- src/json.hpp | 117 ++++++++++++++++++++++++++++++---------------- src/json.hpp.re2c | 117 ++++++++++++++++++++++++++++++---------------- test/unit.cpp | 8 ++-- 3 files changed, 158 insertions(+), 84 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 90941b71c..d188a91fc 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -694,7 +694,7 @@ class basic_json @since version 1.0.0 */ - enum class value_t : uint8_t + enum class value_t : uint16_t { null, ///< null value object, ///< object (unordered set of name/value pairs) @@ -704,7 +704,8 @@ class basic_json number_integer, ///< number value (integer) number_unsigned,///< number value (unsigned integer) number_float, ///< number value (floating-point) - discarded ///< discarded by the the parser callback function + discarded, ///< discarded by the the parser callback function + precision_mask = 0xFF }; @@ -1746,7 +1747,7 @@ class basic_json } // check if iterator range is complete for primitive values - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::boolean: case value_t::number_float: @@ -1767,7 +1768,7 @@ class basic_json } } - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::number_integer: { @@ -1851,7 +1852,7 @@ class basic_json basic_json(const basic_json& other) : m_type(other.m_type) { - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::object: { @@ -2081,7 +2082,7 @@ class basic_json */ value_t type() const noexcept { - return m_type; + return static_cast(static_cast(m_type) & static_cast(value_t::precision_mask)); } /*! @@ -2257,7 +2258,7 @@ class basic_json */ bool is_number_float() const noexcept { - return m_type == value_t::number_float; + return (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) == value_t::number_float; } /*! @@ -2358,7 +2359,7 @@ class basic_json */ operator value_t() const noexcept { - return m_type; + return (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))); } /// @} @@ -2513,7 +2514,7 @@ class basic_json , int>::type = 0> T get_impl(T*) const { - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::number_integer: { @@ -3645,7 +3646,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::boolean: case value_t::number_float: @@ -3751,7 +3752,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::boolean: case value_t::number_float: @@ -5762,7 +5763,7 @@ class basic_json // variable to hold indentation for recursive calls unsigned int new_indent = current_indent; - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::object: { @@ -5873,22 +5874,24 @@ class basic_json case value_t::number_float: { - // If the number is an integer then output as a fixed with with - // precision 1 to output "0.0", "1.0" etc as expected for some - // round trip tests otherwise 15 digits of precision allows - // round-trip IEEE 754 string->double->string; to be safe, we - // read this value from + // If the number was parsed from a string use the same precision + // otherwise 15 digits of precision allows round-trip IEEE 754 + // string->double->string; to be safe, we read this value from // std::numeric_limits::digits10 - if (std::fmod(m_value.number_float, 1) == 0) - { - o << std::fixed << std::setprecision(1); - } - else - { - // std::defaultfloat not supported in gcc version < 5 - o.unsetf(std::ios_base::floatfield); - o << std::setprecision(std::numeric_limits::digits10); - } + int precision = static_cast(m_type) >> 8; + if (!precision) precision = std::numeric_limits::digits10; + + // Special case for zero - use fixed precision to get "0.0" + if (m_value.number_float == 0) + { + o << std::fixed << std::setprecision(1); + } + else + { + // std::defaultfloat not supported in gcc version < 5 + o.unsetf(std::ios_base::floatfield); + o << std::setprecision(precision); + } o << m_value.number_float; return; } @@ -7755,39 +7758,73 @@ basic_json_parser_64: */ void get_number(basic_json& result) const { - typename string_t::value_type* endptr; assert(m_start != nullptr); + + // Count the significant figures + int precision = 0; + { + const lexer::lexer_char_t *curptr; + + // Assume unsigned integer for now + result.m_type = value_t::number_unsigned; + for (curptr = m_start; curptr < m_cursor; curptr++) { + switch (*curptr) { + case '-': + // Found minus sign: change to integer + result.m_type = value_t::number_integer; + case '.': + // Don't count either '.' or '-' + continue; + case 'e': + case 'E': + // Found exponent: change to float and stop counting + result.m_type = value_t::number_float; + break; + default: + // Found a signficant figure + precision++; + continue; + } + break; + } + + // Characters after number - shouldn't happen, but try parsing as float + if (curptr != m_cursor) result.m_type = value_t::number_float; + } + errno = 0; - + typename string_t::value_type* endptr = 0; + // Attempt to parse it as an integer - first checking for a negative number - if (*reinterpret_cast(m_start) != '-') + if (result.m_type == value_t::number_unsigned) { // Positive, parse with strtoull and attempt cast to number_unsigned_t - if (attempt_cast(std::strtoull(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) - result.m_type = value_t::number_unsigned; - else result.m_type = value_t::number_float; // Cast failed due to overflow - store as float + if (!attempt_cast(std::strtoull(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) + result.m_type = value_t::number_float; // Cast failed due to overflow - store as float } - else + else if (result.m_type == value_t::number_integer) { // Negative, parse with strtoll and attempt cast to number_integer_t - if (attempt_cast(std::strtoll(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) - result.m_type = value_t::number_integer; - else result.m_type = value_t::number_float; // Cast failed due to overflow - store as float + if (!attempt_cast(std::strtoll(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) + result.m_type = value_t::number_float; // Cast failed due to overflow - store as float } // Check the end of the number was reached and no range error occurred if (reinterpret_cast(endptr) != m_cursor || errno == ERANGE) result.m_type = value_t::number_float; - if (result.m_type == value_t::number_float) + if (result.m_type == value_t::number_float) { // Either the number won't fit in an integer (range error from strtoull/strtoll or overflow on cast) or there was // something else after the number, which could be an exponent - + // Parse with strtod result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); + // Add the precision bits + result.m_type = static_cast(static_cast(result.m_type) | (precision << 8)); + // Anything after the number is an error - if(reinterpret_cast(endptr) != m_cursor) + if (reinterpret_cast(endptr) != m_cursor && *m_cursor != '.') throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); } } diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 4939f1947..6c8311d01 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -694,7 +694,7 @@ class basic_json @since version 1.0.0 */ - enum class value_t : uint8_t + enum class value_t : uint16_t { null, ///< null value object, ///< object (unordered set of name/value pairs) @@ -704,7 +704,8 @@ class basic_json number_integer, ///< number value (integer) number_unsigned,///< number value (unsigned integer) number_float, ///< number value (floating-point) - discarded ///< discarded by the the parser callback function + discarded, ///< discarded by the the parser callback function + precision_mask = 0xFF }; @@ -1746,7 +1747,7 @@ class basic_json } // check if iterator range is complete for primitive values - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::boolean: case value_t::number_float: @@ -1767,7 +1768,7 @@ class basic_json } } - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::number_integer: { @@ -1851,7 +1852,7 @@ class basic_json basic_json(const basic_json& other) : m_type(other.m_type) { - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::object: { @@ -2081,7 +2082,7 @@ class basic_json */ value_t type() const noexcept { - return m_type; + return static_cast(static_cast(m_type) & static_cast(value_t::precision_mask)); } /*! @@ -2257,7 +2258,7 @@ class basic_json */ bool is_number_float() const noexcept { - return m_type == value_t::number_float; + return (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) == value_t::number_float; } /*! @@ -2358,7 +2359,7 @@ class basic_json */ operator value_t() const noexcept { - return m_type; + return (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))); } /// @} @@ -2513,7 +2514,7 @@ class basic_json , int>::type = 0> T get_impl(T*) const { - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::number_integer: { @@ -3645,7 +3646,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::boolean: case value_t::number_float: @@ -3751,7 +3752,7 @@ class basic_json InteratorType result = end(); - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::boolean: case value_t::number_float: @@ -5762,7 +5763,7 @@ class basic_json // variable to hold indentation for recursive calls unsigned int new_indent = current_indent; - switch (m_type) + switch (static_cast(static_cast(m_type) & static_cast(value_t::precision_mask))) { case value_t::object: { @@ -5873,22 +5874,24 @@ class basic_json case value_t::number_float: { - // If the number is an integer then output as a fixed with with - // precision 1 to output "0.0", "1.0" etc as expected for some - // round trip tests otherwise 15 digits of precision allows - // round-trip IEEE 754 string->double->string; to be safe, we - // read this value from + // If the number was parsed from a string use the same precision + // otherwise 15 digits of precision allows round-trip IEEE 754 + // string->double->string; to be safe, we read this value from // std::numeric_limits::digits10 - if (std::fmod(m_value.number_float, 1) == 0) - { - o << std::fixed << std::setprecision(1); - } - else - { - // std::defaultfloat not supported in gcc version < 5 - o.unsetf(std::ios_base::floatfield); - o << std::setprecision(std::numeric_limits::digits10); - } + int precision = static_cast(m_type) >> 8; + if (!precision) precision = std::numeric_limits::digits10; + + // Special case for zero - use fixed precision to get "0.0" + if (m_value.number_float == 0) + { + o << std::fixed << std::setprecision(1); + } + else + { + // std::defaultfloat not supported in gcc version < 5 + o.unsetf(std::ios_base::floatfield); + o << std::setprecision(precision); + } o << m_value.number_float; return; } @@ -7437,39 +7440,73 @@ class basic_json */ void get_number(basic_json& result) const { - typename string_t::value_type* endptr; assert(m_start != nullptr); + + // Count the significant figures + int precision = 0; + { + const lexer::lexer_char_t *curptr; + + // Assume unsigned integer for now + result.m_type = value_t::number_unsigned; + for (curptr = m_start; curptr < m_cursor; curptr++) { + switch (*curptr) { + case '-': + // Found minus sign: change to integer + result.m_type = value_t::number_integer; + case '.': + // Don't count either '.' or '-' + continue; + case 'e': + case 'E': + // Found exponent: change to float and stop counting + result.m_type = value_t::number_float; + break; + default: + // Found a signficant figure + precision++; + continue; + } + break; + } + + // Characters after number - shouldn't happen, but try parsing as float + if (curptr != m_cursor) result.m_type = value_t::number_float; + } + errno = 0; - + typename string_t::value_type* endptr = 0; + // Attempt to parse it as an integer - first checking for a negative number - if (*reinterpret_cast(m_start) != '-') + if (result.m_type == value_t::number_unsigned) { // Positive, parse with strtoull and attempt cast to number_unsigned_t - if (attempt_cast(std::strtoull(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) - result.m_type = value_t::number_unsigned; - else result.m_type = value_t::number_float; // Cast failed due to overflow - store as float + if (!attempt_cast(std::strtoull(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) + result.m_type = value_t::number_float; // Cast failed due to overflow - store as float } - else + else if (result.m_type == value_t::number_integer) { // Negative, parse with strtoll and attempt cast to number_integer_t - if (attempt_cast(std::strtoll(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) - result.m_type = value_t::number_integer; - else result.m_type = value_t::number_float; // Cast failed due to overflow - store as float + if (!attempt_cast(std::strtoll(reinterpret_cast(m_start), &endptr, 10), result.m_value.number_unsigned)) + result.m_type = value_t::number_float; // Cast failed due to overflow - store as float } // Check the end of the number was reached and no range error occurred if (reinterpret_cast(endptr) != m_cursor || errno == ERANGE) result.m_type = value_t::number_float; - if (result.m_type == value_t::number_float) + if (result.m_type == value_t::number_float) { // Either the number won't fit in an integer (range error from strtoull/strtoll or overflow on cast) or there was // something else after the number, which could be an exponent - + // Parse with strtod result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); + // Add the precision bits + result.m_type = static_cast(static_cast(result.m_type) | (precision << 8)); + // Anything after the number is an error - if(reinterpret_cast(endptr) != m_cursor) + if (reinterpret_cast(endptr) != m_cursor && *m_cursor != '.') throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); } } diff --git a/test/unit.cpp b/test/unit.cpp index eb4061603..7ba03c084 100644 --- a/test/unit.cpp +++ b/test/unit.cpp @@ -11762,10 +11762,10 @@ TEST_CASE("compliance tests from nativejson-benchmark") "test/json_roundtrip/roundtrip21.json", "test/json_roundtrip/roundtrip22.json", "test/json_roundtrip/roundtrip23.json", - //"test/json_roundtrip/roundtrip24.json", - //"test/json_roundtrip/roundtrip25.json", - //"test/json_roundtrip/roundtrip26.json", - //"test/json_roundtrip/roundtrip27.json" + "test/json_roundtrip/roundtrip24.json", + "test/json_roundtrip/roundtrip25.json", + "test/json_roundtrip/roundtrip26.json", + "test/json_roundtrip/roundtrip27.json" }) { CAPTURE(filename); From 21a00fccc81871136656f413f1cb090c1ba107e1 Mon Sep 17 00:00:00 2001 From: Trevor Welsby Date: Sat, 30 Jan 2016 11:55:11 +1000 Subject: [PATCH 2/3] Shift integer parsing to guess_type() and rename to get_integer() --- src/json.hpp | 158 +++++++++++++++++++++------------------------- src/json.hpp.re2c | 158 +++++++++++++++++++++------------------------- test/unit.cpp | 2 +- 3 files changed, 147 insertions(+), 171 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index bf1700d89..e72c90e11 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -7814,51 +7814,50 @@ basic_json_parser_64: } /*! - @brief static_cast between two types and indicate if it results in error + @brief attempt to parse an integer, otherwise get the floating point representation - This function performs a static_cast between @a source and @a dest. It - then checks if a static_cast back to @a dest produces an error. + This function parses the integer component up to the radix point or exponent. + It also collects information about the floating point representation, which + it stores in the result parameter. If there is no radix point or exponent, + and the number can fit into a @ref number_integer_t or @ref number_unsigned_t + then it sets the result parameter accordingly. The 'floating point + representation' includes the number of significant figures after the radix + point, whether the number is in exponential or decimal form, the + capitalization of the exponent marker, and if the optional '+' is present in + the exponent. This information is necessary to perform accurate round trips + of floating point numbers. - @param[in] source the value to cast from - - @param[out] dest the value to cast to - - @return @a true if the cast was performed without error, @a false otherwise + @param[out] result @ref basic_json object to receive the result. */ - template - bool attempt_cast(T_A source, T_B& dest) const - { - dest = static_cast(source); - return (source == static_cast(dest)); - } - - /*! - @brief peek ahead and guess the number type and floating point representation - - This function scans the number to identify the number type. In addition it - counts the significant figures after the decimal point, whether the - number is in exponential or decimal form, the capitalization of the - exponent marker, and if the optional '+' is present in the exponent. This - information is necessary to perform accurate round trips of floating point - numbers. - - @param[out] type @ref type_data_t object to receive the type information. - */ - void guess_type(type_data_t & type) const + value_t get_integer(basic_json& result) const { const lexer::lexer_char_t *curptr = m_start; - type.bits.parsed = true; + result.m_type.bits.parsed = true; + // 'found_radix_point' will be set to 0xFF upon finding a radix point + // and later used to mask in/out the precision depending whether a + // radix is found i.e. 'precision &= found_radix_point' uint8_t found_radix_point = 0; uint8_t precision = 0; + // Accumulate the integer conversion result (unsigned for now) + number_unsigned_t value = 0; + + // Maximum absolute value of the relevant integer type + uint64_t max; + + // Temporarily store the type to avoid unecessary bitfield access + value_t type; + // Look for sign if (*curptr == '-') { type = value_t::number_integer; + max = static_cast(std::numeric_limits::max()) + 1; curptr++; } else { type = value_t::number_unsigned; + max = static_cast(std::numeric_limits::max()); if (*curptr == '+') curptr++; } @@ -7879,25 +7878,50 @@ basic_json_parser_64: found_radix_point = 0xFF; continue; } - // Assume exponent (if not it is a bad number and will fail - // parse anyway - could throw here instead): change to + // Assume exponent (if not then will fail parse): change to // float, stop counting and record exponent details type = value_t::number_float; - type.bits.has_exp = true; + result.m_type.bits.has_exp = true; // Exponent capitalization - type.bits.exp_cap = (*curptr == 'E'); + result.m_type.bits.exp_cap = (*curptr == 'E'); // Exponent '+' sign - type.bits.exp_plus = (*(++curptr) == '+'); + result.m_type.bits.exp_plus = (*(++curptr) == '+'); break; } + + // Skip if definitely not an integer + if (type != value_t::number_float) { + + // Multiply last value by ten and add the new digit + auto temp = value * 10 + *curptr - 0x30; + + // Test for overflow + if (temp < value || temp > max) + { + // Overflow + type = value_t::number_float; + } + else + { + // No overflow - save it + value = temp; + } + } precision++; } - // If no radix was found then precision would now be set to + // If no radix point was found then precision would now be set to // the number of digits, which is wrong - clear it - type.bits.precision = precision & found_radix_point; + result.m_type.bits.precision = precision & found_radix_point; + + // Save the value (if not a float) + if (type == value_t::number_unsigned) result.m_value.number_unsigned = value; + else if (type == value_t::number_integer) result.m_value.number_integer = -static_cast(value); + + // Return the type (don't save it yet) + return type; } /*! @@ -7907,23 +7931,15 @@ basic_json_parser_64: type (either integer, unsigned integer or floating point), which is passed back to the caller via the result parameter. - First @ref guess_type() is called to determine the type and to retrieve - information about the floating point representation (if applicable) - that can be used to accurately render the number to a string later. + First @ref guess_type() is called to attempt to parse as an integer + and to retrieve information about the floating point representation + (if applicable) that can be used to accurately render the number to a + string later. - Depending on the type, either @a std::strtoull (if number_unsigned_t) or - @a std::strtoll (if number_integer_t) is then called to attempt to parse the - number as an integer. Numbers that are too large or too small for a - signed/unsigned long long will cause a range error (@a errno set to ERANGE). - The parsed number is cast to a @ref number_integer_t/@ref number_unsigned_t - using the helper function @ref attempt_cast, which returns @a false if the - cast could not be peformed without error. - - In either of these cases (range error or a cast error) the number is parsed - using @a std:strtod (or @a std:strtof or @a std::strtold), which sets - @a endptr to the first character past the converted number. If it is not - the same as @ref m_cursor a bad input is assumed and @a result parameter is - set to NAN. + If the number is a floating point number the number is then parsed using + @a std:strtod (or @a std:strtof or @a std::strtold), which sets @a endptr + to the first character past the converted number. If it is not the same as + @ref m_cursor a bad input is assumed and @a result parameter is set to NAN. @param[out] result @ref basic_json object to receive the number, or NAN if the conversion read past the current token. The latter case needs to be @@ -7933,49 +7949,21 @@ basic_json_parser_64: { assert(m_start != nullptr); - guess_type(result.m_type); + value_t type = get_integer(result); - errno = 0; - - // Attempt to parse it as an integer - if (result.m_type == value_t::number_unsigned) + if (type == value_t::number_float) { - // Positive, parse with strtoull and attempt cast to number_unsigned_t - if (!attempt_cast(std::strtoull(reinterpret_cast(m_start), NULL, - 10), result.m_value.number_unsigned)) - { - result.m_type = value_t::number_float; // Cast failed due to overflow - store as float - } - } - else if (result.m_type == value_t::number_integer) - { - // Negative, parse with strtoll and attempt cast to number_integer_t - if (!attempt_cast(std::strtoll(reinterpret_cast(m_start), NULL, - 10), result.m_value.number_integer)) - { - result.m_type = value_t::number_float; // Cast failed due to overflow - store as float - } - } - - // Check the end of the number was reached and no range error occurred - if (errno == ERANGE) result.m_type = value_t::number_float; - - if (result.m_type == value_t::number_float) - { - // Either the number won't fit in an integer (range error from - // strtoull/strtoll or overflow on cast) or there was something - // else after the number, which could be an exponent - // Parse with strtod typename string_t::value_type* endptr; result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); // Anything after the number is an error if (reinterpret_cast(endptr) != m_cursor && *m_cursor != '.') - { throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); - } } + + // Save the type + result.m_type = type; } private: diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 4ec693572..9879126e9 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -7496,51 +7496,50 @@ class basic_json } /*! - @brief static_cast between two types and indicate if it results in error + @brief attempt to parse an integer, otherwise get the floating point representation - This function performs a static_cast between @a source and @a dest. It - then checks if a static_cast back to @a dest produces an error. + This function parses the integer component up to the radix point or exponent. + It also collects information about the floating point representation, which + it stores in the result parameter. If there is no radix point or exponent, + and the number can fit into a @ref number_integer_t or @ref number_unsigned_t + then it sets the result parameter accordingly. The 'floating point + representation' includes the number of significant figures after the radix + point, whether the number is in exponential or decimal form, the + capitalization of the exponent marker, and if the optional '+' is present in + the exponent. This information is necessary to perform accurate round trips + of floating point numbers. - @param[in] source the value to cast from - - @param[out] dest the value to cast to - - @return @a true if the cast was performed without error, @a false otherwise + @param[out] result @ref basic_json object to receive the result. */ - template - bool attempt_cast(T_A source, T_B& dest) const - { - dest = static_cast(source); - return (source == static_cast(dest)); - } - - /*! - @brief peek ahead and guess the number type and floating point representation - - This function scans the number to identify the number type. In addition it - counts the significant figures after the decimal point, whether the - number is in exponential or decimal form, the capitalization of the - exponent marker, and if the optional '+' is present in the exponent. This - information is necessary to perform accurate round trips of floating point - numbers. - - @param[out] type @ref type_data_t object to receive the type information. - */ - void guess_type(type_data_t & type) const + value_t get_integer(basic_json& result) const { const lexer::lexer_char_t *curptr = m_start; - type.bits.parsed = true; + result.m_type.bits.parsed = true; + // 'found_radix_point' will be set to 0xFF upon finding a radix point + // and later used to mask in/out the precision depending whether a + // radix is found i.e. 'precision &= found_radix_point' uint8_t found_radix_point = 0; uint8_t precision = 0; + // Accumulate the integer conversion result (unsigned for now) + number_unsigned_t value = 0; + + // Maximum absolute value of the relevant integer type + uint64_t max; + + // Temporarily store the type to avoid unecessary bitfield access + value_t type; + // Look for sign if (*curptr == '-') { type = value_t::number_integer; + max = static_cast(std::numeric_limits::max()) + 1; curptr++; } else { type = value_t::number_unsigned; + max = static_cast(std::numeric_limits::max()); if (*curptr == '+') curptr++; } @@ -7561,25 +7560,50 @@ class basic_json found_radix_point = 0xFF; continue; } - // Assume exponent (if not it is a bad number and will fail - // parse anyway - could throw here instead): change to + // Assume exponent (if not then will fail parse): change to // float, stop counting and record exponent details type = value_t::number_float; - type.bits.has_exp = true; + result.m_type.bits.has_exp = true; // Exponent capitalization - type.bits.exp_cap = (*curptr == 'E'); + result.m_type.bits.exp_cap = (*curptr == 'E'); // Exponent '+' sign - type.bits.exp_plus = (*(++curptr) == '+'); + result.m_type.bits.exp_plus = (*(++curptr) == '+'); break; } + + // Skip if definitely not an integer + if (type != value_t::number_float) { + + // Multiply last value by ten and add the new digit + auto temp = value * 10 + *curptr - 0x30; + + // Test for overflow + if (temp < value || temp > max) + { + // Overflow + type = value_t::number_float; + } + else + { + // No overflow - save it + value = temp; + } + } precision++; } - // If no radix was found then precision would now be set to + // If no radix point was found then precision would now be set to // the number of digits, which is wrong - clear it - type.bits.precision = precision & found_radix_point; + result.m_type.bits.precision = precision & found_radix_point; + + // Save the value (if not a float) + if (type == value_t::number_unsigned) result.m_value.number_unsigned = value; + else if (type == value_t::number_integer) result.m_value.number_integer = -static_cast(value); + + // Return the type (don't save it yet) + return type; } /*! @@ -7589,23 +7613,15 @@ class basic_json type (either integer, unsigned integer or floating point), which is passed back to the caller via the result parameter. - First @ref guess_type() is called to determine the type and to retrieve - information about the floating point representation (if applicable) - that can be used to accurately render the number to a string later. + First @ref guess_type() is called to attempt to parse as an integer + and to retrieve information about the floating point representation + (if applicable) that can be used to accurately render the number to a + string later. - Depending on the type, either @a std::strtoull (if number_unsigned_t) or - @a std::strtoll (if number_integer_t) is then called to attempt to parse the - number as an integer. Numbers that are too large or too small for a - signed/unsigned long long will cause a range error (@a errno set to ERANGE). - The parsed number is cast to a @ref number_integer_t/@ref number_unsigned_t - using the helper function @ref attempt_cast, which returns @a false if the - cast could not be peformed without error. - - In either of these cases (range error or a cast error) the number is parsed - using @a std:strtod (or @a std:strtof or @a std::strtold), which sets - @a endptr to the first character past the converted number. If it is not - the same as @ref m_cursor a bad input is assumed and @a result parameter is - set to NAN. + If the number is a floating point number the number is then parsed using + @a std:strtod (or @a std:strtof or @a std::strtold), which sets @a endptr + to the first character past the converted number. If it is not the same as + @ref m_cursor a bad input is assumed and @a result parameter is set to NAN. @param[out] result @ref basic_json object to receive the number, or NAN if the conversion read past the current token. The latter case needs to be @@ -7615,49 +7631,21 @@ class basic_json { assert(m_start != nullptr); - guess_type(result.m_type); + value_t type = get_integer(result); - errno = 0; - - // Attempt to parse it as an integer - if (result.m_type == value_t::number_unsigned) + if (type == value_t::number_float) { - // Positive, parse with strtoull and attempt cast to number_unsigned_t - if (!attempt_cast(std::strtoull(reinterpret_cast(m_start), NULL, - 10), result.m_value.number_unsigned)) - { - result.m_type = value_t::number_float; // Cast failed due to overflow - store as float - } - } - else if (result.m_type == value_t::number_integer) - { - // Negative, parse with strtoll and attempt cast to number_integer_t - if (!attempt_cast(std::strtoll(reinterpret_cast(m_start), NULL, - 10), result.m_value.number_integer)) - { - result.m_type = value_t::number_float; // Cast failed due to overflow - store as float - } - } - - // Check the end of the number was reached and no range error occurred - if (errno == ERANGE) result.m_type = value_t::number_float; - - if (result.m_type == value_t::number_float) - { - // Either the number won't fit in an integer (range error from - // strtoull/strtoll or overflow on cast) or there was something - // else after the number, which could be an exponent - // Parse with strtod typename string_t::value_type* endptr; result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); // Anything after the number is an error if (reinterpret_cast(endptr) != m_cursor && *m_cursor != '.') - { throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); - } } + + // Save the type + result.m_type = type; } private: diff --git a/test/unit.cpp b/test/unit.cpp index 615cd2e64..7cf6e9051 100644 --- a/test/unit.cpp +++ b/test/unit.cpp @@ -12108,7 +12108,7 @@ TEST_CASE("regression tests") // integer object creation - expected to wrap and still be stored as an integer j = -2147483649LL; // -2^31-1 CHECK(static_cast(j.type()) == static_cast(custom_json::value_t::number_integer)); - CHECK(j.get() == 2147483647.0f); // Wrap + CHECK(j.get() == 2147483647); // Wrap // integer parsing - expected to overflow and be stored as a float with rounding j = custom_json::parse("-2147483649"); // -2^31 From e9517958a3e2143bd4bbbd6b5f7f08d2f6135509 Mon Sep 17 00:00:00 2001 From: Trevor Welsby Date: Sat, 30 Jan 2016 17:58:02 +1000 Subject: [PATCH 3/3] Merge get_number()/get_integer() + changes suggested by @gregmarr --- src/json.hpp | 103 +++++++++++++++++++--------------------------- src/json.hpp.re2c | 103 +++++++++++++++++++--------------------------- 2 files changed, 86 insertions(+), 120 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index e72c90e11..55ade790b 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -5974,12 +5974,12 @@ class basic_json // Remove '+' sign from the exponent if necessary if (!m_type.bits.exp_plus) { - if (static_cast(len) > sizeof(buf)) len = sizeof(buf); - for (size_t i = 0; i < static_cast(len); i++) + if (len > static_cast(sizeof(buf))) len = sizeof(buf); + for (int i = 0; i < len; i++) { if (buf[i] == '+') { - for (; i + 1 < static_cast(len); i++) buf[i] = buf[i + 1]; + for (; i + 1 < len; i++) buf[i] = buf[i + 1]; } } } @@ -5992,14 +5992,16 @@ class basic_json } else if (m_value.number_float == 0) { - // Special case for zero - use fixed precision to get "0.0" - snprintf(buf, sizeof(buf), "%#.1f", m_value.number_float); + // Special case for zero to get "0.0"/"-0.0" + if (std::signbit(m_value.number_float)) o << "-0.0"; + else o << "0.0"; + return; } else { - // Otherwise 15 digits of precision allows round-trip IEEE 754 - // string->double->string; to be safe, we read this value from - // std::numeric_limits::digits10 + // Otherwise 6, 15 or 16 digits of precision allows round-trip IEEE 754 + // string->float->string, string->double->string or string->long double->string; + // to be safe, we read this value from std::numeric_limits::digits10 snprintf(buf, sizeof(buf), "%.*g", std::numeric_limits::digits10, m_value.number_float); } @@ -7814,23 +7816,35 @@ basic_json_parser_64: } /*! - @brief attempt to parse an integer, otherwise get the floating point representation + @brief return number value for number tokens - This function parses the integer component up to the radix point or exponent. - It also collects information about the floating point representation, which + This function translates the last token into the most appropriate number + type (either integer, unsigned integer or floating point), which is + passed back to the caller via the result parameter. + + This function parses the integer component up to the radix point or exponent + while collecting information about the 'floating point representation', which it stores in the result parameter. If there is no radix point or exponent, and the number can fit into a @ref number_integer_t or @ref number_unsigned_t - then it sets the result parameter accordingly. The 'floating point - representation' includes the number of significant figures after the radix - point, whether the number is in exponential or decimal form, the - capitalization of the exponent marker, and if the optional '+' is present in - the exponent. This information is necessary to perform accurate round trips + then it sets the result parameter accordingly. + + The 'floating point representation' includes the number of significant figures + after the radix point, whether the number is in exponential or decimal form, + the capitalization of the exponent marker, and if the optional '+' is present + in the exponent. This information is necessary to perform accurate round trips of floating point numbers. - @param[out] result @ref basic_json object to receive the result. + If the number is a floating point number the number is then parsed using + @a std:strtod (or @a std:strtof or @a std::strtold). + + @param[out] result @ref basic_json object to receive the number, or NAN if the + conversion read past the current token. The latter case needs to be + treated by the caller function. */ - value_t get_integer(basic_json& result) const + void get_number(basic_json& result) const { + assert(m_start != nullptr); + const lexer::lexer_char_t *curptr = m_start; result.m_type.bits.parsed = true; @@ -7844,7 +7858,7 @@ basic_json_parser_64: number_unsigned_t value = 0; // Maximum absolute value of the relevant integer type - uint64_t max; + number_unsigned_t max; // Temporarily store the type to avoid unecessary bitfield access value_t type; @@ -7917,49 +7931,18 @@ basic_json_parser_64: result.m_type.bits.precision = precision & found_radix_point; // Save the value (if not a float) - if (type == value_t::number_unsigned) result.m_value.number_unsigned = value; - else if (type == value_t::number_integer) result.m_value.number_integer = -static_cast(value); - - // Return the type (don't save it yet) - return type; - } - - /*! - @brief return number value for number tokens - - This function translates the last token into the most appropriate number - type (either integer, unsigned integer or floating point), which is - passed back to the caller via the result parameter. - - First @ref guess_type() is called to attempt to parse as an integer - and to retrieve information about the floating point representation - (if applicable) that can be used to accurately render the number to a - string later. - - If the number is a floating point number the number is then parsed using - @a std:strtod (or @a std:strtof or @a std::strtold), which sets @a endptr - to the first character past the converted number. If it is not the same as - @ref m_cursor a bad input is assumed and @a result parameter is set to NAN. - - @param[out] result @ref basic_json object to receive the number, or NAN if the - conversion read past the current token. The latter case needs to be - treated by the caller function. - */ - void get_number(basic_json& result) const - { - assert(m_start != nullptr); - - value_t type = get_integer(result); - - if (type == value_t::number_float) + if (type == value_t::number_unsigned) + { + result.m_value.number_unsigned = value; + } + else if (type == value_t::number_integer) + { + result.m_value.number_integer = -static_cast(value); + } + else { // Parse with strtod - typename string_t::value_type* endptr; - result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); - - // Anything after the number is an error - if (reinterpret_cast(endptr) != m_cursor && *m_cursor != '.') - throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); + result.m_value.number_float = str_to_float_t(static_cast(nullptr), NULL); } // Save the type diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 9879126e9..7d6a72115 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -5974,12 +5974,12 @@ class basic_json // Remove '+' sign from the exponent if necessary if (!m_type.bits.exp_plus) { - if (static_cast(len) > sizeof(buf)) len = sizeof(buf); - for (size_t i = 0; i < static_cast(len); i++) + if (len > static_cast(sizeof(buf))) len = sizeof(buf); + for (int i = 0; i < len; i++) { if (buf[i] == '+') { - for (; i + 1 < static_cast(len); i++) buf[i] = buf[i + 1]; + for (; i + 1 < len; i++) buf[i] = buf[i + 1]; } } } @@ -5992,14 +5992,16 @@ class basic_json } else if (m_value.number_float == 0) { - // Special case for zero - use fixed precision to get "0.0" - snprintf(buf, sizeof(buf), "%#.1f", m_value.number_float); + // Special case for zero to get "0.0"/"-0.0" + if (std::signbit(m_value.number_float)) o << "-0.0"; + else o << "0.0"; + return; } else { - // Otherwise 15 digits of precision allows round-trip IEEE 754 - // string->double->string; to be safe, we read this value from - // std::numeric_limits::digits10 + // Otherwise 6, 15 or 16 digits of precision allows round-trip IEEE 754 + // string->float->string, string->double->string or string->long double->string; + // to be safe, we read this value from std::numeric_limits::digits10 snprintf(buf, sizeof(buf), "%.*g", std::numeric_limits::digits10, m_value.number_float); } @@ -7496,23 +7498,35 @@ class basic_json } /*! - @brief attempt to parse an integer, otherwise get the floating point representation + @brief return number value for number tokens - This function parses the integer component up to the radix point or exponent. - It also collects information about the floating point representation, which + This function translates the last token into the most appropriate number + type (either integer, unsigned integer or floating point), which is + passed back to the caller via the result parameter. + + This function parses the integer component up to the radix point or exponent + while collecting information about the 'floating point representation', which it stores in the result parameter. If there is no radix point or exponent, and the number can fit into a @ref number_integer_t or @ref number_unsigned_t - then it sets the result parameter accordingly. The 'floating point - representation' includes the number of significant figures after the radix - point, whether the number is in exponential or decimal form, the - capitalization of the exponent marker, and if the optional '+' is present in - the exponent. This information is necessary to perform accurate round trips + then it sets the result parameter accordingly. + + The 'floating point representation' includes the number of significant figures + after the radix point, whether the number is in exponential or decimal form, + the capitalization of the exponent marker, and if the optional '+' is present + in the exponent. This information is necessary to perform accurate round trips of floating point numbers. - @param[out] result @ref basic_json object to receive the result. + If the number is a floating point number the number is then parsed using + @a std:strtod (or @a std:strtof or @a std::strtold). + + @param[out] result @ref basic_json object to receive the number, or NAN if the + conversion read past the current token. The latter case needs to be + treated by the caller function. */ - value_t get_integer(basic_json& result) const + void get_number(basic_json& result) const { + assert(m_start != nullptr); + const lexer::lexer_char_t *curptr = m_start; result.m_type.bits.parsed = true; @@ -7526,7 +7540,7 @@ class basic_json number_unsigned_t value = 0; // Maximum absolute value of the relevant integer type - uint64_t max; + number_unsigned_t max; // Temporarily store the type to avoid unecessary bitfield access value_t type; @@ -7599,49 +7613,18 @@ class basic_json result.m_type.bits.precision = precision & found_radix_point; // Save the value (if not a float) - if (type == value_t::number_unsigned) result.m_value.number_unsigned = value; - else if (type == value_t::number_integer) result.m_value.number_integer = -static_cast(value); - - // Return the type (don't save it yet) - return type; - } - - /*! - @brief return number value for number tokens - - This function translates the last token into the most appropriate number - type (either integer, unsigned integer or floating point), which is - passed back to the caller via the result parameter. - - First @ref guess_type() is called to attempt to parse as an integer - and to retrieve information about the floating point representation - (if applicable) that can be used to accurately render the number to a - string later. - - If the number is a floating point number the number is then parsed using - @a std:strtod (or @a std:strtof or @a std::strtold), which sets @a endptr - to the first character past the converted number. If it is not the same as - @ref m_cursor a bad input is assumed and @a result parameter is set to NAN. - - @param[out] result @ref basic_json object to receive the number, or NAN if the - conversion read past the current token. The latter case needs to be - treated by the caller function. - */ - void get_number(basic_json& result) const - { - assert(m_start != nullptr); - - value_t type = get_integer(result); - - if (type == value_t::number_float) + if (type == value_t::number_unsigned) + { + result.m_value.number_unsigned = value; + } + else if (type == value_t::number_integer) + { + result.m_value.number_integer = -static_cast(value); + } + else { // Parse with strtod - typename string_t::value_type* endptr; - result.m_value.number_float = str_to_float_t(static_cast(nullptr), &endptr); - - // Anything after the number is an error - if (reinterpret_cast(endptr) != m_cursor && *m_cursor != '.') - throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number"); + result.m_value.number_float = str_to_float_t(static_cast(nullptr), NULL); } // Save the type