diff --git a/src/json.hpp b/src/json.hpp index 00204f6bd..e7fc76cc8 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -602,7 +602,7 @@ class basic_json @sa @ref number_integer_t -- type for number values (integer) - @since version 1.0.0 + @since version 2.0.0 */ using number_unsigned_t = NumberUnsignedType; @@ -699,9 +699,9 @@ class basic_json string, ///< string value boolean, ///< boolean value number_integer, ///< number value (integer) + number_unsigned,///< number value (unsigned integer) number_float, ///< number value (floating-point) - discarded, ///< discarded by the the parser callback function - number_unsigned ///< number value (unsigned integer) + discarded ///< discarded by the the parser callback function }; @@ -1343,7 +1343,7 @@ class basic_json @sa @ref basic_json(const CompatibleNumberUnsignedType) -- create a number value (unsigned integer) from a compatible number type - @since version 1.0.0 + @since version 2.0.0 */ template order = {{ + static constexpr std::array order = {{ 0, // null 3, // object 4, // array 5, // string 1, // boolean 2, // integer + 2, // unsigned 2, // float - 0, // filler for discarded (preserves existing value_t values) - 2 // unsigned } }; @@ -7482,53 +7481,99 @@ basic_json_parser_64: /*! @brief return number value for number tokens - This function translates the last token into a floating point number. - The pointer m_start points to the beginning of the parsed number. We - pass this pointer to std::strtod which sets endptr to the first - character past the converted number. If this pointer is not the same as - m_cursor, then either more or less characters have been used during the - comparison. This can happen for inputs like "01" which will be treated - like number 0 followed by number 1. + This function translates the last token into the most appropriate + number type (either integer, unsigned integer or floating point), + which is passed back to the caller via the result parameter. The pointer + m_start points to the beginning of the parsed number. We first examine + the first character to determine the sign of the number and then pass + this pointer to either std::strtoull (if positive) or std::strtoll + (if negative), both of which set endptr to the first character past the + converted number. If this pointer is not the same as m_cursor, then + either more or less characters have been used during the comparison. + + This can happen for inputs like "01" which will be treated like number 0 + followed by number 1. This will also occur for valid floating point + inputs like "12e3" will be incorrectly read as 12. Numbers that are too + large or too small to be stored in the number_integer_t or + number_unsigned_t types will cause a range error (errno set to ERANGE). + In both cases (more/less characters read, or a range error) the pointer + is passed to std:strtod, which also sets endptr to the first character + past the converted number. + + The resulting number_float_t is then cast to a number_integer_t or, + if positive, to a number_unsigned_t and compared to the original. If + there is no loss of precision then it is stored as a number_integer_t + or, if positive a number_unsigned_t, otherwise as a number_float_t. + + A final comparison is made of endptr and if still not the same as + m_cursor a bad input is assumed and result parameter is set to NAN. - @return the result of the number conversion or NAN if the conversion - read past the current token. The latter case needs to be treated by the - caller function. - - @throw std::range_error if passed value is out of range + @param[out] result basic_json object to receive the number, or NAN if the + conversion read past the current token. The latter case needs to be + treated by the caller function. */ void get_number(basic_json& result) const { typename string_t::value_type* endptr; assert(m_start != nullptr); - - // Parse it as an integer - if(*reinterpret_cast(m_start) != '-') { - // Unsigned - result.m_value.number_unsigned = strtoull(reinterpret_cast(m_start),&endptr,10); + + // Attempt to parse it as an integer - first checking for a negative number + if(*reinterpret_cast(m_start) != '-') + { + // Positive, parse with strtoull + result.m_value.number_unsigned = std::strtoull(reinterpret_cast(m_start),&endptr,10); result.m_type = value_t::number_unsigned; } - else { - // Signed - result.m_value.number_integer = strtoll(reinterpret_cast(m_start),&endptr,10); + else + { + // Negative, parse with strtoll + result.m_value.number_integer = std::strtoll(reinterpret_cast(m_start),&endptr,10); result.m_type = value_t::number_integer; } - // Parse it as a double - const auto float_val = strtold(reinterpret_cast(m_start),&endptr); - long double int_part; - const auto frac_part = std::modf(float_val, &int_part); + // Check the end of the number was reached and no range error occurred + if(reinterpret_cast(endptr) != m_cursor || errno == ERANGE) + { + // Either the number won't fit in an integer (range error) or there was + // something else after the number, which could be an exponent + + // Parse with strtod + result.m_value.number_float = std::strtod(reinterpret_cast(m_start),&endptr); + + // Check if it can be stored as an integer without loss of precision e.g. 1.2e3 = 1200 + if (result.m_type == value_t::number_integer) + { + auto int_val = static_cast(result.m_value.number_float); + if (approx(result.m_value.number_float, static_cast(int_val))) + { + // we would not lose precision -> return int + result.m_value.number_integer = int_val; + } + else + { + result.m_type = value_t::number_float; + } + } + else + { + auto int_val = static_cast(result.m_value.number_float); + if (approx(result.m_value.number_float, static_cast(int_val))) + { + // we would not lose precision -> return int + result.m_value.number_unsigned = int_val; + } + else + { + result.m_type = value_t::number_float; + } + } - // Test if the double or integer is a better representation - if(!approx(frac_part, static_cast(0)) || - (result.m_type == value_t::number_unsigned && !approx(int_part, static_cast(result.m_value.number_unsigned))) || - (result.m_type == value_t::number_integer && !approx(int_part, static_cast(result.m_value.number_integer)))) { - result.m_value.number_float = float_val; - result.m_type = value_t::number_float; - } - - if(reinterpret_cast(endptr) != m_cursor) { - result.m_value.number_float = NAN; - result.m_type = value_t::number_float; + // Anything after the number is an error + if(reinterpret_cast(endptr) != m_cursor) + { + result.m_value.number_float = NAN; + result.m_type = value_t::number_float; + } } } diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index e10ffc3b8..f253e7f71 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -602,7 +602,7 @@ class basic_json @sa @ref number_integer_t -- type for number values (integer) - @since version 1.0.0 + @since version 2.0.0 */ using number_unsigned_t = NumberUnsignedType; @@ -699,9 +699,9 @@ class basic_json string, ///< string value boolean, ///< boolean value number_integer, ///< number value (integer) + number_unsigned,///< number value (unsigned integer) number_float, ///< number value (floating-point) - discarded, ///< discarded by the the parser callback function - number_unsigned ///< number value (unsigned integer) + discarded ///< discarded by the the parser callback function }; @@ -1343,7 +1343,7 @@ class basic_json @sa @ref basic_json(const CompatibleNumberUnsignedType) -- create a number value (unsigned integer) from a compatible number type - @since version 1.0.0 + @since version 2.0.0 */ template order = {{ + static constexpr std::array order = {{ 0, // null 3, // object 4, // array 5, // string 1, // boolean 2, // integer + 2, // unsigned 2, // float - 0, // filler for discarded (preserves existing value_t values) - 2 // unsigned } }; @@ -7164,53 +7163,99 @@ class basic_json /*! @brief return number value for number tokens - This function translates the last token into a floating point number. - The pointer m_start points to the beginning of the parsed number. We - pass this pointer to std::strtod which sets endptr to the first - character past the converted number. If this pointer is not the same as - m_cursor, then either more or less characters have been used during the - comparison. This can happen for inputs like "01" which will be treated - like number 0 followed by number 1. + This function translates the last token into the most appropriate + number type (either integer, unsigned integer or floating point), + which is passed back to the caller via the result parameter. The pointer + m_start points to the beginning of the parsed number. We first examine + the first character to determine the sign of the number and then pass + this pointer to either std::strtoull (if positive) or std::strtoll + (if negative), both of which set endptr to the first character past the + converted number. If this pointer is not the same as m_cursor, then + either more or less characters have been used during the comparison. + + This can happen for inputs like "01" which will be treated like number 0 + followed by number 1. This will also occur for valid floating point + inputs like "12e3" will be incorrectly read as 12. Numbers that are too + large or too small to be stored in the number_integer_t or + number_unsigned_t types will cause a range error (errno set to ERANGE). + In both cases (more/less characters read, or a range error) the pointer + is passed to std:strtod, which also sets endptr to the first character + past the converted number. + + The resulting number_float_t is then cast to a number_integer_t or, + if positive, to a number_unsigned_t and compared to the original. If + there is no loss of precision then it is stored as a number_integer_t + or, if positive a number_unsigned_t, otherwise as a number_float_t. + + A final comparison is made of endptr and if still not the same as + m_cursor a bad input is assumed and result parameter is set to NAN. - @return the result of the number conversion or NAN if the conversion - read past the current token. The latter case needs to be treated by the - caller function. - - @throw std::range_error if passed value is out of range + @param[out] result basic_json object to receive the number, or NAN if the + conversion read past the current token. The latter case needs to be + treated by the caller function. */ void get_number(basic_json& result) const { typename string_t::value_type* endptr; assert(m_start != nullptr); - - // Parse it as an integer - if(*reinterpret_cast(m_start) != '-') { - // Unsigned - result.m_value.number_unsigned = strtoull(reinterpret_cast(m_start),&endptr,10); + + // Attempt to parse it as an integer - first checking for a negative number + if(*reinterpret_cast(m_start) != '-') + { + // Positive, parse with strtoull + result.m_value.number_unsigned = std::strtoull(reinterpret_cast(m_start),&endptr,10); result.m_type = value_t::number_unsigned; } - else { - // Signed - result.m_value.number_integer = strtoll(reinterpret_cast(m_start),&endptr,10); + else + { + // Negative, parse with strtoll + result.m_value.number_integer = std::strtoll(reinterpret_cast(m_start),&endptr,10); result.m_type = value_t::number_integer; } - // Parse it as a double - const auto float_val = strtold(reinterpret_cast(m_start),&endptr); - long double int_part; - const auto frac_part = std::modf(float_val, &int_part); + // Check the end of the number was reached and no range error occurred + if(reinterpret_cast(endptr) != m_cursor || errno == ERANGE) + { + // Either the number won't fit in an integer (range error) or there was + // something else after the number, which could be an exponent + + // Parse with strtod + result.m_value.number_float = std::strtod(reinterpret_cast(m_start),&endptr); + + // Check if it can be stored as an integer without loss of precision e.g. 1.2e3 = 1200 + if (result.m_type == value_t::number_integer) + { + auto int_val = static_cast(result.m_value.number_float); + if (approx(result.m_value.number_float, static_cast(int_val))) + { + // we would not lose precision -> return int + result.m_value.number_integer = int_val; + } + else + { + result.m_type = value_t::number_float; + } + } + else + { + auto int_val = static_cast(result.m_value.number_float); + if (approx(result.m_value.number_float, static_cast(int_val))) + { + // we would not lose precision -> return int + result.m_value.number_unsigned = int_val; + } + else + { + result.m_type = value_t::number_float; + } + } - // Test if the double or integer is a better representation - if(!approx(frac_part, static_cast(0)) || - (result.m_type == value_t::number_unsigned && !approx(int_part, static_cast(result.m_value.number_unsigned))) || - (result.m_type == value_t::number_integer && !approx(int_part, static_cast(result.m_value.number_integer)))) { - result.m_value.number_float = float_val; - result.m_type = value_t::number_float; - } - - if(reinterpret_cast(endptr) != m_cursor) { - result.m_value.number_float = NAN; - result.m_type = value_t::number_float; + // Anything after the number is an error + if(reinterpret_cast(endptr) != m_cursor) + { + result.m_value.number_float = NAN; + result.m_type = value_t::number_float; + } } } diff --git a/test/unit.cpp b/test/unit.cpp index 7ddb95dc8..7b5c8d898 100644 --- a/test/unit.cpp +++ b/test/unit.cpp @@ -9529,12 +9529,12 @@ TEST_CASE("parser class") CHECK_THROWS_WITH(json::parser("01").parse(), "parse error - 0 is not a number"); CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'"); - CHECK_THROWS_WITH(json::parser("1.").parse(), "parse error - 1 is not a number"); + CHECK_THROWS_WITH(json::parser("1.").parse(), "parse error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("1E").parse(), "parse error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("1E-").parse(), "parse error - unexpected 'E'; expected end of input"); - CHECK_THROWS_WITH(json::parser("1.E1").parse(), "parse error - 1 is not a number"); + CHECK_THROWS_WITH(json::parser("1.E1").parse(), "parse error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("-1E").parse(), "parse error - unexpected 'E'; expected end of input"); CHECK_THROWS_WITH(json::parser("-0E#").parse(), @@ -9576,18 +9576,18 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("1E.").parse(), std::invalid_argument); CHECK_THROWS_AS(json::parser("1E/").parse(), std::invalid_argument); CHECK_THROWS_AS(json::parser("1E:").parse(), std::invalid_argument); - CHECK_THROWS_WITH(json::parser("0.").parse(), "parse error - 0 is not a number"); + CHECK_THROWS_WITH(json::parser("0.").parse(), "parse error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("-").parse(), "parse error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("--").parse(), "parse error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("-0.").parse(), - "parse error - -0 is not a number"); + "parse error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("-.").parse(), "parse error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("-:").parse(), "parse error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("0.:").parse(), - "parse error - 0 is not a number"); + "parse error - unexpected '.'; expected end of input"); CHECK_THROWS_WITH(json::parser("e.").parse(), "parse error - unexpected 'e'"); CHECK_THROWS_WITH(json::parser("1e.").parse(),