Shift integer parsing to guess_type() and rename to get_integer()

This commit is contained in:
Trevor Welsby 2016-01-30 11:55:11 +10:00
parent 7d08aa759b
commit 21a00fccc8
3 changed files with 147 additions and 171 deletions

View File

@ -7814,51 +7814,50 @@ basic_json_parser_64:
}
/*!
@brief static_cast between two types and indicate if it results in error
@brief attempt to parse an integer, otherwise get the floating point representation
This function performs a static_cast between @a source and @a dest. It
then checks if a static_cast back to @a dest produces an error.
This function parses the integer component up to the radix point or exponent.
It also collects information about the floating point representation, which
it stores in the result parameter. If there is no radix point or exponent,
and the number can fit into a @ref number_integer_t or @ref number_unsigned_t
then it sets the result parameter accordingly. The 'floating point
representation' includes the number of significant figures after the radix
point, whether the number is in exponential or decimal form, the
capitalization of the exponent marker, and if the optional '+' is present in
the exponent. This information is necessary to perform accurate round trips
of floating point numbers.
@param[in] source the value to cast from
@param[out] dest the value to cast to
@return @a true if the cast was performed without error, @a false otherwise
@param[out] result @ref basic_json object to receive the result.
*/
template <typename T_A, typename T_B>
bool attempt_cast(T_A source, T_B& dest) const
{
dest = static_cast<T_B>(source);
return (source == static_cast<T_A>(dest));
}
/*!
@brief peek ahead and guess the number type and floating point representation
This function scans the number to identify the number type. In addition it
counts the significant figures after the decimal point, whether the
number is in exponential or decimal form, the capitalization of the
exponent marker, and if the optional '+' is present in the exponent. This
information is necessary to perform accurate round trips of floating point
numbers.
@param[out] type @ref type_data_t object to receive the type information.
*/
void guess_type(type_data_t & type) const
value_t get_integer(basic_json& result) const
{
const lexer::lexer_char_t *curptr = m_start;
type.bits.parsed = true;
result.m_type.bits.parsed = true;
// 'found_radix_point' will be set to 0xFF upon finding a radix point
// and later used to mask in/out the precision depending whether a
// radix is found i.e. 'precision &= found_radix_point'
uint8_t found_radix_point = 0;
uint8_t precision = 0;
// Accumulate the integer conversion result (unsigned for now)
number_unsigned_t value = 0;
// Maximum absolute value of the relevant integer type
uint64_t max;
// Temporarily store the type to avoid unecessary bitfield access
value_t type;
// Look for sign
if (*curptr == '-') {
type = value_t::number_integer;
max = static_cast<uint64_t>(std::numeric_limits<number_integer_t>::max()) + 1;
curptr++;
}
else {
type = value_t::number_unsigned;
max = static_cast<uint64_t>(std::numeric_limits<number_unsigned_t>::max());
if (*curptr == '+') curptr++;
}
@ -7879,25 +7878,50 @@ basic_json_parser_64:
found_radix_point = 0xFF;
continue;
}
// Assume exponent (if not it is a bad number and will fail
// parse anyway - could throw here instead): change to
// Assume exponent (if not then will fail parse): change to
// float, stop counting and record exponent details
type = value_t::number_float;
type.bits.has_exp = true;
result.m_type.bits.has_exp = true;
// Exponent capitalization
type.bits.exp_cap = (*curptr == 'E');
result.m_type.bits.exp_cap = (*curptr == 'E');
// Exponent '+' sign
type.bits.exp_plus = (*(++curptr) == '+');
result.m_type.bits.exp_plus = (*(++curptr) == '+');
break;
}
// Skip if definitely not an integer
if (type != value_t::number_float) {
// Multiply last value by ten and add the new digit
auto temp = value * 10 + *curptr - 0x30;
// Test for overflow
if (temp < value || temp > max)
{
// Overflow
type = value_t::number_float;
}
else
{
// No overflow - save it
value = temp;
}
}
precision++;
}
// If no radix was found then precision would now be set to
// If no radix point was found then precision would now be set to
// the number of digits, which is wrong - clear it
type.bits.precision = precision & found_radix_point;
result.m_type.bits.precision = precision & found_radix_point;
// Save the value (if not a float)
if (type == value_t::number_unsigned) result.m_value.number_unsigned = value;
else if (type == value_t::number_integer) result.m_value.number_integer = -static_cast<number_integer_t>(value);
// Return the type (don't save it yet)
return type;
}
/*!
@ -7907,23 +7931,15 @@ basic_json_parser_64:
type (either integer, unsigned integer or floating point), which is
passed back to the caller via the result parameter.
First @ref guess_type() is called to determine the type and to retrieve
information about the floating point representation (if applicable)
that can be used to accurately render the number to a string later.
First @ref guess_type() is called to attempt to parse as an integer
and to retrieve information about the floating point representation
(if applicable) that can be used to accurately render the number to a
string later.
Depending on the type, either @a std::strtoull (if number_unsigned_t) or
@a std::strtoll (if number_integer_t) is then called to attempt to parse the
number as an integer. Numbers that are too large or too small for a
signed/unsigned long long will cause a range error (@a errno set to ERANGE).
The parsed number is cast to a @ref number_integer_t/@ref number_unsigned_t
using the helper function @ref attempt_cast, which returns @a false if the
cast could not be peformed without error.
In either of these cases (range error or a cast error) the number is parsed
using @a std:strtod (or @a std:strtof or @a std::strtold), which sets
@a endptr to the first character past the converted number. If it is not
the same as @ref m_cursor a bad input is assumed and @a result parameter is
set to NAN.
If the number is a floating point number the number is then parsed using
@a std:strtod (or @a std:strtof or @a std::strtold), which sets @a endptr
to the first character past the converted number. If it is not the same as
@ref m_cursor a bad input is assumed and @a result parameter is set to NAN.
@param[out] result @ref basic_json object to receive the number, or NAN if the
conversion read past the current token. The latter case needs to be
@ -7933,49 +7949,21 @@ basic_json_parser_64:
{
assert(m_start != nullptr);
guess_type(result.m_type);
value_t type = get_integer(result);
errno = 0;
// Attempt to parse it as an integer
if (result.m_type == value_t::number_unsigned)
if (type == value_t::number_float)
{
// Positive, parse with strtoull and attempt cast to number_unsigned_t
if (!attempt_cast(std::strtoull(reinterpret_cast<typename string_t::const_pointer>(m_start), NULL,
10), result.m_value.number_unsigned))
{
result.m_type = value_t::number_float; // Cast failed due to overflow - store as float
}
}
else if (result.m_type == value_t::number_integer)
{
// Negative, parse with strtoll and attempt cast to number_integer_t
if (!attempt_cast(std::strtoll(reinterpret_cast<typename string_t::const_pointer>(m_start), NULL,
10), result.m_value.number_integer))
{
result.m_type = value_t::number_float; // Cast failed due to overflow - store as float
}
}
// Check the end of the number was reached and no range error occurred
if (errno == ERANGE) result.m_type = value_t::number_float;
if (result.m_type == value_t::number_float)
{
// Either the number won't fit in an integer (range error from
// strtoull/strtoll or overflow on cast) or there was something
// else after the number, which could be an exponent
// Parse with strtod
typename string_t::value_type* endptr;
result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), &endptr);
// Anything after the number is an error
if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor && *m_cursor != '.')
{
throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number");
}
}
// Save the type
result.m_type = type;
}
private:

View File

@ -7496,51 +7496,50 @@ class basic_json
}
/*!
@brief static_cast between two types and indicate if it results in error
@brief attempt to parse an integer, otherwise get the floating point representation
This function performs a static_cast between @a source and @a dest. It
then checks if a static_cast back to @a dest produces an error.
This function parses the integer component up to the radix point or exponent.
It also collects information about the floating point representation, which
it stores in the result parameter. If there is no radix point or exponent,
and the number can fit into a @ref number_integer_t or @ref number_unsigned_t
then it sets the result parameter accordingly. The 'floating point
representation' includes the number of significant figures after the radix
point, whether the number is in exponential or decimal form, the
capitalization of the exponent marker, and if the optional '+' is present in
the exponent. This information is necessary to perform accurate round trips
of floating point numbers.
@param[in] source the value to cast from
@param[out] dest the value to cast to
@return @a true if the cast was performed without error, @a false otherwise
@param[out] result @ref basic_json object to receive the result.
*/
template <typename T_A, typename T_B>
bool attempt_cast(T_A source, T_B& dest) const
{
dest = static_cast<T_B>(source);
return (source == static_cast<T_A>(dest));
}
/*!
@brief peek ahead and guess the number type and floating point representation
This function scans the number to identify the number type. In addition it
counts the significant figures after the decimal point, whether the
number is in exponential or decimal form, the capitalization of the
exponent marker, and if the optional '+' is present in the exponent. This
information is necessary to perform accurate round trips of floating point
numbers.
@param[out] type @ref type_data_t object to receive the type information.
*/
void guess_type(type_data_t & type) const
value_t get_integer(basic_json& result) const
{
const lexer::lexer_char_t *curptr = m_start;
type.bits.parsed = true;
result.m_type.bits.parsed = true;
// 'found_radix_point' will be set to 0xFF upon finding a radix point
// and later used to mask in/out the precision depending whether a
// radix is found i.e. 'precision &= found_radix_point'
uint8_t found_radix_point = 0;
uint8_t precision = 0;
// Accumulate the integer conversion result (unsigned for now)
number_unsigned_t value = 0;
// Maximum absolute value of the relevant integer type
uint64_t max;
// Temporarily store the type to avoid unecessary bitfield access
value_t type;
// Look for sign
if (*curptr == '-') {
type = value_t::number_integer;
max = static_cast<uint64_t>(std::numeric_limits<number_integer_t>::max()) + 1;
curptr++;
}
else {
type = value_t::number_unsigned;
max = static_cast<uint64_t>(std::numeric_limits<number_unsigned_t>::max());
if (*curptr == '+') curptr++;
}
@ -7561,25 +7560,50 @@ class basic_json
found_radix_point = 0xFF;
continue;
}
// Assume exponent (if not it is a bad number and will fail
// parse anyway - could throw here instead): change to
// Assume exponent (if not then will fail parse): change to
// float, stop counting and record exponent details
type = value_t::number_float;
type.bits.has_exp = true;
result.m_type.bits.has_exp = true;
// Exponent capitalization
type.bits.exp_cap = (*curptr == 'E');
result.m_type.bits.exp_cap = (*curptr == 'E');
// Exponent '+' sign
type.bits.exp_plus = (*(++curptr) == '+');
result.m_type.bits.exp_plus = (*(++curptr) == '+');
break;
}
// Skip if definitely not an integer
if (type != value_t::number_float) {
// Multiply last value by ten and add the new digit
auto temp = value * 10 + *curptr - 0x30;
// Test for overflow
if (temp < value || temp > max)
{
// Overflow
type = value_t::number_float;
}
else
{
// No overflow - save it
value = temp;
}
}
precision++;
}
// If no radix was found then precision would now be set to
// If no radix point was found then precision would now be set to
// the number of digits, which is wrong - clear it
type.bits.precision = precision & found_radix_point;
result.m_type.bits.precision = precision & found_radix_point;
// Save the value (if not a float)
if (type == value_t::number_unsigned) result.m_value.number_unsigned = value;
else if (type == value_t::number_integer) result.m_value.number_integer = -static_cast<number_integer_t>(value);
// Return the type (don't save it yet)
return type;
}
/*!
@ -7589,23 +7613,15 @@ class basic_json
type (either integer, unsigned integer or floating point), which is
passed back to the caller via the result parameter.
First @ref guess_type() is called to determine the type and to retrieve
information about the floating point representation (if applicable)
that can be used to accurately render the number to a string later.
First @ref guess_type() is called to attempt to parse as an integer
and to retrieve information about the floating point representation
(if applicable) that can be used to accurately render the number to a
string later.
Depending on the type, either @a std::strtoull (if number_unsigned_t) or
@a std::strtoll (if number_integer_t) is then called to attempt to parse the
number as an integer. Numbers that are too large or too small for a
signed/unsigned long long will cause a range error (@a errno set to ERANGE).
The parsed number is cast to a @ref number_integer_t/@ref number_unsigned_t
using the helper function @ref attempt_cast, which returns @a false if the
cast could not be peformed without error.
In either of these cases (range error or a cast error) the number is parsed
using @a std:strtod (or @a std:strtof or @a std::strtold), which sets
@a endptr to the first character past the converted number. If it is not
the same as @ref m_cursor a bad input is assumed and @a result parameter is
set to NAN.
If the number is a floating point number the number is then parsed using
@a std:strtod (or @a std:strtof or @a std::strtold), which sets @a endptr
to the first character past the converted number. If it is not the same as
@ref m_cursor a bad input is assumed and @a result parameter is set to NAN.
@param[out] result @ref basic_json object to receive the number, or NAN if the
conversion read past the current token. The latter case needs to be
@ -7615,49 +7631,21 @@ class basic_json
{
assert(m_start != nullptr);
guess_type(result.m_type);
value_t type = get_integer(result);
errno = 0;
// Attempt to parse it as an integer
if (result.m_type == value_t::number_unsigned)
if (type == value_t::number_float)
{
// Positive, parse with strtoull and attempt cast to number_unsigned_t
if (!attempt_cast(std::strtoull(reinterpret_cast<typename string_t::const_pointer>(m_start), NULL,
10), result.m_value.number_unsigned))
{
result.m_type = value_t::number_float; // Cast failed due to overflow - store as float
}
}
else if (result.m_type == value_t::number_integer)
{
// Negative, parse with strtoll and attempt cast to number_integer_t
if (!attempt_cast(std::strtoll(reinterpret_cast<typename string_t::const_pointer>(m_start), NULL,
10), result.m_value.number_integer))
{
result.m_type = value_t::number_float; // Cast failed due to overflow - store as float
}
}
// Check the end of the number was reached and no range error occurred
if (errno == ERANGE) result.m_type = value_t::number_float;
if (result.m_type == value_t::number_float)
{
// Either the number won't fit in an integer (range error from
// strtoull/strtoll or overflow on cast) or there was something
// else after the number, which could be an exponent
// Parse with strtod
typename string_t::value_type* endptr;
result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), &endptr);
// Anything after the number is an error
if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor && *m_cursor != '.')
{
throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number");
}
}
// Save the type
result.m_type = type;
}
private:

View File

@ -12108,7 +12108,7 @@ TEST_CASE("regression tests")
// integer object creation - expected to wrap and still be stored as an integer
j = -2147483649LL; // -2^31-1
CHECK(static_cast<int>(j.type()) == static_cast<int>(custom_json::value_t::number_integer));
CHECK(j.get<int32_t>() == 2147483647.0f); // Wrap
CHECK(j.get<int32_t>() == 2147483647); // Wrap
// integer parsing - expected to overflow and be stored as a float with rounding
j = custom_json::parse("-2147483649"); // -2^31