Merge e9517958a3 into ff6f723195

2016-01-30 07:58:29 +00:00 · 2016-01-30 07:58:29 +00:00 · 9b4eb1447b
commit 9b4eb1447b
parent ff6f723195 e9517958a3
8 changed files with 917 additions and 1073 deletions
--- a/src/json.hpp
+++ b/src/json.hpp
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@ -715,6 +715,73 @@ class basic_json
  private:
    /*!
    @brief a type to hold JSON type information
    This bitfield type holds information about JSON types. It is internally
    used to hold the basic JSON type enumeration, as well as additional
    information in the case of values that have been parsed from a string
    including whether of not it was created directly or parsed, and
    in the case of floating point numbers the number of significant
    figures in the original representaiton and if it was in exponential
    form, if a '+' was included in the exponent and the capitilization of
    the exponent marker.  The sole purpose of this information is to permit
    accurate round trips.
    @since version 2.0.0
    */
    union type_data_t {
        struct {
            uint16_t type : 4;
            uint16_t parsed : 1;
            uint16_t has_exp : 1;
            uint16_t exp_plus : 1;
            uint16_t exp_cap : 1;
            uint16_t precision : 8;
        } bits;
        uint16_t data;
        // Return the type
        value_t get() const
        {
            return static_cast<value_t>(bits.type);
        }
        // Test type for equality (ignore other fields)
        bool operator==(const value_t& rhs) const
        {
            return static_cast<value_t>(bits.type) == rhs;
        }
        // Assignment
        type_data_t & operator=(value_t rhs)
        {
            bits.type = static_cast<uint16_t>(rhs);
            return *this;
        }
        // Construct from value_t
        type_data_t(value_t t) noexcept
        {
            *reinterpret_cast<uint16_t*>(this) = 0;
            bits.type = static_cast<uint16_t>(t);
        }
        // Default constructor
        type_data_t() noexcept
        {
            data = 0;
            bits.type = reinterpret_cast<uint16_t>(value_t::null);
        }
        // Copy constructor
        type_data_t(const type_data_t& t) noexcept
        {
            data = t.data;
        }
    };
    /// helper for exception-safe object creation
    template<typename T, typename... Args>
    static T* create(Args&& ... args)
@ -1752,7 +1819,7 @@ class basic_json
        }
        // check if iterator range is complete for primitive values
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::boolean:
            case value_t::number_float:
@ -1773,7 +1840,7 @@ class basic_json
            }
        }
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::number_integer:
            {
@ -1857,7 +1924,7 @@ class basic_json
    basic_json(const basic_json& other)
        : m_type(other.m_type)
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::object:
            {
@ -1987,7 +2054,7 @@ class basic_json
    */
    ~basic_json()
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::object:
            {
@ -2087,7 +2154,7 @@ class basic_json
    */
    value_t type() const noexcept
    {
-        return m_type;
+        return m_type.get();
    }
    /*!
@ -2370,7 +2437,7 @@ class basic_json
    */
    operator value_t() const noexcept
    {
-        return m_type;
+        return m_type.get();
    }
    /// @}
@ -2525,7 +2592,7 @@ class basic_json
                 , int>::type = 0>
    T get_impl(T*) const
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::number_integer:
            {
@ -3657,7 +3724,7 @@ class basic_json
        InteratorType result = end();
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::boolean:
            case value_t::number_float:
@ -3676,7 +3743,7 @@ class basic_json
                    m_value.string = nullptr;
                }
-                m_type = value_t::null;
+                m_type = type_data_t(value_t::null);
                break;
            }
@ -3763,7 +3830,7 @@ class basic_json
        InteratorType result = end();
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::boolean:
            case value_t::number_float:
@ -3782,7 +3849,7 @@ class basic_json
                    m_value.string = nullptr;
                }
-                m_type = value_t::null;
+                m_type = type_data_t(value_t::null);
                break;
            }
@ -4271,7 +4338,7 @@ class basic_json
    */
    bool empty() const noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::null:
            {
@ -4329,7 +4396,7 @@ class basic_json
    */
    size_type size() const noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::null:
            {
@ -4391,7 +4458,7 @@ class basic_json
    */
    size_type max_size() const noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::array:
            {
@ -4450,7 +4517,7 @@ class basic_json
    */
    void clear() noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::number_integer:
            {
@ -4543,7 +4610,7 @@ class basic_json
        assert(m_value.array != nullptr);
        m_value.array->push_back(std::move(val));
        // invalidate object
-        val.m_type = value_t::null;
+        val.m_type = type_data_t(value_t::null);
    }
    /*!
@ -5568,7 +5635,7 @@ class basic_json
    /// return the type as string
    string_t type_name() const
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::null:
                return "null";
@ -5774,7 +5841,7 @@ class basic_json
        // variable to hold indentation for recursive calls
        unsigned int new_indent = current_indent;
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::object:
            {
@ -5885,23 +5952,60 @@ class basic_json
            case value_t::number_float:
            {
-                // If the number is an integer then output as a fixed with with
+                // Buffer size: precision (2^8-1 = 255) + other ('-.e-xxx' = 7) + null (1)
-                // precision 1 to output "0.0", "1.0" etc as expected for some
+                char buf[263];
-                // round trip tests otherwise  15 digits of precision allows
+                int len;
-                // round-trip IEEE 754 string->double->string; to be safe, we
+
-                // read this value from
+                if (m_type.bits.parsed)
                // std::numeric_limits<number_float_t>::digits10
                if (std::fmod(m_value.number_float, 1) == 0)
                {
-                    o << std::fixed << std::setprecision(1);
+                    // Number was parsed from a string so use the same precision
                    if (m_type.bits.has_exp)
                    {
                        // Exponent - output in exponential form - handle capitalization of e/E
                        if (m_type.bits.exp_cap)
                        {
                            len = snprintf(buf, sizeof(buf), "%.*E", m_type.bits.precision, m_value.number_float) + 1;
                        }
                        else
                        {
                            len = snprintf(buf, sizeof(buf), "%.*e", m_type.bits.precision, m_value.number_float) + 1;
                        }
                        // Remove '+' sign from the exponent if necessary
                        if (!m_type.bits.exp_plus)
                        {
                            if (len > static_cast<int>(sizeof(buf))) len = sizeof(buf);
                            for (int i = 0; i < len; i++)
                            {
                                if (buf[i] == '+')
                                {
                                    for (; i + 1 < len; i++) buf[i] = buf[i + 1];
                                }
                            }
                        }
                    }
                    else
                    {
                        // No exponent - output as a decimal
                        snprintf(buf, sizeof(buf), "%.*f", m_type.bits.precision, m_value.number_float);
                    }
                }
                else if (m_value.number_float == 0)
                {
                    // Special case for zero to get "0.0"/"-0.0"
                    if (std::signbit(m_value.number_float)) o << "-0.0";
                    else o << "0.0";
                    return;
                }
                else
                {
-                    // std::defaultfloat not supported in gcc version < 5
+                    // Otherwise 6, 15 or 16 digits of precision allows round-trip IEEE 754
-                    o.unsetf(std::ios_base::floatfield);
+                    // string->float->string, string->double->string or string->long double->string;
-                    o << std::setprecision(std::numeric_limits<double>::digits10);
+                    // to be safe, we read this value from std::numeric_limits<number_float_t>::digits10
                    snprintf(buf, sizeof(buf), "%.*g", std::numeric_limits<double>::digits10, m_value.number_float);
                }
-                o << m_value.number_float;
+
                o << buf;
                return;
            }
@ -5925,7 +6029,7 @@ class basic_json
    //////////////////////
    /// the type of the current element
-    value_t m_type = value_t::null;
+    type_data_t m_type = type_data_t(value_t::null);
    /// the value of the current element
    json_value m_value = {};
@ -6149,7 +6253,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6176,7 +6280,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6222,7 +6326,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6258,7 +6362,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6288,7 +6392,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6328,7 +6432,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6371,7 +6475,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6408,7 +6512,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6443,7 +6547,7 @@ class basic_json
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6479,7 +6583,7 @@ class basic_json
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6521,7 +6625,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6571,7 +6675,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6595,7 +6699,7 @@ class basic_json
        {
            assert(m_object != nullptr);
-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -7393,124 +7497,138 @@ class basic_json
            return std::strtof(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
        }
        /*!
        @brief static_cast between two types and indicate if it results in error
        This function performs a static_cast between @a source and @a dest.  It
        then checks if a static_cast back to @a dest produces an error.
        @param[in] source  the value to cast from
        @param[out] dest  the value to cast to
        @return @a true if the cast was performed without error, @a false otherwise
        */
        template <typename T_A, typename T_B>
        bool attempt_cast(T_A source, T_B& dest) const
        {
            dest = static_cast<T_B>(source);
            return (source == static_cast<T_A>(dest));
        }
        /*!
        @brief return number value for number tokens
-        This function translates the last token into the most appropriate
+        This function translates the last token into the most appropriate number
-        number type (either integer, unsigned integer or floating point), which
+        type (either integer, unsigned integer or floating point), which is
-        is passed back to the caller via the result parameter. The pointer @a
+        passed back to the caller via the result parameter.
        m_start points to the beginning of the parsed number. We first examine
        the first character to determine the sign of the number and then pass
        this pointer to either @a std::strtoull (if positive) or @a
        std::strtoll (if negative), both of which set @a endptr to the first
        character past the converted number. If this pointer is not the same as
        @a m_cursor, then either more or less characters have been used during
        the comparison.
-        This can happen for inputs like "01" which will be treated like number
+        This function parses the integer component up to the radix point or exponent
-        0 followed by number 1. This will also occur for valid floating point
+        while collecting information about the 'floating point representation', which
-        inputs like "12e3" will be incorrectly read as 12. Numbers that are too
+        it stores in the result parameter.  If there is no radix point or exponent,
-        large or too small for a signed/unsigned long long will cause a range
+        and the number can fit into a @ref number_integer_t or @ref number_unsigned_t
-        error (@a errno set to ERANGE). The parsed number is cast to a @ref
+        then it sets the result parameter accordingly.
        number_integer_t/@ref number_unsigned_t using the helper function @ref
        attempt_cast, which returns @a false if the cast could not be peformed
        without error.
-        In any of these cases (more/less characters read, range error or a cast
+        The 'floating point representation' includes the number of significant figures
-        error) the pointer is passed to @a std:strtod, which also sets @a
+        after the radix point, whether the number is in exponential or decimal form,
-        endptr to the first character past the converted number. The resulting
+        the capitalization of the exponent marker, and if the optional '+' is present
-        @ref number_float_t is then cast to a @ref number_integer_t/@ref
+        in the exponent. This information is necessary to perform accurate round trips
-        number_unsigned_t using @ref attempt_cast and if no error occurs is
+        of floating point numbers.
        stored in that form, otherwise it is stored as a @ref number_float_t.
-        A final comparison is made of @a endptr and if still not the same as
+        If the number is a floating point number the number is then parsed using
-        @ref m_cursor a bad input is assumed and @a result parameter is set to
+        @a std:strtod (or @a std:strtof or @a std::strtold).
        NAN.
-        @param[out] result @ref basic_json object to receive the number, or NAN
+        @param[out] result  @ref basic_json object to receive the number, or NAN if the
-        if the conversion read past the current token. The latter case needs to
+        conversion read past the current token. The latter case needs to be
-        be treated by the caller function.
+        treated by the caller function.
        */
        void get_number(basic_json& result) const
        {
            typename string_t::value_type* endptr;
            assert(m_start != nullptr);
            errno = 0;
-            // attempt to parse it as an integer - first checking for a
+            const lexer::lexer_char_t *curptr = m_start;
-            // negative number
+            result.m_type.bits.parsed = true;
-            if (*reinterpret_cast<typename string_t::const_pointer>(m_start) != '-')
+
            // 'found_radix_point' will be set to 0xFF upon finding a radix point 
            // and later used to mask in/out the precision depending whether a
            // radix is found i.e. 'precision &= found_radix_point'
            uint8_t found_radix_point = 0;
            uint8_t precision = 0;
            // Accumulate the integer conversion result (unsigned for now)
            number_unsigned_t value = 0;
            // Maximum absolute value of the relevant integer type
            number_unsigned_t max;
            // Temporarily store the type to avoid unecessary bitfield access
            value_t type;
            // Look for sign
            if (*curptr == '-') {
                type = value_t::number_integer;
                max = static_cast<uint64_t>(std::numeric_limits<number_integer_t>::max()) + 1;
                curptr++;
            }
            else {
                type = value_t::number_unsigned;
                max = static_cast<uint64_t>(std::numeric_limits<number_unsigned_t>::max());
                if (*curptr == '+') curptr++;
            }
            // Count the significant figures
            for (; curptr < m_cursor; curptr++)
            {
-                // positive, parse with strtoull and attempt cast to
+
-                // number_unsigned_t
+                // Quickly skip tests if a digit
-                if (attempt_cast(std::strtoull(reinterpret_cast<typename string_t::const_pointer>(m_start), &endptr,
+                if (*curptr < '0' || *curptr > '9')
                                               10), result.m_value.number_unsigned))
                {
-                    result.m_type = value_t::number_unsigned;
+                    if (*curptr == '.')
                    {
                        // Don't count '.' but change to float
                        type = value_t::number_float;
                        // Reset precision count
                        precision = 0;
                        found_radix_point = 0xFF;
                        continue;
                    }
                    // Assume exponent (if not then will fail parse): change to 
                    // float, stop counting and record exponent details
                    type = value_t::number_float;
                    result.m_type.bits.has_exp = true;
                    // Exponent capitalization
                    result.m_type.bits.exp_cap = (*curptr == 'E');
                    // Exponent '+' sign
                    result.m_type.bits.exp_plus = (*(++curptr) == '+');
                    break;
                }
-                else
+
-                {
+                // Skip if definitely not an integer 
-                    // cast failed due to overflow - store as float
+                if (type != value_t::number_float) {
-                    result.m_type = value_t::number_float;
+
                    // Multiply last value by ten and add the new digit
                    auto temp = value * 10 + *curptr - 0x30;
                    // Test for overflow
                    if (temp < value || temp > max)
                    {
                        // Overflow
                        type = value_t::number_float;
                    }
                    else
                    {
                        // No overflow - save it
                        value = temp;
                    }
                }
                precision++;
            }
            // If no radix point was found then precision would now be set to 
            // the number of digits, which is wrong - clear it
            result.m_type.bits.precision = precision & found_radix_point;
            // Save the value (if not a float)
            if (type == value_t::number_unsigned)
            {
                result.m_value.number_unsigned = value;
            }
            else if (type == value_t::number_integer)
            {
                result.m_value.number_integer = -static_cast<number_integer_t>(value);
            }
            else
            {
-                // Negative, parse with strtoll and attempt cast to
+                // Parse with strtod
-                // number_integer_t
+                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
                if (attempt_cast(std::strtoll(reinterpret_cast<typename string_t::const_pointer>(m_start), &endptr,
                                              10), result.m_value.number_unsigned))
                {
                    result.m_type = value_t::number_integer;
                }
                else
                {
                    // cast failed due to overflow - store as float
                    result.m_type = value_t::number_float;
                }
            }
-            // check the end of the number was reached and no range error
+            // Save the type
-            // occurred
+            result.m_type = type;
            if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor || errno == ERANGE)
            {
                result.m_type = value_t::number_float;
            }
            if (result.m_type  == value_t::number_float)
            {
                // either the number won't fit in an integer (range error from
                // strtoull/strtoll or overflow on cast) or there was something
                // else after the number, which could be an exponent
                // parse with strtod
                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), &endptr);
                // anything after the number is an error
                if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor)
                {
                    throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number");
                }
            }
        }
      private:
--- a/test/json_roundtrip/roundtrip28.json
+++ b/test/json_roundtrip/roundtrip28.json
@ -0,0 +1 @@
 [4.940656458412e-324]
--- a/test/json_roundtrip/roundtrip29.json
+++ b/test/json_roundtrip/roundtrip29.json
@ -0,0 +1 @@
 [2.2250738585072e-308]
--- a/test/json_roundtrip/roundtrip30.json
+++ b/test/json_roundtrip/roundtrip30.json
@ -0,0 +1 @@
 [1.2345E-30]
--- a/test/json_roundtrip/roundtrip31.json
+++ b/test/json_roundtrip/roundtrip31.json
@ -0,0 +1 @@
 [1.2345E+30]
--- a/test/json_roundtrip/roundtrip32.json
+++ b/test/json_roundtrip/roundtrip32.json
@ -0,0 +1 @@
 [1.2345e+30]
--- a/test/unit.cpp
+++ b/test/unit.cpp
@ -9719,7 +9719,8 @@ TEST_CASE("parser class")
                CHECK_THROWS_AS(json::parser("-0e-:").parse(), std::invalid_argument);
                CHECK_THROWS_AS(json::parser("-0f").parse(), std::invalid_argument);
-                CHECK_THROWS_WITH(json::parser("01").parse(), "parse error - 0 is not a number");
+                CHECK_THROWS_WITH(json::parser("01").parse(),
                                  "parse error - unexpected number literal; expected end of input");
                CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'");
                CHECK_THROWS_WITH(json::parser("1.").parse(),
                                  "parse error - unexpected '.'; expected end of input");
@ -11766,10 +11767,15 @@ TEST_CASE("compliance tests from nativejson-benchmark")
                    "test/json_roundtrip/roundtrip21.json",
                    "test/json_roundtrip/roundtrip22.json",
                    "test/json_roundtrip/roundtrip23.json",
-                    //"test/json_roundtrip/roundtrip24.json",
+                    "test/json_roundtrip/roundtrip24.json",
-                    //"test/json_roundtrip/roundtrip25.json",
+                    "test/json_roundtrip/roundtrip25.json",
-                    //"test/json_roundtrip/roundtrip26.json",
+                    "test/json_roundtrip/roundtrip26.json",
-                    //"test/json_roundtrip/roundtrip27.json"
+                    "test/json_roundtrip/roundtrip27.json",
                    "test/json_roundtrip/roundtrip28.json",
                    "test/json_roundtrip/roundtrip29.json",
                    "test/json_roundtrip/roundtrip30.json",
                    "test/json_roundtrip/roundtrip31.json",
                    "test/json_roundtrip/roundtrip32.json"
                })
        {
            CAPTURE(filename);
@ -12097,17 +12103,17 @@ TEST_CASE("regression tests")
        // unsigned integer parsing - expected to overflow and be stored as a float
        j = custom_json::parse("4294967296"); // 2^32
        CHECK(static_cast<int>(j.type()) == static_cast<int>(custom_json::value_t::number_float));
-        CHECK(j.get<float>() == 4294967296.0);
+        CHECK(j.get<float>() == 4294967296.0f);
        // integer object creation - expected to wrap and still be stored as an integer
        j = -2147483649LL; // -2^31-1
        CHECK(static_cast<int>(j.type()) == static_cast<int>(custom_json::value_t::number_integer));
-        CHECK(j.get<int32_t>() == 2147483647.0);  // Wrap
+        CHECK(j.get<int32_t>() == 2147483647);  // Wrap
-        // integer parsing - expected to overflow and be stored as a float
+        // integer parsing - expected to overflow and be stored as a float with rounding
-        j = custom_json::parse("-2147483648"); // -2^31
+        j = custom_json::parse("-2147483649"); // -2^31
        CHECK(static_cast<int>(j.type()) == static_cast<int>(custom_json::value_t::number_float));
-        CHECK(j.get<float>() == -2147483648.0);
+        CHECK(j.get<float>() == -2147483650.0f);
    }
    SECTION("issue #93 reverse_iterator operator inheritance problem")