Merge e9517958a3 into ff6f723195

2016-01-30 07:58:29 +00:00 · 2016-01-30 07:58:29 +00:00 · 9b4eb1447b
commit 9b4eb1447b
parent ff6f723195 e9517958a3
8 changed files with 917 additions and 1073 deletions
--- a/src/json.hpp
+++ b/src/json.hpp
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@ -715,6 +715,73 @@ class basic_json


  private:
+
+    /*!
+    @brief a type to hold JSON type information
+
+    This bitfield type holds information about JSON types. It is internally
+    used to hold the basic JSON type enumeration, as well as additional
+    information in the case of values that have been parsed from a string
+    including whether of not it was created directly or parsed, and
+    in the case of floating point numbers the number of significant
+    figures in the original representaiton and if it was in exponential
+    form, if a '+' was included in the exponent and the capitilization of
+    the exponent marker.  The sole purpose of this information is to permit
+    accurate round trips.
+
+    @since version 2.0.0
+    */
+    union type_data_t {
+        struct {
+            uint16_t type : 4;
+            uint16_t parsed : 1;
+            uint16_t has_exp : 1;
+            uint16_t exp_plus : 1;
+            uint16_t exp_cap : 1;
+            uint16_t precision : 8;
+        } bits;
+        uint16_t data;
+
+        // Return the type
+        value_t get() const
+        {
+            return static_cast<value_t>(bits.type);
+        }
+
+        // Test type for equality (ignore other fields)
+        bool operator==(const value_t& rhs) const
+        {
+            return static_cast<value_t>(bits.type) == rhs;
+        }
+
+        // Assignment
+        type_data_t & operator=(value_t rhs)
+        {
+            bits.type = static_cast<uint16_t>(rhs);
+            return *this;
+        }
+
+        // Construct from value_t
+        type_data_t(value_t t) noexcept
+        {
+            *reinterpret_cast<uint16_t*>(this) = 0;
+            bits.type = static_cast<uint16_t>(t);
+        }
+
+        // Default constructor
+        type_data_t() noexcept
+        {
+            data = 0;
+            bits.type = reinterpret_cast<uint16_t>(value_t::null);
+        }
+
+        // Copy constructor
+        type_data_t(const type_data_t& t) noexcept
+        {
+            data = t.data;
+        }
+    };
+
    /// helper for exception-safe object creation
    template<typename T, typename... Args>
    static T* create(Args&& ... args)
@ -1752,7 +1819,7 @@ class basic_json
        }

        // check if iterator range is complete for primitive values
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::boolean:
            case value_t::number_float:
@ -1773,7 +1840,7 @@ class basic_json
            }
        }

-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::number_integer:
            {
@ -1857,7 +1924,7 @@ class basic_json
    basic_json(const basic_json& other)
        : m_type(other.m_type)
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::object:
            {
@ -1987,7 +2054,7 @@ class basic_json
    */
    ~basic_json()
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::object:
            {
@ -2087,7 +2154,7 @@ class basic_json
    */
    value_t type() const noexcept
    {
-        return m_type;
+        return m_type.get();
    }

    /*!
@ -2370,7 +2437,7 @@ class basic_json
    */
    operator value_t() const noexcept
    {
-        return m_type;
+        return m_type.get();
    }

    /// @}
@ -2525,7 +2592,7 @@ class basic_json
                 , int>::type = 0>
    T get_impl(T*) const
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::number_integer:
            {
@ -3657,7 +3724,7 @@ class basic_json

        InteratorType result = end();

-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::boolean:
            case value_t::number_float:
@ -3676,7 +3743,7 @@ class basic_json
                    m_value.string = nullptr;
                }

-                m_type = value_t::null;
+                m_type = type_data_t(value_t::null);
                break;
            }

@ -3763,7 +3830,7 @@ class basic_json

        InteratorType result = end();

-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::boolean:
            case value_t::number_float:
@ -3782,7 +3849,7 @@ class basic_json
                    m_value.string = nullptr;
                }

-                m_type = value_t::null;
+                m_type = type_data_t(value_t::null);
                break;
            }

@ -4271,7 +4338,7 @@ class basic_json
    */
    bool empty() const noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::null:
            {
@ -4329,7 +4396,7 @@ class basic_json
    */
    size_type size() const noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::null:
            {
@ -4391,7 +4458,7 @@ class basic_json
    */
    size_type max_size() const noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::array:
            {
@ -4450,7 +4517,7 @@ class basic_json
    */
    void clear() noexcept
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::number_integer:
            {
@ -4543,7 +4610,7 @@ class basic_json
        assert(m_value.array != nullptr);
        m_value.array->push_back(std::move(val));
        // invalidate object
-        val.m_type = value_t::null;
+        val.m_type = type_data_t(value_t::null);
    }

    /*!
@ -5568,7 +5635,7 @@ class basic_json
    /// return the type as string
    string_t type_name() const
    {
-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::null:
                return "null";
@ -5774,7 +5841,7 @@ class basic_json
        // variable to hold indentation for recursive calls
        unsigned int new_indent = current_indent;

-        switch (m_type)
+        switch (m_type.get())
        {
            case value_t::object:
            {
@ -5885,23 +5952,60 @@ class basic_json

            case value_t::number_float:
            {
-                // If the number is an integer then output as a fixed with with
-                // precision 1 to output "0.0", "1.0" etc as expected for some
-                // round trip tests otherwise  15 digits of precision allows
-                // round-trip IEEE 754 string->double->string; to be safe, we
-                // read this value from
-                // std::numeric_limits<number_float_t>::digits10
-                if (std::fmod(m_value.number_float, 1) == 0)
+                // Buffer size: precision (2^8-1 = 255) + other ('-.e-xxx' = 7) + null (1)
+                char buf[263];
+                int len;
+
+                if (m_type.bits.parsed)
                {
-                    o << std::fixed << std::setprecision(1);
+                    // Number was parsed from a string so use the same precision
+                    if (m_type.bits.has_exp)
+                    {
+                        // Exponent - output in exponential form - handle capitalization of e/E
+                        if (m_type.bits.exp_cap)
+                        {
+                            len = snprintf(buf, sizeof(buf), "%.*E", m_type.bits.precision, m_value.number_float) + 1;
+                        }
+                        else
+                        {
+                            len = snprintf(buf, sizeof(buf), "%.*e", m_type.bits.precision, m_value.number_float) + 1;
+                        }
+
+                        // Remove '+' sign from the exponent if necessary
+                        if (!m_type.bits.exp_plus)
+                        {
+                            if (len > static_cast<int>(sizeof(buf))) len = sizeof(buf);
+                            for (int i = 0; i < len; i++)
+                            {
+                                if (buf[i] == '+')
+                                {
+                                    for (; i + 1 < len; i++) buf[i] = buf[i + 1];
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        // No exponent - output as a decimal
+                        snprintf(buf, sizeof(buf), "%.*f", m_type.bits.precision, m_value.number_float);
+                    }
+                }
+                else if (m_value.number_float == 0)
+                {
+                    // Special case for zero to get "0.0"/"-0.0"
+                    if (std::signbit(m_value.number_float)) o << "-0.0";
+                    else o << "0.0";
+                    return;
                }
                else
                {
-                    // std::defaultfloat not supported in gcc version < 5
-                    o.unsetf(std::ios_base::floatfield);
-                    o << std::setprecision(std::numeric_limits<double>::digits10);
+                    // Otherwise 6, 15 or 16 digits of precision allows round-trip IEEE 754
+                    // string->float->string, string->double->string or string->long double->string;
+                    // to be safe, we read this value from std::numeric_limits<number_float_t>::digits10
+                    snprintf(buf, sizeof(buf), "%.*g", std::numeric_limits<double>::digits10, m_value.number_float);
                }
-                o << m_value.number_float;
+
+                o << buf;
                return;
            }

@ -5925,7 +6029,7 @@ class basic_json
    //////////////////////

    /// the type of the current element
-    value_t m_type = value_t::null;
+    type_data_t m_type = type_data_t(value_t::null);

    /// the value of the current element
    json_value m_value = {};
@ -6149,7 +6253,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6176,7 +6280,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6222,7 +6326,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6258,7 +6362,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6288,7 +6392,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6328,7 +6432,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6371,7 +6475,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6408,7 +6512,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6443,7 +6547,7 @@ class basic_json

            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6479,7 +6583,7 @@ class basic_json

            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6521,7 +6625,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6571,7 +6675,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -6595,7 +6699,7 @@ class basic_json
        {
            assert(m_object != nullptr);

-            switch (m_object->m_type)
+            switch (m_object->m_type.get())
            {
                case basic_json::value_t::object:
                {
@ -7393,124 +7497,138 @@ class basic_json
            return std::strtof(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
        }

-        /*!
-        @brief static_cast between two types and indicate if it results in error
-
-        This function performs a static_cast between @a source and @a dest.  It
-        then checks if a static_cast back to @a dest produces an error.
-
-        @param[in] source  the value to cast from
-
-        @param[out] dest  the value to cast to
-
-        @return @a true if the cast was performed without error, @a false otherwise
-        */
-        template <typename T_A, typename T_B>
-        bool attempt_cast(T_A source, T_B& dest) const
-        {
-            dest = static_cast<T_B>(source);
-            return (source == static_cast<T_A>(dest));
-        }
-
        /*!
        @brief return number value for number tokens

-        This function translates the last token into the most appropriate
-        number type (either integer, unsigned integer or floating point), which
-        is passed back to the caller via the result parameter. The pointer @a
-        m_start points to the beginning of the parsed number. We first examine
-        the first character to determine the sign of the number and then pass
-        this pointer to either @a std::strtoull (if positive) or @a
-        std::strtoll (if negative), both of which set @a endptr to the first
-        character past the converted number. If this pointer is not the same as
-        @a m_cursor, then either more or less characters have been used during
-        the comparison.
+        This function translates the last token into the most appropriate number
+        type (either integer, unsigned integer or floating point), which is
+        passed back to the caller via the result parameter.

-        This can happen for inputs like "01" which will be treated like number
-        0 followed by number 1. This will also occur for valid floating point
-        inputs like "12e3" will be incorrectly read as 12. Numbers that are too
-        large or too small for a signed/unsigned long long will cause a range
-        error (@a errno set to ERANGE). The parsed number is cast to a @ref
-        number_integer_t/@ref number_unsigned_t using the helper function @ref
-        attempt_cast, which returns @a false if the cast could not be peformed
-        without error.
+        This function parses the integer component up to the radix point or exponent
+        while collecting information about the 'floating point representation', which
+        it stores in the result parameter.  If there is no radix point or exponent,
+        and the number can fit into a @ref number_integer_t or @ref number_unsigned_t
+        then it sets the result parameter accordingly.

-        In any of these cases (more/less characters read, range error or a cast
-        error) the pointer is passed to @a std:strtod, which also sets @a
-        endptr to the first character past the converted number. The resulting
-        @ref number_float_t is then cast to a @ref number_integer_t/@ref
-        number_unsigned_t using @ref attempt_cast and if no error occurs is
-        stored in that form, otherwise it is stored as a @ref number_float_t.
+        The 'floating point representation' includes the number of significant figures
+        after the radix point, whether the number is in exponential or decimal form,
+        the capitalization of the exponent marker, and if the optional '+' is present
+        in the exponent. This information is necessary to perform accurate round trips
+        of floating point numbers.

-        A final comparison is made of @a endptr and if still not the same as
-        @ref m_cursor a bad input is assumed and @a result parameter is set to
-        NAN.
+        If the number is a floating point number the number is then parsed using
+        @a std:strtod (or @a std:strtof or @a std::strtold).

-        @param[out] result @ref basic_json object to receive the number, or NAN
-        if the conversion read past the current token. The latter case needs to
-        be treated by the caller function.
+        @param[out] result  @ref basic_json object to receive the number, or NAN if the
+        conversion read past the current token. The latter case needs to be
+        treated by the caller function.
        */
        void get_number(basic_json& result) const
        {
-            typename string_t::value_type* endptr;
            assert(m_start != nullptr);
-            errno = 0;

-            // attempt to parse it as an integer - first checking for a
-            // negative number
-            if (*reinterpret_cast<typename string_t::const_pointer>(m_start) != '-')
+            const lexer::lexer_char_t *curptr = m_start;
+            result.m_type.bits.parsed = true;
+
+            // 'found_radix_point' will be set to 0xFF upon finding a radix point 
+            // and later used to mask in/out the precision depending whether a
+            // radix is found i.e. 'precision &= found_radix_point'
+            uint8_t found_radix_point = 0;
+            uint8_t precision = 0;
+
+            // Accumulate the integer conversion result (unsigned for now)
+            number_unsigned_t value = 0;
+
+            // Maximum absolute value of the relevant integer type
+            number_unsigned_t max;
+
+            // Temporarily store the type to avoid unecessary bitfield access
+            value_t type;
+
+            // Look for sign
+            if (*curptr == '-') {
+                type = value_t::number_integer;
+                max = static_cast<uint64_t>(std::numeric_limits<number_integer_t>::max()) + 1;
+                curptr++;
+            }
+            else {
+                type = value_t::number_unsigned;
+                max = static_cast<uint64_t>(std::numeric_limits<number_unsigned_t>::max());
+                if (*curptr == '+') curptr++;
+            }
+
+            // Count the significant figures
+            for (; curptr < m_cursor; curptr++)
            {
-                // positive, parse with strtoull and attempt cast to
-                // number_unsigned_t
-                if (attempt_cast(std::strtoull(reinterpret_cast<typename string_t::const_pointer>(m_start), &endptr,
-                                               10), result.m_value.number_unsigned))
+
+                // Quickly skip tests if a digit
+                if (*curptr < '0' || *curptr > '9')
                {
-                    result.m_type = value_t::number_unsigned;
+                    if (*curptr == '.')
+                    {
+                        // Don't count '.' but change to float
+                        type = value_t::number_float;
+
+                        // Reset precision count
+                        precision = 0;
+                        found_radix_point = 0xFF;
+                        continue;
+                    }
+                    // Assume exponent (if not then will fail parse): change to 
+                    // float, stop counting and record exponent details
+                    type = value_t::number_float;
+                    result.m_type.bits.has_exp = true;
+
+                    // Exponent capitalization
+                    result.m_type.bits.exp_cap = (*curptr == 'E');
+
+                    // Exponent '+' sign
+                    result.m_type.bits.exp_plus = (*(++curptr) == '+');
+                    break;
                }
-                else
-                {
-                    // cast failed due to overflow - store as float
-                    result.m_type = value_t::number_float;
+
+                // Skip if definitely not an integer 
+                if (type != value_t::number_float) {
+
+                    // Multiply last value by ten and add the new digit
+                    auto temp = value * 10 + *curptr - 0x30;
+
+                    // Test for overflow
+                    if (temp < value || temp > max)
+                    {
+                        // Overflow
+                        type = value_t::number_float;
+                    }
+                    else
+                    {
+                        // No overflow - save it
+                        value = temp;
+                    }
                }
+                precision++;
+            }
+
+            // If no radix point was found then precision would now be set to 
+            // the number of digits, which is wrong - clear it
+            result.m_type.bits.precision = precision & found_radix_point;
+
+            // Save the value (if not a float)
+            if (type == value_t::number_unsigned)
+            {
+                result.m_value.number_unsigned = value;
+            }
+            else if (type == value_t::number_integer)
+            {
+                result.m_value.number_integer = -static_cast<number_integer_t>(value);
            }
            else
            {
-                // Negative, parse with strtoll and attempt cast to
-                // number_integer_t
-                if (attempt_cast(std::strtoll(reinterpret_cast<typename string_t::const_pointer>(m_start), &endptr,
-                                              10), result.m_value.number_unsigned))
-                {
-                    result.m_type = value_t::number_integer;
-                }
-                else
-                {
-                    // cast failed due to overflow - store as float
-                    result.m_type = value_t::number_float;
-                }
+                // Parse with strtod
+                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
            }

-            // check the end of the number was reached and no range error
-            // occurred
-            if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor || errno == ERANGE)
-            {
-                result.m_type = value_t::number_float;
-            }
-
-            if (result.m_type  == value_t::number_float)
-            {
-                // either the number won't fit in an integer (range error from
-                // strtoull/strtoll or overflow on cast) or there was something
-                // else after the number, which could be an exponent
-
-                // parse with strtod
-                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), &endptr);
-
-                // anything after the number is an error
-                if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor)
-                {
-                    throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number");
-                }
-            }
+            // Save the type
+            result.m_type = type;
        }

      private:
--- a/test/json_roundtrip/roundtrip28.json
+++ b/test/json_roundtrip/roundtrip28.json
@ -0,0 +1 @@
+[4.940656458412e-324]
--- a/test/json_roundtrip/roundtrip29.json
+++ b/test/json_roundtrip/roundtrip29.json
@ -0,0 +1 @@
+[2.2250738585072e-308]
--- a/test/json_roundtrip/roundtrip30.json
+++ b/test/json_roundtrip/roundtrip30.json
@ -0,0 +1 @@
+[1.2345E-30]
--- a/test/json_roundtrip/roundtrip31.json
+++ b/test/json_roundtrip/roundtrip31.json
@ -0,0 +1 @@
+[1.2345E+30]
--- a/test/json_roundtrip/roundtrip32.json
+++ b/test/json_roundtrip/roundtrip32.json
@ -0,0 +1 @@
+[1.2345e+30]
--- a/test/unit.cpp
+++ b/test/unit.cpp
@ -9719,7 +9719,8 @@ TEST_CASE("parser class")
                CHECK_THROWS_AS(json::parser("-0e-:").parse(), std::invalid_argument);
                CHECK_THROWS_AS(json::parser("-0f").parse(), std::invalid_argument);

-                CHECK_THROWS_WITH(json::parser("01").parse(), "parse error - 0 is not a number");
+                CHECK_THROWS_WITH(json::parser("01").parse(),
+                                  "parse error - unexpected number literal; expected end of input");
                CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'");
                CHECK_THROWS_WITH(json::parser("1.").parse(),
                                  "parse error - unexpected '.'; expected end of input");
@ -11766,10 +11767,15 @@ TEST_CASE("compliance tests from nativejson-benchmark")
                    "test/json_roundtrip/roundtrip21.json",
                    "test/json_roundtrip/roundtrip22.json",
                    "test/json_roundtrip/roundtrip23.json",
-                    //"test/json_roundtrip/roundtrip24.json",
-                    //"test/json_roundtrip/roundtrip25.json",
-                    //"test/json_roundtrip/roundtrip26.json",
-                    //"test/json_roundtrip/roundtrip27.json"
+                    "test/json_roundtrip/roundtrip24.json",
+                    "test/json_roundtrip/roundtrip25.json",
+                    "test/json_roundtrip/roundtrip26.json",
+                    "test/json_roundtrip/roundtrip27.json",
+                    "test/json_roundtrip/roundtrip28.json",
+                    "test/json_roundtrip/roundtrip29.json",
+                    "test/json_roundtrip/roundtrip30.json",
+                    "test/json_roundtrip/roundtrip31.json",
+                    "test/json_roundtrip/roundtrip32.json"
                })
        {
            CAPTURE(filename);
@ -12097,17 +12103,17 @@ TEST_CASE("regression tests")
        // unsigned integer parsing - expected to overflow and be stored as a float
        j = custom_json::parse("4294967296"); // 2^32
        CHECK(static_cast<int>(j.type()) == static_cast<int>(custom_json::value_t::number_float));
-        CHECK(j.get<float>() == 4294967296.0);
+        CHECK(j.get<float>() == 4294967296.0f);

        // integer object creation - expected to wrap and still be stored as an integer
        j = -2147483649LL; // -2^31-1
        CHECK(static_cast<int>(j.type()) == static_cast<int>(custom_json::value_t::number_integer));
-        CHECK(j.get<int32_t>() == 2147483647.0);  // Wrap
+        CHECK(j.get<int32_t>() == 2147483647);  // Wrap

-        // integer parsing - expected to overflow and be stored as a float
-        j = custom_json::parse("-2147483648"); // -2^31
+        // integer parsing - expected to overflow and be stored as a float with rounding
+        j = custom_json::parse("-2147483649"); // -2^31
        CHECK(static_cast<int>(j.type()) == static_cast<int>(custom_json::value_t::number_float));
-        CHECK(j.get<float>() == -2147483648.0);
+        CHECK(j.get<float>() == -2147483650.0f);
    }

    SECTION("issue #93 reverse_iterator operator inheritance problem")