Merge d46cf99a85 into 4639bb2c8f

2018-05-18 06:19:31 +00:00 · 2018-05-18 06:19:31 +00:00 · c0ff2ad581
commit c0ff2ad581
parent 4639bb2c8f d46cf99a85
4 changed files with 656 additions and 105 deletions
--- a/include/nlohmann/detail/input/input_adapters.hpp
+++ b/include/nlohmann/detail/input/input_adapters.hpp
@ -48,77 +48,214 @@ struct input_adapter_protocol
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 /*!
-Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
+A helper function to skip the UTF-8 byte order mark.
-beginning of input. Does not support changing the underlying std::streambuf
+
-in mid-input. Maintains underlying std::istream and std::streambuf to support
+If a complete BOM has been skipped, or if an incomplete BOM has been detected
-subsequent use of standard std::istream operations to process any input
+and the stream has been successfully rewind to the start of the BOM, returns
-characters following those used in parsing the JSON input.  Clears the
+goodbit.
-std::istream flags; any input errors (e.g., EOF) will be detected by the first
+If an internal operation fails, returns badbit, and the streambuf should no
-subsequent call for input from the std::istream.
+longer be used.
 Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
 the eofbit. However, some implementations keep the eofbit if is.unget() fails,
 others do not.
 Note: The streambuf must be non-null.
 */
 inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
 {
    using traits_type = std::char_traits<char>;
    assert(sb != nullptr);
    if (sb->sgetc() == 0xEF)
    {
        sb->sbumpc();
        if (sb->sgetc() == 0xBB)
        {
            sb->sbumpc();
            if (sb->sgetc() == 0xBF)
            {
                sb->sbumpc();
                return std::ios_base::goodbit;
            }
            if (sb->sungetc() == traits_type::eof())
            {
                return std::ios_base::badbit;
            }
        }
        if (sb->sungetc() == traits_type::eof())
        {
            return std::ios_base::badbit;
        }
    }
    return std::ios_base::goodbit;
 }
 /*!
 Input adapter for a (caching) istream.
 Ignores a UTF Byte Order Mark at beginning of input.
 Does not support changing the underlying std::streambuf in mid-input.
 */
 #if 0
 class input_stream_adapter : public input_adapter_protocol
 {
  public:
-    ~input_stream_adapter() override
+    using traits_type = std::char_traits<char>;
    {
        // clear stream flags; we use underlying streambuf I/O, do not
        // maintain ifstream flags
        is.clear();
    }
    explicit input_stream_adapter(std::istream& i)
-        : is(i), sb(*i.rdbuf())
+        : is(i)
    {
-        // skip byte order mark
+        // Skip byte order mark
-        std::char_traits<char>::int_type c;
+        if (is.peek() == 0xEF)
        if ((c = get_character()) == 0xEF)
        {
-            if ((c = get_character()) == 0xBB)
+            is.ignore();
            if (is.peek() == 0xBB)
            {
-                if ((c = get_character()) == 0xBF)
+                is.ignore();
                if (is.peek() == 0xBF)
                {
-                    return; // Ignore BOM
+                    is.ignore();
                    return; // Found a complete BOM.
                }
-                else if (c != std::char_traits<char>::eof())
+
                {
                    is.unget();
                }
                is.putback('\xBB');
            }
            else if (c != std::char_traits<char>::eof())
            {
                is.unget();
            }
-            is.putback('\xEF');
+
-        }
+            is.unget();
        else if (c != std::char_traits<char>::eof())
        {
            is.unget(); // no byte order mark; process as usual
        }
    }
    // delete because of pointer members
    input_stream_adapter(const input_stream_adapter&) = delete;
-    input_stream_adapter& operator=(input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
-    // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
+    traits_type::int_type get_character() override
    // ensure that std::char_traits<char>::eof() and the character 0xFF do not
    // end up as the same value, eg. 0xFFFFFFFF.
    std::char_traits<char>::int_type get_character() override
    {
-        return sb.sbumpc();
+        // Only try to get a character if the stream is good!
        if (is.good())
        {
            const auto ch = is.peek();
            // If peek() returns EOF, the following call to ignore() will set
            // the failbit, but we do not want to set the failbit here.
            if (ch != traits_type::eof())
            {
                is.ignore();
                return ch;
            }
        }
        return traits_type::eof();
    }
    void unget_character() override
    {
-        sb.sungetc();  // is.unget() avoided for performance
+        is.unget();
    }
  private:
    /// the associated input stream
    std::istream& is;
    std::streambuf& sb;
 };
 #else
 class input_stream_adapter : public input_adapter_protocol
 {
    //
    // NOTE:
    //
    // This implementation differs slightly from the reference implementation
    // (using the std::istream interface):
    //
    //      From N4659:
    //      30.7.4.3 Unformatted input functions
    //
    //      [...]
    //      If an exception is thrown during input then `ios::badbit` is turned
    //      on[310] in `*this`'s error state. (Exceptions thrown from
    //      `basic_ios<>::clear()` are not caught or rethrown.)
    //      If `(exceptions() & badbit) != 0` then the exception is rethrown.
    //
    //      [310] This is done without causing an `ios::failure` to be thrown.
    //
    // However, there is no (portable) way to turn on the `badbit` in `is`
    // without throwing an exception, so here we don't catch (and possibly)
    // rethrow exceptions from streambuf operations.
    // If an internal operation throws an exception, the behavior of this
    // implementation is therefore slightly different from the reference
    // implementation:
    //
    // If an exception is thrown during input and
    //
    // - badbit is turned ON in `is.exceptions()`:
    //      The badbit will **not** be set in `is`'s error state.
    //
    // - badbit is turned OFF in `is.exceptions()`:
    //      The badbit will **not** be set in `is`'s error state and the
    //      exception is **not** swallowed.
    //
  public:
    using traits_type = std::char_traits<char>;
    explicit input_stream_adapter(std::istream& i)
        : is(i)
        , ok(i, /* noskipws */ true)
    {
        std::ios_base::iostate state = std::ios_base::goodbit;
        if (ok)
        {
            state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
        }
        else
        {
            state |= std::ios_base::failbit;
        }
        // Update the stream state. In case skip_byte_order_mark() failed (but
        // did not throw an exception), `state` now has the badbit set and the
        // call to setstate might throw an ios::failure. Likewise, if the stream
        // is "not ok" then the failbit will be set, which might throw an
        // exception, too.
        is.setstate(state);
    }
    input_stream_adapter(const input_stream_adapter&) = delete;
    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
    traits_type::int_type get_character() override
    {
        // Only try to get a character if the stream is good!
        if (is.good())
        {
            const auto ch = is.rdbuf()->sbumpc();
            if (ch != traits_type::eof())
            {
                return ch;
            }
            // sbumpc failed.
            // No more characters are available. Set eofbit.
            is.setstate(std::ios_base::eofbit);
        }
        return traits_type::eof();
    }
    void unget_character() override
    {
        // This method is only ever called if the last call to get_character was
        // successful (i.e. not EOF). This implies that the stream is good and
        // that the call to sungetc below is guaranteed to succeed.
        is.rdbuf()->sungetc();
    }
  private:
    std::istream& is;
    std::istream::sentry const ok;
 };
 #endif
 /// input adapter for buffer input
 class input_buffer_adapter : public input_adapter_protocol
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@ -1622,77 +1622,214 @@ struct input_adapter_protocol
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 /*!
-Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
+A helper function to skip the UTF-8 byte order mark.
-beginning of input. Does not support changing the underlying std::streambuf
+
-in mid-input. Maintains underlying std::istream and std::streambuf to support
+If a complete BOM has been skipped, or if an incomplete BOM has been detected
-subsequent use of standard std::istream operations to process any input
+and the stream has been successfully rewind to the start of the BOM, returns
-characters following those used in parsing the JSON input.  Clears the
+goodbit.
-std::istream flags; any input errors (e.g., EOF) will be detected by the first
+If an internal operation fails, returns badbit, and the streambuf should no
-subsequent call for input from the std::istream.
+longer be used.
 Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
 the eofbit. However, some implementations keep the eofbit if is.unget() fails,
 others do not.
 Note: The streambuf must be non-null.
 */
 inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
 {
    using traits_type = std::char_traits<char>;
    assert(sb != nullptr);
    if (sb->sgetc() == 0xEF)
    {
        sb->sbumpc();
        if (sb->sgetc() == 0xBB)
        {
            sb->sbumpc();
            if (sb->sgetc() == 0xBF)
            {
                sb->sbumpc();
                return std::ios_base::goodbit;
            }
            if (sb->sungetc() == traits_type::eof())
            {
                return std::ios_base::badbit;
            }
        }
        if (sb->sungetc() == traits_type::eof())
        {
            return std::ios_base::badbit;
        }
    }
    return std::ios_base::goodbit;
 }
 /*!
 Input adapter for a (caching) istream.
 Ignores a UTF Byte Order Mark at beginning of input.
 Does not support changing the underlying std::streambuf in mid-input.
 */
 #if 0
 class input_stream_adapter : public input_adapter_protocol
 {
  public:
-    ~input_stream_adapter() override
+    using traits_type = std::char_traits<char>;
    {
        // clear stream flags; we use underlying streambuf I/O, do not
        // maintain ifstream flags
        is.clear();
    }
    explicit input_stream_adapter(std::istream& i)
-        : is(i), sb(*i.rdbuf())
+        : is(i)
    {
-        // skip byte order mark
+        // Skip byte order mark
-        std::char_traits<char>::int_type c;
+        if (is.peek() == 0xEF)
        if ((c = get_character()) == 0xEF)
        {
-            if ((c = get_character()) == 0xBB)
+            is.ignore();
            if (is.peek() == 0xBB)
            {
-                if ((c = get_character()) == 0xBF)
+                is.ignore();
                if (is.peek() == 0xBF)
                {
-                    return; // Ignore BOM
+                    is.ignore();
                    return; // Found a complete BOM.
                }
-                else if (c != std::char_traits<char>::eof())
+
                {
                    is.unget();
                }
                is.putback('\xBB');
            }
            else if (c != std::char_traits<char>::eof())
            {
                is.unget();
            }
-            is.putback('\xEF');
+
-        }
+            is.unget();
        else if (c != std::char_traits<char>::eof())
        {
            is.unget(); // no byte order mark; process as usual
        }
    }
    // delete because of pointer members
    input_stream_adapter(const input_stream_adapter&) = delete;
-    input_stream_adapter& operator=(input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
-    // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
+    traits_type::int_type get_character() override
    // ensure that std::char_traits<char>::eof() and the character 0xFF do not
    // end up as the same value, eg. 0xFFFFFFFF.
    std::char_traits<char>::int_type get_character() override
    {
-        return sb.sbumpc();
+        // Only try to get a character if the stream is good!
        if (is.good())
        {
            const auto ch = is.peek();
            // If peek() returns EOF, the following call to ignore() will set
            // the failbit, but we do not want to set the failbit here.
            if (ch != traits_type::eof())
            {
                is.ignore();
                return ch;
            }
        }
        return traits_type::eof();
    }
    void unget_character() override
    {
-        sb.sungetc();  // is.unget() avoided for performance
+        is.unget();
    }
  private:
    /// the associated input stream
    std::istream& is;
    std::streambuf& sb;
 };
 #else
 class input_stream_adapter : public input_adapter_protocol
 {
    //
    // NOTE:
    //
    // This implementation differs slightly from the reference implementation
    // (using the std::istream interface):
    //
    //      From N4659:
    //      30.7.4.3 Unformatted input functions
    //
    //      [...]
    //      If an exception is thrown during input then `ios::badbit` is turned
    //      on[310] in `*this`'s error state. (Exceptions thrown from
    //      `basic_ios<>::clear()` are not caught or rethrown.)
    //      If `(exceptions() & badbit) != 0` then the exception is rethrown.
    //
    //      [310] This is done without causing an `ios::failure` to be thrown.
    //
    // However, there is no (portable) way to turn on the `badbit` in `is`
    // without throwing an exception, so here we don't catch (and possibly)
    // rethrow exceptions from streambuf operations.
    // If an internal operation throws an exception, the behavior of this
    // implementation is therefore slightly different from the reference
    // implementation:
    //
    // If an exception is thrown during input and
    //
    // - badbit is turned ON in `is.exceptions()`:
    //      The badbit will **not** be set in `is`'s error state.
    //
    // - badbit is turned OFF in `is.exceptions()`:
    //      The badbit will **not** be set in `is`'s error state and the
    //      exception is **not** swallowed.
    //
  public:
    using traits_type = std::char_traits<char>;
    explicit input_stream_adapter(std::istream& i)
        : is(i)
        , ok(i, /* noskipws */ true)
    {
        std::ios_base::iostate state = std::ios_base::goodbit;
        if (ok)
        {
            state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
        }
        else
        {
            state |= std::ios_base::failbit;
        }
        // Update the stream state. In case skip_byte_order_mark() failed (but
        // did not throw an exception), `state` now has the badbit set and the
        // call to setstate might throw an ios::failure. Likewise, if the stream
        // is "not ok" then the failbit will be set, which might throw an
        // exception, too.
        is.setstate(state);
    }
    input_stream_adapter(const input_stream_adapter&) = delete;
    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
    traits_type::int_type get_character() override
    {
        // Only try to get a character if the stream is good!
        if (is.good())
        {
            const auto ch = is.rdbuf()->sbumpc();
            if (ch != traits_type::eof())
            {
                return ch;
            }
            // sbumpc failed.
            // No more characters are available. Set eofbit.
            is.setstate(std::ios_base::eofbit);
        }
        return traits_type::eof();
    }
    void unget_character() override
    {
        // This method is only ever called if the last call to get_character was
        // successful (i.e. not EOF). This implies that the stream is good and
        // that the call to sungetc below is guaranteed to succeed.
        is.rdbuf()->sungetc();
    }
  private:
    std::istream& is;
    std::istream::sentry const ok;
 };
 #endif
 /// input adapter for buffer input
 class input_buffer_adapter : public input_adapter_protocol
--- a/test/src/unit-deserialization.cpp
+++ b/test/src/unit-deserialization.cpp
@ -35,6 +35,57 @@ using nlohmann::json;
 #include <iostream>
 #include <valarray>
 // HACK to get the tests running if exceptions are disabled on the command line
 // using the "-e/--nothrow" flag. In this case the expressions in CHECK_THROWS
 // and similar macros is never executed and subsequent checks relying on the
 // side effects of the expression may or may not fail.
 #define IF_EXCEPTIONS_ENABLED_THEN_CHECK(expr)                                                  \
    {                                                                                           \
        bool _exceptions_enabled_ = false;                                                      \
        /* The next line sets the `_exceptions_enabled_` flag to true, iff the expression in */ \
        /* the CHECK_THROWS macro actually gets ever evaluated. It's not if the "-e" flag    */ \
        /* has been specified on the command line.                                           */ \
        CHECK_THROWS([&](){ _exceptions_enabled_ = true; throw std::runtime_error("ok"); }());  \
        if (_exceptions_enabled_)                                                               \
        {                                                                                       \
            CHECK(expr);                                                                        \
        }                                                                                       \
    }                                                                                           \
    /**/
 namespace
 {
    // A stringbuf which only ever has a get-area of exactly one character.
    // I.e. multiple successive calls to sungetc will fail.
    // Note that sgetc and sbumpc both update the get-area and count as a "read" operation.
    // (sbumpc is the equivalent to sgetc + gbump(1).)
    class unget_fails_stringbuf : public std::streambuf
    {
        const char* last;
      public:
        explicit unget_fails_stringbuf(char const* str, size_t len)
            : last(str + len)
        {
            char* first = const_cast<char*>(str);
            this->setg(first, first, first);
        }
      protected:
        virtual traits_type::int_type underflow() override
        {
            char* pos = this->gptr();
            if (pos == last)
            {
                this->setg(pos, pos, pos); // empty. and invalid.
                return traits_type::eof();
            }
            this->setg(pos, pos, pos + 1);
            return traits_type::to_int_type(*pos);
        }
    };
 }
 TEST_CASE("deserialization")
 {
    SECTION("successful deserialization")
@ -45,6 +96,9 @@ TEST_CASE("deserialization")
            ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]";
            ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]";
            json j = json::parse(ss1);
            CHECK(!ss1.fail());
            CHECK(!ss1.bad());
            CHECK(ss1.eof()); // Strict parsing.
            CHECK(json::accept(ss2));
            CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
        }
@ -71,6 +125,12 @@ TEST_CASE("deserialization")
            ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
            json j;
            j << ss;
            CHECK(!ss.fail());
            CHECK(!ss.bad());
            // operator>> uses non-strict parsing.
            // We have read the closing ']' and we're done. The parser should
            // not have read the EOF marker.
            CHECK(!ss.eof());
            CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
        }
@ -91,6 +151,18 @@ TEST_CASE("deserialization")
    SECTION("unsuccessful deserialization")
    {
        SECTION("null streambuf")
        {
            std::streambuf* sb = nullptr;
            std::istream iss(sb);
            CHECK(iss.bad());
            CHECK_THROWS_WITH(json::parse(iss),
                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.fail()); // Tests the badbit too.
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.bad());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.eof());
        }
        SECTION("stream")
        {
            std::stringstream ss1, ss2, ss3, ss4;
@ -99,12 +171,15 @@ TEST_CASE("deserialization")
            ss3 << "[\"foo\",1,2,3,false,{\"one\":1}";
            ss4 << "[\"foo\",1,2,3,false,{\"one\":1}";
            CHECK_THROWS_AS(json::parse(ss1), json::parse_error&);
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
            CHECK_THROWS_WITH(json::parse(ss2),
                              "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
            CHECK(not json::accept(ss3));
            json j_error;
-            CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false));
+            CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false));
            CHECK(j_error.is_discarded());
        }
@ -128,6 +203,9 @@ TEST_CASE("deserialization")
            ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
            json j;
            CHECK_THROWS_AS(j << ss1, json::parse_error&);
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
            CHECK_THROWS_WITH(j << ss2,
                              "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
        }
@ -139,6 +217,9 @@ TEST_CASE("deserialization")
            ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
            json j;
            CHECK_THROWS_AS(ss1 >> j, json::parse_error&);
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
            CHECK_THROWS_WITH(ss2 >> j,
                              "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
        }
@ -448,7 +529,7 @@ TEST_CASE("deserialization")
    SECTION("ignoring byte-order marks")
    {
-        std::string bom = "\xEF\xBB\xBF";
+        const std::string bom = "\xEF\xBB\xBF";
        SECTION("BOM only")
        {
@ -456,7 +537,11 @@ TEST_CASE("deserialization")
            CHECK_THROWS_WITH(json::parse(bom),
                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&);
+            std::istringstream iss(bom);
            CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.fail());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.bad());
            IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.eof());
            CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
        }
@ -464,29 +549,132 @@ TEST_CASE("deserialization")
        SECTION("BOM and content")
        {
            CHECK(json::parse(bom + "1") == 1);
-            CHECK(json::parse(std::istringstream(bom + "1")) == 1);
+
            std::istringstream iss(bom + "1");
            CHECK(json::parse(iss) == 1);
            CHECK(!iss.bad());
            CHECK(!iss.fail());
            // Strict parsing: stream should be at EOF now.
            CHECK(iss.eof());
            iss.str(bom + "1");
            iss.clear();
            json j;
            CHECK_NOTHROW(iss >> j);
            CHECK(j == 1);
            CHECK(!iss.fail());
            CHECK(!iss.bad());
            // Non-strict parsing:
            // EOF bit is set only if we tried to read a character past the end of the file.
            // In this case: parsing the complete number requires reading past the end of the file.
            CHECK(iss.eof());
            iss.str(bom + "\"1\"");
            iss.clear();
            CHECK(json::parse(iss) == "1");
            CHECK(!iss.fail());
            CHECK(!iss.bad());
            CHECK(iss.eof()); // Strict...
            iss.str(bom + "\"1\"");
            iss.clear();
            CHECK_NOTHROW(iss >> j);
            CHECK(j == "1");
            CHECK(!iss.fail());
            CHECK(!iss.bad());
            CHECK(!iss.eof()); // Non-strict...
        }
        SECTION("2 byte of BOM")
        {
-            CHECK_THROWS_AS(json::parse(bom.substr(0, 2)), json::parse_error&);
+            const std::string bom2 = bom.substr(0, 2);
            CHECK_THROWS_WITH(json::parse(bom),
                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 2))), json::parse_error&);
+            CHECK_THROWS_AS(json::parse(bom2), json::parse_error&);
-            CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
+            CHECK_THROWS_WITH(json::parse(bom2),
-                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
            std::istringstream iss(bom2);
            CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
            CHECK(!iss.fail());
            CHECK(!iss.bad());
            CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
            CHECK(iss.good());
            CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)),
                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
        }
        SECTION("2 byte of BOM - incomplete")
        {
            {
                unget_fails_stringbuf sb("\xEF\xBB ", 3);
                std::istream is(&sb);
                json j;
                CHECK_THROWS_WITH(is >> j,
                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
                // Do not check the eofbit.
                // Some implementations keep the eofbit if is.unget() fails, some do not.
            }
            {
                unget_fails_stringbuf sb("\xEF\xBB", 2);
                std::istream is(&sb);
                json j;
                CHECK_THROWS_WITH(is >> j,
                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
                // Do not check the eofbit.
                // Some implementations keep the eofbit if is.unget() fails, some do not.
            }
        }
        SECTION("1 byte of BOM")
        {
-            CHECK_THROWS_AS(json::parse(bom.substr(0, 1)), json::parse_error&);
+            const std::string bom1 = bom.substr(0, 1);
            CHECK_THROWS_WITH(json::parse(bom),
                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 1))), json::parse_error&);
+            CHECK_THROWS_AS(json::parse(bom1), json::parse_error&);
-            CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
+            CHECK_THROWS_WITH(json::parse(bom1),
-                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
            std::istringstream iss(bom1);
            CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
            CHECK(!iss.fail());
            CHECK(!iss.bad());
            CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
            CHECK(iss.good());
            CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)),
                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
        }
        SECTION("1 byte of BOM - incomplete")
        {
            {
                unget_fails_stringbuf sb("\xEF  ", 3);
                std::istream is(&sb);
                json j;
                CHECK_THROWS_WITH(is >> j,
                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
                // Do not check the eofbit.
                // Some implementations keep the eofbit if is.unget() fails, some do not.
            }
            {
                unget_fails_stringbuf sb("\xEF", 1);
                std::istream is(&sb);
                json j;
                CHECK_THROWS_WITH(is >> j,
                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
                // Do not check the eofbit.
                // Some implementations keep the eofbit if is.unget() fails, some do not.
            }
        }
        SECTION("variations")
@ -526,14 +714,80 @@ TEST_CASE("deserialization")
            }
        }
-        SECTION("preserve state after parsing")
+        SECTION("preserve state after parsing - strings")
        {
            std::istringstream s(bom + "\"123\" \"456\"");
            json j;
            s >> j;
            CHECK(j == "123");
            CHECK(s.good());
            s >> j;
            CHECK(j == "456");
            CHECK(s.good());
            s.peek();
            CHECK(s.eof());
        }
        SECTION("preserve state after parsing - numbers (ref)")
        {
            std::istringstream s("123 456");
            int j;
            s >> j;
            CHECK(j == 123);
            CHECK(s.good());
            s >> j;
            CHECK(j == 456);
            CHECK(!s.good());
            CHECK(!s.fail());
            CHECK(!s.bad());
            // The stream now has the eofbit set (since to determine whether the number has completely
            // parsed, the lexer needs to read past the end of the file).
            CHECK(s.eof());
        }
        SECTION("preserve state after parsing - numbers")
        {
            std::istringstream s(bom + "123 456");
            json j;
-            j << s;
+            s >> j;
            CHECK(j == 123);
-            j << s;
+            CHECK(s.good());
            s >> j;
            CHECK(j == 456);
            CHECK(!s.good());
            CHECK(!s.fail());
            CHECK(!s.bad());
            // The stream now has the eofbit set (since to determine whether the number has completely
            // parsed, the lexer needs to read past the end of the file).
            CHECK(s.eof());
        }
        SECTION("preserve state after parsing - numbers (trailing space) (ref)")
        {
            std::istringstream s("123 456 ");
            int j;
            s >> j;
            CHECK(j == 123);
            CHECK(s.good());
            s >> j;
            CHECK(j == 456);
            // The trailing space at the end is the end of the number.
            // The stream should not have the eofbit set.
            CHECK(s.good());
            CHECK(s.peek() == static_cast<unsigned char>(' '));
        }
        SECTION("preserve state after parsing - numbers (trailing space)")
        {
            std::istringstream s(bom + "123 456 ");
            json j;
            s >> j;
            CHECK(j == 123);
            CHECK(s.good());
            s >> j;
            CHECK(j == 456);
            // The trailing space at the end is the end of the number.
            // The stream should not have the eofbit set.
            CHECK(s.good());
            CHECK(s.peek() == static_cast<unsigned char>(' '));
        }
    }
 }
--- a/test/src/unit-regression.cpp
+++ b/test/src/unit-regression.cpp
@ -1477,6 +1477,29 @@ TEST_CASE("regression tests")
        my_json foo = R"([1, 2, 3])"_json;
    }
    SECTION("issue #976 - istream >> json --- 1st character skipped in stream")
    {
        json j;
        std::istringstream iss;
        iss.clear();
        iss.str("10");
        iss.setstate(std::ios_base::failbit);
        CHECK_THROWS_WITH(iss >> j,
            "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
        CHECK(iss.fail());
        iss.clear();
        iss.str("10");
        iss.setstate(std::ios_base::failbit);
        CHECK_THROWS_WITH(json::parse(iss),
            "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
        CHECK(iss.fail());
    }
    SECTION("issue #977 - Assigning between different json types")
    {
        foo_json lj = ns::foo{3};