From 9fca09b00e1ec92c841f2d4167aa8b515c5e2372 Mon Sep 17 00:00:00 2001
From: abolz <lt.morris.schaffer@googlemail.com>
Date: Mon, 12 Mar 2018 10:30:52 +0100
Subject: [PATCH 1/3] Fix BOM tests

---
 test/src/unit-deserialization.cpp | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp
index 6e46abe31..6b56474b2 100644
--- a/test/src/unit-deserialization.cpp
+++ b/test/src/unit-deserialization.cpp
@@ -447,7 +447,7 @@ TEST_CASE("deserialization")
 
     SECTION("ignoring byte-order marks")
     {
-        std::string bom = "\xEF\xBB\xBF";
+        const std::string bom = "\xEF\xBB\xBF";
 
         SECTION("BOM only")
         {
@@ -468,24 +468,28 @@ TEST_CASE("deserialization")
 
         SECTION("2 byte of BOM")
         {
-            CHECK_THROWS_AS(json::parse(bom.substr(0, 2)), json::parse_error&);
-            CHECK_THROWS_WITH(json::parse(bom),
-                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+            const std::string bom2 = bom.substr(0, 2);
 
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 2))), json::parse_error&);
-            CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
-                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+            CHECK_THROWS_AS(json::parse(bom2), json::parse_error&);
+            CHECK_THROWS_WITH(json::parse(bom2),
+                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
+
+            CHECK_THROWS_AS(json::parse(std::istringstream(bom2)), json::parse_error&);
+            CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)),
+                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
         }
 
         SECTION("1 byte of BOM")
         {
-            CHECK_THROWS_AS(json::parse(bom.substr(0, 1)), json::parse_error&);
-            CHECK_THROWS_WITH(json::parse(bom),
-                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+            const std::string bom1 = bom.substr(0, 1);
 
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 1))), json::parse_error&);
-            CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
-                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+            CHECK_THROWS_AS(json::parse(bom1), json::parse_error&);
+            CHECK_THROWS_WITH(json::parse(bom1),
+                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
+
+            CHECK_THROWS_AS(json::parse(std::istringstream(bom1)), json::parse_error&);
+            CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)),
+                              "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
         }
 
         SECTION("variations")

From b487afcbaa31457fa0818198ed34aaf01effa4d1 Mon Sep 17 00:00:00 2001
From: abolz <lt.morris.schaffer@googlemail.com>
Date: Mon, 12 Mar 2018 10:38:16 +0100
Subject: [PATCH 2/3] Use the `std::istream` interface to implement
 `input_stream_adapter` (fix #976)

---
 .../nlohmann/detail/input/input_adapters.hpp  |  77 +++--
 single_include/nlohmann/json.hpp              |  77 +++--
 test/src/unit-deserialization.cpp             | 266 +++++++++++++++++-
 test/src/unit-regression.cpp                  |  23 ++
 4 files changed, 349 insertions(+), 94 deletions(-)

diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp
index ef66948d1..e26e706a2 100644
--- a/include/nlohmann/detail/input/input_adapters.hpp
+++ b/include/nlohmann/detail/input/input_adapters.hpp
@@ -48,76 +48,67 @@ struct input_adapter_protocol
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 
 /*!
-Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
-beginning of input. Does not support changing the underlying std::streambuf
-in mid-input. Maintains underlying std::istream and std::streambuf to support
-subsequent use of standard std::istream operations to process any input
-characters following those used in parsing the JSON input.  Clears the
-std::istream flags; any input errors (e.g., EOF) will be detected by the first
-subsequent call for input from the std::istream.
+Input adapter for a (caching) istream.
+Ignores a UTF Byte Order Mark at beginning of input.
+
+Does not support changing the underlying std::streambuf in mid-input.
 */
 class input_stream_adapter : public input_adapter_protocol
 {
   public:
-    ~input_stream_adapter() override
-    {
-        // clear stream flags; we use underlying streambuf I/O, do not
-        // maintain ifstream flags
-        is.clear();
-    }
+    using traits_type = std::char_traits<char>;
 
     explicit input_stream_adapter(std::istream& i)
-        : is(i), sb(*i.rdbuf())
+        : is(i)
     {
-        // skip byte order mark
-        std::char_traits<char>::int_type c;
-        if ((c = get_character()) == 0xEF)
+        // Skip byte order mark
+        if (is.peek() == 0xEF)
         {
-            if ((c = get_character()) == 0xBB)
+            is.ignore();
+            if (is.peek() == 0xBB)
             {
-                if ((c = get_character()) == 0xBF)
+                is.ignore();
+                if (is.peek() == 0xBF)
                 {
-                    return; // Ignore BOM
+                    is.ignore();
+                    return; // Found a complete BOM.
                 }
-                else if (c != std::char_traits<char>::eof())
-                {
-                    is.unget();
-                }
-                is.putback('\xBB');
-            }
-            else if (c != std::char_traits<char>::eof())
-            {
+
                 is.unget();
             }
-            is.putback('\xEF');
-        }
-        else if (c != std::char_traits<char>::eof())
-        {
-            is.unget(); // no byte order mark; process as usual
+
+            is.unget();
         }
     }
 
-    // delete because of pointer members
     input_stream_adapter(const input_stream_adapter&) = delete;
-    input_stream_adapter& operator=(input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
 
-    // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
-    // ensure that std::char_traits<char>::eof() and the character 0xFF do not
-    // end up as the same value, eg. 0xFFFFFFFF.
-    std::char_traits<char>::int_type get_character() override
+    traits_type::int_type get_character() override
     {
-        return sb.sbumpc();
+        // Only try to get a character if the stream is good!
+        if (is.good())
+        {
+            const auto ch = is.peek();
+            // If peek() returns EOF, the following call to ignore() will set
+            // the failbit, but we do not want to set the failbit here.
+            if (ch != traits_type::eof())
+            {
+                is.ignore();
+                return ch;
+            }
+        }
+
+        return traits_type::eof();
     }
 
     void unget_character() override
     {
-        sb.sungetc();  // is.unget() avoided for performance
+        is.unget();
     }
 
   private:
-    /// the associated input stream
     std::istream& is;
-    std::streambuf& sb;
 };
 
 /// input adapter for buffer input
diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp
index 1d8e4e82c..b71502749 100644
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@@ -1621,76 +1621,67 @@ struct input_adapter_protocol
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 
 /*!
-Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
-beginning of input. Does not support changing the underlying std::streambuf
-in mid-input. Maintains underlying std::istream and std::streambuf to support
-subsequent use of standard std::istream operations to process any input
-characters following those used in parsing the JSON input.  Clears the
-std::istream flags; any input errors (e.g., EOF) will be detected by the first
-subsequent call for input from the std::istream.
+Input adapter for a (caching) istream.
+Ignores a UTF Byte Order Mark at beginning of input.
+
+Does not support changing the underlying std::streambuf in mid-input.
 */
 class input_stream_adapter : public input_adapter_protocol
 {
   public:
-    ~input_stream_adapter() override
-    {
-        // clear stream flags; we use underlying streambuf I/O, do not
-        // maintain ifstream flags
-        is.clear();
-    }
+    using traits_type = std::char_traits<char>;
 
     explicit input_stream_adapter(std::istream& i)
-        : is(i), sb(*i.rdbuf())
+        : is(i)
     {
-        // skip byte order mark
-        std::char_traits<char>::int_type c;
-        if ((c = get_character()) == 0xEF)
+        // Skip byte order mark
+        if (is.peek() == 0xEF)
         {
-            if ((c = get_character()) == 0xBB)
+            is.ignore();
+            if (is.peek() == 0xBB)
             {
-                if ((c = get_character()) == 0xBF)
+                is.ignore();
+                if (is.peek() == 0xBF)
                 {
-                    return; // Ignore BOM
+                    is.ignore();
+                    return; // Found a complete BOM.
                 }
-                else if (c != std::char_traits<char>::eof())
-                {
-                    is.unget();
-                }
-                is.putback('\xBB');
-            }
-            else if (c != std::char_traits<char>::eof())
-            {
+
                 is.unget();
             }
-            is.putback('\xEF');
-        }
-        else if (c != std::char_traits<char>::eof())
-        {
-            is.unget(); // no byte order mark; process as usual
+
+            is.unget();
         }
     }
 
-    // delete because of pointer members
     input_stream_adapter(const input_stream_adapter&) = delete;
-    input_stream_adapter& operator=(input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
 
-    // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
-    // ensure that std::char_traits<char>::eof() and the character 0xFF do not
-    // end up as the same value, eg. 0xFFFFFFFF.
-    std::char_traits<char>::int_type get_character() override
+    traits_type::int_type get_character() override
     {
-        return sb.sbumpc();
+        // Only try to get a character if the stream is good!
+        if (is.good())
+        {
+            const auto ch = is.peek();
+            // If peek() returns EOF, the following call to ignore() will set
+            // the failbit, but we do not want to set the failbit here.
+            if (ch != traits_type::eof())
+            {
+                is.ignore();
+                return ch;
+            }
+        }
+
+        return traits_type::eof();
     }
 
     void unget_character() override
     {
-        sb.sungetc();  // is.unget() avoided for performance
+        is.unget();
     }
 
   private:
-    /// the associated input stream
     std::istream& is;
-    std::streambuf& sb;
 };
 
 /// input adapter for buffer input
diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp
index 6b56474b2..ddfba20de 100644
--- a/test/src/unit-deserialization.cpp
+++ b/test/src/unit-deserialization.cpp
@@ -34,6 +34,57 @@ using nlohmann::json;
 #include <iostream>
 #include <valarray>
 
+// HACK to get the tests running if exceptions are disabled on the command line
+// using the "-e/--nothrow" flag. In this case the expressions in CHECK_THROWS
+// and similar macros is never executed and subsequent checks relying on the
+// side effects of the expression may or may not fail.
+#define IF_EXCEPTIONS_ENABLED_THEN_CHECK(expr)                                                  \
+    {                                                                                           \
+        bool _exceptions_enabled_ = false;                                                      \
+        /* The next line sets the `_exceptions_enabled_` flag to true, iff the expression in */ \
+        /* the CHECK_THROWS macro actually gets ever evaluated. It's not if the "-e" flag    */ \
+        /* has been specified on the command line.                                           */ \
+        CHECK_THROWS([&](){ _exceptions_enabled_ = true; throw std::runtime_error("ok"); }());  \
+        if (_exceptions_enabled_)                                                               \
+        {                                                                                       \
+            CHECK(expr);                                                                        \
+        }                                                                                       \
+    }                                                                                           \
+    /**/
+
+namespace
+{
+    // A stringbuf which only ever has a get-area of exactly one character.
+    // I.e. multiple successive calls to sungetc will fail.
+    // Note that sgetc and sbumpc both update the get-area and count as a "read" operation.
+    // (sbumpc is the equivalent to sgetc + gbump(1).)
+    class unget_fails_stringbuf : public std::streambuf
+    {
+        const char* last;
+
+      public:
+        explicit unget_fails_stringbuf(char const* str, size_t len)
+            : last(str + len)
+        {
+            char* first = const_cast<char*>(str);
+            this->setg(first, first, first);
+        }
+
+      protected:
+        virtual traits_type::int_type underflow() override
+        {
+            char* pos = this->gptr();
+            if (pos == last)
+            {
+                this->setg(pos, pos, pos); // empty. and invalid.
+                return traits_type::eof();
+            }
+            this->setg(pos, pos, pos + 1);
+            return traits_type::to_int_type(*pos);
+        }
+    };
+}
+
 TEST_CASE("deserialization")
 {
     SECTION("successful deserialization")
@@ -44,6 +95,9 @@ TEST_CASE("deserialization")
             ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]";
             ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]";
             json j = json::parse(ss1);
+            CHECK(!ss1.fail());
+            CHECK(!ss1.bad());
+            CHECK(ss1.eof()); // Strict parsing.
             CHECK(json::accept(ss2));
             CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
         }
@@ -70,6 +124,12 @@ TEST_CASE("deserialization")
             ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
             json j;
             j << ss;
+            CHECK(!ss.fail());
+            CHECK(!ss.bad());
+            // operator>> uses non-strict parsing.
+            // We have read the closing ']' and we're done. The parser should
+            // not have read the EOF marker.
+            CHECK(!ss.eof());
             CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
         }
 
@@ -90,6 +150,18 @@ TEST_CASE("deserialization")
 
     SECTION("unsuccessful deserialization")
     {
+        SECTION("null streambuf")
+        {
+            std::streambuf* sb = nullptr;
+            std::istream iss(sb);
+            CHECK(iss.bad());
+            CHECK_THROWS_WITH(json::parse(iss),
+                              "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.fail()); // Tests the badbit too.
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.bad());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.eof());
+        }
+
         SECTION("stream")
         {
             std::stringstream ss1, ss2, ss3, ss4;
@@ -98,12 +170,15 @@ TEST_CASE("deserialization")
             ss3 << "[\"foo\",1,2,3,false,{\"one\":1}";
             ss4 << "[\"foo\",1,2,3,false,{\"one\":1}";
             CHECK_THROWS_AS(json::parse(ss1), json::parse_error&);
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
             CHECK_THROWS_WITH(json::parse(ss2),
                               "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
             CHECK(not json::accept(ss3));
 
             json j_error;
-            CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false));
+            CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false));
             CHECK(j_error.is_discarded());
         }
 
@@ -127,6 +202,9 @@ TEST_CASE("deserialization")
             ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
             json j;
             CHECK_THROWS_AS(j << ss1, json::parse_error&);
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
             CHECK_THROWS_WITH(j << ss2,
                               "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
         }
@@ -138,6 +216,9 @@ TEST_CASE("deserialization")
             ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
             json j;
             CHECK_THROWS_AS(ss1 >> j, json::parse_error&);
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
             CHECK_THROWS_WITH(ss2 >> j,
                               "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
         }
@@ -455,7 +536,11 @@ TEST_CASE("deserialization")
             CHECK_THROWS_WITH(json::parse(bom),
                               "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
 
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&);
+            std::istringstream iss(bom);
+            CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.fail());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.bad());
+            IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.eof());
             CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
                               "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
         }
@@ -463,7 +548,40 @@ TEST_CASE("deserialization")
         SECTION("BOM and content")
         {
             CHECK(json::parse(bom + "1") == 1);
-            CHECK(json::parse(std::istringstream(bom + "1")) == 1);
+
+            std::istringstream iss(bom + "1");
+            CHECK(json::parse(iss) == 1);
+            CHECK(!iss.bad());
+            CHECK(!iss.fail());
+            // Strict parsing: stream should be at EOF now.
+            CHECK(iss.eof());
+
+            iss.str(bom + "1");
+            iss.clear();
+            json j;
+            CHECK_NOTHROW(iss >> j);
+            CHECK(j == 1);
+            CHECK(!iss.fail());
+            CHECK(!iss.bad());
+            // Non-strict parsing:
+            // EOF bit is set only if we tried to read a character past the end of the file.
+            // In this case: parsing the complete number requires reading past the end of the file.
+            CHECK(iss.eof());
+
+            iss.str(bom + "\"1\"");
+            iss.clear();
+            CHECK(json::parse(iss) == "1");
+            CHECK(!iss.fail());
+            CHECK(!iss.bad());
+            CHECK(iss.eof()); // Strict...
+
+            iss.str(bom + "\"1\"");
+            iss.clear();
+            CHECK_NOTHROW(iss >> j);
+            CHECK(j == "1");
+            CHECK(!iss.fail());
+            CHECK(!iss.bad());
+            CHECK(!iss.eof()); // Non-strict...
         }
 
         SECTION("2 byte of BOM")
@@ -474,11 +592,44 @@ TEST_CASE("deserialization")
             CHECK_THROWS_WITH(json::parse(bom2),
                               "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
 
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom2)), json::parse_error&);
+            std::istringstream iss(bom2);
+            CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
+            CHECK(!iss.fail());
+            CHECK(!iss.bad());
+            CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
+            CHECK(iss.good());
             CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)),
                               "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
         }
 
+        SECTION("2 byte of BOM - incomplete")
+        {
+            {
+                unget_fails_stringbuf sb("\xEF\xBB ", 3);
+                std::istream is(&sb);
+
+                json j;
+                CHECK_THROWS_WITH(is >> j,
+                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
+                // Do not check the eofbit.
+                // Some implementations keep the eofbit if is.unget() fails, some do not.
+            }
+            {
+                unget_fails_stringbuf sb("\xEF\xBB", 2);
+                std::istream is(&sb);
+
+                json j;
+                CHECK_THROWS_WITH(is >> j,
+                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
+                // Do not check the eofbit.
+                // Some implementations keep the eofbit if is.unget() fails, some do not.
+            }
+        }
+
         SECTION("1 byte of BOM")
         {
             const std::string bom1 = bom.substr(0, 1);
@@ -487,11 +638,44 @@ TEST_CASE("deserialization")
             CHECK_THROWS_WITH(json::parse(bom1),
                               "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
 
-            CHECK_THROWS_AS(json::parse(std::istringstream(bom1)), json::parse_error&);
+            std::istringstream iss(bom1);
+            CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
+            CHECK(!iss.fail());
+            CHECK(!iss.bad());
+            CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
+            CHECK(iss.good());
             CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)),
                               "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
         }
 
+        SECTION("1 byte of BOM - incomplete")
+        {
+            {
+                unget_fails_stringbuf sb("\xEF  ", 3);
+                std::istream is(&sb);
+
+                json j;
+                CHECK_THROWS_WITH(is >> j,
+                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
+                // Do not check the eofbit.
+                // Some implementations keep the eofbit if is.unget() fails, some do not.
+            }
+            {
+                unget_fails_stringbuf sb("\xEF", 1);
+                std::istream is(&sb);
+
+                json j;
+                CHECK_THROWS_WITH(is >> j,
+                                "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
+                IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
+                // Do not check the eofbit.
+                // Some implementations keep the eofbit if is.unget() fails, some do not.
+            }
+        }
+
         SECTION("variations")
         {
             // calculate variations of each byte of the BOM to make sure
@@ -529,14 +713,80 @@ TEST_CASE("deserialization")
             }
         }
 
-        SECTION("preserve state after parsing")
+        SECTION("preserve state after parsing - strings")
+        {
+            std::istringstream s(bom + "\"123\" \"456\"");
+            json j;
+            s >> j;
+            CHECK(j == "123");
+            CHECK(s.good());
+            s >> j;
+            CHECK(j == "456");
+            CHECK(s.good());
+            s.peek();
+            CHECK(s.eof());
+        }
+
+        SECTION("preserve state after parsing - numbers (ref)")
+        {
+            std::istringstream s("123 456");
+            int j;
+            s >> j;
+            CHECK(j == 123);
+            CHECK(s.good());
+            s >> j;
+            CHECK(j == 456);
+            CHECK(!s.good());
+            CHECK(!s.fail());
+            CHECK(!s.bad());
+            // The stream now has the eofbit set (since to determine whether the number has completely
+            // parsed, the lexer needs to read past the end of the file).
+            CHECK(s.eof());
+        }
+        SECTION("preserve state after parsing - numbers")
         {
             std::istringstream s(bom + "123 456");
             json j;
-            j << s;
+            s >> j;
             CHECK(j == 123);
-            j << s;
+            CHECK(s.good());
+            s >> j;
             CHECK(j == 456);
+            CHECK(!s.good());
+            CHECK(!s.fail());
+            CHECK(!s.bad());
+            // The stream now has the eofbit set (since to determine whether the number has completely
+            // parsed, the lexer needs to read past the end of the file).
+            CHECK(s.eof());
+        }
+
+        SECTION("preserve state after parsing - numbers (trailing space) (ref)")
+        {
+            std::istringstream s("123 456 ");
+            int j;
+            s >> j;
+            CHECK(j == 123);
+            CHECK(s.good());
+            s >> j;
+            CHECK(j == 456);
+            // The trailing space at the end is the end of the number.
+            // The stream should not have the eofbit set.
+            CHECK(s.good());
+            CHECK(s.peek() == static_cast<unsigned char>(' '));
+        }
+        SECTION("preserve state after parsing - numbers (trailing space)")
+        {
+            std::istringstream s(bom + "123 456 ");
+            json j;
+            s >> j;
+            CHECK(j == 123);
+            CHECK(s.good());
+            s >> j;
+            CHECK(j == 456);
+            // The trailing space at the end is the end of the number.
+            // The stream should not have the eofbit set.
+            CHECK(s.good());
+            CHECK(s.peek() == static_cast<unsigned char>(' '));
         }
     }
 }
diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp
index 604def6cb..3bced8590 100644
--- a/test/src/unit-regression.cpp
+++ b/test/src/unit-regression.cpp
@@ -1504,6 +1504,29 @@ TEST_CASE("regression tests")
         my_json foo = R"([1, 2, 3])"_json;
     }
 
+    SECTION("issue #976 - istream >> json --- 1st character skipped in stream")
+    {
+        json j;
+
+        std::istringstream iss;
+
+        iss.clear();
+        iss.str("10");
+        iss.setstate(std::ios_base::failbit);
+
+        CHECK_THROWS_WITH(iss >> j,
+            "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+        CHECK(iss.fail());
+
+        iss.clear();
+        iss.str("10");
+        iss.setstate(std::ios_base::failbit);
+
+        CHECK_THROWS_WITH(json::parse(iss),
+            "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
+        CHECK(iss.fail());
+    }
+
     SECTION("issue #977 - Assigning between different json types")
     {
         foo_json lj = ns::foo{3};

From d46cf99a856c932100224873b78e3b89bf79f77a Mon Sep 17 00:00:00 2001
From: abolz <lt.morris.schaffer@googlemail.com>
Date: Mon, 12 Mar 2018 13:09:56 +0100
Subject: [PATCH 3/3] Improve performance of `input_stream_adapter`

Use the underlying streambuf to extract characters instead of the istream
interface and manually set the istream error state.

This slightly changes the behavior in case a streambuf operation throws
an exception.
---
 .../nlohmann/detail/input/input_adapters.hpp  | 146 ++++++++++++++++++
 single_include/nlohmann/json.hpp              | 146 ++++++++++++++++++
 2 files changed, 292 insertions(+)

diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp
index e26e706a2..1f2b1aa8d 100644
--- a/include/nlohmann/detail/input/input_adapters.hpp
+++ b/include/nlohmann/detail/input/input_adapters.hpp
@@ -47,12 +47,61 @@ struct input_adapter_protocol
 /// a type to simplify interfaces
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 
+/*!
+A helper function to skip the UTF-8 byte order mark.
+
+If a complete BOM has been skipped, or if an incomplete BOM has been detected
+and the stream has been successfully rewind to the start of the BOM, returns
+goodbit.
+If an internal operation fails, returns badbit, and the streambuf should no
+longer be used.
+
+Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
+the eofbit. However, some implementations keep the eofbit if is.unget() fails,
+others do not.
+
+Note: The streambuf must be non-null.
+*/
+inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
+{
+    using traits_type = std::char_traits<char>;
+
+    assert(sb != nullptr);
+
+    if (sb->sgetc() == 0xEF)
+    {
+        sb->sbumpc();
+        if (sb->sgetc() == 0xBB)
+        {
+            sb->sbumpc();
+            if (sb->sgetc() == 0xBF)
+            {
+                sb->sbumpc();
+                return std::ios_base::goodbit;
+            }
+
+            if (sb->sungetc() == traits_type::eof())
+            {
+                return std::ios_base::badbit;
+            }
+        }
+
+        if (sb->sungetc() == traits_type::eof())
+        {
+            return std::ios_base::badbit;
+        }
+    }
+
+    return std::ios_base::goodbit;
+}
+
 /*!
 Input adapter for a (caching) istream.
 Ignores a UTF Byte Order Mark at beginning of input.
 
 Does not support changing the underlying std::streambuf in mid-input.
 */
+#if 0
 class input_stream_adapter : public input_adapter_protocol
 {
   public:
@@ -110,6 +159,103 @@ class input_stream_adapter : public input_adapter_protocol
   private:
     std::istream& is;
 };
+#else
+class input_stream_adapter : public input_adapter_protocol
+{
+    //
+    // NOTE:
+    //
+    // This implementation differs slightly from the reference implementation
+    // (using the std::istream interface):
+    //
+    //      From N4659:
+    //      30.7.4.3 Unformatted input functions
+    //
+    //      [...]
+    //      If an exception is thrown during input then `ios::badbit` is turned
+    //      on[310] in `*this`'s error state. (Exceptions thrown from
+    //      `basic_ios<>::clear()` are not caught or rethrown.)
+    //      If `(exceptions() & badbit) != 0` then the exception is rethrown.
+    //
+    //      [310] This is done without causing an `ios::failure` to be thrown.
+    //
+    // However, there is no (portable) way to turn on the `badbit` in `is`
+    // without throwing an exception, so here we don't catch (and possibly)
+    // rethrow exceptions from streambuf operations.
+    // If an internal operation throws an exception, the behavior of this
+    // implementation is therefore slightly different from the reference
+    // implementation:
+    //
+    // If an exception is thrown during input and
+    //
+    // - badbit is turned ON in `is.exceptions()`:
+    //      The badbit will **not** be set in `is`'s error state.
+    //
+    // - badbit is turned OFF in `is.exceptions()`:
+    //      The badbit will **not** be set in `is`'s error state and the
+    //      exception is **not** swallowed.
+    //
+
+  public:
+    using traits_type = std::char_traits<char>;
+
+    explicit input_stream_adapter(std::istream& i)
+        : is(i)
+        , ok(i, /* noskipws */ true)
+    {
+        std::ios_base::iostate state = std::ios_base::goodbit;
+        if (ok)
+        {
+            state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
+        }
+        else
+        {
+            state |= std::ios_base::failbit;
+        }
+
+        // Update the stream state. In case skip_byte_order_mark() failed (but
+        // did not throw an exception), `state` now has the badbit set and the
+        // call to setstate might throw an ios::failure. Likewise, if the stream
+        // is "not ok" then the failbit will be set, which might throw an
+        // exception, too.
+        is.setstate(state);
+    }
+
+    input_stream_adapter(const input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
+
+    traits_type::int_type get_character() override
+    {
+        // Only try to get a character if the stream is good!
+        if (is.good())
+        {
+            const auto ch = is.rdbuf()->sbumpc();
+            if (ch != traits_type::eof())
+            {
+                return ch;
+            }
+
+            // sbumpc failed.
+            // No more characters are available. Set eofbit.
+            is.setstate(std::ios_base::eofbit);
+        }
+
+        return traits_type::eof();
+    }
+
+    void unget_character() override
+    {
+        // This method is only ever called if the last call to get_character was
+        // successful (i.e. not EOF). This implies that the stream is good and
+        // that the call to sungetc below is guaranteed to succeed.
+        is.rdbuf()->sungetc();
+    }
+
+  private:
+    std::istream& is;
+    std::istream::sentry const ok;
+};
+#endif
 
 /// input adapter for buffer input
 class input_buffer_adapter : public input_adapter_protocol
diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp
index b71502749..c0d6d27c9 100644
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@@ -1620,12 +1620,61 @@ struct input_adapter_protocol
 /// a type to simplify interfaces
 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
 
+/*!
+A helper function to skip the UTF-8 byte order mark.
+
+If a complete BOM has been skipped, or if an incomplete BOM has been detected
+and the stream has been successfully rewind to the start of the BOM, returns
+goodbit.
+If an internal operation fails, returns badbit, and the streambuf should no
+longer be used.
+
+Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
+the eofbit. However, some implementations keep the eofbit if is.unget() fails,
+others do not.
+
+Note: The streambuf must be non-null.
+*/
+inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
+{
+    using traits_type = std::char_traits<char>;
+
+    assert(sb != nullptr);
+
+    if (sb->sgetc() == 0xEF)
+    {
+        sb->sbumpc();
+        if (sb->sgetc() == 0xBB)
+        {
+            sb->sbumpc();
+            if (sb->sgetc() == 0xBF)
+            {
+                sb->sbumpc();
+                return std::ios_base::goodbit;
+            }
+
+            if (sb->sungetc() == traits_type::eof())
+            {
+                return std::ios_base::badbit;
+            }
+        }
+
+        if (sb->sungetc() == traits_type::eof())
+        {
+            return std::ios_base::badbit;
+        }
+    }
+
+    return std::ios_base::goodbit;
+}
+
 /*!
 Input adapter for a (caching) istream.
 Ignores a UTF Byte Order Mark at beginning of input.
 
 Does not support changing the underlying std::streambuf in mid-input.
 */
+#if 0
 class input_stream_adapter : public input_adapter_protocol
 {
   public:
@@ -1683,6 +1732,103 @@ class input_stream_adapter : public input_adapter_protocol
   private:
     std::istream& is;
 };
+#else
+class input_stream_adapter : public input_adapter_protocol
+{
+    //
+    // NOTE:
+    //
+    // This implementation differs slightly from the reference implementation
+    // (using the std::istream interface):
+    //
+    //      From N4659:
+    //      30.7.4.3 Unformatted input functions
+    //
+    //      [...]
+    //      If an exception is thrown during input then `ios::badbit` is turned
+    //      on[310] in `*this`'s error state. (Exceptions thrown from
+    //      `basic_ios<>::clear()` are not caught or rethrown.)
+    //      If `(exceptions() & badbit) != 0` then the exception is rethrown.
+    //
+    //      [310] This is done without causing an `ios::failure` to be thrown.
+    //
+    // However, there is no (portable) way to turn on the `badbit` in `is`
+    // without throwing an exception, so here we don't catch (and possibly)
+    // rethrow exceptions from streambuf operations.
+    // If an internal operation throws an exception, the behavior of this
+    // implementation is therefore slightly different from the reference
+    // implementation:
+    //
+    // If an exception is thrown during input and
+    //
+    // - badbit is turned ON in `is.exceptions()`:
+    //      The badbit will **not** be set in `is`'s error state.
+    //
+    // - badbit is turned OFF in `is.exceptions()`:
+    //      The badbit will **not** be set in `is`'s error state and the
+    //      exception is **not** swallowed.
+    //
+
+  public:
+    using traits_type = std::char_traits<char>;
+
+    explicit input_stream_adapter(std::istream& i)
+        : is(i)
+        , ok(i, /* noskipws */ true)
+    {
+        std::ios_base::iostate state = std::ios_base::goodbit;
+        if (ok)
+        {
+            state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
+        }
+        else
+        {
+            state |= std::ios_base::failbit;
+        }
+
+        // Update the stream state. In case skip_byte_order_mark() failed (but
+        // did not throw an exception), `state` now has the badbit set and the
+        // call to setstate might throw an ios::failure. Likewise, if the stream
+        // is "not ok" then the failbit will be set, which might throw an
+        // exception, too.
+        is.setstate(state);
+    }
+
+    input_stream_adapter(const input_stream_adapter&) = delete;
+    input_stream_adapter& operator=(const input_stream_adapter&) = delete;
+
+    traits_type::int_type get_character() override
+    {
+        // Only try to get a character if the stream is good!
+        if (is.good())
+        {
+            const auto ch = is.rdbuf()->sbumpc();
+            if (ch != traits_type::eof())
+            {
+                return ch;
+            }
+
+            // sbumpc failed.
+            // No more characters are available. Set eofbit.
+            is.setstate(std::ios_base::eofbit);
+        }
+
+        return traits_type::eof();
+    }
+
+    void unget_character() override
+    {
+        // This method is only ever called if the last call to get_character was
+        // successful (i.e. not EOF). This implies that the stream is good and
+        // that the call to sungetc below is guaranteed to succeed.
+        is.rdbuf()->sungetc();
+    }
+
+  private:
+    std::istream& is;
+    std::istream::sentry const ok;
+};
+#endif
 
 /// input adapter for buffer input
 class input_buffer_adapter : public input_adapter_protocol