diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index ef66948d1..e26e706a2 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -48,76 +48,67 @@ struct input_adapter_protocol using input_adapter_t = std::shared_ptr; /*! -Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at -beginning of input. Does not support changing the underlying std::streambuf -in mid-input. Maintains underlying std::istream and std::streambuf to support -subsequent use of standard std::istream operations to process any input -characters following those used in parsing the JSON input. Clears the -std::istream flags; any input errors (e.g., EOF) will be detected by the first -subsequent call for input from the std::istream. +Input adapter for a (caching) istream. +Ignores a UTF Byte Order Mark at beginning of input. + +Does not support changing the underlying std::streambuf in mid-input. */ class input_stream_adapter : public input_adapter_protocol { public: - ~input_stream_adapter() override - { - // clear stream flags; we use underlying streambuf I/O, do not - // maintain ifstream flags - is.clear(); - } + using traits_type = std::char_traits; explicit input_stream_adapter(std::istream& i) - : is(i), sb(*i.rdbuf()) + : is(i) { - // skip byte order mark - std::char_traits::int_type c; - if ((c = get_character()) == 0xEF) + // Skip byte order mark + if (is.peek() == 0xEF) { - if ((c = get_character()) == 0xBB) + is.ignore(); + if (is.peek() == 0xBB) { - if ((c = get_character()) == 0xBF) + is.ignore(); + if (is.peek() == 0xBF) { - return; // Ignore BOM + is.ignore(); + return; // Found a complete BOM. } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xBB'); - } - else if (c != std::char_traits::eof()) - { + is.unget(); } - is.putback('\xEF'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); // no byte order mark; process as usual + + is.unget(); } } - // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; - input_stream_adapter& operator=(input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; - // std::istream/std::streambuf use std::char_traits::to_int_type, to - // ensure that std::char_traits::eof() and the character 0xFF do not - // end up as the same value, eg. 0xFFFFFFFF. - std::char_traits::int_type get_character() override + traits_type::int_type get_character() override { - return sb.sbumpc(); + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.peek(); + // If peek() returns EOF, the following call to ignore() will set + // the failbit, but we do not want to set the failbit here. + if (ch != traits_type::eof()) + { + is.ignore(); + return ch; + } + } + + return traits_type::eof(); } void unget_character() override { - sb.sungetc(); // is.unget() avoided for performance + is.unget(); } private: - /// the associated input stream std::istream& is; - std::streambuf& sb; }; /// input adapter for buffer input diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 1d8e4e82c..b71502749 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1621,76 +1621,67 @@ struct input_adapter_protocol using input_adapter_t = std::shared_ptr; /*! -Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at -beginning of input. Does not support changing the underlying std::streambuf -in mid-input. Maintains underlying std::istream and std::streambuf to support -subsequent use of standard std::istream operations to process any input -characters following those used in parsing the JSON input. Clears the -std::istream flags; any input errors (e.g., EOF) will be detected by the first -subsequent call for input from the std::istream. +Input adapter for a (caching) istream. +Ignores a UTF Byte Order Mark at beginning of input. + +Does not support changing the underlying std::streambuf in mid-input. */ class input_stream_adapter : public input_adapter_protocol { public: - ~input_stream_adapter() override - { - // clear stream flags; we use underlying streambuf I/O, do not - // maintain ifstream flags - is.clear(); - } + using traits_type = std::char_traits; explicit input_stream_adapter(std::istream& i) - : is(i), sb(*i.rdbuf()) + : is(i) { - // skip byte order mark - std::char_traits::int_type c; - if ((c = get_character()) == 0xEF) + // Skip byte order mark + if (is.peek() == 0xEF) { - if ((c = get_character()) == 0xBB) + is.ignore(); + if (is.peek() == 0xBB) { - if ((c = get_character()) == 0xBF) + is.ignore(); + if (is.peek() == 0xBF) { - return; // Ignore BOM + is.ignore(); + return; // Found a complete BOM. } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xBB'); - } - else if (c != std::char_traits::eof()) - { + is.unget(); } - is.putback('\xEF'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); // no byte order mark; process as usual + + is.unget(); } } - // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; - input_stream_adapter& operator=(input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; - // std::istream/std::streambuf use std::char_traits::to_int_type, to - // ensure that std::char_traits::eof() and the character 0xFF do not - // end up as the same value, eg. 0xFFFFFFFF. - std::char_traits::int_type get_character() override + traits_type::int_type get_character() override { - return sb.sbumpc(); + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.peek(); + // If peek() returns EOF, the following call to ignore() will set + // the failbit, but we do not want to set the failbit here. + if (ch != traits_type::eof()) + { + is.ignore(); + return ch; + } + } + + return traits_type::eof(); } void unget_character() override { - sb.sungetc(); // is.unget() avoided for performance + is.unget(); } private: - /// the associated input stream std::istream& is; - std::streambuf& sb; }; /// input adapter for buffer input diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 6b56474b2..ddfba20de 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -34,6 +34,57 @@ using nlohmann::json; #include #include +// HACK to get the tests running if exceptions are disabled on the command line +// using the "-e/--nothrow" flag. In this case the expressions in CHECK_THROWS +// and similar macros is never executed and subsequent checks relying on the +// side effects of the expression may or may not fail. +#define IF_EXCEPTIONS_ENABLED_THEN_CHECK(expr) \ + { \ + bool _exceptions_enabled_ = false; \ + /* The next line sets the `_exceptions_enabled_` flag to true, iff the expression in */ \ + /* the CHECK_THROWS macro actually gets ever evaluated. It's not if the "-e" flag */ \ + /* has been specified on the command line. */ \ + CHECK_THROWS([&](){ _exceptions_enabled_ = true; throw std::runtime_error("ok"); }()); \ + if (_exceptions_enabled_) \ + { \ + CHECK(expr); \ + } \ + } \ + /**/ + +namespace +{ + // A stringbuf which only ever has a get-area of exactly one character. + // I.e. multiple successive calls to sungetc will fail. + // Note that sgetc and sbumpc both update the get-area and count as a "read" operation. + // (sbumpc is the equivalent to sgetc + gbump(1).) + class unget_fails_stringbuf : public std::streambuf + { + const char* last; + + public: + explicit unget_fails_stringbuf(char const* str, size_t len) + : last(str + len) + { + char* first = const_cast(str); + this->setg(first, first, first); + } + + protected: + virtual traits_type::int_type underflow() override + { + char* pos = this->gptr(); + if (pos == last) + { + this->setg(pos, pos, pos); // empty. and invalid. + return traits_type::eof(); + } + this->setg(pos, pos, pos + 1); + return traits_type::to_int_type(*pos); + } + }; +} + TEST_CASE("deserialization") { SECTION("successful deserialization") @@ -44,6 +95,9 @@ TEST_CASE("deserialization") ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]"; json j = json::parse(ss1); + CHECK(!ss1.fail()); + CHECK(!ss1.bad()); + CHECK(ss1.eof()); // Strict parsing. CHECK(json::accept(ss2)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); } @@ -70,6 +124,12 @@ TEST_CASE("deserialization") ss << "[\"foo\",1,2,3,false,{\"one\":1}]"; json j; j << ss; + CHECK(!ss.fail()); + CHECK(!ss.bad()); + // operator>> uses non-strict parsing. + // We have read the closing ']' and we're done. The parser should + // not have read the EOF marker. + CHECK(!ss.eof()); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); } @@ -90,6 +150,18 @@ TEST_CASE("deserialization") SECTION("unsuccessful deserialization") { + SECTION("null streambuf") + { + std::streambuf* sb = nullptr; + std::istream iss(sb); + CHECK(iss.bad()); + CHECK_THROWS_WITH(json::parse(iss), + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.fail()); // Tests the badbit too. + IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.eof()); + } + SECTION("stream") { std::stringstream ss1, ss2, ss3, ss4; @@ -98,12 +170,15 @@ TEST_CASE("deserialization") ss3 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss4 << "[\"foo\",1,2,3,false,{\"one\":1}"; CHECK_THROWS_AS(json::parse(ss1), json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof()); CHECK_THROWS_WITH(json::parse(ss2), "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); CHECK(not json::accept(ss3)); json j_error; - CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false)); + CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false)); CHECK(j_error.is_discarded()); } @@ -127,6 +202,9 @@ TEST_CASE("deserialization") ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; json j; CHECK_THROWS_AS(j << ss1, json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof()); CHECK_THROWS_WITH(j << ss2, "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } @@ -138,6 +216,9 @@ TEST_CASE("deserialization") ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; json j; CHECK_THROWS_AS(ss1 >> j, json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof()); CHECK_THROWS_WITH(ss2 >> j, "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } @@ -455,7 +536,11 @@ TEST_CASE("deserialization") CHECK_THROWS_WITH(json::parse(bom), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); - CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&); + std::istringstream iss(bom); + CHECK_THROWS_AS(json::parse(iss), json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.eof()); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); } @@ -463,7 +548,40 @@ TEST_CASE("deserialization") SECTION("BOM and content") { CHECK(json::parse(bom + "1") == 1); - CHECK(json::parse(std::istringstream(bom + "1")) == 1); + + std::istringstream iss(bom + "1"); + CHECK(json::parse(iss) == 1); + CHECK(!iss.bad()); + CHECK(!iss.fail()); + // Strict parsing: stream should be at EOF now. + CHECK(iss.eof()); + + iss.str(bom + "1"); + iss.clear(); + json j; + CHECK_NOTHROW(iss >> j); + CHECK(j == 1); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + // Non-strict parsing: + // EOF bit is set only if we tried to read a character past the end of the file. + // In this case: parsing the complete number requires reading past the end of the file. + CHECK(iss.eof()); + + iss.str(bom + "\"1\""); + iss.clear(); + CHECK(json::parse(iss) == "1"); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(iss.eof()); // Strict... + + iss.str(bom + "\"1\""); + iss.clear(); + CHECK_NOTHROW(iss >> j); + CHECK(j == "1"); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(!iss.eof()); // Non-strict... } SECTION("2 byte of BOM") @@ -474,11 +592,44 @@ TEST_CASE("deserialization") CHECK_THROWS_WITH(json::parse(bom2), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); - CHECK_THROWS_AS(json::parse(std::istringstream(bom2)), json::parse_error&); + std::istringstream iss(bom2); + CHECK_THROWS_AS(json::parse(iss), json::parse_error&); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file. + CHECK(iss.good()); CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); } + SECTION("2 byte of BOM - incomplete") + { + { + unget_fails_stringbuf sb("\xEF\xBB ", 3); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + { + unget_fails_stringbuf sb("\xEF\xBB", 2); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + } + SECTION("1 byte of BOM") { const std::string bom1 = bom.substr(0, 1); @@ -487,11 +638,44 @@ TEST_CASE("deserialization") CHECK_THROWS_WITH(json::parse(bom1), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); - CHECK_THROWS_AS(json::parse(std::istringstream(bom1)), json::parse_error&); + std::istringstream iss(bom1); + CHECK_THROWS_AS(json::parse(iss), json::parse_error&); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file. + CHECK(iss.good()); CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); } + SECTION("1 byte of BOM - incomplete") + { + { + unget_fails_stringbuf sb("\xEF ", 3); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + { + unget_fails_stringbuf sb("\xEF", 1); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + } + SECTION("variations") { // calculate variations of each byte of the BOM to make sure @@ -529,14 +713,80 @@ TEST_CASE("deserialization") } } - SECTION("preserve state after parsing") + SECTION("preserve state after parsing - strings") + { + std::istringstream s(bom + "\"123\" \"456\""); + json j; + s >> j; + CHECK(j == "123"); + CHECK(s.good()); + s >> j; + CHECK(j == "456"); + CHECK(s.good()); + s.peek(); + CHECK(s.eof()); + } + + SECTION("preserve state after parsing - numbers (ref)") + { + std::istringstream s("123 456"); + int j; + s >> j; + CHECK(j == 123); + CHECK(s.good()); + s >> j; + CHECK(j == 456); + CHECK(!s.good()); + CHECK(!s.fail()); + CHECK(!s.bad()); + // The stream now has the eofbit set (since to determine whether the number has completely + // parsed, the lexer needs to read past the end of the file). + CHECK(s.eof()); + } + SECTION("preserve state after parsing - numbers") { std::istringstream s(bom + "123 456"); json j; - j << s; + s >> j; CHECK(j == 123); - j << s; + CHECK(s.good()); + s >> j; CHECK(j == 456); + CHECK(!s.good()); + CHECK(!s.fail()); + CHECK(!s.bad()); + // The stream now has the eofbit set (since to determine whether the number has completely + // parsed, the lexer needs to read past the end of the file). + CHECK(s.eof()); + } + + SECTION("preserve state after parsing - numbers (trailing space) (ref)") + { + std::istringstream s("123 456 "); + int j; + s >> j; + CHECK(j == 123); + CHECK(s.good()); + s >> j; + CHECK(j == 456); + // The trailing space at the end is the end of the number. + // The stream should not have the eofbit set. + CHECK(s.good()); + CHECK(s.peek() == static_cast(' ')); + } + SECTION("preserve state after parsing - numbers (trailing space)") + { + std::istringstream s(bom + "123 456 "); + json j; + s >> j; + CHECK(j == 123); + CHECK(s.good()); + s >> j; + CHECK(j == 456); + // The trailing space at the end is the end of the number. + // The stream should not have the eofbit set. + CHECK(s.good()); + CHECK(s.peek() == static_cast(' ')); } } } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index 604def6cb..3bced8590 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -1504,6 +1504,29 @@ TEST_CASE("regression tests") my_json foo = R"([1, 2, 3])"_json; } + SECTION("issue #976 - istream >> json --- 1st character skipped in stream") + { + json j; + + std::istringstream iss; + + iss.clear(); + iss.str("10"); + iss.setstate(std::ios_base::failbit); + + CHECK_THROWS_WITH(iss >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK(iss.fail()); + + iss.clear(); + iss.str("10"); + iss.setstate(std::ios_base::failbit); + + CHECK_THROWS_WITH(json::parse(iss), + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK(iss.fail()); + } + SECTION("issue #977 - Assigning between different json types") { foo_json lj = ns::foo{3};