From 9fca09b00e1ec92c841f2d4167aa8b515c5e2372 Mon Sep 17 00:00:00 2001 From: abolz Date: Mon, 12 Mar 2018 10:30:52 +0100 Subject: [PATCH 1/3] Fix BOM tests --- test/src/unit-deserialization.cpp | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 6e46abe31..6b56474b2 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -447,7 +447,7 @@ TEST_CASE("deserialization") SECTION("ignoring byte-order marks") { - std::string bom = "\xEF\xBB\xBF"; + const std::string bom = "\xEF\xBB\xBF"; SECTION("BOM only") { @@ -468,24 +468,28 @@ TEST_CASE("deserialization") SECTION("2 byte of BOM") { - CHECK_THROWS_AS(json::parse(bom.substr(0, 2)), json::parse_error&); - CHECK_THROWS_WITH(json::parse(bom), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + const std::string bom2 = bom.substr(0, 2); - CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 2))), json::parse_error&); - CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK_THROWS_AS(json::parse(bom2), json::parse_error&); + CHECK_THROWS_WITH(json::parse(bom2), + "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); + + CHECK_THROWS_AS(json::parse(std::istringstream(bom2)), json::parse_error&); + CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)), + "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); } SECTION("1 byte of BOM") { - CHECK_THROWS_AS(json::parse(bom.substr(0, 1)), json::parse_error&); - CHECK_THROWS_WITH(json::parse(bom), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + const std::string bom1 = bom.substr(0, 1); - CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 1))), json::parse_error&); - CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), - "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK_THROWS_AS(json::parse(bom1), json::parse_error&); + CHECK_THROWS_WITH(json::parse(bom1), + "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); + + CHECK_THROWS_AS(json::parse(std::istringstream(bom1)), json::parse_error&); + CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)), + "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); } SECTION("variations") From b487afcbaa31457fa0818198ed34aaf01effa4d1 Mon Sep 17 00:00:00 2001 From: abolz Date: Mon, 12 Mar 2018 10:38:16 +0100 Subject: [PATCH 2/3] Use the `std::istream` interface to implement `input_stream_adapter` (fix #976) --- .../nlohmann/detail/input/input_adapters.hpp | 77 +++-- single_include/nlohmann/json.hpp | 77 +++-- test/src/unit-deserialization.cpp | 266 +++++++++++++++++- test/src/unit-regression.cpp | 23 ++ 4 files changed, 349 insertions(+), 94 deletions(-) diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index ef66948d1..e26e706a2 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -48,76 +48,67 @@ struct input_adapter_protocol using input_adapter_t = std::shared_ptr; /*! -Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at -beginning of input. Does not support changing the underlying std::streambuf -in mid-input. Maintains underlying std::istream and std::streambuf to support -subsequent use of standard std::istream operations to process any input -characters following those used in parsing the JSON input. Clears the -std::istream flags; any input errors (e.g., EOF) will be detected by the first -subsequent call for input from the std::istream. +Input adapter for a (caching) istream. +Ignores a UTF Byte Order Mark at beginning of input. + +Does not support changing the underlying std::streambuf in mid-input. */ class input_stream_adapter : public input_adapter_protocol { public: - ~input_stream_adapter() override - { - // clear stream flags; we use underlying streambuf I/O, do not - // maintain ifstream flags - is.clear(); - } + using traits_type = std::char_traits; explicit input_stream_adapter(std::istream& i) - : is(i), sb(*i.rdbuf()) + : is(i) { - // skip byte order mark - std::char_traits::int_type c; - if ((c = get_character()) == 0xEF) + // Skip byte order mark + if (is.peek() == 0xEF) { - if ((c = get_character()) == 0xBB) + is.ignore(); + if (is.peek() == 0xBB) { - if ((c = get_character()) == 0xBF) + is.ignore(); + if (is.peek() == 0xBF) { - return; // Ignore BOM + is.ignore(); + return; // Found a complete BOM. } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xBB'); - } - else if (c != std::char_traits::eof()) - { + is.unget(); } - is.putback('\xEF'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); // no byte order mark; process as usual + + is.unget(); } } - // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; - input_stream_adapter& operator=(input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; - // std::istream/std::streambuf use std::char_traits::to_int_type, to - // ensure that std::char_traits::eof() and the character 0xFF do not - // end up as the same value, eg. 0xFFFFFFFF. - std::char_traits::int_type get_character() override + traits_type::int_type get_character() override { - return sb.sbumpc(); + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.peek(); + // If peek() returns EOF, the following call to ignore() will set + // the failbit, but we do not want to set the failbit here. + if (ch != traits_type::eof()) + { + is.ignore(); + return ch; + } + } + + return traits_type::eof(); } void unget_character() override { - sb.sungetc(); // is.unget() avoided for performance + is.unget(); } private: - /// the associated input stream std::istream& is; - std::streambuf& sb; }; /// input adapter for buffer input diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 1d8e4e82c..b71502749 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1621,76 +1621,67 @@ struct input_adapter_protocol using input_adapter_t = std::shared_ptr; /*! -Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at -beginning of input. Does not support changing the underlying std::streambuf -in mid-input. Maintains underlying std::istream and std::streambuf to support -subsequent use of standard std::istream operations to process any input -characters following those used in parsing the JSON input. Clears the -std::istream flags; any input errors (e.g., EOF) will be detected by the first -subsequent call for input from the std::istream. +Input adapter for a (caching) istream. +Ignores a UTF Byte Order Mark at beginning of input. + +Does not support changing the underlying std::streambuf in mid-input. */ class input_stream_adapter : public input_adapter_protocol { public: - ~input_stream_adapter() override - { - // clear stream flags; we use underlying streambuf I/O, do not - // maintain ifstream flags - is.clear(); - } + using traits_type = std::char_traits; explicit input_stream_adapter(std::istream& i) - : is(i), sb(*i.rdbuf()) + : is(i) { - // skip byte order mark - std::char_traits::int_type c; - if ((c = get_character()) == 0xEF) + // Skip byte order mark + if (is.peek() == 0xEF) { - if ((c = get_character()) == 0xBB) + is.ignore(); + if (is.peek() == 0xBB) { - if ((c = get_character()) == 0xBF) + is.ignore(); + if (is.peek() == 0xBF) { - return; // Ignore BOM + is.ignore(); + return; // Found a complete BOM. } - else if (c != std::char_traits::eof()) - { - is.unget(); - } - is.putback('\xBB'); - } - else if (c != std::char_traits::eof()) - { + is.unget(); } - is.putback('\xEF'); - } - else if (c != std::char_traits::eof()) - { - is.unget(); // no byte order mark; process as usual + + is.unget(); } } - // delete because of pointer members input_stream_adapter(const input_stream_adapter&) = delete; - input_stream_adapter& operator=(input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; - // std::istream/std::streambuf use std::char_traits::to_int_type, to - // ensure that std::char_traits::eof() and the character 0xFF do not - // end up as the same value, eg. 0xFFFFFFFF. - std::char_traits::int_type get_character() override + traits_type::int_type get_character() override { - return sb.sbumpc(); + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.peek(); + // If peek() returns EOF, the following call to ignore() will set + // the failbit, but we do not want to set the failbit here. + if (ch != traits_type::eof()) + { + is.ignore(); + return ch; + } + } + + return traits_type::eof(); } void unget_character() override { - sb.sungetc(); // is.unget() avoided for performance + is.unget(); } private: - /// the associated input stream std::istream& is; - std::streambuf& sb; }; /// input adapter for buffer input diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 6b56474b2..ddfba20de 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -34,6 +34,57 @@ using nlohmann::json; #include #include +// HACK to get the tests running if exceptions are disabled on the command line +// using the "-e/--nothrow" flag. In this case the expressions in CHECK_THROWS +// and similar macros is never executed and subsequent checks relying on the +// side effects of the expression may or may not fail. +#define IF_EXCEPTIONS_ENABLED_THEN_CHECK(expr) \ + { \ + bool _exceptions_enabled_ = false; \ + /* The next line sets the `_exceptions_enabled_` flag to true, iff the expression in */ \ + /* the CHECK_THROWS macro actually gets ever evaluated. It's not if the "-e" flag */ \ + /* has been specified on the command line. */ \ + CHECK_THROWS([&](){ _exceptions_enabled_ = true; throw std::runtime_error("ok"); }()); \ + if (_exceptions_enabled_) \ + { \ + CHECK(expr); \ + } \ + } \ + /**/ + +namespace +{ + // A stringbuf which only ever has a get-area of exactly one character. + // I.e. multiple successive calls to sungetc will fail. + // Note that sgetc and sbumpc both update the get-area and count as a "read" operation. + // (sbumpc is the equivalent to sgetc + gbump(1).) + class unget_fails_stringbuf : public std::streambuf + { + const char* last; + + public: + explicit unget_fails_stringbuf(char const* str, size_t len) + : last(str + len) + { + char* first = const_cast(str); + this->setg(first, first, first); + } + + protected: + virtual traits_type::int_type underflow() override + { + char* pos = this->gptr(); + if (pos == last) + { + this->setg(pos, pos, pos); // empty. and invalid. + return traits_type::eof(); + } + this->setg(pos, pos, pos + 1); + return traits_type::to_int_type(*pos); + } + }; +} + TEST_CASE("deserialization") { SECTION("successful deserialization") @@ -44,6 +95,9 @@ TEST_CASE("deserialization") ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]"; json j = json::parse(ss1); + CHECK(!ss1.fail()); + CHECK(!ss1.bad()); + CHECK(ss1.eof()); // Strict parsing. CHECK(json::accept(ss2)); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); } @@ -70,6 +124,12 @@ TEST_CASE("deserialization") ss << "[\"foo\",1,2,3,false,{\"one\":1}]"; json j; j << ss; + CHECK(!ss.fail()); + CHECK(!ss.bad()); + // operator>> uses non-strict parsing. + // We have read the closing ']' and we're done. The parser should + // not have read the EOF marker. + CHECK(!ss.eof()); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); } @@ -90,6 +150,18 @@ TEST_CASE("deserialization") SECTION("unsuccessful deserialization") { + SECTION("null streambuf") + { + std::streambuf* sb = nullptr; + std::istream iss(sb); + CHECK(iss.bad()); + CHECK_THROWS_WITH(json::parse(iss), + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.fail()); // Tests the badbit too. + IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.eof()); + } + SECTION("stream") { std::stringstream ss1, ss2, ss3, ss4; @@ -98,12 +170,15 @@ TEST_CASE("deserialization") ss3 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss4 << "[\"foo\",1,2,3,false,{\"one\":1}"; CHECK_THROWS_AS(json::parse(ss1), json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof()); CHECK_THROWS_WITH(json::parse(ss2), "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); CHECK(not json::accept(ss3)); json j_error; - CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false)); + CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false)); CHECK(j_error.is_discarded()); } @@ -127,6 +202,9 @@ TEST_CASE("deserialization") ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; json j; CHECK_THROWS_AS(j << ss1, json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof()); CHECK_THROWS_WITH(j << ss2, "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } @@ -138,6 +216,9 @@ TEST_CASE("deserialization") ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; json j; CHECK_THROWS_AS(ss1 >> j, json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof()); CHECK_THROWS_WITH(ss2 >> j, "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); } @@ -455,7 +536,11 @@ TEST_CASE("deserialization") CHECK_THROWS_WITH(json::parse(bom), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); - CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&); + std::istringstream iss(bom); + CHECK_THROWS_AS(json::parse(iss), json::parse_error&); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.fail()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.bad()); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.eof()); CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); } @@ -463,7 +548,40 @@ TEST_CASE("deserialization") SECTION("BOM and content") { CHECK(json::parse(bom + "1") == 1); - CHECK(json::parse(std::istringstream(bom + "1")) == 1); + + std::istringstream iss(bom + "1"); + CHECK(json::parse(iss) == 1); + CHECK(!iss.bad()); + CHECK(!iss.fail()); + // Strict parsing: stream should be at EOF now. + CHECK(iss.eof()); + + iss.str(bom + "1"); + iss.clear(); + json j; + CHECK_NOTHROW(iss >> j); + CHECK(j == 1); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + // Non-strict parsing: + // EOF bit is set only if we tried to read a character past the end of the file. + // In this case: parsing the complete number requires reading past the end of the file. + CHECK(iss.eof()); + + iss.str(bom + "\"1\""); + iss.clear(); + CHECK(json::parse(iss) == "1"); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(iss.eof()); // Strict... + + iss.str(bom + "\"1\""); + iss.clear(); + CHECK_NOTHROW(iss >> j); + CHECK(j == "1"); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(!iss.eof()); // Non-strict... } SECTION("2 byte of BOM") @@ -474,11 +592,44 @@ TEST_CASE("deserialization") CHECK_THROWS_WITH(json::parse(bom2), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); - CHECK_THROWS_AS(json::parse(std::istringstream(bom2)), json::parse_error&); + std::istringstream iss(bom2); + CHECK_THROWS_AS(json::parse(iss), json::parse_error&); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file. + CHECK(iss.good()); CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); } + SECTION("2 byte of BOM - incomplete") + { + { + unget_fails_stringbuf sb("\xEF\xBB ", 3); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + { + unget_fails_stringbuf sb("\xEF\xBB", 2); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + } + SECTION("1 byte of BOM") { const std::string bom1 = bom.substr(0, 1); @@ -487,11 +638,44 @@ TEST_CASE("deserialization") CHECK_THROWS_WITH(json::parse(bom1), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); - CHECK_THROWS_AS(json::parse(std::istringstream(bom1)), json::parse_error&); + std::istringstream iss(bom1); + CHECK_THROWS_AS(json::parse(iss), json::parse_error&); + CHECK(!iss.fail()); + CHECK(!iss.bad()); + CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file. + CHECK(iss.good()); CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)), "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'"); } + SECTION("1 byte of BOM - incomplete") + { + { + unget_fails_stringbuf sb("\xEF ", 3); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + { + unget_fails_stringbuf sb("\xEF", 1); + std::istream is(&sb); + + json j; + CHECK_THROWS_WITH(is >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too + IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad()); + // Do not check the eofbit. + // Some implementations keep the eofbit if is.unget() fails, some do not. + } + } + SECTION("variations") { // calculate variations of each byte of the BOM to make sure @@ -529,14 +713,80 @@ TEST_CASE("deserialization") } } - SECTION("preserve state after parsing") + SECTION("preserve state after parsing - strings") + { + std::istringstream s(bom + "\"123\" \"456\""); + json j; + s >> j; + CHECK(j == "123"); + CHECK(s.good()); + s >> j; + CHECK(j == "456"); + CHECK(s.good()); + s.peek(); + CHECK(s.eof()); + } + + SECTION("preserve state after parsing - numbers (ref)") + { + std::istringstream s("123 456"); + int j; + s >> j; + CHECK(j == 123); + CHECK(s.good()); + s >> j; + CHECK(j == 456); + CHECK(!s.good()); + CHECK(!s.fail()); + CHECK(!s.bad()); + // The stream now has the eofbit set (since to determine whether the number has completely + // parsed, the lexer needs to read past the end of the file). + CHECK(s.eof()); + } + SECTION("preserve state after parsing - numbers") { std::istringstream s(bom + "123 456"); json j; - j << s; + s >> j; CHECK(j == 123); - j << s; + CHECK(s.good()); + s >> j; CHECK(j == 456); + CHECK(!s.good()); + CHECK(!s.fail()); + CHECK(!s.bad()); + // The stream now has the eofbit set (since to determine whether the number has completely + // parsed, the lexer needs to read past the end of the file). + CHECK(s.eof()); + } + + SECTION("preserve state after parsing - numbers (trailing space) (ref)") + { + std::istringstream s("123 456 "); + int j; + s >> j; + CHECK(j == 123); + CHECK(s.good()); + s >> j; + CHECK(j == 456); + // The trailing space at the end is the end of the number. + // The stream should not have the eofbit set. + CHECK(s.good()); + CHECK(s.peek() == static_cast(' ')); + } + SECTION("preserve state after parsing - numbers (trailing space)") + { + std::istringstream s(bom + "123 456 "); + json j; + s >> j; + CHECK(j == 123); + CHECK(s.good()); + s >> j; + CHECK(j == 456); + // The trailing space at the end is the end of the number. + // The stream should not have the eofbit set. + CHECK(s.good()); + CHECK(s.peek() == static_cast(' ')); } } } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index 604def6cb..3bced8590 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -1504,6 +1504,29 @@ TEST_CASE("regression tests") my_json foo = R"([1, 2, 3])"_json; } + SECTION("issue #976 - istream >> json --- 1st character skipped in stream") + { + json j; + + std::istringstream iss; + + iss.clear(); + iss.str("10"); + iss.setstate(std::ios_base::failbit); + + CHECK_THROWS_WITH(iss >> j, + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK(iss.fail()); + + iss.clear(); + iss.str("10"); + iss.setstate(std::ios_base::failbit); + + CHECK_THROWS_WITH(json::parse(iss), + "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); + CHECK(iss.fail()); + } + SECTION("issue #977 - Assigning between different json types") { foo_json lj = ns::foo{3}; From d46cf99a856c932100224873b78e3b89bf79f77a Mon Sep 17 00:00:00 2001 From: abolz Date: Mon, 12 Mar 2018 13:09:56 +0100 Subject: [PATCH 3/3] Improve performance of `input_stream_adapter` Use the underlying streambuf to extract characters instead of the istream interface and manually set the istream error state. This slightly changes the behavior in case a streambuf operation throws an exception. --- .../nlohmann/detail/input/input_adapters.hpp | 146 ++++++++++++++++++ single_include/nlohmann/json.hpp | 146 ++++++++++++++++++ 2 files changed, 292 insertions(+) diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index e26e706a2..1f2b1aa8d 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -47,12 +47,61 @@ struct input_adapter_protocol /// a type to simplify interfaces using input_adapter_t = std::shared_ptr; +/*! +A helper function to skip the UTF-8 byte order mark. + +If a complete BOM has been skipped, or if an incomplete BOM has been detected +and the stream has been successfully rewind to the start of the BOM, returns +goodbit. +If an internal operation fails, returns badbit, and the streambuf should no +longer be used. + +Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears +the eofbit. However, some implementations keep the eofbit if is.unget() fails, +others do not. + +Note: The streambuf must be non-null. +*/ +inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb) +{ + using traits_type = std::char_traits; + + assert(sb != nullptr); + + if (sb->sgetc() == 0xEF) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBB) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBF) + { + sb->sbumpc(); + return std::ios_base::goodbit; + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + return std::ios_base::goodbit; +} + /*! Input adapter for a (caching) istream. Ignores a UTF Byte Order Mark at beginning of input. Does not support changing the underlying std::streambuf in mid-input. */ +#if 0 class input_stream_adapter : public input_adapter_protocol { public: @@ -110,6 +159,103 @@ class input_stream_adapter : public input_adapter_protocol private: std::istream& is; }; +#else +class input_stream_adapter : public input_adapter_protocol +{ + // + // NOTE: + // + // This implementation differs slightly from the reference implementation + // (using the std::istream interface): + // + // From N4659: + // 30.7.4.3 Unformatted input functions + // + // [...] + // If an exception is thrown during input then `ios::badbit` is turned + // on[310] in `*this`'s error state. (Exceptions thrown from + // `basic_ios<>::clear()` are not caught or rethrown.) + // If `(exceptions() & badbit) != 0` then the exception is rethrown. + // + // [310] This is done without causing an `ios::failure` to be thrown. + // + // However, there is no (portable) way to turn on the `badbit` in `is` + // without throwing an exception, so here we don't catch (and possibly) + // rethrow exceptions from streambuf operations. + // If an internal operation throws an exception, the behavior of this + // implementation is therefore slightly different from the reference + // implementation: + // + // If an exception is thrown during input and + // + // - badbit is turned ON in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state. + // + // - badbit is turned OFF in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state and the + // exception is **not** swallowed. + // + + public: + using traits_type = std::char_traits; + + explicit input_stream_adapter(std::istream& i) + : is(i) + , ok(i, /* noskipws */ true) + { + std::ios_base::iostate state = std::ios_base::goodbit; + if (ok) + { + state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf()); + } + else + { + state |= std::ios_base::failbit; + } + + // Update the stream state. In case skip_byte_order_mark() failed (but + // did not throw an exception), `state` now has the badbit set and the + // call to setstate might throw an ios::failure. Likewise, if the stream + // is "not ok" then the failbit will be set, which might throw an + // exception, too. + is.setstate(state); + } + + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; + + traits_type::int_type get_character() override + { + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.rdbuf()->sbumpc(); + if (ch != traits_type::eof()) + { + return ch; + } + + // sbumpc failed. + // No more characters are available. Set eofbit. + is.setstate(std::ios_base::eofbit); + } + + return traits_type::eof(); + } + + void unget_character() override + { + // This method is only ever called if the last call to get_character was + // successful (i.e. not EOF). This implies that the stream is good and + // that the call to sungetc below is guaranteed to succeed. + is.rdbuf()->sungetc(); + } + + private: + std::istream& is; + std::istream::sentry const ok; +}; +#endif /// input adapter for buffer input class input_buffer_adapter : public input_adapter_protocol diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index b71502749..c0d6d27c9 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1620,12 +1620,61 @@ struct input_adapter_protocol /// a type to simplify interfaces using input_adapter_t = std::shared_ptr; +/*! +A helper function to skip the UTF-8 byte order mark. + +If a complete BOM has been skipped, or if an incomplete BOM has been detected +and the stream has been successfully rewind to the start of the BOM, returns +goodbit. +If an internal operation fails, returns badbit, and the streambuf should no +longer be used. + +Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears +the eofbit. However, some implementations keep the eofbit if is.unget() fails, +others do not. + +Note: The streambuf must be non-null. +*/ +inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb) +{ + using traits_type = std::char_traits; + + assert(sb != nullptr); + + if (sb->sgetc() == 0xEF) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBB) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBF) + { + sb->sbumpc(); + return std::ios_base::goodbit; + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + return std::ios_base::goodbit; +} + /*! Input adapter for a (caching) istream. Ignores a UTF Byte Order Mark at beginning of input. Does not support changing the underlying std::streambuf in mid-input. */ +#if 0 class input_stream_adapter : public input_adapter_protocol { public: @@ -1683,6 +1732,103 @@ class input_stream_adapter : public input_adapter_protocol private: std::istream& is; }; +#else +class input_stream_adapter : public input_adapter_protocol +{ + // + // NOTE: + // + // This implementation differs slightly from the reference implementation + // (using the std::istream interface): + // + // From N4659: + // 30.7.4.3 Unformatted input functions + // + // [...] + // If an exception is thrown during input then `ios::badbit` is turned + // on[310] in `*this`'s error state. (Exceptions thrown from + // `basic_ios<>::clear()` are not caught or rethrown.) + // If `(exceptions() & badbit) != 0` then the exception is rethrown. + // + // [310] This is done without causing an `ios::failure` to be thrown. + // + // However, there is no (portable) way to turn on the `badbit` in `is` + // without throwing an exception, so here we don't catch (and possibly) + // rethrow exceptions from streambuf operations. + // If an internal operation throws an exception, the behavior of this + // implementation is therefore slightly different from the reference + // implementation: + // + // If an exception is thrown during input and + // + // - badbit is turned ON in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state. + // + // - badbit is turned OFF in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state and the + // exception is **not** swallowed. + // + + public: + using traits_type = std::char_traits; + + explicit input_stream_adapter(std::istream& i) + : is(i) + , ok(i, /* noskipws */ true) + { + std::ios_base::iostate state = std::ios_base::goodbit; + if (ok) + { + state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf()); + } + else + { + state |= std::ios_base::failbit; + } + + // Update the stream state. In case skip_byte_order_mark() failed (but + // did not throw an exception), `state` now has the badbit set and the + // call to setstate might throw an ios::failure. Likewise, if the stream + // is "not ok" then the failbit will be set, which might throw an + // exception, too. + is.setstate(state); + } + + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; + + traits_type::int_type get_character() override + { + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.rdbuf()->sbumpc(); + if (ch != traits_type::eof()) + { + return ch; + } + + // sbumpc failed. + // No more characters are available. Set eofbit. + is.setstate(std::ios_base::eofbit); + } + + return traits_type::eof(); + } + + void unget_character() override + { + // This method is only ever called if the last call to get_character was + // successful (i.e. not EOF). This implies that the stream is good and + // that the call to sungetc below is guaranteed to succeed. + is.rdbuf()->sungetc(); + } + + private: + std::istream& is; + std::istream::sentry const ok; +}; +#endif /// input adapter for buffer input class input_buffer_adapter : public input_adapter_protocol