This commit is contained in:
abolz 2018-05-18 06:19:31 +00:00 committed by GitHub
commit c0ff2ad581
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 656 additions and 105 deletions

View File

@ -48,77 +48,214 @@ struct input_adapter_protocol
using input_adapter_t = std::shared_ptr<input_adapter_protocol>; using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*! /*!
Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at A helper function to skip the UTF-8 byte order mark.
beginning of input. Does not support changing the underlying std::streambuf
in mid-input. Maintains underlying std::istream and std::streambuf to support If a complete BOM has been skipped, or if an incomplete BOM has been detected
subsequent use of standard std::istream operations to process any input and the stream has been successfully rewind to the start of the BOM, returns
characters following those used in parsing the JSON input. Clears the goodbit.
std::istream flags; any input errors (e.g., EOF) will be detected by the first If an internal operation fails, returns badbit, and the streambuf should no
subsequent call for input from the std::istream. longer be used.
Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
the eofbit. However, some implementations keep the eofbit if is.unget() fails,
others do not.
Note: The streambuf must be non-null.
*/ */
inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
{
using traits_type = std::char_traits<char>;
assert(sb != nullptr);
if (sb->sgetc() == 0xEF)
{
sb->sbumpc();
if (sb->sgetc() == 0xBB)
{
sb->sbumpc();
if (sb->sgetc() == 0xBF)
{
sb->sbumpc();
return std::ios_base::goodbit;
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
return std::ios_base::goodbit;
}
/*!
Input adapter for a (caching) istream.
Ignores a UTF Byte Order Mark at beginning of input.
Does not support changing the underlying std::streambuf in mid-input.
*/
#if 0
class input_stream_adapter : public input_adapter_protocol class input_stream_adapter : public input_adapter_protocol
{ {
public: public:
~input_stream_adapter() override using traits_type = std::char_traits<char>;
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags
is.clear();
}
explicit input_stream_adapter(std::istream& i) explicit input_stream_adapter(std::istream& i)
: is(i), sb(*i.rdbuf()) : is(i)
{ {
// skip byte order mark // Skip byte order mark
std::char_traits<char>::int_type c; if (is.peek() == 0xEF)
if ((c = get_character()) == 0xEF)
{ {
if ((c = get_character()) == 0xBB) is.ignore();
if (is.peek() == 0xBB)
{ {
if ((c = get_character()) == 0xBF) is.ignore();
if (is.peek() == 0xBF)
{ {
return; // Ignore BOM is.ignore();
return; // Found a complete BOM.
} }
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xBB');
}
else if (c != std::char_traits<char>::eof())
{
is.unget(); is.unget();
} }
is.putback('\xEF');
} is.unget();
else if (c != std::char_traits<char>::eof())
{
is.unget(); // no byte order mark; process as usual
} }
} }
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete; input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete; input_stream_adapter& operator=(const input_stream_adapter&) = delete;
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to traits_type::int_type get_character() override
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
std::char_traits<char>::int_type get_character() override
{ {
return sb.sbumpc(); // Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.peek();
// If peek() returns EOF, the following call to ignore() will set
// the failbit, but we do not want to set the failbit here.
if (ch != traits_type::eof())
{
is.ignore();
return ch;
}
}
return traits_type::eof();
} }
void unget_character() override void unget_character() override
{ {
sb.sungetc(); // is.unget() avoided for performance is.unget();
} }
private: private:
/// the associated input stream
std::istream& is; std::istream& is;
std::streambuf& sb;
}; };
#else
class input_stream_adapter : public input_adapter_protocol
{
//
// NOTE:
//
// This implementation differs slightly from the reference implementation
// (using the std::istream interface):
//
// From N4659:
// 30.7.4.3 Unformatted input functions
//
// [...]
// If an exception is thrown during input then `ios::badbit` is turned
// on[310] in `*this`'s error state. (Exceptions thrown from
// `basic_ios<>::clear()` are not caught or rethrown.)
// If `(exceptions() & badbit) != 0` then the exception is rethrown.
//
// [310] This is done without causing an `ios::failure` to be thrown.
//
// However, there is no (portable) way to turn on the `badbit` in `is`
// without throwing an exception, so here we don't catch (and possibly)
// rethrow exceptions from streambuf operations.
// If an internal operation throws an exception, the behavior of this
// implementation is therefore slightly different from the reference
// implementation:
//
// If an exception is thrown during input and
//
// - badbit is turned ON in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state.
//
// - badbit is turned OFF in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state and the
// exception is **not** swallowed.
//
public:
using traits_type = std::char_traits<char>;
explicit input_stream_adapter(std::istream& i)
: is(i)
, ok(i, /* noskipws */ true)
{
std::ios_base::iostate state = std::ios_base::goodbit;
if (ok)
{
state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
}
else
{
state |= std::ios_base::failbit;
}
// Update the stream state. In case skip_byte_order_mark() failed (but
// did not throw an exception), `state` now has the badbit set and the
// call to setstate might throw an ios::failure. Likewise, if the stream
// is "not ok" then the failbit will be set, which might throw an
// exception, too.
is.setstate(state);
}
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
traits_type::int_type get_character() override
{
// Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.rdbuf()->sbumpc();
if (ch != traits_type::eof())
{
return ch;
}
// sbumpc failed.
// No more characters are available. Set eofbit.
is.setstate(std::ios_base::eofbit);
}
return traits_type::eof();
}
void unget_character() override
{
// This method is only ever called if the last call to get_character was
// successful (i.e. not EOF). This implies that the stream is good and
// that the call to sungetc below is guaranteed to succeed.
is.rdbuf()->sungetc();
}
private:
std::istream& is;
std::istream::sentry const ok;
};
#endif
/// input adapter for buffer input /// input adapter for buffer input
class input_buffer_adapter : public input_adapter_protocol class input_buffer_adapter : public input_adapter_protocol

View File

@ -1622,77 +1622,214 @@ struct input_adapter_protocol
using input_adapter_t = std::shared_ptr<input_adapter_protocol>; using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*! /*!
Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at A helper function to skip the UTF-8 byte order mark.
beginning of input. Does not support changing the underlying std::streambuf
in mid-input. Maintains underlying std::istream and std::streambuf to support If a complete BOM has been skipped, or if an incomplete BOM has been detected
subsequent use of standard std::istream operations to process any input and the stream has been successfully rewind to the start of the BOM, returns
characters following those used in parsing the JSON input. Clears the goodbit.
std::istream flags; any input errors (e.g., EOF) will be detected by the first If an internal operation fails, returns badbit, and the streambuf should no
subsequent call for input from the std::istream. longer be used.
Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
the eofbit. However, some implementations keep the eofbit if is.unget() fails,
others do not.
Note: The streambuf must be non-null.
*/ */
inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
{
using traits_type = std::char_traits<char>;
assert(sb != nullptr);
if (sb->sgetc() == 0xEF)
{
sb->sbumpc();
if (sb->sgetc() == 0xBB)
{
sb->sbumpc();
if (sb->sgetc() == 0xBF)
{
sb->sbumpc();
return std::ios_base::goodbit;
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
return std::ios_base::goodbit;
}
/*!
Input adapter for a (caching) istream.
Ignores a UTF Byte Order Mark at beginning of input.
Does not support changing the underlying std::streambuf in mid-input.
*/
#if 0
class input_stream_adapter : public input_adapter_protocol class input_stream_adapter : public input_adapter_protocol
{ {
public: public:
~input_stream_adapter() override using traits_type = std::char_traits<char>;
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags
is.clear();
}
explicit input_stream_adapter(std::istream& i) explicit input_stream_adapter(std::istream& i)
: is(i), sb(*i.rdbuf()) : is(i)
{ {
// skip byte order mark // Skip byte order mark
std::char_traits<char>::int_type c; if (is.peek() == 0xEF)
if ((c = get_character()) == 0xEF)
{ {
if ((c = get_character()) == 0xBB) is.ignore();
if (is.peek() == 0xBB)
{ {
if ((c = get_character()) == 0xBF) is.ignore();
if (is.peek() == 0xBF)
{ {
return; // Ignore BOM is.ignore();
return; // Found a complete BOM.
} }
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xBB');
}
else if (c != std::char_traits<char>::eof())
{
is.unget(); is.unget();
} }
is.putback('\xEF');
} is.unget();
else if (c != std::char_traits<char>::eof())
{
is.unget(); // no byte order mark; process as usual
} }
} }
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete; input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete; input_stream_adapter& operator=(const input_stream_adapter&) = delete;
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to traits_type::int_type get_character() override
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
std::char_traits<char>::int_type get_character() override
{ {
return sb.sbumpc(); // Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.peek();
// If peek() returns EOF, the following call to ignore() will set
// the failbit, but we do not want to set the failbit here.
if (ch != traits_type::eof())
{
is.ignore();
return ch;
}
}
return traits_type::eof();
} }
void unget_character() override void unget_character() override
{ {
sb.sungetc(); // is.unget() avoided for performance is.unget();
} }
private: private:
/// the associated input stream
std::istream& is; std::istream& is;
std::streambuf& sb;
}; };
#else
class input_stream_adapter : public input_adapter_protocol
{
//
// NOTE:
//
// This implementation differs slightly from the reference implementation
// (using the std::istream interface):
//
// From N4659:
// 30.7.4.3 Unformatted input functions
//
// [...]
// If an exception is thrown during input then `ios::badbit` is turned
// on[310] in `*this`'s error state. (Exceptions thrown from
// `basic_ios<>::clear()` are not caught or rethrown.)
// If `(exceptions() & badbit) != 0` then the exception is rethrown.
//
// [310] This is done without causing an `ios::failure` to be thrown.
//
// However, there is no (portable) way to turn on the `badbit` in `is`
// without throwing an exception, so here we don't catch (and possibly)
// rethrow exceptions from streambuf operations.
// If an internal operation throws an exception, the behavior of this
// implementation is therefore slightly different from the reference
// implementation:
//
// If an exception is thrown during input and
//
// - badbit is turned ON in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state.
//
// - badbit is turned OFF in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state and the
// exception is **not** swallowed.
//
public:
using traits_type = std::char_traits<char>;
explicit input_stream_adapter(std::istream& i)
: is(i)
, ok(i, /* noskipws */ true)
{
std::ios_base::iostate state = std::ios_base::goodbit;
if (ok)
{
state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
}
else
{
state |= std::ios_base::failbit;
}
// Update the stream state. In case skip_byte_order_mark() failed (but
// did not throw an exception), `state` now has the badbit set and the
// call to setstate might throw an ios::failure. Likewise, if the stream
// is "not ok" then the failbit will be set, which might throw an
// exception, too.
is.setstate(state);
}
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
traits_type::int_type get_character() override
{
// Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.rdbuf()->sbumpc();
if (ch != traits_type::eof())
{
return ch;
}
// sbumpc failed.
// No more characters are available. Set eofbit.
is.setstate(std::ios_base::eofbit);
}
return traits_type::eof();
}
void unget_character() override
{
// This method is only ever called if the last call to get_character was
// successful (i.e. not EOF). This implies that the stream is good and
// that the call to sungetc below is guaranteed to succeed.
is.rdbuf()->sungetc();
}
private:
std::istream& is;
std::istream::sentry const ok;
};
#endif
/// input adapter for buffer input /// input adapter for buffer input
class input_buffer_adapter : public input_adapter_protocol class input_buffer_adapter : public input_adapter_protocol

View File

@ -35,6 +35,57 @@ using nlohmann::json;
#include <iostream> #include <iostream>
#include <valarray> #include <valarray>
// HACK to get the tests running if exceptions are disabled on the command line
// using the "-e/--nothrow" flag. In this case the expressions in CHECK_THROWS
// and similar macros is never executed and subsequent checks relying on the
// side effects of the expression may or may not fail.
#define IF_EXCEPTIONS_ENABLED_THEN_CHECK(expr) \
{ \
bool _exceptions_enabled_ = false; \
/* The next line sets the `_exceptions_enabled_` flag to true, iff the expression in */ \
/* the CHECK_THROWS macro actually gets ever evaluated. It's not if the "-e" flag */ \
/* has been specified on the command line. */ \
CHECK_THROWS([&](){ _exceptions_enabled_ = true; throw std::runtime_error("ok"); }()); \
if (_exceptions_enabled_) \
{ \
CHECK(expr); \
} \
} \
/**/
namespace
{
// A stringbuf which only ever has a get-area of exactly one character.
// I.e. multiple successive calls to sungetc will fail.
// Note that sgetc and sbumpc both update the get-area and count as a "read" operation.
// (sbumpc is the equivalent to sgetc + gbump(1).)
class unget_fails_stringbuf : public std::streambuf
{
const char* last;
public:
explicit unget_fails_stringbuf(char const* str, size_t len)
: last(str + len)
{
char* first = const_cast<char*>(str);
this->setg(first, first, first);
}
protected:
virtual traits_type::int_type underflow() override
{
char* pos = this->gptr();
if (pos == last)
{
this->setg(pos, pos, pos); // empty. and invalid.
return traits_type::eof();
}
this->setg(pos, pos, pos + 1);
return traits_type::to_int_type(*pos);
}
};
}
TEST_CASE("deserialization") TEST_CASE("deserialization")
{ {
SECTION("successful deserialization") SECTION("successful deserialization")
@ -45,6 +96,9 @@ TEST_CASE("deserialization")
ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]";
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]";
json j = json::parse(ss1); json j = json::parse(ss1);
CHECK(!ss1.fail());
CHECK(!ss1.bad());
CHECK(ss1.eof()); // Strict parsing.
CHECK(json::accept(ss2)); CHECK(json::accept(ss2));
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
} }
@ -71,6 +125,12 @@ TEST_CASE("deserialization")
ss << "[\"foo\",1,2,3,false,{\"one\":1}]"; ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
json j; json j;
j << ss; j << ss;
CHECK(!ss.fail());
CHECK(!ss.bad());
// operator>> uses non-strict parsing.
// We have read the closing ']' and we're done. The parser should
// not have read the EOF marker.
CHECK(!ss.eof());
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}})); CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
} }
@ -91,6 +151,18 @@ TEST_CASE("deserialization")
SECTION("unsuccessful deserialization") SECTION("unsuccessful deserialization")
{ {
SECTION("null streambuf")
{
std::streambuf* sb = nullptr;
std::istream iss(sb);
CHECK(iss.bad());
CHECK_THROWS_WITH(json::parse(iss),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.fail()); // Tests the badbit too.
IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.eof());
}
SECTION("stream") SECTION("stream")
{ {
std::stringstream ss1, ss2, ss3, ss4; std::stringstream ss1, ss2, ss3, ss4;
@ -99,12 +171,15 @@ TEST_CASE("deserialization")
ss3 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss3 << "[\"foo\",1,2,3,false,{\"one\":1}";
ss4 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss4 << "[\"foo\",1,2,3,false,{\"one\":1}";
CHECK_THROWS_AS(json::parse(ss1), json::parse_error&); CHECK_THROWS_AS(json::parse(ss1), json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
CHECK_THROWS_WITH(json::parse(ss2), CHECK_THROWS_WITH(json::parse(ss2),
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
CHECK(not json::accept(ss3)); CHECK(not json::accept(ss3));
json j_error; json j_error;
CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false)); CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false));
CHECK(j_error.is_discarded()); CHECK(j_error.is_discarded());
} }
@ -128,6 +203,9 @@ TEST_CASE("deserialization")
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
json j; json j;
CHECK_THROWS_AS(j << ss1, json::parse_error&); CHECK_THROWS_AS(j << ss1, json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
CHECK_THROWS_WITH(j << ss2, CHECK_THROWS_WITH(j << ss2,
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
} }
@ -139,6 +217,9 @@ TEST_CASE("deserialization")
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}"; ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
json j; json j;
CHECK_THROWS_AS(ss1 >> j, json::parse_error&); CHECK_THROWS_AS(ss1 >> j, json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
CHECK_THROWS_WITH(ss2 >> j, CHECK_THROWS_WITH(ss2 >> j,
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'"); "[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
} }
@ -448,7 +529,7 @@ TEST_CASE("deserialization")
SECTION("ignoring byte-order marks") SECTION("ignoring byte-order marks")
{ {
std::string bom = "\xEF\xBB\xBF"; const std::string bom = "\xEF\xBB\xBF";
SECTION("BOM only") SECTION("BOM only")
{ {
@ -456,7 +537,11 @@ TEST_CASE("deserialization")
CHECK_THROWS_WITH(json::parse(bom), CHECK_THROWS_WITH(json::parse(bom),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&); std::istringstream iss(bom);
CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.eof());
CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); "[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
} }
@ -464,29 +549,132 @@ TEST_CASE("deserialization")
SECTION("BOM and content") SECTION("BOM and content")
{ {
CHECK(json::parse(bom + "1") == 1); CHECK(json::parse(bom + "1") == 1);
CHECK(json::parse(std::istringstream(bom + "1")) == 1);
std::istringstream iss(bom + "1");
CHECK(json::parse(iss) == 1);
CHECK(!iss.bad());
CHECK(!iss.fail());
// Strict parsing: stream should be at EOF now.
CHECK(iss.eof());
iss.str(bom + "1");
iss.clear();
json j;
CHECK_NOTHROW(iss >> j);
CHECK(j == 1);
CHECK(!iss.fail());
CHECK(!iss.bad());
// Non-strict parsing:
// EOF bit is set only if we tried to read a character past the end of the file.
// In this case: parsing the complete number requires reading past the end of the file.
CHECK(iss.eof());
iss.str(bom + "\"1\"");
iss.clear();
CHECK(json::parse(iss) == "1");
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(iss.eof()); // Strict...
iss.str(bom + "\"1\"");
iss.clear();
CHECK_NOTHROW(iss >> j);
CHECK(j == "1");
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(!iss.eof()); // Non-strict...
} }
SECTION("2 byte of BOM") SECTION("2 byte of BOM")
{ {
CHECK_THROWS_AS(json::parse(bom.substr(0, 2)), json::parse_error&); const std::string bom2 = bom.substr(0, 2);
CHECK_THROWS_WITH(json::parse(bom),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 2))), json::parse_error&); CHECK_THROWS_AS(json::parse(bom2), json::parse_error&);
CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), CHECK_THROWS_WITH(json::parse(bom2),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
std::istringstream iss(bom2);
CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
CHECK(iss.good());
CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)),
"[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
}
SECTION("2 byte of BOM - incomplete")
{
{
unget_fails_stringbuf sb("\xEF\xBB ", 3);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
{
unget_fails_stringbuf sb("\xEF\xBB", 2);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
} }
SECTION("1 byte of BOM") SECTION("1 byte of BOM")
{ {
CHECK_THROWS_AS(json::parse(bom.substr(0, 1)), json::parse_error&); const std::string bom1 = bom.substr(0, 1);
CHECK_THROWS_WITH(json::parse(bom),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK_THROWS_AS(json::parse(std::istringstream(bom.substr(0, 1))), json::parse_error&); CHECK_THROWS_AS(json::parse(bom1), json::parse_error&);
CHECK_THROWS_WITH(json::parse(std::istringstream(bom)), CHECK_THROWS_WITH(json::parse(bom1),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal"); "[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
std::istringstream iss(bom1);
CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
CHECK(iss.good());
CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)),
"[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
}
SECTION("1 byte of BOM - incomplete")
{
{
unget_fails_stringbuf sb("\xEF ", 3);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
{
unget_fails_stringbuf sb("\xEF", 1);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
} }
SECTION("variations") SECTION("variations")
@ -526,14 +714,80 @@ TEST_CASE("deserialization")
} }
} }
SECTION("preserve state after parsing") SECTION("preserve state after parsing - strings")
{
std::istringstream s(bom + "\"123\" \"456\"");
json j;
s >> j;
CHECK(j == "123");
CHECK(s.good());
s >> j;
CHECK(j == "456");
CHECK(s.good());
s.peek();
CHECK(s.eof());
}
SECTION("preserve state after parsing - numbers (ref)")
{
std::istringstream s("123 456");
int j;
s >> j;
CHECK(j == 123);
CHECK(s.good());
s >> j;
CHECK(j == 456);
CHECK(!s.good());
CHECK(!s.fail());
CHECK(!s.bad());
// The stream now has the eofbit set (since to determine whether the number has completely
// parsed, the lexer needs to read past the end of the file).
CHECK(s.eof());
}
SECTION("preserve state after parsing - numbers")
{ {
std::istringstream s(bom + "123 456"); std::istringstream s(bom + "123 456");
json j; json j;
j << s; s >> j;
CHECK(j == 123); CHECK(j == 123);
j << s; CHECK(s.good());
s >> j;
CHECK(j == 456); CHECK(j == 456);
CHECK(!s.good());
CHECK(!s.fail());
CHECK(!s.bad());
// The stream now has the eofbit set (since to determine whether the number has completely
// parsed, the lexer needs to read past the end of the file).
CHECK(s.eof());
}
SECTION("preserve state after parsing - numbers (trailing space) (ref)")
{
std::istringstream s("123 456 ");
int j;
s >> j;
CHECK(j == 123);
CHECK(s.good());
s >> j;
CHECK(j == 456);
// The trailing space at the end is the end of the number.
// The stream should not have the eofbit set.
CHECK(s.good());
CHECK(s.peek() == static_cast<unsigned char>(' '));
}
SECTION("preserve state after parsing - numbers (trailing space)")
{
std::istringstream s(bom + "123 456 ");
json j;
s >> j;
CHECK(j == 123);
CHECK(s.good());
s >> j;
CHECK(j == 456);
// The trailing space at the end is the end of the number.
// The stream should not have the eofbit set.
CHECK(s.good());
CHECK(s.peek() == static_cast<unsigned char>(' '));
} }
} }
} }

View File

@ -1477,6 +1477,29 @@ TEST_CASE("regression tests")
my_json foo = R"([1, 2, 3])"_json; my_json foo = R"([1, 2, 3])"_json;
} }
SECTION("issue #976 - istream >> json --- 1st character skipped in stream")
{
json j;
std::istringstream iss;
iss.clear();
iss.str("10");
iss.setstate(std::ios_base::failbit);
CHECK_THROWS_WITH(iss >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK(iss.fail());
iss.clear();
iss.str("10");
iss.setstate(std::ios_base::failbit);
CHECK_THROWS_WITH(json::parse(iss),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK(iss.fail());
}
SECTION("issue #977 - Assigning between different json types") SECTION("issue #977 - Assigning between different json types")
{ {
foo_json lj = ns::foo{3}; foo_json lj = ns::foo{3};