Use the std::istream interface to implement input_stream_adapter (fix #976)

This commit is contained in:
abolz 2018-03-12 10:38:16 +01:00
parent 9fca09b00e
commit b487afcbaa
4 changed files with 349 additions and 94 deletions

View File

@ -48,76 +48,67 @@ struct input_adapter_protocol
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*!
Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
beginning of input. Does not support changing the underlying std::streambuf
in mid-input. Maintains underlying std::istream and std::streambuf to support
subsequent use of standard std::istream operations to process any input
characters following those used in parsing the JSON input. Clears the
std::istream flags; any input errors (e.g., EOF) will be detected by the first
subsequent call for input from the std::istream.
Input adapter for a (caching) istream.
Ignores a UTF Byte Order Mark at beginning of input.
Does not support changing the underlying std::streambuf in mid-input.
*/
class input_stream_adapter : public input_adapter_protocol
{
public:
~input_stream_adapter() override
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags
is.clear();
}
using traits_type = std::char_traits<char>;
explicit input_stream_adapter(std::istream& i)
: is(i), sb(*i.rdbuf())
: is(i)
{
// skip byte order mark
std::char_traits<char>::int_type c;
if ((c = get_character()) == 0xEF)
// Skip byte order mark
if (is.peek() == 0xEF)
{
if ((c = get_character()) == 0xBB)
is.ignore();
if (is.peek() == 0xBB)
{
if ((c = get_character()) == 0xBF)
is.ignore();
if (is.peek() == 0xBF)
{
return; // Ignore BOM
is.ignore();
return; // Found a complete BOM.
}
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xBB');
}
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xEF');
}
else if (c != std::char_traits<char>::eof())
{
is.unget(); // no byte order mark; process as usual
is.unget();
}
}
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete;
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
std::char_traits<char>::int_type get_character() override
traits_type::int_type get_character() override
{
return sb.sbumpc();
// Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.peek();
// If peek() returns EOF, the following call to ignore() will set
// the failbit, but we do not want to set the failbit here.
if (ch != traits_type::eof())
{
is.ignore();
return ch;
}
}
return traits_type::eof();
}
void unget_character() override
{
sb.sungetc(); // is.unget() avoided for performance
is.unget();
}
private:
/// the associated input stream
std::istream& is;
std::streambuf& sb;
};
/// input adapter for buffer input

View File

@ -1621,76 +1621,67 @@ struct input_adapter_protocol
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*!
Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
beginning of input. Does not support changing the underlying std::streambuf
in mid-input. Maintains underlying std::istream and std::streambuf to support
subsequent use of standard std::istream operations to process any input
characters following those used in parsing the JSON input. Clears the
std::istream flags; any input errors (e.g., EOF) will be detected by the first
subsequent call for input from the std::istream.
Input adapter for a (caching) istream.
Ignores a UTF Byte Order Mark at beginning of input.
Does not support changing the underlying std::streambuf in mid-input.
*/
class input_stream_adapter : public input_adapter_protocol
{
public:
~input_stream_adapter() override
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags
is.clear();
}
using traits_type = std::char_traits<char>;
explicit input_stream_adapter(std::istream& i)
: is(i), sb(*i.rdbuf())
: is(i)
{
// skip byte order mark
std::char_traits<char>::int_type c;
if ((c = get_character()) == 0xEF)
// Skip byte order mark
if (is.peek() == 0xEF)
{
if ((c = get_character()) == 0xBB)
is.ignore();
if (is.peek() == 0xBB)
{
if ((c = get_character()) == 0xBF)
is.ignore();
if (is.peek() == 0xBF)
{
return; // Ignore BOM
is.ignore();
return; // Found a complete BOM.
}
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xBB');
}
else if (c != std::char_traits<char>::eof())
{
is.unget();
}
is.putback('\xEF');
}
else if (c != std::char_traits<char>::eof())
{
is.unget(); // no byte order mark; process as usual
is.unget();
}
}
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete;
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
std::char_traits<char>::int_type get_character() override
traits_type::int_type get_character() override
{
return sb.sbumpc();
// Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.peek();
// If peek() returns EOF, the following call to ignore() will set
// the failbit, but we do not want to set the failbit here.
if (ch != traits_type::eof())
{
is.ignore();
return ch;
}
}
return traits_type::eof();
}
void unget_character() override
{
sb.sungetc(); // is.unget() avoided for performance
is.unget();
}
private:
/// the associated input stream
std::istream& is;
std::streambuf& sb;
};
/// input adapter for buffer input

View File

@ -34,6 +34,57 @@ using nlohmann::json;
#include <iostream>
#include <valarray>
// HACK to get the tests running if exceptions are disabled on the command line
// using the "-e/--nothrow" flag. In this case the expressions in CHECK_THROWS
// and similar macros is never executed and subsequent checks relying on the
// side effects of the expression may or may not fail.
#define IF_EXCEPTIONS_ENABLED_THEN_CHECK(expr) \
{ \
bool _exceptions_enabled_ = false; \
/* The next line sets the `_exceptions_enabled_` flag to true, iff the expression in */ \
/* the CHECK_THROWS macro actually gets ever evaluated. It's not if the "-e" flag */ \
/* has been specified on the command line. */ \
CHECK_THROWS([&](){ _exceptions_enabled_ = true; throw std::runtime_error("ok"); }()); \
if (_exceptions_enabled_) \
{ \
CHECK(expr); \
} \
} \
/**/
namespace
{
// A stringbuf which only ever has a get-area of exactly one character.
// I.e. multiple successive calls to sungetc will fail.
// Note that sgetc and sbumpc both update the get-area and count as a "read" operation.
// (sbumpc is the equivalent to sgetc + gbump(1).)
class unget_fails_stringbuf : public std::streambuf
{
const char* last;
public:
explicit unget_fails_stringbuf(char const* str, size_t len)
: last(str + len)
{
char* first = const_cast<char*>(str);
this->setg(first, first, first);
}
protected:
virtual traits_type::int_type underflow() override
{
char* pos = this->gptr();
if (pos == last)
{
this->setg(pos, pos, pos); // empty. and invalid.
return traits_type::eof();
}
this->setg(pos, pos, pos + 1);
return traits_type::to_int_type(*pos);
}
};
}
TEST_CASE("deserialization")
{
SECTION("successful deserialization")
@ -44,6 +95,9 @@ TEST_CASE("deserialization")
ss1 << "[\"foo\",1,2,3,false,{\"one\":1}]";
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}]";
json j = json::parse(ss1);
CHECK(!ss1.fail());
CHECK(!ss1.bad());
CHECK(ss1.eof()); // Strict parsing.
CHECK(json::accept(ss2));
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
}
@ -70,6 +124,12 @@ TEST_CASE("deserialization")
ss << "[\"foo\",1,2,3,false,{\"one\":1}]";
json j;
j << ss;
CHECK(!ss.fail());
CHECK(!ss.bad());
// operator>> uses non-strict parsing.
// We have read the closing ']' and we're done. The parser should
// not have read the EOF marker.
CHECK(!ss.eof());
CHECK(j == json({"foo", 1, 2, 3, false, {{"one", 1}}}));
}
@ -90,6 +150,18 @@ TEST_CASE("deserialization")
SECTION("unsuccessful deserialization")
{
SECTION("null streambuf")
{
std::streambuf* sb = nullptr;
std::istream iss(sb);
CHECK(iss.bad());
CHECK_THROWS_WITH(json::parse(iss),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.fail()); // Tests the badbit too.
IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.eof());
}
SECTION("stream")
{
std::stringstream ss1, ss2, ss3, ss4;
@ -98,12 +170,15 @@ TEST_CASE("deserialization")
ss3 << "[\"foo\",1,2,3,false,{\"one\":1}";
ss4 << "[\"foo\",1,2,3,false,{\"one\":1}";
CHECK_THROWS_AS(json::parse(ss1), json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
CHECK_THROWS_WITH(json::parse(ss2),
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
CHECK(not json::accept(ss3));
json j_error;
CHECK_NOTHROW(j_error = json::parse(ss1, nullptr, false));
CHECK_NOTHROW(j_error = json::parse(ss4, nullptr, false));
CHECK(j_error.is_discarded());
}
@ -127,6 +202,9 @@ TEST_CASE("deserialization")
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
json j;
CHECK_THROWS_AS(j << ss1, json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
CHECK_THROWS_WITH(j << ss2,
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
}
@ -138,6 +216,9 @@ TEST_CASE("deserialization")
ss2 << "[\"foo\",1,2,3,false,{\"one\":1}";
json j;
CHECK_THROWS_AS(ss1 >> j, json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!ss1.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(ss1.eof());
CHECK_THROWS_WITH(ss2 >> j,
"[json.exception.parse_error.101] parse error at 29: syntax error - unexpected end of input; expected ']'");
}
@ -455,7 +536,11 @@ TEST_CASE("deserialization")
CHECK_THROWS_WITH(json::parse(bom),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK_THROWS_AS(json::parse(std::istringstream(bom)), json::parse_error&);
std::istringstream iss(bom);
CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.fail());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(!iss.bad());
IF_EXCEPTIONS_ENABLED_THEN_CHECK(iss.eof());
CHECK_THROWS_WITH(json::parse(std::istringstream(bom)),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
}
@ -463,7 +548,40 @@ TEST_CASE("deserialization")
SECTION("BOM and content")
{
CHECK(json::parse(bom + "1") == 1);
CHECK(json::parse(std::istringstream(bom + "1")) == 1);
std::istringstream iss(bom + "1");
CHECK(json::parse(iss) == 1);
CHECK(!iss.bad());
CHECK(!iss.fail());
// Strict parsing: stream should be at EOF now.
CHECK(iss.eof());
iss.str(bom + "1");
iss.clear();
json j;
CHECK_NOTHROW(iss >> j);
CHECK(j == 1);
CHECK(!iss.fail());
CHECK(!iss.bad());
// Non-strict parsing:
// EOF bit is set only if we tried to read a character past the end of the file.
// In this case: parsing the complete number requires reading past the end of the file.
CHECK(iss.eof());
iss.str(bom + "\"1\"");
iss.clear();
CHECK(json::parse(iss) == "1");
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(iss.eof()); // Strict...
iss.str(bom + "\"1\"");
iss.clear();
CHECK_NOTHROW(iss >> j);
CHECK(j == "1");
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(!iss.eof()); // Non-strict...
}
SECTION("2 byte of BOM")
@ -474,11 +592,44 @@ TEST_CASE("deserialization")
CHECK_THROWS_WITH(json::parse(bom2),
"[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
CHECK_THROWS_AS(json::parse(std::istringstream(bom2)), json::parse_error&);
std::istringstream iss(bom2);
CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
CHECK(iss.good());
CHECK_THROWS_WITH(json::parse(std::istringstream(bom2)),
"[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
}
SECTION("2 byte of BOM - incomplete")
{
{
unget_fails_stringbuf sb("\xEF\xBB ", 3);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
{
unget_fails_stringbuf sb("\xEF\xBB", 2);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
}
SECTION("1 byte of BOM")
{
const std::string bom1 = bom.substr(0, 1);
@ -487,11 +638,44 @@ TEST_CASE("deserialization")
CHECK_THROWS_WITH(json::parse(bom1),
"[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
CHECK_THROWS_AS(json::parse(std::istringstream(bom1)), json::parse_error&);
std::istringstream iss(bom1);
CHECK_THROWS_AS(json::parse(iss), json::parse_error&);
CHECK(!iss.fail());
CHECK(!iss.bad());
CHECK(!iss.eof()); // EOF bit is set only if we tried to read a character past the end of the file.
CHECK(iss.good());
CHECK_THROWS_WITH(json::parse(std::istringstream(bom1)),
"[json.exception.parse_error.101] parse error at 1: syntax error - invalid literal; last read: '\xEF'");
}
SECTION("1 byte of BOM - incomplete")
{
{
unget_fails_stringbuf sb("\xEF ", 3);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
{
unget_fails_stringbuf sb("\xEF", 1);
std::istream is(&sb);
json j;
CHECK_THROWS_WITH(is >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.fail()); // Tests the badbit too
IF_EXCEPTIONS_ENABLED_THEN_CHECK(is.bad());
// Do not check the eofbit.
// Some implementations keep the eofbit if is.unget() fails, some do not.
}
}
SECTION("variations")
{
// calculate variations of each byte of the BOM to make sure
@ -529,14 +713,80 @@ TEST_CASE("deserialization")
}
}
SECTION("preserve state after parsing")
SECTION("preserve state after parsing - strings")
{
std::istringstream s(bom + "\"123\" \"456\"");
json j;
s >> j;
CHECK(j == "123");
CHECK(s.good());
s >> j;
CHECK(j == "456");
CHECK(s.good());
s.peek();
CHECK(s.eof());
}
SECTION("preserve state after parsing - numbers (ref)")
{
std::istringstream s("123 456");
int j;
s >> j;
CHECK(j == 123);
CHECK(s.good());
s >> j;
CHECK(j == 456);
CHECK(!s.good());
CHECK(!s.fail());
CHECK(!s.bad());
// The stream now has the eofbit set (since to determine whether the number has completely
// parsed, the lexer needs to read past the end of the file).
CHECK(s.eof());
}
SECTION("preserve state after parsing - numbers")
{
std::istringstream s(bom + "123 456");
json j;
j << s;
s >> j;
CHECK(j == 123);
j << s;
CHECK(s.good());
s >> j;
CHECK(j == 456);
CHECK(!s.good());
CHECK(!s.fail());
CHECK(!s.bad());
// The stream now has the eofbit set (since to determine whether the number has completely
// parsed, the lexer needs to read past the end of the file).
CHECK(s.eof());
}
SECTION("preserve state after parsing - numbers (trailing space) (ref)")
{
std::istringstream s("123 456 ");
int j;
s >> j;
CHECK(j == 123);
CHECK(s.good());
s >> j;
CHECK(j == 456);
// The trailing space at the end is the end of the number.
// The stream should not have the eofbit set.
CHECK(s.good());
CHECK(s.peek() == static_cast<unsigned char>(' '));
}
SECTION("preserve state after parsing - numbers (trailing space)")
{
std::istringstream s(bom + "123 456 ");
json j;
s >> j;
CHECK(j == 123);
CHECK(s.good());
s >> j;
CHECK(j == 456);
// The trailing space at the end is the end of the number.
// The stream should not have the eofbit set.
CHECK(s.good());
CHECK(s.peek() == static_cast<unsigned char>(' '));
}
}
}

View File

@ -1504,6 +1504,29 @@ TEST_CASE("regression tests")
my_json foo = R"([1, 2, 3])"_json;
}
SECTION("issue #976 - istream >> json --- 1st character skipped in stream")
{
json j;
std::istringstream iss;
iss.clear();
iss.str("10");
iss.setstate(std::ios_base::failbit);
CHECK_THROWS_WITH(iss >> j,
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK(iss.fail());
iss.clear();
iss.str("10");
iss.setstate(std::ios_base::failbit);
CHECK_THROWS_WITH(json::parse(iss),
"[json.exception.parse_error.101] parse error at 1: syntax error - unexpected end of input; expected '[', '{', or a literal");
CHECK(iss.fail());
}
SECTION("issue #977 - Assigning between different json types")
{
foo_json lj = ns::foo{3};