Improve performance of input_stream_adapter

Use the underlying streambuf to extract characters instead of the istream
interface and manually set the istream error state.

This slightly changes the behavior in case a streambuf operation throws
an exception.
This commit is contained in:
abolz 2018-03-12 13:09:56 +01:00
parent b487afcbaa
commit d46cf99a85
2 changed files with 292 additions and 0 deletions

View File

@ -47,12 +47,61 @@ struct input_adapter_protocol
/// a type to simplify interfaces
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*!
A helper function to skip the UTF-8 byte order mark.
If a complete BOM has been skipped, or if an incomplete BOM has been detected
and the stream has been successfully rewind to the start of the BOM, returns
goodbit.
If an internal operation fails, returns badbit, and the streambuf should no
longer be used.
Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
the eofbit. However, some implementations keep the eofbit if is.unget() fails,
others do not.
Note: The streambuf must be non-null.
*/
inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
{
using traits_type = std::char_traits<char>;
assert(sb != nullptr);
if (sb->sgetc() == 0xEF)
{
sb->sbumpc();
if (sb->sgetc() == 0xBB)
{
sb->sbumpc();
if (sb->sgetc() == 0xBF)
{
sb->sbumpc();
return std::ios_base::goodbit;
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
return std::ios_base::goodbit;
}
/*!
Input adapter for a (caching) istream.
Ignores a UTF Byte Order Mark at beginning of input.
Does not support changing the underlying std::streambuf in mid-input.
*/
#if 0
class input_stream_adapter : public input_adapter_protocol
{
public:
@ -110,6 +159,103 @@ class input_stream_adapter : public input_adapter_protocol
private:
std::istream& is;
};
#else
class input_stream_adapter : public input_adapter_protocol
{
//
// NOTE:
//
// This implementation differs slightly from the reference implementation
// (using the std::istream interface):
//
// From N4659:
// 30.7.4.3 Unformatted input functions
//
// [...]
// If an exception is thrown during input then `ios::badbit` is turned
// on[310] in `*this`'s error state. (Exceptions thrown from
// `basic_ios<>::clear()` are not caught or rethrown.)
// If `(exceptions() & badbit) != 0` then the exception is rethrown.
//
// [310] This is done without causing an `ios::failure` to be thrown.
//
// However, there is no (portable) way to turn on the `badbit` in `is`
// without throwing an exception, so here we don't catch (and possibly)
// rethrow exceptions from streambuf operations.
// If an internal operation throws an exception, the behavior of this
// implementation is therefore slightly different from the reference
// implementation:
//
// If an exception is thrown during input and
//
// - badbit is turned ON in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state.
//
// - badbit is turned OFF in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state and the
// exception is **not** swallowed.
//
public:
using traits_type = std::char_traits<char>;
explicit input_stream_adapter(std::istream& i)
: is(i)
, ok(i, /* noskipws */ true)
{
std::ios_base::iostate state = std::ios_base::goodbit;
if (ok)
{
state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
}
else
{
state |= std::ios_base::failbit;
}
// Update the stream state. In case skip_byte_order_mark() failed (but
// did not throw an exception), `state` now has the badbit set and the
// call to setstate might throw an ios::failure. Likewise, if the stream
// is "not ok" then the failbit will be set, which might throw an
// exception, too.
is.setstate(state);
}
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
traits_type::int_type get_character() override
{
// Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.rdbuf()->sbumpc();
if (ch != traits_type::eof())
{
return ch;
}
// sbumpc failed.
// No more characters are available. Set eofbit.
is.setstate(std::ios_base::eofbit);
}
return traits_type::eof();
}
void unget_character() override
{
// This method is only ever called if the last call to get_character was
// successful (i.e. not EOF). This implies that the stream is good and
// that the call to sungetc below is guaranteed to succeed.
is.rdbuf()->sungetc();
}
private:
std::istream& is;
std::istream::sentry const ok;
};
#endif
/// input adapter for buffer input
class input_buffer_adapter : public input_adapter_protocol

View File

@ -1620,12 +1620,61 @@ struct input_adapter_protocol
/// a type to simplify interfaces
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*!
A helper function to skip the UTF-8 byte order mark.
If a complete BOM has been skipped, or if an incomplete BOM has been detected
and the stream has been successfully rewind to the start of the BOM, returns
goodbit.
If an internal operation fails, returns badbit, and the streambuf should no
longer be used.
Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
the eofbit. However, some implementations keep the eofbit if is.unget() fails,
others do not.
Note: The streambuf must be non-null.
*/
inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
{
using traits_type = std::char_traits<char>;
assert(sb != nullptr);
if (sb->sgetc() == 0xEF)
{
sb->sbumpc();
if (sb->sgetc() == 0xBB)
{
sb->sbumpc();
if (sb->sgetc() == 0xBF)
{
sb->sbumpc();
return std::ios_base::goodbit;
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
if (sb->sungetc() == traits_type::eof())
{
return std::ios_base::badbit;
}
}
return std::ios_base::goodbit;
}
/*!
Input adapter for a (caching) istream.
Ignores a UTF Byte Order Mark at beginning of input.
Does not support changing the underlying std::streambuf in mid-input.
*/
#if 0
class input_stream_adapter : public input_adapter_protocol
{
public:
@ -1683,6 +1732,103 @@ class input_stream_adapter : public input_adapter_protocol
private:
std::istream& is;
};
#else
class input_stream_adapter : public input_adapter_protocol
{
//
// NOTE:
//
// This implementation differs slightly from the reference implementation
// (using the std::istream interface):
//
// From N4659:
// 30.7.4.3 Unformatted input functions
//
// [...]
// If an exception is thrown during input then `ios::badbit` is turned
// on[310] in `*this`'s error state. (Exceptions thrown from
// `basic_ios<>::clear()` are not caught or rethrown.)
// If `(exceptions() & badbit) != 0` then the exception is rethrown.
//
// [310] This is done without causing an `ios::failure` to be thrown.
//
// However, there is no (portable) way to turn on the `badbit` in `is`
// without throwing an exception, so here we don't catch (and possibly)
// rethrow exceptions from streambuf operations.
// If an internal operation throws an exception, the behavior of this
// implementation is therefore slightly different from the reference
// implementation:
//
// If an exception is thrown during input and
//
// - badbit is turned ON in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state.
//
// - badbit is turned OFF in `is.exceptions()`:
// The badbit will **not** be set in `is`'s error state and the
// exception is **not** swallowed.
//
public:
using traits_type = std::char_traits<char>;
explicit input_stream_adapter(std::istream& i)
: is(i)
, ok(i, /* noskipws */ true)
{
std::ios_base::iostate state = std::ios_base::goodbit;
if (ok)
{
state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
}
else
{
state |= std::ios_base::failbit;
}
// Update the stream state. In case skip_byte_order_mark() failed (but
// did not throw an exception), `state` now has the badbit set and the
// call to setstate might throw an ios::failure. Likewise, if the stream
// is "not ok" then the failbit will be set, which might throw an
// exception, too.
is.setstate(state);
}
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
traits_type::int_type get_character() override
{
// Only try to get a character if the stream is good!
if (is.good())
{
const auto ch = is.rdbuf()->sbumpc();
if (ch != traits_type::eof())
{
return ch;
}
// sbumpc failed.
// No more characters are available. Set eofbit.
is.setstate(std::ios_base::eofbit);
}
return traits_type::eof();
}
void unget_character() override
{
// This method is only ever called if the last call to get_character was
// successful (i.e. not EOF). This implies that the stream is good and
// that the call to sungetc below is guaranteed to succeed.
is.rdbuf()->sungetc();
}
private:
std::istream& is;
std::istream::sentry const ok;
};
#endif
/// input adapter for buffer input
class input_buffer_adapter : public input_adapter_protocol