Improve performance of input_stream_adapter
Use the underlying streambuf to extract characters instead of the istream interface and manually set the istream error state. This slightly changes the behavior in case a streambuf operation throws an exception.
This commit is contained in:
parent
b487afcbaa
commit
d46cf99a85
@ -47,12 +47,61 @@ struct input_adapter_protocol
|
|||||||
/// a type to simplify interfaces
|
/// a type to simplify interfaces
|
||||||
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
|
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
A helper function to skip the UTF-8 byte order mark.
|
||||||
|
|
||||||
|
If a complete BOM has been skipped, or if an incomplete BOM has been detected
|
||||||
|
and the stream has been successfully rewind to the start of the BOM, returns
|
||||||
|
goodbit.
|
||||||
|
If an internal operation fails, returns badbit, and the streambuf should no
|
||||||
|
longer be used.
|
||||||
|
|
||||||
|
Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
|
||||||
|
the eofbit. However, some implementations keep the eofbit if is.unget() fails,
|
||||||
|
others do not.
|
||||||
|
|
||||||
|
Note: The streambuf must be non-null.
|
||||||
|
*/
|
||||||
|
inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
|
||||||
|
{
|
||||||
|
using traits_type = std::char_traits<char>;
|
||||||
|
|
||||||
|
assert(sb != nullptr);
|
||||||
|
|
||||||
|
if (sb->sgetc() == 0xEF)
|
||||||
|
{
|
||||||
|
sb->sbumpc();
|
||||||
|
if (sb->sgetc() == 0xBB)
|
||||||
|
{
|
||||||
|
sb->sbumpc();
|
||||||
|
if (sb->sgetc() == 0xBF)
|
||||||
|
{
|
||||||
|
sb->sbumpc();
|
||||||
|
return std::ios_base::goodbit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb->sungetc() == traits_type::eof())
|
||||||
|
{
|
||||||
|
return std::ios_base::badbit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb->sungetc() == traits_type::eof())
|
||||||
|
{
|
||||||
|
return std::ios_base::badbit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::ios_base::goodbit;
|
||||||
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Input adapter for a (caching) istream.
|
Input adapter for a (caching) istream.
|
||||||
Ignores a UTF Byte Order Mark at beginning of input.
|
Ignores a UTF Byte Order Mark at beginning of input.
|
||||||
|
|
||||||
Does not support changing the underlying std::streambuf in mid-input.
|
Does not support changing the underlying std::streambuf in mid-input.
|
||||||
*/
|
*/
|
||||||
|
#if 0
|
||||||
class input_stream_adapter : public input_adapter_protocol
|
class input_stream_adapter : public input_adapter_protocol
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -110,6 +159,103 @@ class input_stream_adapter : public input_adapter_protocol
|
|||||||
private:
|
private:
|
||||||
std::istream& is;
|
std::istream& is;
|
||||||
};
|
};
|
||||||
|
#else
|
||||||
|
class input_stream_adapter : public input_adapter_protocol
|
||||||
|
{
|
||||||
|
//
|
||||||
|
// NOTE:
|
||||||
|
//
|
||||||
|
// This implementation differs slightly from the reference implementation
|
||||||
|
// (using the std::istream interface):
|
||||||
|
//
|
||||||
|
// From N4659:
|
||||||
|
// 30.7.4.3 Unformatted input functions
|
||||||
|
//
|
||||||
|
// [...]
|
||||||
|
// If an exception is thrown during input then `ios::badbit` is turned
|
||||||
|
// on[310] in `*this`'s error state. (Exceptions thrown from
|
||||||
|
// `basic_ios<>::clear()` are not caught or rethrown.)
|
||||||
|
// If `(exceptions() & badbit) != 0` then the exception is rethrown.
|
||||||
|
//
|
||||||
|
// [310] This is done without causing an `ios::failure` to be thrown.
|
||||||
|
//
|
||||||
|
// However, there is no (portable) way to turn on the `badbit` in `is`
|
||||||
|
// without throwing an exception, so here we don't catch (and possibly)
|
||||||
|
// rethrow exceptions from streambuf operations.
|
||||||
|
// If an internal operation throws an exception, the behavior of this
|
||||||
|
// implementation is therefore slightly different from the reference
|
||||||
|
// implementation:
|
||||||
|
//
|
||||||
|
// If an exception is thrown during input and
|
||||||
|
//
|
||||||
|
// - badbit is turned ON in `is.exceptions()`:
|
||||||
|
// The badbit will **not** be set in `is`'s error state.
|
||||||
|
//
|
||||||
|
// - badbit is turned OFF in `is.exceptions()`:
|
||||||
|
// The badbit will **not** be set in `is`'s error state and the
|
||||||
|
// exception is **not** swallowed.
|
||||||
|
//
|
||||||
|
|
||||||
|
public:
|
||||||
|
using traits_type = std::char_traits<char>;
|
||||||
|
|
||||||
|
explicit input_stream_adapter(std::istream& i)
|
||||||
|
: is(i)
|
||||||
|
, ok(i, /* noskipws */ true)
|
||||||
|
{
|
||||||
|
std::ios_base::iostate state = std::ios_base::goodbit;
|
||||||
|
if (ok)
|
||||||
|
{
|
||||||
|
state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
state |= std::ios_base::failbit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the stream state. In case skip_byte_order_mark() failed (but
|
||||||
|
// did not throw an exception), `state` now has the badbit set and the
|
||||||
|
// call to setstate might throw an ios::failure. Likewise, if the stream
|
||||||
|
// is "not ok" then the failbit will be set, which might throw an
|
||||||
|
// exception, too.
|
||||||
|
is.setstate(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
input_stream_adapter(const input_stream_adapter&) = delete;
|
||||||
|
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
|
||||||
|
|
||||||
|
traits_type::int_type get_character() override
|
||||||
|
{
|
||||||
|
// Only try to get a character if the stream is good!
|
||||||
|
if (is.good())
|
||||||
|
{
|
||||||
|
const auto ch = is.rdbuf()->sbumpc();
|
||||||
|
if (ch != traits_type::eof())
|
||||||
|
{
|
||||||
|
return ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sbumpc failed.
|
||||||
|
// No more characters are available. Set eofbit.
|
||||||
|
is.setstate(std::ios_base::eofbit);
|
||||||
|
}
|
||||||
|
|
||||||
|
return traits_type::eof();
|
||||||
|
}
|
||||||
|
|
||||||
|
void unget_character() override
|
||||||
|
{
|
||||||
|
// This method is only ever called if the last call to get_character was
|
||||||
|
// successful (i.e. not EOF). This implies that the stream is good and
|
||||||
|
// that the call to sungetc below is guaranteed to succeed.
|
||||||
|
is.rdbuf()->sungetc();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::istream& is;
|
||||||
|
std::istream::sentry const ok;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/// input adapter for buffer input
|
/// input adapter for buffer input
|
||||||
class input_buffer_adapter : public input_adapter_protocol
|
class input_buffer_adapter : public input_adapter_protocol
|
||||||
|
|||||||
@ -1620,12 +1620,61 @@ struct input_adapter_protocol
|
|||||||
/// a type to simplify interfaces
|
/// a type to simplify interfaces
|
||||||
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
|
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
A helper function to skip the UTF-8 byte order mark.
|
||||||
|
|
||||||
|
If a complete BOM has been skipped, or if an incomplete BOM has been detected
|
||||||
|
and the stream has been successfully rewind to the start of the BOM, returns
|
||||||
|
goodbit.
|
||||||
|
If an internal operation fails, returns badbit, and the streambuf should no
|
||||||
|
longer be used.
|
||||||
|
|
||||||
|
Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
|
||||||
|
the eofbit. However, some implementations keep the eofbit if is.unget() fails,
|
||||||
|
others do not.
|
||||||
|
|
||||||
|
Note: The streambuf must be non-null.
|
||||||
|
*/
|
||||||
|
inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
|
||||||
|
{
|
||||||
|
using traits_type = std::char_traits<char>;
|
||||||
|
|
||||||
|
assert(sb != nullptr);
|
||||||
|
|
||||||
|
if (sb->sgetc() == 0xEF)
|
||||||
|
{
|
||||||
|
sb->sbumpc();
|
||||||
|
if (sb->sgetc() == 0xBB)
|
||||||
|
{
|
||||||
|
sb->sbumpc();
|
||||||
|
if (sb->sgetc() == 0xBF)
|
||||||
|
{
|
||||||
|
sb->sbumpc();
|
||||||
|
return std::ios_base::goodbit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb->sungetc() == traits_type::eof())
|
||||||
|
{
|
||||||
|
return std::ios_base::badbit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb->sungetc() == traits_type::eof())
|
||||||
|
{
|
||||||
|
return std::ios_base::badbit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::ios_base::goodbit;
|
||||||
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Input adapter for a (caching) istream.
|
Input adapter for a (caching) istream.
|
||||||
Ignores a UTF Byte Order Mark at beginning of input.
|
Ignores a UTF Byte Order Mark at beginning of input.
|
||||||
|
|
||||||
Does not support changing the underlying std::streambuf in mid-input.
|
Does not support changing the underlying std::streambuf in mid-input.
|
||||||
*/
|
*/
|
||||||
|
#if 0
|
||||||
class input_stream_adapter : public input_adapter_protocol
|
class input_stream_adapter : public input_adapter_protocol
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -1683,6 +1732,103 @@ class input_stream_adapter : public input_adapter_protocol
|
|||||||
private:
|
private:
|
||||||
std::istream& is;
|
std::istream& is;
|
||||||
};
|
};
|
||||||
|
#else
|
||||||
|
class input_stream_adapter : public input_adapter_protocol
|
||||||
|
{
|
||||||
|
//
|
||||||
|
// NOTE:
|
||||||
|
//
|
||||||
|
// This implementation differs slightly from the reference implementation
|
||||||
|
// (using the std::istream interface):
|
||||||
|
//
|
||||||
|
// From N4659:
|
||||||
|
// 30.7.4.3 Unformatted input functions
|
||||||
|
//
|
||||||
|
// [...]
|
||||||
|
// If an exception is thrown during input then `ios::badbit` is turned
|
||||||
|
// on[310] in `*this`'s error state. (Exceptions thrown from
|
||||||
|
// `basic_ios<>::clear()` are not caught or rethrown.)
|
||||||
|
// If `(exceptions() & badbit) != 0` then the exception is rethrown.
|
||||||
|
//
|
||||||
|
// [310] This is done without causing an `ios::failure` to be thrown.
|
||||||
|
//
|
||||||
|
// However, there is no (portable) way to turn on the `badbit` in `is`
|
||||||
|
// without throwing an exception, so here we don't catch (and possibly)
|
||||||
|
// rethrow exceptions from streambuf operations.
|
||||||
|
// If an internal operation throws an exception, the behavior of this
|
||||||
|
// implementation is therefore slightly different from the reference
|
||||||
|
// implementation:
|
||||||
|
//
|
||||||
|
// If an exception is thrown during input and
|
||||||
|
//
|
||||||
|
// - badbit is turned ON in `is.exceptions()`:
|
||||||
|
// The badbit will **not** be set in `is`'s error state.
|
||||||
|
//
|
||||||
|
// - badbit is turned OFF in `is.exceptions()`:
|
||||||
|
// The badbit will **not** be set in `is`'s error state and the
|
||||||
|
// exception is **not** swallowed.
|
||||||
|
//
|
||||||
|
|
||||||
|
public:
|
||||||
|
using traits_type = std::char_traits<char>;
|
||||||
|
|
||||||
|
explicit input_stream_adapter(std::istream& i)
|
||||||
|
: is(i)
|
||||||
|
, ok(i, /* noskipws */ true)
|
||||||
|
{
|
||||||
|
std::ios_base::iostate state = std::ios_base::goodbit;
|
||||||
|
if (ok)
|
||||||
|
{
|
||||||
|
state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
state |= std::ios_base::failbit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the stream state. In case skip_byte_order_mark() failed (but
|
||||||
|
// did not throw an exception), `state` now has the badbit set and the
|
||||||
|
// call to setstate might throw an ios::failure. Likewise, if the stream
|
||||||
|
// is "not ok" then the failbit will be set, which might throw an
|
||||||
|
// exception, too.
|
||||||
|
is.setstate(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
input_stream_adapter(const input_stream_adapter&) = delete;
|
||||||
|
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
|
||||||
|
|
||||||
|
traits_type::int_type get_character() override
|
||||||
|
{
|
||||||
|
// Only try to get a character if the stream is good!
|
||||||
|
if (is.good())
|
||||||
|
{
|
||||||
|
const auto ch = is.rdbuf()->sbumpc();
|
||||||
|
if (ch != traits_type::eof())
|
||||||
|
{
|
||||||
|
return ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sbumpc failed.
|
||||||
|
// No more characters are available. Set eofbit.
|
||||||
|
is.setstate(std::ios_base::eofbit);
|
||||||
|
}
|
||||||
|
|
||||||
|
return traits_type::eof();
|
||||||
|
}
|
||||||
|
|
||||||
|
void unget_character() override
|
||||||
|
{
|
||||||
|
// This method is only ever called if the last call to get_character was
|
||||||
|
// successful (i.e. not EOF). This implies that the stream is good and
|
||||||
|
// that the call to sungetc below is guaranteed to succeed.
|
||||||
|
is.rdbuf()->sungetc();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::istream& is;
|
||||||
|
std::istream::sentry const ok;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/// input adapter for buffer input
|
/// input adapter for buffer input
|
||||||
class input_buffer_adapter : public input_adapter_protocol
|
class input_buffer_adapter : public input_adapter_protocol
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user