diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index e26e706a2..1f2b1aa8d 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -47,12 +47,61 @@ struct input_adapter_protocol /// a type to simplify interfaces using input_adapter_t = std::shared_ptr; +/*! +A helper function to skip the UTF-8 byte order mark. + +If a complete BOM has been skipped, or if an incomplete BOM has been detected +and the stream has been successfully rewind to the start of the BOM, returns +goodbit. +If an internal operation fails, returns badbit, and the streambuf should no +longer be used. + +Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears +the eofbit. However, some implementations keep the eofbit if is.unget() fails, +others do not. + +Note: The streambuf must be non-null. +*/ +inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb) +{ + using traits_type = std::char_traits; + + assert(sb != nullptr); + + if (sb->sgetc() == 0xEF) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBB) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBF) + { + sb->sbumpc(); + return std::ios_base::goodbit; + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + return std::ios_base::goodbit; +} + /*! Input adapter for a (caching) istream. Ignores a UTF Byte Order Mark at beginning of input. Does not support changing the underlying std::streambuf in mid-input. */ +#if 0 class input_stream_adapter : public input_adapter_protocol { public: @@ -110,6 +159,103 @@ class input_stream_adapter : public input_adapter_protocol private: std::istream& is; }; +#else +class input_stream_adapter : public input_adapter_protocol +{ + // + // NOTE: + // + // This implementation differs slightly from the reference implementation + // (using the std::istream interface): + // + // From N4659: + // 30.7.4.3 Unformatted input functions + // + // [...] + // If an exception is thrown during input then `ios::badbit` is turned + // on[310] in `*this`'s error state. (Exceptions thrown from + // `basic_ios<>::clear()` are not caught or rethrown.) + // If `(exceptions() & badbit) != 0` then the exception is rethrown. + // + // [310] This is done without causing an `ios::failure` to be thrown. + // + // However, there is no (portable) way to turn on the `badbit` in `is` + // without throwing an exception, so here we don't catch (and possibly) + // rethrow exceptions from streambuf operations. + // If an internal operation throws an exception, the behavior of this + // implementation is therefore slightly different from the reference + // implementation: + // + // If an exception is thrown during input and + // + // - badbit is turned ON in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state. + // + // - badbit is turned OFF in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state and the + // exception is **not** swallowed. + // + + public: + using traits_type = std::char_traits; + + explicit input_stream_adapter(std::istream& i) + : is(i) + , ok(i, /* noskipws */ true) + { + std::ios_base::iostate state = std::ios_base::goodbit; + if (ok) + { + state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf()); + } + else + { + state |= std::ios_base::failbit; + } + + // Update the stream state. In case skip_byte_order_mark() failed (but + // did not throw an exception), `state` now has the badbit set and the + // call to setstate might throw an ios::failure. Likewise, if the stream + // is "not ok" then the failbit will be set, which might throw an + // exception, too. + is.setstate(state); + } + + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; + + traits_type::int_type get_character() override + { + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.rdbuf()->sbumpc(); + if (ch != traits_type::eof()) + { + return ch; + } + + // sbumpc failed. + // No more characters are available. Set eofbit. + is.setstate(std::ios_base::eofbit); + } + + return traits_type::eof(); + } + + void unget_character() override + { + // This method is only ever called if the last call to get_character was + // successful (i.e. not EOF). This implies that the stream is good and + // that the call to sungetc below is guaranteed to succeed. + is.rdbuf()->sungetc(); + } + + private: + std::istream& is; + std::istream::sentry const ok; +}; +#endif /// input adapter for buffer input class input_buffer_adapter : public input_adapter_protocol diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index b71502749..c0d6d27c9 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -1620,12 +1620,61 @@ struct input_adapter_protocol /// a type to simplify interfaces using input_adapter_t = std::shared_ptr; +/*! +A helper function to skip the UTF-8 byte order mark. + +If a complete BOM has been skipped, or if an incomplete BOM has been detected +and the stream has been successfully rewind to the start of the BOM, returns +goodbit. +If an internal operation fails, returns badbit, and the streambuf should no +longer be used. + +Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears +the eofbit. However, some implementations keep the eofbit if is.unget() fails, +others do not. + +Note: The streambuf must be non-null. +*/ +inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb) +{ + using traits_type = std::char_traits; + + assert(sb != nullptr); + + if (sb->sgetc() == 0xEF) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBB) + { + sb->sbumpc(); + if (sb->sgetc() == 0xBF) + { + sb->sbumpc(); + return std::ios_base::goodbit; + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + if (sb->sungetc() == traits_type::eof()) + { + return std::ios_base::badbit; + } + } + + return std::ios_base::goodbit; +} + /*! Input adapter for a (caching) istream. Ignores a UTF Byte Order Mark at beginning of input. Does not support changing the underlying std::streambuf in mid-input. */ +#if 0 class input_stream_adapter : public input_adapter_protocol { public: @@ -1683,6 +1732,103 @@ class input_stream_adapter : public input_adapter_protocol private: std::istream& is; }; +#else +class input_stream_adapter : public input_adapter_protocol +{ + // + // NOTE: + // + // This implementation differs slightly from the reference implementation + // (using the std::istream interface): + // + // From N4659: + // 30.7.4.3 Unformatted input functions + // + // [...] + // If an exception is thrown during input then `ios::badbit` is turned + // on[310] in `*this`'s error state. (Exceptions thrown from + // `basic_ios<>::clear()` are not caught or rethrown.) + // If `(exceptions() & badbit) != 0` then the exception is rethrown. + // + // [310] This is done without causing an `ios::failure` to be thrown. + // + // However, there is no (portable) way to turn on the `badbit` in `is` + // without throwing an exception, so here we don't catch (and possibly) + // rethrow exceptions from streambuf operations. + // If an internal operation throws an exception, the behavior of this + // implementation is therefore slightly different from the reference + // implementation: + // + // If an exception is thrown during input and + // + // - badbit is turned ON in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state. + // + // - badbit is turned OFF in `is.exceptions()`: + // The badbit will **not** be set in `is`'s error state and the + // exception is **not** swallowed. + // + + public: + using traits_type = std::char_traits; + + explicit input_stream_adapter(std::istream& i) + : is(i) + , ok(i, /* noskipws */ true) + { + std::ios_base::iostate state = std::ios_base::goodbit; + if (ok) + { + state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf()); + } + else + { + state |= std::ios_base::failbit; + } + + // Update the stream state. In case skip_byte_order_mark() failed (but + // did not throw an exception), `state` now has the badbit set and the + // call to setstate might throw an ios::failure. Likewise, if the stream + // is "not ok" then the failbit will be set, which might throw an + // exception, too. + is.setstate(state); + } + + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(const input_stream_adapter&) = delete; + + traits_type::int_type get_character() override + { + // Only try to get a character if the stream is good! + if (is.good()) + { + const auto ch = is.rdbuf()->sbumpc(); + if (ch != traits_type::eof()) + { + return ch; + } + + // sbumpc failed. + // No more characters are available. Set eofbit. + is.setstate(std::ios_base::eofbit); + } + + return traits_type::eof(); + } + + void unget_character() override + { + // This method is only ever called if the last call to get_character was + // successful (i.e. not EOF). This implies that the stream is good and + // that the call to sungetc below is guaranteed to succeed. + is.rdbuf()->sungetc(); + } + + private: + std::istream& is; + std::istream::sentry const ok; +}; +#endif /// input adapter for buffer input class input_buffer_adapter : public input_adapter_protocol