#pragma once #include // min #include // array #include // assert #include // size_t #include // strlen #include // streamsize, streamoff, streampos #include // istream #include // begin, end, iterator_traits, random_access_iterator_tag, distance, next #include // shared_ptr, make_shared, addressof #include // accumulate #include // string, char_traits #include // enable_if, is_base_of, is_pointer, is_integral, remove_pointer #include // pair, declval #include namespace nlohmann { namespace detail { //////////////////// // input adapters // //////////////////// /*! @brief abstract input adapter interface Produces a stream of std::char_traits::int_type characters from a std::istream, a buffer, or some other input type. Accepts the return of exactly one non-EOF character for future input. The int_type characters returned consist of all valid char values as positive values (typically unsigned char), plus an EOF value outside that range, specified by the value of the function std::char_traits::eof(). This value is typically -1, but could be any arbitrary value which is not a valid char value. */ struct input_adapter_protocol { /// get a character [0,255] or std::char_traits::eof(). virtual std::char_traits::int_type get_character() = 0; /// restore the last non-eof() character to input virtual void unget_character() = 0; virtual ~input_adapter_protocol() = default; }; /// a type to simplify interfaces using input_adapter_t = std::shared_ptr; /*! A helper function to skip the UTF-8 byte order mark. If a complete BOM has been skipped, or if an incomplete BOM has been detected and the stream has been successfully rewind to the start of the BOM, returns goodbit. If an internal operation fails, returns badbit, and the streambuf should no longer be used. Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears the eofbit. However, some implementations keep the eofbit if is.unget() fails, others do not. Note: The streambuf must be non-null. */ inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb) { using traits_type = std::char_traits; assert(sb != nullptr); if (sb->sgetc() == 0xEF) { sb->sbumpc(); if (sb->sgetc() == 0xBB) { sb->sbumpc(); if (sb->sgetc() == 0xBF) { sb->sbumpc(); return std::ios_base::goodbit; } if (sb->sungetc() == traits_type::eof()) { return std::ios_base::badbit; } } if (sb->sungetc() == traits_type::eof()) { return std::ios_base::badbit; } } return std::ios_base::goodbit; } /*! Input adapter for a (caching) istream. Ignores a UTF Byte Order Mark at beginning of input. Does not support changing the underlying std::streambuf in mid-input. */ #if 0 class input_stream_adapter : public input_adapter_protocol { public: using traits_type = std::char_traits; explicit input_stream_adapter(std::istream& i) : is(i) { // Skip byte order mark if (is.peek() == 0xEF) { is.ignore(); if (is.peek() == 0xBB) { is.ignore(); if (is.peek() == 0xBF) { is.ignore(); return; // Found a complete BOM. } is.unget(); } is.unget(); } } input_stream_adapter(const input_stream_adapter&) = delete; input_stream_adapter& operator=(const input_stream_adapter&) = delete; traits_type::int_type get_character() override { // Only try to get a character if the stream is good! if (is.good()) { const auto ch = is.peek(); // If peek() returns EOF, the following call to ignore() will set // the failbit, but we do not want to set the failbit here. if (ch != traits_type::eof()) { is.ignore(); return ch; } } return traits_type::eof(); } void unget_character() override { is.unget(); } private: std::istream& is; }; #else class input_stream_adapter : public input_adapter_protocol { // // NOTE: // // This implementation differs slightly from the reference implementation // (using the std::istream interface): // // From N4659: // 30.7.4.3 Unformatted input functions // // [...] // If an exception is thrown during input then `ios::badbit` is turned // on[310] in `*this`'s error state. (Exceptions thrown from // `basic_ios<>::clear()` are not caught or rethrown.) // If `(exceptions() & badbit) != 0` then the exception is rethrown. // // [310] This is done without causing an `ios::failure` to be thrown. // // However, there is no (portable) way to turn on the `badbit` in `is` // without throwing an exception, so here we don't catch (and possibly) // rethrow exceptions from streambuf operations. // If an internal operation throws an exception, the behavior of this // implementation is therefore slightly different from the reference // implementation: // // If an exception is thrown during input and // // - badbit is turned ON in `is.exceptions()`: // The badbit will **not** be set in `is`'s error state. // // - badbit is turned OFF in `is.exceptions()`: // The badbit will **not** be set in `is`'s error state and the // exception is **not** swallowed. // public: using traits_type = std::char_traits; explicit input_stream_adapter(std::istream& i) : is(i) , ok(i, /* noskipws */ true) { std::ios_base::iostate state = std::ios_base::goodbit; if (ok) { state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf()); } else { state |= std::ios_base::failbit; } // Update the stream state. In case skip_byte_order_mark() failed (but // did not throw an exception), `state` now has the badbit set and the // call to setstate might throw an ios::failure. Likewise, if the stream // is "not ok" then the failbit will be set, which might throw an // exception, too. is.setstate(state); } input_stream_adapter(const input_stream_adapter&) = delete; input_stream_adapter& operator=(const input_stream_adapter&) = delete; traits_type::int_type get_character() override { // Only try to get a character if the stream is good! if (is.good()) { const auto ch = is.rdbuf()->sbumpc(); if (ch != traits_type::eof()) { return ch; } // sbumpc failed. // No more characters are available. Set eofbit. is.setstate(std::ios_base::eofbit); } return traits_type::eof(); } void unget_character() override { // This method is only ever called if the last call to get_character was // successful (i.e. not EOF). This implies that the stream is good and // that the call to sungetc below is guaranteed to succeed. is.rdbuf()->sungetc(); } private: std::istream& is; std::istream::sentry const ok; }; #endif /// input adapter for buffer input class input_buffer_adapter : public input_adapter_protocol { public: input_buffer_adapter(const char* b, const std::size_t l) : cursor(b), limit(b + l), start(b) { // skip byte order mark if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF') { cursor += 3; } } // delete because of pointer members input_buffer_adapter(const input_buffer_adapter&) = delete; input_buffer_adapter& operator=(input_buffer_adapter&) = delete; std::char_traits::int_type get_character() noexcept override { if (JSON_LIKELY(cursor < limit)) { return std::char_traits::to_int_type(*(cursor++)); } return std::char_traits::eof(); } void unget_character() noexcept override { if (JSON_LIKELY(cursor > start)) { --cursor; } } private: /// pointer to the current character const char* cursor; /// pointer past the last character const char* limit; /// pointer to the first character const char* start; }; template class wide_string_input_adapter : public input_adapter_protocol { public: wide_string_input_adapter(const WideStringType& w) : str(w) {} std::char_traits::int_type get_character() noexcept override { // unget_character() was called previously: return the last character if (next_unget) { next_unget = false; return last_char; } // check if buffer needs to be filled if (utf8_bytes_index == utf8_bytes_filled) { if (sizeof(typename WideStringType::value_type) == 2) { fill_buffer_utf16(); } else { fill_buffer_utf32(); } assert(utf8_bytes_filled > 0); assert(utf8_bytes_index == 0); } // use buffer assert(utf8_bytes_filled > 0); assert(utf8_bytes_index < utf8_bytes_filled); return (last_char = utf8_bytes[utf8_bytes_index++]); } void unget_character() noexcept override { next_unget = true; } private: void fill_buffer_utf16() { utf8_bytes_index = 0; if (current_wchar == str.size()) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } else { // get the current character const int wc = static_cast(str[current_wchar++]); // UTF-16 to UTF-8 encoding if (wc < 0x80) { utf8_bytes[0] = wc; utf8_bytes_filled = 1; } else if (wc <= 0x7FF) { utf8_bytes[0] = 0xC0 | ((wc >> 6)); utf8_bytes[1] = 0x80 | (wc & 0x3F); utf8_bytes_filled = 2; } else if (0xD800 > wc or wc >= 0xE000) { utf8_bytes[0] = 0xE0 | ((wc >> 12)); utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); utf8_bytes[2] = 0x80 | (wc & 0x3F); utf8_bytes_filled = 3; } else { if (current_wchar < str.size()) { const int wc2 = static_cast(str[current_wchar++]); const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); utf8_bytes[0] = 0xf0 | (charcode >> 18); utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); utf8_bytes[3] = 0x80 | (charcode & 0x3F); utf8_bytes_filled = 4; } else { // unknown character ++current_wchar; utf8_bytes[0] = wc; utf8_bytes_filled = 1; } } } } void fill_buffer_utf32() { utf8_bytes_index = 0; if (current_wchar == str.size()) { utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } else { // get the current character const int wc = static_cast(str[current_wchar++]); // UTF-32 to UTF-8 encoding if (wc < 0x80) { utf8_bytes[0] = wc; utf8_bytes_filled = 1; } else if (wc <= 0x7FF) { utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); utf8_bytes[1] = 0x80 | (wc & 0x3F); utf8_bytes_filled = 2; } else if (wc <= 0xFFFF) { utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); utf8_bytes[2] = 0x80 | (wc & 0x3F); utf8_bytes_filled = 3; } else if (wc <= 0x10FFFF) { utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); utf8_bytes[3] = 0x80 | (wc & 0x3F); utf8_bytes_filled = 4; } else { // unknown character utf8_bytes[0] = wc; utf8_bytes_filled = 1; } } } private: /// the wstring to process const WideStringType& str; /// index of the current wchar in str std::size_t current_wchar = 0; /// a buffer for UTF-8 bytes std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; /// index to the utf8_codes array for the next valid byte std::size_t utf8_bytes_index = 0; /// number of valid bytes in the utf8_codes array std::size_t utf8_bytes_filled = 0; /// the last character (returned after unget_character() is called) std::char_traits::int_type last_char = 0; /// whether get_character() should return last_char bool next_unget = false; }; class input_adapter { public: // native support /// input adapter for input stream input_adapter(std::istream& i) : ia(std::make_shared(i)) {} /// input adapter for input stream input_adapter(std::istream&& i) : ia(std::make_shared(i)) {} input_adapter(const std::wstring& ws) : ia(std::make_shared>(ws)) {} input_adapter(const std::u16string& ws) : ia(std::make_shared>(ws)) {} input_adapter(const std::u32string& ws) : ia(std::make_shared>(ws)) {} /// input adapter for buffer template::value and std::is_integral::type>::value and sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> input_adapter(CharT b, std::size_t l) : ia(std::make_shared(reinterpret_cast(b), l)) {} // derived support /// input adapter for string literal template::value and std::is_integral::type>::value and sizeof(typename std::remove_pointer::type) == 1, int>::type = 0> input_adapter(CharT b) : input_adapter(reinterpret_cast(b), std::strlen(reinterpret_cast(b))) {} /// input adapter for iterator range with contiguous storage template::iterator_category, std::random_access_iterator_tag>::value, int>::type = 0> input_adapter(IteratorType first, IteratorType last) { // assertion to check that the iterator range is indeed contiguous, // see http://stackoverflow.com/a/35008842/266378 for more discussion assert(std::accumulate( first, last, std::pair(true, 0), [&first](std::pair res, decltype(*first) val) { res.first &= (val == *(std::next(std::addressof(*first), res.second++))); return res; }).first); // assertion to check that each element is 1 byte long static_assert( sizeof(typename std::iterator_traits::value_type) == 1, "each element in the iterator range must have the size of 1 byte"); const auto len = static_cast(std::distance(first, last)); if (JSON_LIKELY(len > 0)) { // there is at least one element: use the address of first ia = std::make_shared(reinterpret_cast(&(*first)), len); } else { // the address of first cannot be used: use nullptr ia = std::make_shared(nullptr, len); } } /// input adapter for array template input_adapter(T (&array)[N]) : input_adapter(std::begin(array), std::end(array)) {} /// input adapter for contiguous container template::value and std::is_base_of()))>::iterator_category>::value, int>::type = 0> input_adapter(const ContiguousContainer& c) : input_adapter(std::begin(c), std::end(c)) {} operator input_adapter_t() { return ia; } private: /// the actual adapter input_adapter_t ia = nullptr; }; } }