579 lines
18 KiB
C++
579 lines
18 KiB
C++
#pragma once
|
|
|
|
#include <algorithm> // min
|
|
#include <array> // array
|
|
#include <cassert> // assert
|
|
#include <cstddef> // size_t
|
|
#include <cstring> // strlen
|
|
#include <ios> // streamsize, streamoff, streampos
|
|
#include <istream> // istream
|
|
#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
|
|
#include <memory> // shared_ptr, make_shared, addressof
|
|
#include <numeric> // accumulate
|
|
#include <string> // string, char_traits
|
|
#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
|
|
#include <utility> // pair, declval
|
|
|
|
#include <nlohmann/detail/macro_scope.hpp>
|
|
|
|
namespace nlohmann
|
|
{
|
|
namespace detail
|
|
{
|
|
////////////////////
|
|
// input adapters //
|
|
////////////////////
|
|
|
|
/*!
|
|
@brief abstract input adapter interface
|
|
|
|
Produces a stream of std::char_traits<char>::int_type characters from a
|
|
std::istream, a buffer, or some other input type. Accepts the return of exactly
|
|
one non-EOF character for future input. The int_type characters returned
|
|
consist of all valid char values as positive values (typically unsigned char),
|
|
plus an EOF value outside that range, specified by the value of the function
|
|
std::char_traits<char>::eof(). This value is typically -1, but could be any
|
|
arbitrary value which is not a valid char value.
|
|
*/
|
|
struct input_adapter_protocol
|
|
{
|
|
/// get a character [0,255] or std::char_traits<char>::eof().
|
|
virtual std::char_traits<char>::int_type get_character() = 0;
|
|
/// restore the last non-eof() character to input
|
|
virtual void unget_character() = 0;
|
|
virtual ~input_adapter_protocol() = default;
|
|
};
|
|
|
|
/// a type to simplify interfaces
|
|
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
|
|
|
|
/*!
|
|
A helper function to skip the UTF-8 byte order mark.
|
|
|
|
If a complete BOM has been skipped, or if an incomplete BOM has been detected
|
|
and the stream has been successfully rewind to the start of the BOM, returns
|
|
goodbit.
|
|
If an internal operation fails, returns badbit, and the streambuf should no
|
|
longer be used.
|
|
|
|
Note: Doesn't handle the eofbit. Before doing anything else is.unget() clears
|
|
the eofbit. However, some implementations keep the eofbit if is.unget() fails,
|
|
others do not.
|
|
|
|
Note: The streambuf must be non-null.
|
|
*/
|
|
inline std::ios_base::iostate skip_byte_order_mark(std::streambuf* sb)
|
|
{
|
|
using traits_type = std::char_traits<char>;
|
|
|
|
assert(sb != nullptr);
|
|
|
|
if (sb->sgetc() == 0xEF)
|
|
{
|
|
sb->sbumpc();
|
|
if (sb->sgetc() == 0xBB)
|
|
{
|
|
sb->sbumpc();
|
|
if (sb->sgetc() == 0xBF)
|
|
{
|
|
sb->sbumpc();
|
|
return std::ios_base::goodbit;
|
|
}
|
|
|
|
if (sb->sungetc() == traits_type::eof())
|
|
{
|
|
return std::ios_base::badbit;
|
|
}
|
|
}
|
|
|
|
if (sb->sungetc() == traits_type::eof())
|
|
{
|
|
return std::ios_base::badbit;
|
|
}
|
|
}
|
|
|
|
return std::ios_base::goodbit;
|
|
}
|
|
|
|
/*!
|
|
Input adapter for a (caching) istream.
|
|
Ignores a UTF Byte Order Mark at beginning of input.
|
|
|
|
Does not support changing the underlying std::streambuf in mid-input.
|
|
*/
|
|
#if 0
|
|
class input_stream_adapter : public input_adapter_protocol
|
|
{
|
|
public:
|
|
using traits_type = std::char_traits<char>;
|
|
|
|
explicit input_stream_adapter(std::istream& i)
|
|
: is(i)
|
|
{
|
|
// Skip byte order mark
|
|
if (is.peek() == 0xEF)
|
|
{
|
|
is.ignore();
|
|
if (is.peek() == 0xBB)
|
|
{
|
|
is.ignore();
|
|
if (is.peek() == 0xBF)
|
|
{
|
|
is.ignore();
|
|
return; // Found a complete BOM.
|
|
}
|
|
|
|
is.unget();
|
|
}
|
|
|
|
is.unget();
|
|
}
|
|
}
|
|
|
|
input_stream_adapter(const input_stream_adapter&) = delete;
|
|
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
|
|
|
|
traits_type::int_type get_character() override
|
|
{
|
|
// Only try to get a character if the stream is good!
|
|
if (is.good())
|
|
{
|
|
const auto ch = is.peek();
|
|
// If peek() returns EOF, the following call to ignore() will set
|
|
// the failbit, but we do not want to set the failbit here.
|
|
if (ch != traits_type::eof())
|
|
{
|
|
is.ignore();
|
|
return ch;
|
|
}
|
|
}
|
|
|
|
return traits_type::eof();
|
|
}
|
|
|
|
void unget_character() override
|
|
{
|
|
is.unget();
|
|
}
|
|
|
|
private:
|
|
std::istream& is;
|
|
};
|
|
#else
|
|
class input_stream_adapter : public input_adapter_protocol
|
|
{
|
|
//
|
|
// NOTE:
|
|
//
|
|
// This implementation differs slightly from the reference implementation
|
|
// (using the std::istream interface):
|
|
//
|
|
// From N4659:
|
|
// 30.7.4.3 Unformatted input functions
|
|
//
|
|
// [...]
|
|
// If an exception is thrown during input then `ios::badbit` is turned
|
|
// on[310] in `*this`'s error state. (Exceptions thrown from
|
|
// `basic_ios<>::clear()` are not caught or rethrown.)
|
|
// If `(exceptions() & badbit) != 0` then the exception is rethrown.
|
|
//
|
|
// [310] This is done without causing an `ios::failure` to be thrown.
|
|
//
|
|
// However, there is no (portable) way to turn on the `badbit` in `is`
|
|
// without throwing an exception, so here we don't catch (and possibly)
|
|
// rethrow exceptions from streambuf operations.
|
|
// If an internal operation throws an exception, the behavior of this
|
|
// implementation is therefore slightly different from the reference
|
|
// implementation:
|
|
//
|
|
// If an exception is thrown during input and
|
|
//
|
|
// - badbit is turned ON in `is.exceptions()`:
|
|
// The badbit will **not** be set in `is`'s error state.
|
|
//
|
|
// - badbit is turned OFF in `is.exceptions()`:
|
|
// The badbit will **not** be set in `is`'s error state and the
|
|
// exception is **not** swallowed.
|
|
//
|
|
|
|
public:
|
|
using traits_type = std::char_traits<char>;
|
|
|
|
explicit input_stream_adapter(std::istream& i)
|
|
: is(i)
|
|
, ok(i, /* noskipws */ true)
|
|
{
|
|
std::ios_base::iostate state = std::ios_base::goodbit;
|
|
if (ok)
|
|
{
|
|
state |= nlohmann::detail::skip_byte_order_mark(is.rdbuf());
|
|
}
|
|
else
|
|
{
|
|
state |= std::ios_base::failbit;
|
|
}
|
|
|
|
// Update the stream state. In case skip_byte_order_mark() failed (but
|
|
// did not throw an exception), `state` now has the badbit set and the
|
|
// call to setstate might throw an ios::failure. Likewise, if the stream
|
|
// is "not ok" then the failbit will be set, which might throw an
|
|
// exception, too.
|
|
is.setstate(state);
|
|
}
|
|
|
|
input_stream_adapter(const input_stream_adapter&) = delete;
|
|
input_stream_adapter& operator=(const input_stream_adapter&) = delete;
|
|
|
|
traits_type::int_type get_character() override
|
|
{
|
|
// Only try to get a character if the stream is good!
|
|
if (is.good())
|
|
{
|
|
const auto ch = is.rdbuf()->sbumpc();
|
|
if (ch != traits_type::eof())
|
|
{
|
|
return ch;
|
|
}
|
|
|
|
// sbumpc failed.
|
|
// No more characters are available. Set eofbit.
|
|
is.setstate(std::ios_base::eofbit);
|
|
}
|
|
|
|
return traits_type::eof();
|
|
}
|
|
|
|
void unget_character() override
|
|
{
|
|
// This method is only ever called if the last call to get_character was
|
|
// successful (i.e. not EOF). This implies that the stream is good and
|
|
// that the call to sungetc below is guaranteed to succeed.
|
|
is.rdbuf()->sungetc();
|
|
}
|
|
|
|
private:
|
|
std::istream& is;
|
|
std::istream::sentry const ok;
|
|
};
|
|
#endif
|
|
|
|
/// input adapter for buffer input
|
|
class input_buffer_adapter : public input_adapter_protocol
|
|
{
|
|
public:
|
|
input_buffer_adapter(const char* b, const std::size_t l)
|
|
: cursor(b), limit(b + l), start(b)
|
|
{
|
|
// skip byte order mark
|
|
if (l >= 3 and b[0] == '\xEF' and b[1] == '\xBB' and b[2] == '\xBF')
|
|
{
|
|
cursor += 3;
|
|
}
|
|
}
|
|
|
|
// delete because of pointer members
|
|
input_buffer_adapter(const input_buffer_adapter&) = delete;
|
|
input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
|
|
|
|
std::char_traits<char>::int_type get_character() noexcept override
|
|
{
|
|
if (JSON_LIKELY(cursor < limit))
|
|
{
|
|
return std::char_traits<char>::to_int_type(*(cursor++));
|
|
}
|
|
|
|
return std::char_traits<char>::eof();
|
|
}
|
|
|
|
void unget_character() noexcept override
|
|
{
|
|
if (JSON_LIKELY(cursor > start))
|
|
{
|
|
--cursor;
|
|
}
|
|
}
|
|
|
|
private:
|
|
/// pointer to the current character
|
|
const char* cursor;
|
|
/// pointer past the last character
|
|
const char* limit;
|
|
/// pointer to the first character
|
|
const char* start;
|
|
};
|
|
|
|
template<typename WideStringType>
|
|
class wide_string_input_adapter : public input_adapter_protocol
|
|
{
|
|
public:
|
|
wide_string_input_adapter(const WideStringType& w) : str(w) {}
|
|
|
|
std::char_traits<char>::int_type get_character() noexcept override
|
|
{
|
|
// unget_character() was called previously: return the last character
|
|
if (next_unget)
|
|
{
|
|
next_unget = false;
|
|
return last_char;
|
|
}
|
|
|
|
// check if buffer needs to be filled
|
|
if (utf8_bytes_index == utf8_bytes_filled)
|
|
{
|
|
if (sizeof(typename WideStringType::value_type) == 2)
|
|
{
|
|
fill_buffer_utf16();
|
|
}
|
|
else
|
|
{
|
|
fill_buffer_utf32();
|
|
}
|
|
|
|
assert(utf8_bytes_filled > 0);
|
|
assert(utf8_bytes_index == 0);
|
|
}
|
|
|
|
// use buffer
|
|
assert(utf8_bytes_filled > 0);
|
|
assert(utf8_bytes_index < utf8_bytes_filled);
|
|
return (last_char = utf8_bytes[utf8_bytes_index++]);
|
|
}
|
|
|
|
void unget_character() noexcept override
|
|
{
|
|
next_unget = true;
|
|
}
|
|
|
|
private:
|
|
void fill_buffer_utf16()
|
|
{
|
|
utf8_bytes_index = 0;
|
|
|
|
if (current_wchar == str.size())
|
|
{
|
|
utf8_bytes[0] = std::char_traits<char>::eof();
|
|
utf8_bytes_filled = 1;
|
|
}
|
|
else
|
|
{
|
|
// get the current character
|
|
const int wc = static_cast<int>(str[current_wchar++]);
|
|
|
|
// UTF-16 to UTF-8 encoding
|
|
if (wc < 0x80)
|
|
{
|
|
utf8_bytes[0] = wc;
|
|
utf8_bytes_filled = 1;
|
|
}
|
|
else if (wc <= 0x7FF)
|
|
{
|
|
utf8_bytes[0] = 0xC0 | ((wc >> 6));
|
|
utf8_bytes[1] = 0x80 | (wc & 0x3F);
|
|
utf8_bytes_filled = 2;
|
|
}
|
|
else if (0xD800 > wc or wc >= 0xE000)
|
|
{
|
|
utf8_bytes[0] = 0xE0 | ((wc >> 12));
|
|
utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F);
|
|
utf8_bytes[2] = 0x80 | (wc & 0x3F);
|
|
utf8_bytes_filled = 3;
|
|
}
|
|
else
|
|
{
|
|
if (current_wchar < str.size())
|
|
{
|
|
const int wc2 = static_cast<int>(str[current_wchar++]);
|
|
const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF));
|
|
utf8_bytes[0] = 0xf0 | (charcode >> 18);
|
|
utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F);
|
|
utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F);
|
|
utf8_bytes[3] = 0x80 | (charcode & 0x3F);
|
|
utf8_bytes_filled = 4;
|
|
}
|
|
else
|
|
{
|
|
// unknown character
|
|
++current_wchar;
|
|
utf8_bytes[0] = wc;
|
|
utf8_bytes_filled = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void fill_buffer_utf32()
|
|
{
|
|
utf8_bytes_index = 0;
|
|
|
|
if (current_wchar == str.size())
|
|
{
|
|
utf8_bytes[0] = std::char_traits<char>::eof();
|
|
utf8_bytes_filled = 1;
|
|
}
|
|
else
|
|
{
|
|
// get the current character
|
|
const int wc = static_cast<int>(str[current_wchar++]);
|
|
|
|
// UTF-32 to UTF-8 encoding
|
|
if (wc < 0x80)
|
|
{
|
|
utf8_bytes[0] = wc;
|
|
utf8_bytes_filled = 1;
|
|
}
|
|
else if (wc <= 0x7FF)
|
|
{
|
|
utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F);
|
|
utf8_bytes[1] = 0x80 | (wc & 0x3F);
|
|
utf8_bytes_filled = 2;
|
|
}
|
|
else if (wc <= 0xFFFF)
|
|
{
|
|
utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F);
|
|
utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F);
|
|
utf8_bytes[2] = 0x80 | (wc & 0x3F);
|
|
utf8_bytes_filled = 3;
|
|
}
|
|
else if (wc <= 0x10FFFF)
|
|
{
|
|
utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07);
|
|
utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F);
|
|
utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F);
|
|
utf8_bytes[3] = 0x80 | (wc & 0x3F);
|
|
utf8_bytes_filled = 4;
|
|
}
|
|
else
|
|
{
|
|
// unknown character
|
|
utf8_bytes[0] = wc;
|
|
utf8_bytes_filled = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
/// the wstring to process
|
|
const WideStringType& str;
|
|
|
|
/// index of the current wchar in str
|
|
std::size_t current_wchar = 0;
|
|
|
|
/// a buffer for UTF-8 bytes
|
|
std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
|
|
|
|
/// index to the utf8_codes array for the next valid byte
|
|
std::size_t utf8_bytes_index = 0;
|
|
/// number of valid bytes in the utf8_codes array
|
|
std::size_t utf8_bytes_filled = 0;
|
|
|
|
/// the last character (returned after unget_character() is called)
|
|
std::char_traits<char>::int_type last_char = 0;
|
|
/// whether get_character() should return last_char
|
|
bool next_unget = false;
|
|
};
|
|
|
|
class input_adapter
|
|
{
|
|
public:
|
|
// native support
|
|
|
|
/// input adapter for input stream
|
|
input_adapter(std::istream& i)
|
|
: ia(std::make_shared<input_stream_adapter>(i)) {}
|
|
|
|
/// input adapter for input stream
|
|
input_adapter(std::istream&& i)
|
|
: ia(std::make_shared<input_stream_adapter>(i)) {}
|
|
|
|
input_adapter(const std::wstring& ws)
|
|
: ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {}
|
|
|
|
input_adapter(const std::u16string& ws)
|
|
: ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {}
|
|
|
|
input_adapter(const std::u32string& ws)
|
|
: ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {}
|
|
|
|
/// input adapter for buffer
|
|
template<typename CharT,
|
|
typename std::enable_if<
|
|
std::is_pointer<CharT>::value and
|
|
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
|
|
sizeof(typename std::remove_pointer<CharT>::type) == 1,
|
|
int>::type = 0>
|
|
input_adapter(CharT b, std::size_t l)
|
|
: ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {}
|
|
|
|
// derived support
|
|
|
|
/// input adapter for string literal
|
|
template<typename CharT,
|
|
typename std::enable_if<
|
|
std::is_pointer<CharT>::value and
|
|
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
|
|
sizeof(typename std::remove_pointer<CharT>::type) == 1,
|
|
int>::type = 0>
|
|
input_adapter(CharT b)
|
|
: input_adapter(reinterpret_cast<const char*>(b),
|
|
std::strlen(reinterpret_cast<const char*>(b))) {}
|
|
|
|
/// input adapter for iterator range with contiguous storage
|
|
template<class IteratorType,
|
|
typename std::enable_if<
|
|
std::is_same<typename std::iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
|
|
int>::type = 0>
|
|
input_adapter(IteratorType first, IteratorType last)
|
|
{
|
|
// assertion to check that the iterator range is indeed contiguous,
|
|
// see http://stackoverflow.com/a/35008842/266378 for more discussion
|
|
assert(std::accumulate(
|
|
first, last, std::pair<bool, int>(true, 0),
|
|
[&first](std::pair<bool, int> res, decltype(*first) val)
|
|
{
|
|
res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
|
|
return res;
|
|
}).first);
|
|
|
|
// assertion to check that each element is 1 byte long
|
|
static_assert(
|
|
sizeof(typename std::iterator_traits<IteratorType>::value_type) == 1,
|
|
"each element in the iterator range must have the size of 1 byte");
|
|
|
|
const auto len = static_cast<size_t>(std::distance(first, last));
|
|
if (JSON_LIKELY(len > 0))
|
|
{
|
|
// there is at least one element: use the address of first
|
|
ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len);
|
|
}
|
|
else
|
|
{
|
|
// the address of first cannot be used: use nullptr
|
|
ia = std::make_shared<input_buffer_adapter>(nullptr, len);
|
|
}
|
|
}
|
|
|
|
/// input adapter for array
|
|
template<class T, std::size_t N>
|
|
input_adapter(T (&array)[N])
|
|
: input_adapter(std::begin(array), std::end(array)) {}
|
|
|
|
/// input adapter for contiguous container
|
|
template<class ContiguousContainer, typename
|
|
std::enable_if<not std::is_pointer<ContiguousContainer>::value and
|
|
std::is_base_of<std::random_access_iterator_tag, typename std::iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
|
|
int>::type = 0>
|
|
input_adapter(const ContiguousContainer& c)
|
|
: input_adapter(std::begin(c), std::end(c)) {}
|
|
|
|
operator input_adapter_t()
|
|
{
|
|
return ia;
|
|
}
|
|
|
|
private:
|
|
/// the actual adapter
|
|
input_adapter_t ia = nullptr;
|
|
};
|
|
}
|
|
}
|