fmt/test/scan.h

553 lines
15 KiB
C
Raw Normal View History

// Formatting library for C++ - scanning API proof of concept
//
// Copyright (c) 2019 - present, Victor Zverovich
// All rights reserved.
//
// For the license information refer to format.h.
#include <array>
#include <cassert>
#include <climits>
2023-11-26 20:22:31 +03:00
#include "fmt/format.h"
FMT_BEGIN_NAMESPACE
2023-11-26 20:22:31 +03:00
namespace detail {
2023-12-25 20:05:26 +03:00
inline bool is_whitespace(char c) { return c == ' ' || c == '\n'; }
2023-12-24 18:32:27 +03:00
2023-12-25 20:05:26 +03:00
template <typename T> class optional {
2023-12-25 18:18:23 +03:00
private:
T value_;
bool has_value_ = false;
public:
optional() = default;
optional(T value) : value_(std::move(value)), has_value_(true) {}
2023-12-25 20:05:26 +03:00
explicit operator bool() const { return has_value_; }
2023-12-25 18:18:23 +03:00
2023-12-25 21:16:55 +03:00
auto operator*() const -> const T& {
2023-12-25 18:18:23 +03:00
if (!has_value_) throw std::runtime_error("bad optional access");
return value_;
}
};
2023-12-02 20:34:27 +03:00
struct maybe_contiguous_range {
const char* begin;
const char* end;
explicit operator bool() const { return begin != nullptr; }
};
2023-11-26 20:22:31 +03:00
class scan_buffer {
private:
const char* ptr_;
2023-12-02 20:34:27 +03:00
const char* end_;
bool contiguous_;
2023-11-26 20:22:31 +03:00
protected:
2023-12-02 20:34:27 +03:00
scan_buffer(const char* ptr, const char* end, bool contiguous)
: ptr_(ptr), end_(end), contiguous_(contiguous) {}
2023-11-26 20:22:31 +03:00
~scan_buffer() = default;
2023-12-25 21:28:50 +03:00
void set(string_view buf) {
ptr_ = buf.begin();
end_ = buf.end();
2023-11-26 20:22:31 +03:00
}
2023-12-25 21:16:55 +03:00
auto ptr() const -> const char* { return ptr_; }
2023-12-24 18:32:27 +03:00
2023-11-26 20:22:31 +03:00
public:
scan_buffer(const scan_buffer&) = delete;
void operator=(const scan_buffer&) = delete;
2023-12-24 18:32:27 +03:00
// Fills the buffer with more input if available.
virtual void consume() = 0;
2023-12-02 20:34:27 +03:00
class iterator {
private:
const char** ptr_;
scan_buffer* buf_; // This could be merged with ptr_.
char value_;
static auto sentinel() -> const char** {
static const char* ptr = nullptr;
return &ptr;
}
friend class scan_buffer;
friend auto operator==(iterator lhs, iterator rhs) -> bool {
return *lhs.ptr_ == *rhs.ptr_;
}
friend auto operator!=(iterator lhs, iterator rhs) -> bool {
return *lhs.ptr_ != *rhs.ptr_;
}
2023-12-25 21:47:45 +03:00
iterator(scan_buffer* buf) : buf_(buf) {
if (buf->ptr_ == buf->end_) {
ptr_ = sentinel();
return;
}
ptr_ = &buf->ptr_;
value_ = *buf->ptr_;
2023-12-02 20:34:27 +03:00
}
2023-12-29 17:25:57 +03:00
friend scan_buffer& get_buffer(iterator it) {
return *it.buf_;
}
2023-12-02 20:34:27 +03:00
public:
iterator() : ptr_(sentinel()), buf_(nullptr) {}
2023-11-26 20:22:31 +03:00
2023-12-02 20:34:27 +03:00
auto operator++() -> iterator& {
if (!buf_->try_consume()) ptr_ = sentinel();
value_ = *buf_->ptr_;
return *this;
}
auto operator++(int) -> iterator {
iterator copy = *this;
++*this;
return copy;
}
auto operator*() const -> char { return value_; }
auto base() const -> const char* { return buf_->ptr_; }
friend auto to_contiguous(iterator it) -> maybe_contiguous_range;
friend void advance(iterator& it, size_t n);
};
friend auto to_contiguous(iterator it) -> maybe_contiguous_range {
if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_};
return {nullptr, nullptr};
}
friend void advance(iterator& it, size_t n) {
FMT_ASSERT(it.buf_->is_contiguous(), "");
const char*& ptr = it.buf_->ptr_;
ptr += n;
it.value_ = *ptr;
if (ptr == it.buf_->end_) it.ptr_ = iterator::sentinel();
}
2023-11-26 20:22:31 +03:00
2023-12-25 21:28:50 +03:00
auto begin() -> iterator { return this; }
auto end() -> iterator { return {}; }
2023-12-02 20:34:27 +03:00
auto is_contiguous() const -> bool { return contiguous_; }
2023-12-25 21:28:50 +03:00
// Tries consuming a single code unit. Returns true iff there is more input.
2023-12-02 20:34:27 +03:00
auto try_consume() -> bool {
FMT_ASSERT(ptr_ != end_, "");
++ptr_;
2023-12-22 17:35:36 +03:00
if (ptr_ != end_) return true;
consume();
return ptr_ != end_;
2023-11-26 20:22:31 +03:00
}
};
class string_scan_buffer : public scan_buffer {
private:
2023-12-22 17:35:36 +03:00
void consume() override {}
2023-11-26 20:22:31 +03:00
public:
2023-12-02 20:34:27 +03:00
explicit string_scan_buffer(string_view s)
: scan_buffer(s.begin(), s.end(), true) {}
2023-11-26 20:22:31 +03:00
};
2023-12-25 20:22:29 +03:00
#ifdef _WIN32
void flockfile(FILE* f) { _lock_file(f); }
void funlockfile(FILE* f) { _unlock_file(f); }
2023-12-29 17:25:57 +03:00
int getc_unlocked(FILE* f) { return _fgetc_nolock(f); }
2023-12-25 20:22:29 +03:00
#endif
2023-12-22 21:50:01 +03:00
// A FILE wrapper. F is FILE defined as a template parameter to make
// system-specific API detection work.
template <typename F> class file_base {
protected:
F* file_;
public:
file_base(F* file) : file_(file) {}
operator F*() const { return file_; }
// Reads a code unit from the stream.
auto get() -> int {
2023-12-25 20:22:29 +03:00
int result = getc_unlocked(file_);
2023-12-22 21:50:01 +03:00
if (result == EOF && ferror(file_) != 0)
FMT_THROW(system_error(errno, FMT_STRING("getc failed")));
return result;
}
// Puts the code unit back into the stream buffer.
void unget(char c) {
if (ungetc(c, file_) == EOF)
FMT_THROW(system_error(errno, FMT_STRING("ungetc failed")));
}
};
2023-12-23 00:39:18 +03:00
// A FILE wrapper for glibc.
template <typename F> class glibc_file : public file_base<F> {
public:
using file_base<F>::file_base;
2023-12-25 20:22:29 +03:00
// Returns the file's read buffer as a string_view.
2023-12-23 00:39:18 +03:00
auto buffer() const -> string_view {
return {this->file_->_IO_read_ptr,
to_unsigned(this->file_->_IO_read_end - this->file_->_IO_read_ptr)};
}
};
2023-12-22 21:50:01 +03:00
// A FILE wrapper for Apple's libc.
template <typename F> class apple_file : public file_base<F> {
public:
using file_base<F>::file_base;
2023-11-26 20:22:31 +03:00
2023-12-22 21:50:01 +03:00
auto buffer() const -> string_view {
return {reinterpret_cast<char*>(this->file_->_p),
to_unsigned(this->file_->_r)};
}
};
// A fallback FILE wrapper.
template <typename F> class fallback_file : public file_base<F> {
private:
char next_; // The next unconsumed character in the buffer.
bool has_next_ = false;
public:
using file_base<F>::file_base;
auto buffer() const -> string_view { return {&next_, has_next_ ? 1u : 0u}; }
auto get() -> int {
has_next_ = false;
return file_base<F>::get();
2023-11-26 20:22:31 +03:00
}
2023-12-22 21:50:01 +03:00
void unget(char c) {
file_base<F>::unget(c);
next_ = c;
has_next_ = true;
2023-12-22 17:35:36 +03:00
}
2023-12-22 21:50:01 +03:00
};
class file_scan_buffer : public scan_buffer {
private:
2023-12-25 20:22:29 +03:00
template <typename F, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0)>
static auto get_file(F* f, int) -> glibc_file<F> {
return f;
}
template <typename F, FMT_ENABLE_IF(sizeof(F::_p) != 0)>
static auto get_file(F* f, int) -> apple_file<F> {
return f;
}
static auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
2023-12-22 17:35:36 +03:00
2023-12-25 20:22:29 +03:00
decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_;
2023-12-25 20:00:03 +03:00
2023-12-25 20:05:26 +03:00
// Fills the buffer if it is empty.
2023-12-23 00:39:18 +03:00
void fill() {
2023-12-22 21:50:01 +03:00
string_view buf = file_.buffer();
2023-12-22 17:35:36 +03:00
if (buf.size() == 0) {
2023-12-22 21:50:01 +03:00
int c = file_.get();
// Put the character back since we are only filling the buffer.
if (c != EOF) file_.unget(static_cast<char>(c));
buf = file_.buffer();
2023-12-22 17:35:36 +03:00
}
2023-12-25 21:28:50 +03:00
set(buf);
2023-11-26 20:22:31 +03:00
}
2023-12-22 17:35:36 +03:00
void consume() override {
// Consume the current buffer content.
2023-12-24 18:32:27 +03:00
size_t n = to_unsigned(ptr() - file_.buffer().begin());
for (size_t i = 0; i != n; ++i) file_.get();
2023-12-23 00:39:18 +03:00
fill();
2023-11-26 20:22:31 +03:00
}
public:
explicit file_scan_buffer(FILE* f)
2023-12-02 20:34:27 +03:00
: scan_buffer(nullptr, nullptr, false), file_(f) {
2023-12-25 19:39:14 +03:00
flockfile(f);
2023-12-23 00:39:18 +03:00
fill();
2023-11-26 20:22:31 +03:00
}
2023-12-25 20:05:26 +03:00
~file_scan_buffer() { funlockfile(file_); }
2023-11-26 20:22:31 +03:00
};
} // namespace detail
template <typename T, typename Char = char> struct scanner {
// A deleted default constructor indicates a disabled scanner.
scanner() = delete;
};
class scan_parse_context {
private:
string_view format_;
public:
using iterator = string_view::iterator;
explicit FMT_CONSTEXPR scan_parse_context(string_view format)
: format_(format) {}
2023-11-25 18:41:04 +03:00
FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); }
FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); }
void advance_to(iterator it) {
2020-05-10 17:25:42 +03:00
format_.remove_prefix(detail::to_unsigned(it - begin()));
}
};
struct scan_context {
private:
2023-11-26 20:22:31 +03:00
detail::scan_buffer& buf_;
public:
2023-12-02 20:34:27 +03:00
using iterator = detail::scan_buffer::iterator;
2023-11-26 20:22:31 +03:00
explicit FMT_CONSTEXPR scan_context(detail::scan_buffer& buf) : buf_(buf) {}
2023-11-26 20:22:31 +03:00
auto begin() const -> iterator { return buf_.begin(); }
auto end() const -> iterator { return buf_.end(); }
2023-12-25 20:05:26 +03:00
void advance_to(iterator) { buf_.consume(); }
};
2020-05-10 17:25:42 +03:00
namespace detail {
enum class scan_type {
none_type,
int_type,
uint_type,
long_long_type,
ulong_long_type,
string_type,
string_view_type,
custom_type
};
struct custom_scan_arg {
void* value;
void (*scan)(void* arg, scan_parse_context& parse_ctx, scan_context& ctx);
};
class scan_arg {
public:
scan_type type;
union {
int* int_value;
unsigned* uint_value;
long long* long_long_value;
unsigned long long* ulong_long_value;
std::string* string;
fmt::string_view* string_view;
custom_scan_arg custom;
// TODO: more types
};
2022-09-12 12:01:44 +03:00
FMT_CONSTEXPR scan_arg() : type(scan_type::none_type), int_value(nullptr) {}
FMT_CONSTEXPR scan_arg(int& value)
: type(scan_type::int_type), int_value(&value) {}
FMT_CONSTEXPR scan_arg(unsigned& value)
: type(scan_type::uint_type), uint_value(&value) {}
FMT_CONSTEXPR scan_arg(long long& value)
: type(scan_type::long_long_type), long_long_value(&value) {}
2022-09-12 12:01:44 +03:00
FMT_CONSTEXPR scan_arg(unsigned long long& value)
: type(scan_type::ulong_long_type), ulong_long_value(&value) {}
2022-09-12 12:01:44 +03:00
FMT_CONSTEXPR scan_arg(std::string& value)
: type(scan_type::string_type), string(&value) {}
FMT_CONSTEXPR scan_arg(fmt::string_view& value)
: type(scan_type::string_view_type), string_view(&value) {}
2022-09-12 12:01:44 +03:00
template <typename T>
FMT_CONSTEXPR scan_arg(T& value) : type(scan_type::custom_type) {
custom.value = &value;
custom.scan = scan_custom_arg<T>;
}
private:
template <typename T>
static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx,
scan_context& ctx) {
2023-11-25 18:41:04 +03:00
auto s = scanner<T>();
parse_ctx.advance_to(s.parse(parse_ctx));
ctx.advance_to(s.scan(*static_cast<T*>(arg), ctx));
}
};
2020-05-10 17:25:42 +03:00
} // namespace detail
struct scan_args {
int size;
2020-05-10 17:25:42 +03:00
const detail::scan_arg* data;
template <size_t N>
2022-09-12 12:01:44 +03:00
FMT_CONSTEXPR scan_args(const std::array<detail::scan_arg, N>& store)
: size(N), data(store.data()) {
static_assert(N < INT_MAX, "too many arguments");
}
};
2020-05-10 17:25:42 +03:00
namespace detail {
struct scan_handler : error_handler {
private:
scan_parse_context parse_ctx_;
scan_context scan_ctx_;
scan_args args_;
int next_arg_id_;
scan_arg arg_;
2023-12-29 17:34:49 +03:00
using iterator = scan_buffer::iterator;
template <typename T = unsigned> auto read_uint(iterator& it) -> optional<T> {
auto end = scan_ctx_.end();
2023-12-25 18:18:23 +03:00
if (it == end) return {};
char c = *it;
2023-12-22 17:35:36 +03:00
if (c < '0' || c > '9') on_error("invalid input");
2023-12-22 21:50:01 +03:00
int num_digits = 0;
T value = 0, prev = 0;
2023-12-25 18:18:23 +03:00
char prev_digit = c;
2023-12-22 17:35:36 +03:00
do {
2023-12-22 21:50:01 +03:00
prev = value;
value = value * 10 + static_cast<unsigned>(c - '0');
2023-12-22 21:50:01 +03:00
prev_digit = c;
2023-12-22 17:35:36 +03:00
c = *++it;
2023-12-22 21:50:01 +03:00
++num_digits;
2023-12-22 17:35:36 +03:00
if (c < '0' || c > '9') break;
} while (it != end);
2023-12-25 20:05:26 +03:00
2023-12-22 21:50:01 +03:00
// Check overflow.
if (num_digits <= std::numeric_limits<int>::digits10) return value;
const unsigned max = to_unsigned((std::numeric_limits<int>::max)());
if (num_digits == std::numeric_limits<int>::digits10 + 1 &&
prev * 10ull + unsigned(prev_digit - '0') <= max) {
return value;
}
throw format_error("number is too big");
}
2023-12-29 17:34:49 +03:00
template <typename T = int> auto read_int(iterator& it) -> optional<T> {
auto end = scan_ctx_.end();
bool negative = it != end && *it == '-';
2023-12-29 17:34:49 +03:00
if (negative) ++it;
if (auto abs_value = read_uint<typename std::make_unsigned<T>::type>(it)) {
2023-12-25 18:18:23 +03:00
auto value = static_cast<T>(*abs_value);
return negative ? -value : value;
}
if (negative) on_error("invalid input");
return {};
}
public:
2023-12-02 20:34:27 +03:00
FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf,
scan_args args)
2023-11-26 20:22:31 +03:00
: parse_ctx_(format), scan_ctx_(buf), args_(args), next_arg_id_(0) {}
2023-12-02 20:34:27 +03:00
auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); }
void on_text(const char* begin, const char* end) {
2023-12-24 18:32:27 +03:00
if (begin == end) return;
2023-12-02 20:34:27 +03:00
auto it = scan_ctx_.begin(), scan_end = scan_ctx_.end();
for (; begin != end; ++begin, ++it) {
if (it == scan_end || *begin != *it) on_error("invalid input");
}
scan_ctx_.advance_to(it);
}
2023-11-25 18:41:04 +03:00
FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); }
FMT_CONSTEXPR auto on_arg_id(int id) -> int {
if (id >= args_.size) on_error("argument index out of range");
arg_ = args_.data[id];
2020-06-06 17:13:38 +03:00
return id;
}
2023-11-25 18:41:04 +03:00
FMT_CONSTEXPR auto on_arg_id(string_view id) -> int {
2021-06-01 21:51:59 +03:00
if (id.data()) on_error("invalid format");
return 0;
}
2020-06-06 17:13:38 +03:00
void on_replacement_field(int, const char*) {
auto it = scan_ctx_.begin(), end = scan_ctx_.end();
2023-12-24 18:32:27 +03:00
while (it != end && is_whitespace(*it)) ++it;
switch (arg_.type) {
case scan_type::int_type:
2023-12-29 17:34:49 +03:00
if (auto value = read_int(it)) *arg_.int_value = *value;
break;
case scan_type::uint_type:
2023-12-29 17:34:49 +03:00
if (auto value = read_uint(it)) *arg_.uint_value = *value;
break;
case scan_type::long_long_type:
2023-12-29 17:34:49 +03:00
if (auto value = read_int<long long>(it)) *arg_.long_long_value = *value;
break;
case scan_type::ulong_long_type:
2023-12-29 17:34:49 +03:00
if (auto value = read_uint<unsigned long long>(it))
2023-12-25 18:18:23 +03:00
*arg_.ulong_long_value = *value;
break;
case scan_type::string_type:
while (it != end && *it != ' ') arg_.string->push_back(*it++);
break;
case scan_type::string_view_type: {
2023-12-02 20:34:27 +03:00
auto range = to_contiguous(it);
// This could also be checked at compile time in scan.
if (!range) on_error("string_view requires contiguous input");
auto p = range.begin;
while (p != range.end && *p != ' ') ++p;
size_t size = to_unsigned(p - range.begin);
*arg_.string_view = {range.begin, size};
advance(it, size);
break;
}
2019-11-26 20:10:24 +03:00
case scan_type::none_type:
case scan_type::custom_type:
assert(false);
}
2023-12-29 17:34:49 +03:00
scan_ctx_.advance_to(it);
}
2023-11-25 18:41:04 +03:00
auto on_format_specs(int, const char* begin, const char*) -> const char* {
if (arg_.type != scan_type::custom_type) return begin;
parse_ctx_.advance_to(begin);
arg_.custom.scan(arg_.custom.value, parse_ctx_, scan_ctx_);
return parse_ctx_.begin();
}
2023-12-25 19:39:14 +03:00
void on_error(const char* message) {
scan_ctx_.advance_to(scan_ctx_.end());
error_handler::on_error(message);
}
};
2020-05-10 17:25:42 +03:00
} // namespace detail
2023-11-25 18:41:04 +03:00
template <typename... T>
auto make_scan_args(T&... args) -> std::array<detail::scan_arg, sizeof...(T)> {
2019-08-11 02:12:05 +03:00
return {{args...}};
}
2023-11-26 20:22:31 +03:00
void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) {
auto h = detail::scan_handler(fmt, buf, args);
2023-11-25 18:41:04 +03:00
detail::parse_format_string<false>(fmt, h);
}
2023-11-25 18:41:04 +03:00
template <typename... T>
auto scan(string_view input, string_view fmt, T&... args)
-> string_view::iterator {
2023-11-26 20:22:31 +03:00
auto&& buf = detail::string_scan_buffer(input);
vscan(buf, fmt, make_scan_args(args...));
2023-12-02 20:34:27 +03:00
return input.begin() + (buf.begin().base() - input.data());
}
2023-11-26 20:22:31 +03:00
2023-12-29 17:25:57 +03:00
template <typename InputRange, typename... T,
FMT_ENABLE_IF(!std::is_convertible<InputRange, string_view>::value)>
auto scan(InputRange&& input, string_view fmt, T&... args)
-> decltype(std::begin(input)) {
auto it = std::begin(input);
vscan(get_buffer(it), fmt, make_scan_args(args...));
return it;
}
2023-12-25 19:39:14 +03:00
template <typename... T> bool scan(std::FILE* f, string_view fmt, T&... args) {
2023-11-26 20:22:31 +03:00
auto&& buf = detail::file_scan_buffer(f);
vscan(buf, fmt, make_scan_args(args...));
2023-12-25 19:39:14 +03:00
return buf.begin() != buf.end();
2023-11-26 20:22:31 +03:00
}
FMT_END_NAMESPACE