| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669 |
- // Formatting library for C++ - scanning API proof of concept
- //
- // Copyright (c) 2019 - present, Victor Zverovich
- // All rights reserved.
- //
- // For the license information refer to format.h.
- #include <array>
- #include <cassert>
- #include <climits>
- #include <tuple>
- #include "fmt/format-inl.h"
- FMT_BEGIN_NAMESPACE
- namespace detail {
- inline auto is_whitespace(char c) -> bool { return c == ' ' || c == '\n'; }
- // If c is a hex digit returns its numeric value, otherwise -1.
- inline auto to_hex_digit(char c) -> int {
- if (c >= '0' && c <= '9') return c - '0';
- if (c >= 'a' && c <= 'f') return c - 'a' + 10;
- if (c >= 'A' && c <= 'F') return c - 'A' + 10;
- return -1;
- }
- struct maybe_contiguous_range {
- const char* begin;
- const char* end;
- explicit operator bool() const { return begin != nullptr; }
- };
- class scan_buffer {
- private:
- const char* ptr_;
- const char* end_;
- bool contiguous_;
- protected:
- scan_buffer(const char* ptr, const char* end, bool contiguous)
- : ptr_(ptr), end_(end), contiguous_(contiguous) {}
- ~scan_buffer() = default;
- void set(span<const char> buf) {
- ptr_ = buf.data;
- end_ = buf.data + buf.size;
- }
- auto ptr() const -> const char* { return ptr_; }
- public:
- scan_buffer(const scan_buffer&) = delete;
- void operator=(const scan_buffer&) = delete;
- // Fills the buffer with more input if available.
- virtual void consume() = 0;
- class sentinel {};
- class iterator {
- private:
- const char** ptr_;
- scan_buffer* buf_; // This could be merged with ptr_.
- char value_;
- static auto get_sentinel() -> const char** {
- static const char* ptr = nullptr;
- return &ptr;
- }
- friend class scan_buffer;
- friend auto operator==(iterator lhs, sentinel) -> bool {
- return *lhs.ptr_ == nullptr;
- }
- friend auto operator!=(iterator lhs, sentinel) -> bool {
- return *lhs.ptr_ != nullptr;
- }
- iterator(scan_buffer* buf) : buf_(buf) {
- if (buf->ptr_ == buf->end_) {
- ptr_ = get_sentinel();
- return;
- }
- ptr_ = &buf->ptr_;
- value_ = *buf->ptr_;
- }
- friend scan_buffer& get_buffer(iterator it) { return *it.buf_; }
- public:
- iterator() : ptr_(get_sentinel()), buf_(nullptr) {}
- auto operator++() -> iterator& {
- if (!buf_->try_consume()) ptr_ = get_sentinel();
- value_ = *buf_->ptr_;
- return *this;
- }
- auto operator++(int) -> iterator {
- iterator copy = *this;
- ++*this;
- return copy;
- }
- auto operator*() const -> char { return value_; }
- auto base() const -> const char* { return buf_->ptr_; }
- friend auto to_contiguous(iterator it) -> maybe_contiguous_range;
- friend auto advance(iterator it, size_t n) -> iterator;
- };
- friend auto to_contiguous(iterator it) -> maybe_contiguous_range {
- if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_};
- return {nullptr, nullptr};
- }
- friend auto advance(iterator it, size_t n) -> iterator {
- FMT_ASSERT(it.buf_->is_contiguous(), "");
- const char*& ptr = it.buf_->ptr_;
- ptr += n;
- it.value_ = *ptr;
- if (ptr == it.buf_->end_) it.ptr_ = iterator::get_sentinel();
- return it;
- }
- auto begin() -> iterator { return this; }
- auto end() -> sentinel { return {}; }
- auto is_contiguous() const -> bool { return contiguous_; }
- // Tries consuming a single code unit. Returns true iff there is more input.
- auto try_consume() -> bool {
- FMT_ASSERT(ptr_ != end_, "");
- ++ptr_;
- if (ptr_ != end_) return true;
- consume();
- return ptr_ != end_;
- }
- };
- using scan_iterator = scan_buffer::iterator;
- using scan_sentinel = scan_buffer::sentinel;
- class string_scan_buffer final : public scan_buffer {
- private:
- void consume() override {}
- public:
- explicit string_scan_buffer(string_view s)
- : scan_buffer(s.begin(), s.end(), true) {}
- };
- class file_scan_buffer final : public scan_buffer {
- private:
- template <typename F, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0 &&
- !FMT_USE_FALLBACK_FILE)>
- static auto get_file(F* f, int) -> glibc_file<F> {
- return f;
- }
- template <typename F,
- FMT_ENABLE_IF(sizeof(F::_p) != 0 && !FMT_USE_FALLBACK_FILE)>
- static auto get_file(F* f, int) -> apple_file<F> {
- return f;
- }
- static auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
- decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_;
- // Fills the buffer if it is empty.
- void fill() {
- span<const char> buf = file_.get_read_buffer();
- if (buf.size == 0) {
- int c = file_.get();
- // Put the character back since we are only filling the buffer.
- if (c != EOF) file_.unget(static_cast<char>(c));
- buf = file_.get_read_buffer();
- }
- set(buf);
- }
- void consume() override {
- // Consume the current buffer content.
- size_t n = to_unsigned(ptr() - file_.get_read_buffer().data);
- for (size_t i = 0; i != n; ++i) file_.get();
- fill();
- }
- public:
- explicit file_scan_buffer(FILE* f)
- : scan_buffer(nullptr, nullptr, false), file_(f) {
- flockfile(f);
- fill();
- }
- ~file_scan_buffer() {
- FILE* f = file_;
- funlockfile(f);
- }
- };
- } // namespace detail
- template <typename T, typename Char = char> struct scanner {
- // A deleted default constructor indicates a disabled scanner.
- scanner() = delete;
- };
- class scan_parse_context {
- private:
- string_view format_;
- public:
- using iterator = string_view::iterator;
- FMT_CONSTEXPR explicit scan_parse_context(string_view format)
- : format_(format) {}
- FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); }
- FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); }
- void advance_to(iterator it) {
- format_.remove_prefix(detail::to_unsigned(it - begin()));
- }
- };
- namespace detail {
- enum class scan_type {
- none_type,
- int_type,
- uint_type,
- long_long_type,
- ulong_long_type,
- string_type,
- string_view_type,
- custom_type
- };
- template <typename Context> struct custom_scan_arg {
- void* value;
- void (*scan)(void* arg, scan_parse_context& parse_ctx, Context& ctx);
- };
- } // namespace detail
- // A scan argument. Context is a template parameter for the compiled API where
- // output can be unbuffered.
- template <typename Context> class basic_scan_arg {
- private:
- using scan_type = detail::scan_type;
- scan_type type_;
- union {
- int* int_value_;
- unsigned* uint_value_;
- long long* long_long_value_;
- unsigned long long* ulong_long_value_;
- std::string* string_;
- string_view* string_view_;
- detail::custom_scan_arg<Context> custom_;
- // TODO: more types
- };
- template <typename T>
- static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx,
- Context& ctx) {
- auto s = scanner<T>();
- parse_ctx.advance_to(s.parse(parse_ctx));
- ctx.advance_to(s.scan(*static_cast<T*>(arg), ctx));
- }
- public:
- FMT_CONSTEXPR basic_scan_arg()
- : type_(scan_type::none_type), int_value_(nullptr) {}
- FMT_CONSTEXPR basic_scan_arg(int& value)
- : type_(scan_type::int_type), int_value_(&value) {}
- FMT_CONSTEXPR basic_scan_arg(unsigned& value)
- : type_(scan_type::uint_type), uint_value_(&value) {}
- FMT_CONSTEXPR basic_scan_arg(long long& value)
- : type_(scan_type::long_long_type), long_long_value_(&value) {}
- FMT_CONSTEXPR basic_scan_arg(unsigned long long& value)
- : type_(scan_type::ulong_long_type), ulong_long_value_(&value) {}
- FMT_CONSTEXPR basic_scan_arg(std::string& value)
- : type_(scan_type::string_type), string_(&value) {}
- FMT_CONSTEXPR basic_scan_arg(string_view& value)
- : type_(scan_type::string_view_type), string_view_(&value) {}
- template <typename T>
- FMT_CONSTEXPR basic_scan_arg(T& value) : type_(scan_type::custom_type) {
- custom_.value = &value;
- custom_.scan = scan_custom_arg<T>;
- }
- constexpr explicit operator bool() const noexcept {
- return type_ != scan_type::none_type;
- }
- auto type() const -> detail::scan_type { return type_; }
- template <typename Visitor>
- auto visit(Visitor&& vis) -> decltype(vis(monostate())) {
- switch (type_) {
- case scan_type::none_type:
- break;
- case scan_type::int_type:
- return vis(*int_value_);
- case scan_type::uint_type:
- return vis(*uint_value_);
- case scan_type::long_long_type:
- return vis(*long_long_value_);
- case scan_type::ulong_long_type:
- return vis(*ulong_long_value_);
- case scan_type::string_type:
- return vis(*string_);
- case scan_type::string_view_type:
- return vis(*string_view_);
- case scan_type::custom_type:
- break;
- }
- return vis(monostate());
- }
- auto scan_custom(const char* parse_begin, scan_parse_context& parse_ctx,
- Context& ctx) const -> bool {
- if (type_ != scan_type::custom_type) return false;
- parse_ctx.advance_to(parse_begin);
- custom_.scan(custom_.value, parse_ctx, ctx);
- return true;
- }
- };
- class scan_context;
- using scan_arg = basic_scan_arg<scan_context>;
- struct scan_args {
- int size;
- const scan_arg* data;
- template <size_t N>
- FMT_CONSTEXPR scan_args(const std::array<scan_arg, N>& store)
- : size(N), data(store.data()) {
- static_assert(N < INT_MAX, "too many arguments");
- }
- };
- class scan_context {
- private:
- detail::scan_buffer& buf_;
- scan_args args_;
- public:
- using iterator = detail::scan_iterator;
- using sentinel = detail::scan_sentinel;
- FMT_CONSTEXPR explicit scan_context(detail::scan_buffer& buf, scan_args args)
- : buf_(buf), args_(args) {}
- FMT_CONSTEXPR auto arg(int id) const -> scan_arg {
- return id < args_.size ? args_.data[id] : scan_arg();
- }
- auto begin() const -> iterator { return buf_.begin(); }
- auto end() const -> sentinel { return {}; }
- void advance_to(iterator) { buf_.consume(); }
- };
- namespace detail {
- const char* parse_scan_specs(const char* begin, const char* end,
- format_specs& specs, scan_type) {
- while (begin != end) {
- switch (to_ascii(*begin)) {
- // TODO: parse more scan format specifiers
- case 'x':
- specs.set_type(presentation_type::hex);
- ++begin;
- break;
- case '}':
- return begin;
- }
- }
- return begin;
- }
- template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
- auto read(scan_iterator it, T& value) -> scan_iterator {
- if (it == scan_sentinel()) return it;
- char c = *it;
- if (c < '0' || c > '9') report_error("invalid input");
- int num_digits = 0;
- T n = 0, prev = 0;
- char prev_digit = c;
- do {
- prev = n;
- n = n * 10 + static_cast<unsigned>(c - '0');
- prev_digit = c;
- c = *++it;
- ++num_digits;
- if (c < '0' || c > '9') break;
- } while (it != scan_sentinel());
- // Check overflow.
- if (num_digits <= std::numeric_limits<int>::digits10) {
- value = n;
- return it;
- }
- unsigned max = to_unsigned((std::numeric_limits<int>::max)());
- if (num_digits == std::numeric_limits<int>::digits10 + 1 &&
- prev * 10ull + unsigned(prev_digit - '0') <= max) {
- value = n;
- } else {
- report_error("number is too big");
- }
- return it;
- }
- template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
- auto read_hex(scan_iterator it, T& value) -> scan_iterator {
- if (it == scan_sentinel()) return it;
- int digit = to_hex_digit(*it);
- if (digit < 0) report_error("invalid input");
- int num_digits = 0;
- T n = 0;
- do {
- n = (n << 4) + static_cast<unsigned>(digit);
- ++num_digits;
- digit = to_hex_digit(*++it);
- if (digit < 0) break;
- } while (it != scan_sentinel());
- // Check overflow.
- if (num_digits <= (std::numeric_limits<T>::digits >> 2))
- value = n;
- else
- report_error("number is too big");
- return it;
- }
- template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
- auto read(scan_iterator it, T& value, const format_specs& specs)
- -> scan_iterator {
- if (specs.type() == presentation_type::hex) return read_hex(it, value);
- return read(it, value);
- }
- template <typename T, FMT_ENABLE_IF(std::is_signed<T>::value)>
- auto read(scan_iterator it, T& value, const format_specs& specs = {})
- -> scan_iterator {
- bool negative = it != scan_sentinel() && *it == '-';
- if (negative) {
- ++it;
- if (it == scan_sentinel()) report_error("invalid input");
- }
- using unsigned_type = typename std::make_unsigned<T>::type;
- unsigned_type abs_value = 0;
- it = read(it, abs_value, specs);
- auto n = static_cast<T>(abs_value);
- value = negative ? -n : n;
- return it;
- }
- auto read(scan_iterator it, std::string& value, const format_specs& = {})
- -> scan_iterator {
- while (it != scan_sentinel() && *it != ' ') value.push_back(*it++);
- return it;
- }
- auto read(scan_iterator it, string_view& value, const format_specs& = {})
- -> scan_iterator {
- auto range = to_contiguous(it);
- // This could also be checked at compile time in scan.
- if (!range) report_error("string_view requires contiguous input");
- auto p = range.begin;
- while (p != range.end && *p != ' ') ++p;
- size_t size = to_unsigned(p - range.begin);
- value = {range.begin, size};
- return advance(it, size);
- }
- auto read(scan_iterator it, monostate, const format_specs& = {})
- -> scan_iterator {
- return it;
- }
- // An argument scanner that uses the default format, e.g. decimal for integers.
- struct default_arg_scanner {
- scan_iterator it;
- template <typename T> FMT_INLINE auto operator()(T&& value) -> scan_iterator {
- return read(it, value);
- }
- };
- // An argument scanner with format specifiers.
- struct arg_scanner {
- scan_iterator it;
- const format_specs& specs;
- template <typename T> auto operator()(T&& value) -> scan_iterator {
- return read(it, value, specs);
- }
- };
- struct scan_handler {
- private:
- scan_parse_context parse_ctx_;
- scan_context scan_ctx_;
- int next_arg_id_;
- using sentinel = scan_buffer::sentinel;
- public:
- FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf,
- scan_args args)
- : parse_ctx_(format), scan_ctx_(buf, args), next_arg_id_(0) {}
- auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); }
- void on_text(const char* begin, const char* end) {
- if (begin == end) return;
- auto it = scan_ctx_.begin();
- for (; begin != end; ++begin, ++it) {
- if (it == sentinel() || *begin != *it) on_error("invalid input");
- }
- scan_ctx_.advance_to(it);
- }
- FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); }
- FMT_CONSTEXPR auto on_arg_id(int id) -> int {
- if (!scan_ctx_.arg(id)) on_error("argument index out of range");
- return id;
- }
- FMT_CONSTEXPR auto on_arg_id(string_view id) -> int {
- if (id.data()) on_error("invalid format");
- return 0;
- }
- void on_replacement_field(int arg_id, const char* begin) {
- scan_arg arg = scan_ctx_.arg(arg_id);
- if (arg.scan_custom(begin, parse_ctx_, scan_ctx_)) return;
- auto it = scan_ctx_.begin();
- while (it != sentinel() && is_whitespace(*it)) ++it;
- scan_ctx_.advance_to(arg.visit(default_arg_scanner{it}));
- }
- auto on_format_specs(int arg_id, const char* begin, const char* end) -> const
- char* {
- scan_arg arg = scan_ctx_.arg(arg_id);
- if (arg.scan_custom(begin, parse_ctx_, scan_ctx_))
- return parse_ctx_.begin();
- auto specs = format_specs();
- begin = parse_scan_specs(begin, end, specs, arg.type());
- if (begin == end || *begin != '}') on_error("missing '}' in format string");
- scan_ctx_.advance_to(arg.visit(arg_scanner{scan_ctx_.begin(), specs}));
- return begin;
- }
- FMT_NORETURN void on_error(const char* message) { report_error(message); }
- };
- void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) {
- auto h = detail::scan_handler(fmt, buf, args);
- detail::parse_format_string(fmt, h);
- }
- template <size_t I, typename... T, FMT_ENABLE_IF(I == sizeof...(T))>
- void make_args(std::array<scan_arg, sizeof...(T)>&, std::tuple<T...>&) {}
- template <size_t I, typename... T, FMT_ENABLE_IF(I < sizeof...(T))>
- void make_args(std::array<scan_arg, sizeof...(T)>& args,
- std::tuple<T...>& values) {
- using element_type = typename std::tuple_element<I, std::tuple<T...>>::type;
- static_assert(std::is_same<remove_cvref_t<element_type>, element_type>::value,
- "");
- args[I] = std::get<I>(values);
- make_args<I + 1>(args, values);
- }
- } // namespace detail
- template <typename Range, typename... T> class scan_data {
- private:
- std::tuple<T...> values_;
- Range range_;
- public:
- scan_data() = default;
- scan_data(T... values) : values_(std::move(values)...) {}
- auto value() const -> decltype(std::get<0>(values_)) {
- return std::get<0>(values_);
- }
- auto values() const -> const std::tuple<T...>& { return values_; }
- auto make_args() -> std::array<scan_arg, sizeof...(T)> {
- auto args = std::array<scan_arg, sizeof...(T)>();
- detail::make_args<0>(args, values_);
- return args;
- }
- auto range() const -> Range { return range_; }
- auto begin() const -> decltype(range_.begin()) { return range_.begin(); }
- auto end() const -> decltype(range_.end()) { return range_.end(); }
- };
- template <typename... T>
- auto make_scan_args(T&... args) -> std::array<scan_arg, sizeof...(T)> {
- return {{args...}};
- }
- class scan_error {};
- // A rudimentary version of std::expected for testing the API shape.
- template <typename T, typename E> class expected {
- private:
- T value_;
- bool has_value_ = true;
- public:
- expected(T value) : value_(std::move(value)) {}
- explicit operator bool() const { return has_value_; }
- auto operator->() const -> const T* { return &value_; }
- auto error() -> E const { return E(); }
- };
- template <typename Range, typename... T>
- using scan_result = expected<scan_data<Range, T...>, scan_error>;
- auto vscan(string_view input, string_view fmt, scan_args args)
- -> string_view::iterator {
- auto&& buf = detail::string_scan_buffer(input);
- detail::vscan(buf, fmt, args);
- return input.begin() + (buf.begin().base() - input.data());
- }
- // Scans the input and stores the results (in)to args.
- template <typename... T>
- auto scan_to(string_view input, string_view fmt, T&... args)
- -> string_view::iterator {
- return vscan(input, fmt, make_scan_args(args...));
- }
- template <typename... T>
- auto scan(string_view input, string_view fmt)
- -> scan_result<string_view, T...> {
- auto data = scan_data<string_view, T...>();
- vscan(input, fmt, data.make_args());
- return data;
- }
- template <typename Range, typename... T,
- FMT_ENABLE_IF(!std::is_convertible<Range, string_view>::value)>
- auto scan_to(Range&& input, string_view fmt, T&... args)
- -> decltype(std::begin(input)) {
- auto it = std::begin(input);
- detail::vscan(get_buffer(it), fmt, make_scan_args(args...));
- return it;
- }
- template <typename... T>
- auto scan_to(FILE* f, string_view fmt, T&... args) -> bool {
- auto&& buf = detail::file_scan_buffer(f);
- detail::vscan(buf, fmt, make_scan_args(args...));
- return buf.begin() != buf.end();
- }
- FMT_END_NAMESPACE
|