From baa84eb810d0f9c63d818258bf472ee233f2ef4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20La=C3=BCgt?= Date: Sun, 10 Nov 2019 22:26:05 +0100 Subject: [PATCH] Add thousand separators on floating number with specifier 'n' --- include/fmt/format-inl.h | 33 ++-- include/fmt/format.h | 317 ++++++++++++++++++++++++++------------- src/format.cc | 4 +- test/locale-test.cc | 15 +- 4 files changed, 244 insertions(+), 125 deletions(-) diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h index d9e37d20..51d477bb 100644 --- a/include/fmt/format-inl.h +++ b/include/fmt/format-inl.h @@ -220,7 +220,7 @@ template FMT_FUNC Char decimal_point_impl(locale_ref loc) { #else template FMT_FUNC std::string internal::grouping_impl(locale_ref) { - return "\03"; + return "\3"; } template FMT_FUNC Char internal::thousands_sep_impl(locale_ref) { @@ -1143,7 +1143,7 @@ bool grisu_format(Double value, buffer& buf, int precision, template char* sprintf_format(Double value, internal::buffer& buf, - sprintf_specs specs) { + sprintf_specs specs, bool& fixed) { // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. FMT_ASSERT(buf.capacity() != 0, "empty buffer"); @@ -1171,6 +1171,7 @@ char* sprintf_format(Double value, internal::buffer& buf, *format_ptr = '\0'; // Format using snprintf. + fixed = true; char* start = nullptr; char* decimal_point_pos = nullptr; for (;;) { @@ -1186,18 +1187,22 @@ char* sprintf_format(Double value, internal::buffer& buf, if (*p == '+' || *p == '-') ++p; if (specs.type != 'a' && specs.type != 'A') { while (p < end && *p >= '0' && *p <= '9') ++p; - if (p < end && *p != 'e' && *p != 'E') { - decimal_point_pos = p; - if (!specs.type) { - // Keep only one trailing zero after the decimal point. - ++p; - if (*p == '0') ++p; - while (p != end && *p >= '1' && *p <= '9') ++p; - char* where = p; - while (p != end && *p == '0') ++p; - if (p == end || *p < '0' || *p > '9') { - if (p != end) std::memmove(where, p, to_unsigned(end - p)); - n -= static_cast(p - where); + if (p < end) { + if (*p == 'e' || *p == 'E') { + fixed = false; + } else { + decimal_point_pos = p; + if (!specs.type) { + // Keep only one trailing zero after the decimal point. + ++p; + if (*p == '0') ++p; + while (p != end && *p >= '1' && *p <= '9') ++p; + char* where = p; + while (p != end && *p == '0') ++p; + if (p == end || *p < '0' || *p > '9') { + if (p != end) std::memmove(where, p, to_unsigned(end - p)); + n -= static_cast(p - where); + } } } } diff --git a/include/fmt/format.h b/include/fmt/format.h index 9c104df1..df3ea1a1 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -850,33 +850,27 @@ template <> inline wchar_t decimal_point(locale_ref loc) { } // Formats a decimal unsigned integer value writing into buffer. -// add_thousands_sep is called after writing each char to add a thousands -// separator if necessary. -template -inline Char* format_decimal(Char* buffer, UInt value, int num_digits, - F add_thousands_sep) { +template +inline char* format_decimal(char* buffer, UInt value, int num_digits) { FMT_ASSERT(num_digits >= 0, "invalid digit count"); buffer += num_digits; - Char* end = buffer; + char* end = buffer; while (value >= 100) { // Integer division is slow so do it for a group of two digits instead // of for every digit. The idea comes from the talk by Alexandrescu // "Three Optimization Tips for C++". See speed-test for a comparison. auto index = static_cast((value % 100) * 2); value /= 100; - *--buffer = static_cast(data::digits[index + 1]); - add_thousands_sep(buffer); - *--buffer = static_cast(data::digits[index]); - add_thousands_sep(buffer); + *--buffer = data::digits[index + 1]; + *--buffer = data::digits[index]; } if (value < 10) { - *--buffer = static_cast('0' + value); + *--buffer = static_cast('0' + value); return end; } auto index = static_cast(value * 2); - *--buffer = static_cast(data::digits[index + 1]); - add_thousands_sep(buffer); - *--buffer = static_cast(data::digits[index]); + *--buffer = data::digits[index + 1]; + *--buffer = data::digits[index]; return end; } @@ -886,20 +880,93 @@ template constexpr int digits10() noexcept { template <> constexpr int digits10() noexcept { return 38; } template <> constexpr int digits10() noexcept { return 38; } -template -inline Iterator format_decimal(Iterator out, UInt value, int num_digits, - F add_thousands_sep) { +template class thousand_formatter { + const std::string& groups_; + Char sep_; + int num_digits_; + + std::string::const_reverse_iterator group_; + unsigned count_; + int pos_; + size_t size_; + + public: + thousand_formatter(const std::string& groups, Char sep, int num_digits) + : groups_(groups), + sep_(sep), + num_digits_(num_digits), + group_(groups.crbegin()), + count_(0), + pos_(0), + size_(0) { + auto group = groups_.cbegin(); + while (group != groups_.cend() && num_digits > *group && *group > 0 && + *group != max_value()) { + pos_ += *group; + ++size_; + num_digits -= *group; + ++group; + } + if (group == groups.cend()) { + count_ = (num_digits - 1) / groups.back(); + pos_ += count_ * groups.back(); + size_ += count_; + ++count_; + } else { + size_t dist = group - groups_.cbegin(); + group_ += groups.end() - group - (dist == size_ ? 1 : 0); + } + } + + size_t size() const { return size_; } + + template void operator()(char c, It&& it) { + *it++ = static_cast(c); + --num_digits_; + if (num_digits_ != pos_ || !num_digits_) return; + + *it++ = sep_; + if (count_) + --count_; + else + ++group_; + pos_ -= *group_; + } + + template + void operator()(const char* begin, const char* end, It&& it) { + while (begin != end) { + operator()(*begin, it); + ++begin; + } + } +}; + +template class no_thousand_formatter { + public: + size_t size() const { return 0; } + + template void operator()(char c, It&& it) { + *it++ = static_cast(c); + } + template + void operator()(const char* begin, const char* end, It&& it) { + it = copy_str(begin, end, it); + } +}; + +template > +inline Iterator format_decimal( + Iterator out, UInt value, int num_digits, + F add_thousands_sep = no_thousand_formatter{}) { FMT_ASSERT(num_digits >= 0, "invalid digit count"); // Buffer should be large enough to hold all digits (<= digits10 + 1). enum { max_size = digits10() + 1 }; - Char buffer[2 * max_size]; - auto end = format_decimal(buffer, value, num_digits, add_thousands_sep); - return internal::copy_str(buffer, end, out); -} - -template -inline It format_decimal(It out, UInt value, int num_digits) { - return format_decimal(out, value, num_digits, [](Char*) {}); + char buffer[max_size]; + auto end = format_decimal(buffer, value, num_digits); + add_thousands_sep(buffer, end, out); + return out; } template @@ -1078,7 +1145,8 @@ template It write_exponent(int exp, It it) { return it; } -template class grisu_writer { +template > +class grisu_writer { private: // The number is given as v = digits_ * pow(10, exp_). const char* digits_; @@ -1087,6 +1155,7 @@ template class grisu_writer { size_t size_; gen_digits_params params_; Char decimal_point_; + F add_thousands_sep_; template It grisu_prettify(It it) const { // pow(10, full_exp - 1) <= v <= pow(10, full_exp). @@ -1101,8 +1170,13 @@ template class grisu_writer { } if (num_digits_ <= full_exp) { // 1234e7 -> 12340000000[.0+] - it = copy_str(digits_, digits_ + num_digits_, it); - it = std::fill_n(it, full_exp - num_digits_, static_cast('0')); + typename std::decay::type add_thousands_sep(add_thousands_sep_); + add_thousands_sep(digits_, digits_ + num_digits_, it); + int n = full_exp - num_digits_; + while (n > 0) { + add_thousands_sep('0', it); + --n; + } int num_zeros = (std::max)(params_.num_digits - full_exp, 1); if (params_.trailing_zeros) { *it++ = decimal_point_; @@ -1114,7 +1188,8 @@ template class grisu_writer { } } else if (full_exp > 0) { // 1234e-2 -> 12.34[0+] - it = copy_str(digits_, digits_ + full_exp, it); + typename std::decay::type add_thousands_sep(add_thousands_sep_); + add_thousands_sep(digits_, digits_ + full_exp, it); if (!params_.trailing_zeros) { // Remove trailing zeros. int num_digits = num_digits_; @@ -1150,12 +1225,14 @@ template class grisu_writer { public: grisu_writer(const char* digits, int num_digits, int exp, - gen_digits_params params, Char decimal_point) + gen_digits_params params, Char decimal_point, + F add_thousands_sep = no_thousand_formatter{}) : digits_(digits), num_digits_(num_digits), exp_(exp), params_(params), - decimal_point_(decimal_point) { + decimal_point_(decimal_point), + add_thousands_sep_(add_thousands_sep) { int full_exp = num_digits + exp - 1; int precision = params.num_digits > 0 ? params.num_digits : 16; params_.fixed |= full_exp >= -4 && full_exp < precision; @@ -1198,13 +1275,13 @@ struct sprintf_specs { }; template -char* sprintf_format(Double, internal::buffer&, sprintf_specs); +char* sprintf_format(Double, internal::buffer&, sprintf_specs, bool&); template <> inline char* sprintf_format(float value, internal::buffer& buf, - sprintf_specs specs) { + sprintf_specs specs, bool& fixed) { // printf does not have a float format specifier, it only supports double. - return sprintf_format(value, buf, specs); + return sprintf_format(value, buf, specs, fixed); } template @@ -1398,7 +1475,7 @@ template class basic_writer { size_t size() const { return size_; } size_t width() const { return size_; } - template void operator()(It&& it) const { + template void operator()(It&& it) { if (prefix.size() != 0) it = internal::copy_str(prefix.begin(), prefix.end(), it); it = std::fill_n(it, padding, fill); @@ -1469,19 +1546,28 @@ template class basic_writer { } } - struct dec_writer { - unsigned_type abs_value; - int num_digits; + template > class dec_writer { + unsigned_type abs_value_; + int num_digits_; + F add_thousands_sep_; - template void operator()(It&& it) const { - it = internal::format_decimal(it, abs_value, num_digits); + public: + dec_writer(unsigned_type abs_value, int num_digits, + F add_thousands_sep = no_thousand_formatter{}) + : abs_value_(abs_value), + num_digits_(num_digits), + add_thousands_sep_(add_thousands_sep) {} + + template void operator()(It&& it) { + it = internal::format_decimal(it, abs_value_, num_digits_, + add_thousands_sep_); } }; void on_dec() { int num_digits = internal::count_digits(abs_value); writer.write_int(num_digits, get_prefix(), specs, - dec_writer{abs_value, num_digits}); + dec_writer<>(abs_value, num_digits)); } struct hex_writer { @@ -1534,55 +1620,18 @@ template class basic_writer { bin_writer<3>{abs_value, num_digits}); } - enum { sep_size = 1 }; - - struct num_writer { - unsigned_type abs_value; - int size; - const std::string& groups; - char_type sep; - - template void operator()(It&& it) const { - basic_string_view s(&sep, sep_size); - // Index of a decimal digit with the least significant digit having - // index 0. - unsigned digit_index = 0; - std::string::const_iterator group = groups.cbegin(); - it = internal::format_decimal( - it, abs_value, size, - [this, s, &group, &digit_index](char_type*& buffer) { - if (*group <= 0 || ++digit_index % *group != 0 || - *group == max_value()) - return; - if (group + 1 != groups.cend()) { - digit_index = 0; - ++group; - } - buffer -= s.size(); - std::uninitialized_copy(s.data(), s.data() + s.size(), - internal::make_checked(buffer, s.size())); - }); - } - }; - void on_num() { std::string groups = internal::grouping(writer.locale_); if (groups.empty()) return on_dec(); auto sep = internal::thousands_sep(writer.locale_); if (!sep) return on_dec(); int num_digits = internal::count_digits(abs_value); - int size = num_digits; - std::string::const_iterator group = groups.cbegin(); - while (group != groups.cend() && num_digits > *group && *group > 0 && - *group != max_value()) { - size += sep_size; - num_digits -= *group; - ++group; - } - if (group == groups.cend()) - size += sep_size * ((num_digits - 1) / groups.back()); - writer.write_int(size, get_prefix(), specs, - num_writer{abs_value, size, groups, sep}); + thousand_formatter add_thousands_sep{groups, sep, num_digits}; + if (!add_thousands_sep.size()) return on_dec(); + writer.write_int(num_digits + static_cast(add_thousands_sep.size()), + get_prefix(), specs, + dec_writer&>( + abs_value, num_digits, add_thousands_sep)); } FMT_NORETURN void on_error() { @@ -1611,24 +1660,38 @@ template class basic_writer { } }; - struct double_writer { - sign_t sign; - internal::buffer& buffer; - char* decimal_point_pos; - char_type decimal_point; + template > class double_writer { + sign_t sign_; + internal::buffer& buffer_; + char* decimal_point_pos_; + char_type decimal_point_; + F add_thousands_sep_; - size_t size() const { return buffer.size() + (sign ? 1 : 0); } + public: + double_writer(sign_t sign, internal::buffer& buffer, + char* decimal_point_pos, char_type decimal_point, + F add_thousands_sep = no_thousand_formatter{}) + : sign_(sign), + buffer_(buffer), + decimal_point_pos_(decimal_point_pos), + decimal_point_(decimal_point), + add_thousands_sep_(add_thousands_sep) {} + + size_t size() const { + return buffer_.size() + add_thousands_sep_.size() + (sign_ ? 1 : 0); + } size_t width() const { return size(); } template void operator()(It&& it) { - if (sign) *it++ = static_cast(data::signs[sign]); - auto begin = buffer.begin(); - if (decimal_point_pos) { - it = internal::copy_str(begin, decimal_point_pos, it); - *it++ = decimal_point; - begin = decimal_point_pos + 1; + if (sign_) *it++ = static_cast(data::signs[sign_]); + if (decimal_point_pos_) { + add_thousands_sep_(buffer_.begin(), decimal_point_pos_, it); + *it++ = decimal_point_; + it = internal::copy_str(decimal_point_pos_ + 1, + buffer_.end(), it); + } else { + add_thousands_sep_(buffer_.begin(), buffer_.end(), it); } - it = internal::copy_str(begin, buffer.end(), it); } }; @@ -2857,8 +2920,10 @@ void internal::basic_writer::write_fp(T value, (specs.type != 'a' && specs.type != 'A' && specs.type != 'e' && specs.type != 'E') && grisu_format(static_cast(value), buffer, precision, options, exp); + bool fixed = false; char* decimal_point_pos = nullptr; - if (!use_grisu) decimal_point_pos = sprintf_format(value, buffer, specs); + if (!use_grisu) + decimal_point_pos = sprintf_format(value, buffer, specs, fixed); if (handler.as_percentage) { buffer.push_back('%'); @@ -2887,12 +2952,58 @@ void internal::basic_writer::write_fp(T value, params.trailing_zeros = (precision != 0 && (handler.fixed || !specs.type)) || specs.alt; int num_digits = static_cast(buffer.size()); - write_padded(as, grisu_writer(buffer.data(), num_digits, exp, - params, decimal_point)); - } else { - write_padded(as, - double_writer{sign, buffer, decimal_point_pos, decimal_point}); + + if (!handler.use_locale) { + return write_padded( + as, grisu_writer(buffer.data(), num_digits, exp, params, + decimal_point)); + } + std::string groups = internal::grouping(locale_); + if (groups.empty()) { + return write_padded( + as, grisu_writer(buffer.data(), num_digits, exp, params, + decimal_point)); + } + char_type sep = internal::thousands_sep(locale_); + if (!sep) { + return write_padded( + as, grisu_writer(buffer.data(), num_digits, exp, params, + decimal_point)); + } + thousand_formatter add(groups, sep, num_digits + exp); + if (!add.size()) { + return write_padded( + as, grisu_writer(buffer.data(), num_digits, exp, params, + decimal_point)); + } + return write_padded( + as, grisu_writer&>( + buffer.data(), num_digits, exp, params, decimal_point, add)); } + + if (!handler.use_locale || !fixed) { + return write_padded( + as, double_writer<>(sign, buffer, decimal_point_pos, decimal_point)); + } + std::string groups = internal::grouping(locale_); + if (groups.empty()) { + return write_padded( + as, double_writer<>(sign, buffer, decimal_point_pos, decimal_point)); + } + char_type sep = internal::thousands_sep(locale_); + if (!sep) { + return write_padded( + as, double_writer<>(sign, buffer, decimal_point_pos, decimal_point)); + } + int num_digits = static_cast( + decimal_point_pos ? decimal_point_pos - buffer.begin() : buffer.size()); + thousand_formatter add(groups, sep, num_digits); + if (!add.size()) { + return write_padded( + as, double_writer<>(sign, buffer, decimal_point_pos, decimal_point)); + } + write_padded(as, double_writer&>( + sign, buffer, decimal_point_pos, decimal_point, add)); } // Reports a system error without throwing an exception. diff --git a/src/format.cc b/src/format.cc index 053b1349..05b7764a 100644 --- a/src/format.cc +++ b/src/format.cc @@ -37,10 +37,10 @@ template FMT_API format_context::iterator internal::vformat_to( internal::buffer&, string_view, basic_format_args); template FMT_API char* internal::sprintf_format(double, internal::buffer&, - sprintf_specs); + sprintf_specs, bool& fixed); template FMT_API char* internal::sprintf_format(long double, internal::buffer&, - sprintf_specs); + sprintf_specs, bool& fixed); // Explicit instantiations for wchar_t. diff --git a/test/locale-test.cc b/test/locale-test.cc index d1922328..50d9ccaf 100644 --- a/test/locale-test.cc +++ b/test/locale-test.cc @@ -14,7 +14,7 @@ using fmt::internal::max_value; template struct numpunct : std::numpunct { protected: Char do_decimal_point() const FMT_OVERRIDE { return '?'; } - std::string do_grouping() const FMT_OVERRIDE { return "\03"; } + std::string do_grouping() const FMT_OVERRIDE { return "\3"; } Char do_thousands_sep() const FMT_OVERRIDE { return '~'; } }; @@ -28,27 +28,30 @@ template struct no_grouping : std::numpunct { template struct special_grouping : std::numpunct { protected: Char do_decimal_point() const FMT_OVERRIDE { return '.'; } - std::string do_grouping() const FMT_OVERRIDE { return "\03\02"; } + std::string do_grouping() const FMT_OVERRIDE { return "\3\2"; } Char do_thousands_sep() const FMT_OVERRIDE { return ','; } }; template struct small_grouping : std::numpunct { protected: Char do_decimal_point() const FMT_OVERRIDE { return '.'; } - std::string do_grouping() const FMT_OVERRIDE { return "\01"; } + std::string do_grouping() const FMT_OVERRIDE { return "\1"; } Char do_thousands_sep() const FMT_OVERRIDE { return ','; } }; TEST(LocaleTest, DoubleDecimalPoint) { std::locale loc(std::locale(), new numpunct()); - EXPECT_EQ("1?23", fmt::format(loc, "{:n}", 1.23)); + EXPECT_EQ("1~234?56", fmt::format(loc, "{:n}", 1234.56)); + EXPECT_EQ("1~230", fmt::format(loc, "{:n}", 123e1)); + EXPECT_EQ("1?23456e+10", fmt::format(loc, "{:n}", 1.23456e10)); + EXPECT_EQ("0?123456", fmt::format(loc, "{:n}", 123456e-6)); // Test with Grisu disabled. fmt::memory_buffer buf; fmt::internal::writer w(buf, fmt::internal::locale_ref(loc)); auto specs = fmt::format_specs(); specs.type = 'n'; - w.write_fp(1.23, specs); - EXPECT_EQ(fmt::to_string(buf), "1?23"); + w.write_fp(1234.56, specs); + EXPECT_EQ(fmt::to_string(buf), "1~234?56"); } TEST(LocaleTest, Format) {