Implement Grisu2 digit generation
This commit is contained in:
parent
569ac91e0b
commit
f0d0a1ebd7
@ -73,7 +73,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||
-Wcast-qual -Wformat=2 -Wmissing-include-dirs
|
||||
-Wcast-align -Wnon-virtual-dtor
|
||||
-Wctor-dtor-privacy -Wdisabled-optimization
|
||||
-Winvalid-pch -Wmissing-declarations -Woverloaded-virtual
|
||||
-Winvalid-pch -Woverloaded-virtual
|
||||
-Wno-ctor-dtor-privacy -Wno-dangling-else -Wno-float-equal
|
||||
-Wno-format-nonliteral -Wno-sign-conversion -Wno-shadow)
|
||||
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.6)
|
||||
@ -101,8 +101,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
-Wno-unused-member-function
|
||||
-Wno-format-nonliteral -Wno-missing-noreturn -Wno-undefined-func-template
|
||||
-Wno-shadow -Wno-sign-conversion -Wno-used-but-marked-unused
|
||||
-Wno-covered-switch-default -Wno-missing-variable-declarations
|
||||
-Wno-double-promotion)
|
||||
-Wno-covered-switch-default -Wno-missing-prototypes
|
||||
-Wno-missing-variable-declarations -Wno-double-promotion)
|
||||
|
||||
set(WERROR_FLAG -Werror)
|
||||
|
||||
|
@ -275,11 +275,16 @@ const char basic_data<T>::DIGITS[] =
|
||||
|
||||
template <typename T>
|
||||
const uint32_t basic_data<T>::POWERS_OF_10_32[] = {
|
||||
1, FMT_POWERS_OF_10(1)
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
const uint32_t basic_data<T>::ZERO_OR_POWERS_OF_10_32[] = {
|
||||
0, FMT_POWERS_OF_10(1)
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
const uint64_t basic_data<T>::POWERS_OF_10_64[] = {
|
||||
const uint64_t basic_data<T>::ZERO_OR_POWERS_OF_10_64[] = {
|
||||
0,
|
||||
FMT_POWERS_OF_10(1),
|
||||
FMT_POWERS_OF_10(1000000000ull),
|
||||
@ -361,6 +366,78 @@ FMT_FUNC fp get_cached_power(int min_exponent, int &pow10_exponent) {
|
||||
pow10_exponent = first_dec_exp + index * dec_exp_step;
|
||||
return fp(data::POW10_SIGNIFICANDS[index], data::POW10_EXPONENTS[index]);
|
||||
}
|
||||
|
||||
// Generates output using Grisu2 digit-gen algorithm.
|
||||
FMT_FUNC void grisu2_gen_digits(
|
||||
const fp &scaled_value, const fp &scaled_upper, uint64_t delta,
|
||||
char *buffer, size_t &size, int &dec_exp) {
|
||||
internal::fp one(1ull << -scaled_upper.e, scaled_upper.e);
|
||||
uint32_t hi = static_cast<uint32_t>(scaled_upper.f >> -one.e); // p1 in Grisu
|
||||
uint64_t lo = scaled_upper.f & (one.f - 1); // p2 in Grisu
|
||||
size = 0;
|
||||
auto kappa = count_digits(hi); // TODO: more descriptive name
|
||||
while (kappa > 0) {
|
||||
uint32_t digit = 0;
|
||||
// This optimization by miloyip reduces the number of integer divisions by
|
||||
// one per iteration.
|
||||
switch (kappa) {
|
||||
case 10: digit = hi / 1000000000; hi %= 1000000000; break;
|
||||
case 9: digit = hi / 100000000; hi %= 100000000; break;
|
||||
case 8: digit = hi / 10000000; hi %= 10000000; break;
|
||||
case 7: digit = hi / 1000000; hi %= 1000000; break;
|
||||
case 6: digit = hi / 100000; hi %= 100000; break;
|
||||
case 5: digit = hi / 10000; hi %= 10000; break;
|
||||
case 4: digit = hi / 1000; hi %= 1000; break;
|
||||
case 3: digit = hi / 100; hi %= 100; break;
|
||||
case 2: digit = hi / 10; hi %= 10; break;
|
||||
case 1: digit = hi; hi = 0; break;
|
||||
default:
|
||||
FMT_ASSERT(false, "invalid number of digits");
|
||||
}
|
||||
if (digit != 0 || size != 0)
|
||||
buffer[size++] = '0' + static_cast<char>(digit);
|
||||
--kappa;
|
||||
uint64_t remainder = (static_cast<uint64_t>(hi) << -one.e) + lo;
|
||||
if (remainder <= delta) {
|
||||
dec_exp += kappa;
|
||||
// TODO: use scaled_value
|
||||
(void)scaled_value;
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (;;) {
|
||||
lo *= 10;
|
||||
delta *= 10;
|
||||
char digit = static_cast<char>(lo >> -one.e);
|
||||
if (digit != 0 || size != 0)
|
||||
buffer[size++] = '0' + digit;
|
||||
lo &= one.f - 1;
|
||||
--kappa;
|
||||
if (lo < delta) {
|
||||
dec_exp += kappa;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FMT_FUNC void grisu2_format(double value, char *buffer, size_t &size) {
|
||||
fp fp_value(value);
|
||||
fp lower, upper;
|
||||
fp_value.compute_boundaries(lower, upper);
|
||||
// Find a cached power of 10 close to 1 / upper.
|
||||
int dec_exp = 0; // K in Grisu paper.
|
||||
const int min_exp = -60;
|
||||
auto dec_pow = get_cached_power(
|
||||
min_exp - (upper.e + fp::significand_size), dec_exp);
|
||||
fp_value.normalize();
|
||||
fp scaled_value = fp_value * dec_pow;
|
||||
fp scaled_lower = lower * dec_pow;
|
||||
fp scaled_upper = upper * dec_pow;
|
||||
++scaled_lower.f; // +1 ulp
|
||||
--scaled_upper.f; // -1 ulp
|
||||
uint64_t delta = scaled_upper.f - scaled_lower.f;
|
||||
grisu2_gen_digits(scaled_value, scaled_upper, delta, buffer, size, dec_exp);
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
#if FMT_USE_WINDOWS_H
|
||||
|
@ -365,6 +365,10 @@ FMT_API fp operator*(fp x, fp y);
|
||||
// (binary) exponent satisfies min_exponent <= c_k.e <= min_exponent + 3.
|
||||
FMT_API fp get_cached_power(int min_exponent, int &pow10_exponent);
|
||||
|
||||
// Formats value using Grisu2 algorithm:
|
||||
// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
|
||||
FMT_API void grisu2_format(double value, char *buffer, size_t &size);
|
||||
|
||||
template <typename Allocator>
|
||||
typename Allocator::value_type *allocate(Allocator& alloc, std::size_t n) {
|
||||
#if __cplusplus >= 201103L || FMT_MSC_VER >= 1700
|
||||
@ -952,7 +956,8 @@ struct int_traits {
|
||||
template <typename T = void>
|
||||
struct FMT_API basic_data {
|
||||
static const uint32_t POWERS_OF_10_32[];
|
||||
static const uint64_t POWERS_OF_10_64[];
|
||||
static const uint32_t ZERO_OR_POWERS_OF_10_32[];
|
||||
static const uint64_t ZERO_OR_POWERS_OF_10_64[];
|
||||
static const uint64_t POW10_SIGNIFICANDS[];
|
||||
static const int16_t POW10_EXPONENTS[];
|
||||
static const char DIGITS[];
|
||||
@ -973,7 +978,7 @@ inline unsigned count_digits(uint64_t n) {
|
||||
// Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
|
||||
// and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits.
|
||||
int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12;
|
||||
return to_unsigned(t) - (n < data::POWERS_OF_10_64[t]) + 1;
|
||||
return to_unsigned(t) - (n < data::ZERO_OR_POWERS_OF_10_64[t]) + 1;
|
||||
}
|
||||
#else
|
||||
// Fallback version of count_digits used when __builtin_clz is not available.
|
||||
@ -1043,7 +1048,8 @@ class decimal_formatter {
|
||||
// https://github.com/jeaiii/itoa
|
||||
unsigned n = N - 1;
|
||||
unsigned a = n / 5 * n * 53 / 16;
|
||||
uint64_t t = ((1ULL << (32 + a)) / data::POWERS_OF_10_32[n] + 1 - n / 9);
|
||||
uint64_t t = ((1ULL << (32 + a)) /
|
||||
data::ZERO_OR_POWERS_OF_10_32[n] + 1 - n / 9);
|
||||
t = ((t * u) >> a) + n / 5 * 4;
|
||||
write_pair(0, t >> 32);
|
||||
for (unsigned i = 2; i < N; i += 2) {
|
||||
@ -1075,7 +1081,7 @@ class decimal_formatter_null : public decimal_formatter {
|
||||
// Optional version of count_digits for better performance on 32-bit platforms.
|
||||
inline unsigned count_digits(uint32_t n) {
|
||||
int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12;
|
||||
return to_unsigned(t) - (n < data::POWERS_OF_10_32[t]) + 1;
|
||||
return to_unsigned(t) - (n < data::ZERO_OR_POWERS_OF_10_32[t]) + 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2943,31 +2949,10 @@ void basic_writer<Range>::write_double(T value, const format_specs &spec) {
|
||||
basic_memory_buffer<char_type> buffer;
|
||||
if (internal::const_check(FMT_USE_GRISU && sizeof(T) <= sizeof(double) &&
|
||||
std::numeric_limits<double>::is_iec559)) {
|
||||
internal::fp fp_value(static_cast<double>(value));
|
||||
fp_value.normalize();
|
||||
// Find a cached power of 10 close to 1 / fp_value.
|
||||
int dec_exp = 0;
|
||||
const int min_exp = -60;
|
||||
auto dec_pow = internal::get_cached_power(
|
||||
min_exp - (fp_value.e + internal::fp::significand_size), dec_exp);
|
||||
internal::fp product = fp_value * dec_pow;
|
||||
// Generate output using Grisu digit-gen-mix algorithm.
|
||||
internal::fp one(1ull << -product.e, product.e);
|
||||
uint64_t hi = product.f >> -one.e;
|
||||
uint64_t f = product.f & (one.f - 1);
|
||||
typedef back_insert_range<internal::basic_buffer<char_type>> range;
|
||||
basic_writer<range> w{range(buffer)};
|
||||
w.write(hi);
|
||||
size_t digits = buffer.size();
|
||||
w.write('.');
|
||||
const unsigned max_digits = 18;
|
||||
while (digits++ < max_digits) {
|
||||
f *= 10;
|
||||
w.write(static_cast<char>('0' + (f >> -one.e)));
|
||||
f &= one.f - 1;
|
||||
}
|
||||
w.write('e');
|
||||
w.write(-dec_exp);
|
||||
char buf[100]; // TODO: max size
|
||||
size_t size = 0;
|
||||
internal::grisu2_format(static_cast<double>(value), buf, size);
|
||||
buffer.append(buf, buf + size); // TODO: avoid extra copy
|
||||
} else {
|
||||
format_specs normalized_spec(spec);
|
||||
normalized_spec.type_ = handler.type;
|
||||
|
Loading…
Reference in New Issue
Block a user