From 6003ec3f25243bfb4d392399c5ccfe44f91d5014 Mon Sep 17 00:00:00 2001
From: Victor Zverovich <victor.zverovich@gmail.com>
Date: Sun, 24 Nov 2019 11:43:59 -0800
Subject: [PATCH] Simplify Grisu implementation

---
 include/fmt/format-inl.h | 94 +++++++++++++++++++++-------------------
 test/format-impl-test.cc | 36 ++++++++-------
 2 files changed, 67 insertions(+), 63 deletions(-)
diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h
index 02ac5a5f..3d13b840 100644
--- a/include/fmt/format-inl.h
+++ b/include/fmt/format-inl.h
@@ -340,6 +340,14 @@ template <typename T> struct bits {
 class fp;
 template <int SHIFT = 0> fp normalize(fp value);
 
+// Lower (upper) boundary is a value half way between a floating-point value
+// and its predecessor (successor). Boundaries have the same exponent as the
+// value so only significands are stored.
+struct boundaries {
+  uint64_t lower;
+  uint64_t upper;
+};
+
 // A handmade floating-point number f * pow(2, e).
 class fp {
  private:
@@ -417,29 +425,28 @@ class fp {
   // where a boundary is a value half way between the number and its predecessor
   // (lower) or successor (upper). The upper boundary is normalized and lower
   // has the same exponent but may be not normalized.
-  template <typename Double>
-  void assign_with_boundaries(Double d, fp& lower, fp& upper) {
+  template <typename Double> boundaries assign_with_boundaries(Double d) {
     bool is_lower_closer = assign(d);
-    lower = is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1);
+    fp lower =
+        is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1);
     // 1 in normalize accounts for the exponent shift above.
-    upper = normalize<1>(fp((f << 1) + 1, e - 1));
+    fp upper = normalize<1>(fp((f << 1) + 1, e - 1));
     lower.f <<= lower.e - upper.e;
-    lower.e = upper.e;
+    return boundaries{lower.f, upper.f};
   }
 
-  template <typename Double>
-  void assign_float_with_boundaries(Double d, fp& lower, fp& upper) {
+  template <typename Double> boundaries assign_float_with_boundaries(Double d) {
     assign(d);
     constexpr int min_normal_e = std::numeric_limits<float>::min_exponent -
                                  std::numeric_limits<double>::digits;
     significand_type half_ulp = 1 << (std::numeric_limits<double>::digits -
                                       std::numeric_limits<float>::digits - 1);
     if (min_normal_e > e) half_ulp <<= min_normal_e - e;
-    upper = normalize<0>(fp(f + half_ulp, e));
-    lower = fp(
+    fp upper = normalize<0>(fp(f + half_ulp, e));
+    fp lower = fp(
         f - (half_ulp >> ((f == implicit_bit && e > min_normal_e) ? 1 : 0)), e);
     lower.f <<= lower.e - upper.e;
-    lower.e = upper.e;
+    return boundaries{lower.f, upper.f};
   }
 };
 
@@ -451,28 +458,28 @@ inline fp operator-(fp x, fp y) {
   return {x.f - y.f, x.e};
 }
 
-// Computes an fp number r with r.f = x.f * y.f / pow(2, 64) rounded to nearest
-// with half-up tie breaking, r.e = x.e + y.e + 64. Result may not be
-// normalized.
-FMT_FUNC fp operator*(fp x, fp y) {
-  int exp = x.e + y.e + 64;
+inline uint64_t multiply(uint64_t lhs, uint64_t rhs) {
 #if FMT_USE_INT128
-  auto product = static_cast<__uint128_t>(x.f) * y.f;
+  auto product = static_cast<__uint128_t>(lhs) * rhs;
   auto f = static_cast<uint64_t>(product >> 64);
-  if ((static_cast<uint64_t>(product) & (1ULL << 63)) != 0) ++f;
-  return {f, exp};
+  return (static_cast<uint64_t>(product) & (1ULL << 63)) != 0 ? f + 1 : f;
 #else
   // Multiply 32-bit parts of significands.
   uint64_t mask = (1ULL << 32) - 1;
-  uint64_t a = x.f >> 32, b = x.f & mask;
-  uint64_t c = y.f >> 32, d = y.f & mask;
+  uint64_t a = lhs >> 32, b = lhs & mask;
+  uint64_t c = rhs >> 32, d = rhs & mask;
   uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d;
   // Compute mid 64-bit of result and round.
   uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31);
-  return fp(ac + (ad >> 32) + (bc >> 32) + (mid >> 32), exp);
+  return ac + (ad >> 32) + (bc >> 32) + (mid >> 32);
 #endif
 }
 
+// Computes an fp number r with r.f = x.f * y.f / pow(2, 64) rounded to nearest
+// with half-up tie breaking, r.e = x.e + y.e + 64. Result may not be
+// normalized.
+inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; }
+
 // Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its
 // (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`.
 FMT_FUNC fp get_cached_power(int min_exponent, int& pow10_exponent) {
@@ -1062,8 +1069,7 @@ int format_float(T value, int precision, float_spec spec, buffer<char>& buf) {
   int cached_exp10 = 0;     // K in Grisu.
   if (precision != -1) {
     if (precision > 17) return snprintf_float(value, precision, spec, buf);
-    fp fp_value(value);
-    fp normalized = normalize(fp_value);
+    fp normalized = normalize(fp(value));
     const auto cached_pow = get_cached_power(
         min_exp - (normalized.e + fp::significand_size), cached_exp10);
     normalized = normalized * cached_pow;
@@ -1081,33 +1087,33 @@ int format_float(T value, int precision, float_spec spec, buffer<char>& buf) {
     buf.resize(to_unsigned(num_digits));
   } else {
     fp fp_value;
-    fp lower, upper;  // w^- and w^+ in the Grisu paper.
-    if (spec.binary32)
-      fp_value.assign_float_with_boundaries(value, lower, upper);
-    else
-      fp_value.assign_with_boundaries(value, lower, upper);
-    // Find a cached power of 10 such that multiplying upper by it will bring
+    auto boundaries = spec.binary32
+                          ? fp_value.assign_float_with_boundaries(value)
+                          : fp_value.assign_with_boundaries(value);
+    fp_value = normalize(fp_value);
+    // Find a cached power of 10 such that multiplying value by it will bring
     // the exponent in the range [min_exp, -32].
-    const auto cached_pow = get_cached_power(  // \tilde{c}_{-k} in Grisu.
-        min_exp - (upper.e + fp::significand_size), cached_exp10);
-    fp normalized = normalize(fp_value);
-    normalized = normalized * cached_pow;
-    lower = lower * cached_pow;  // \tilde{M}^- in Grisu.
-    upper = upper * cached_pow;  // \tilde{M}^+ in Grisu.
-    assert(min_exp <= upper.e && upper.e <= -32);
-    auto result = digits::result();
-    --lower.f;  // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}.
-    ++upper.f;  // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}.
+    const fp cached_pow = get_cached_power(
+        min_exp - (fp_value.e + fp::significand_size), cached_exp10);
+    // Multiply value and boundaries by the cached power of 10.
+    fp_value = fp_value * cached_pow;
+    boundaries.lower = multiply(boundaries.lower, cached_pow.f);
+    boundaries.upper = multiply(boundaries.upper, cached_pow.f);
+    assert(min_exp <= fp_value.e && fp_value.e <= -32);
+    --boundaries.lower;  // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}.
+    ++boundaries.upper;  // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}.
     // Numbers outside of (lower, upper) definitely do not round to value.
-    grisu_shortest_handler handler{buf.data(), 0, (upper - normalized).f};
-    result = grisu_gen_digits(upper, upper.f - lower.f, exp, handler);
-    int size = handler.size;
+    grisu_shortest_handler handler{buf.data(), 0,
+                                   boundaries.upper - fp_value.f};
+    auto result =
+        grisu_gen_digits(fp(boundaries.upper, fp_value.e),
+                         boundaries.upper - boundaries.lower, exp, handler);
     if (result == digits::error) {
-      exp = exp + size - cached_exp10 - 1;
+      exp += handler.size - cached_exp10 - 1;
       fallback_format(value, buf, exp);
       return exp;
     }
-    buf.resize(to_unsigned(size));
+    buf.resize(to_unsigned(handler.size));
   }
   return exp - cached_exp10;
 }
diff --git a/test/format-impl-test.cc b/test/format-impl-test.cc
index b192be90..09fe21fe 100644
--- a/test/format-impl-test.cc
+++ b/test/format-impl-test.cc
@@ -189,27 +189,27 @@ template <> void run_double_tests<true>() {
   EXPECT_EQ(fp(1.23), fp(0x13ae147ae147aeu, -52));
 
   // Compute boundaries:
-  fp value, lower, upper;
+  fp value;
   // Normalized & not power of 2 - equidistant boundaries:
-  value.assign_with_boundaries(1.23, lower, upper);
+  auto b = value.assign_with_boundaries(1.23);
   EXPECT_EQ(value, fp(0x0013ae147ae147ae, -52));
-  EXPECT_EQ(lower, fp(0x9d70a3d70a3d6c00, -63));
-  EXPECT_EQ(upper, fp(0x9d70a3d70a3d7400, -63));
+  EXPECT_EQ(b.lower, 0x9d70a3d70a3d6c00);
+  EXPECT_EQ(b.upper, 0x9d70a3d70a3d7400);
   // Normalized power of 2 - lower boundary is closer:
-  value.assign_with_boundaries(1.9807040628566084e+28, lower, upper);  // 2**94
+  b = value.assign_with_boundaries(1.9807040628566084e+28);  // 2**94
   EXPECT_EQ(value, fp(0x0010000000000000, 42));
-  EXPECT_EQ(lower, fp(0x7ffffffffffffe00, 31));
-  EXPECT_EQ(upper, fp(0x8000000000000400, 31));
+  EXPECT_EQ(b.lower, 0x7ffffffffffffe00);
+  EXPECT_EQ(b.upper, 0x8000000000000400);
   // Smallest normalized double - equidistant boundaries:
-  value.assign_with_boundaries(2.2250738585072014e-308, lower, upper);
+  b = value.assign_with_boundaries(2.2250738585072014e-308);
   EXPECT_EQ(value, fp(0x0010000000000000, -1074));
-  EXPECT_EQ(lower, fp(0x7ffffffffffffc00, -1085));
-  EXPECT_EQ(upper, fp(0x8000000000000400, -1085));
+  EXPECT_EQ(b.lower, 0x7ffffffffffffc00);
+  EXPECT_EQ(b.upper, 0x8000000000000400);
   // Subnormal - equidistant boundaries:
-  value.assign_with_boundaries(4.9406564584124654e-324, lower, upper);
+  b = value.assign_with_boundaries(4.9406564584124654e-324);
   EXPECT_EQ(value, fp(0x0000000000000001, -1074));
-  EXPECT_EQ(lower, fp(0x4000000000000000, -1137));
-  EXPECT_EQ(upper, fp(0xc000000000000000, -1137));
+  EXPECT_EQ(b.lower, 0x4000000000000000);
+  EXPECT_EQ(b.upper, 0xc000000000000000);
 }
 
 TEST(FPTest, DoubleTests) {
@@ -243,12 +243,10 @@ TEST(FPTest, ComputeFloatBoundaries) {
     fp vupper = normalize(fp(test.upper));
     vlower.f >>= vupper.e - vlower.e;
     vlower.e = vupper.e;
-    fp value, lower, upper;
-    value.assign_float_with_boundaries(test.x, lower, upper);
-    EXPECT_EQ(vlower.f, lower.f);
-    EXPECT_EQ(vlower.e, lower.e);
-    EXPECT_EQ(vupper.f, upper.f);
-    EXPECT_EQ(vupper.e, upper.e);
+    fp value;
+    auto b = value.assign_float_with_boundaries(test.x);
+    EXPECT_EQ(vlower.f, b.lower);
+    EXPECT_EQ(vupper.f, b.upper);
   }
 }