From 0a363f2b02835e33122fafb9f97a52252a6771b8 Mon Sep 17 00:00:00 2001
From: Nikhil Benesch <nikhil.benesch@gmail.com>
Date: Thu, 21 Jul 2016 18:10:56 -0400
Subject: [PATCH] Allow use of "," format specifier as in Python

f68771a helpfully enabled the "n" typecode for locale-aware printing of
integers, but not for floats. This commit enables the use of "n" for
locale-aware printing of floats, as in Python, and additionally
introduces the "," format specifier, which can be applied to "d", "f",
and "g" (and their upper-case variants, when present) to force
locale-aware grouping.

"," is particularly useful for floats, as "n" will use exponential
notation for large floats, while ",f" never will.

Unlike in Python, there is no distinction between ",d" and "n" for
integers, or ",g" and "n" for floats. (In Python, "," will always output
a literal comma, while "n" is locale-aware.) In fmtlib, both "," and "n"
are locale-aware.
---
 doc/syntax.rst      | 19 ++++++++++++-------
 fmt/format.h        | 31 +++++++++++++++++++++++++++----
 test/format-test.cc | 44 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 80 insertions(+), 14 deletions(-)
diff --git a/doc/syntax.rst b/doc/syntax.rst
index feda3e44..703646f9 100644
--- a/doc/syntax.rst
+++ b/doc/syntax.rst
@@ -74,14 +74,14 @@ although some of the formatting options are only supported by the numeric types.
 The general form of a *standard format specifier* is:
 
 .. productionlist:: sf
-   format_spec: [[`fill`]`align`][`sign`]["#"]["0"][`width`]["." `precision`][`type`]
+   format_spec: [[`fill`]`align`][`sign`]["#"]["0"][`width`][","]["." `precision`][`type`]
    fill: <a character other than '{' or '}'>
    align: "<" | ">" | "=" | "^"
    sign: "+" | "-" | " "
    width: `integer` | "{" `arg_id` "}"
    precision: `integer` | "{" `arg_id` "}"
    type: `int_type` | "c" | "e" | "E" | "f" | "F" | "g" | "G" | "p" | "s"
-   int_type: "b" | "B" | "d" | "o" | "x" | "X"
+   int_type: "b" | "B" | "d" | "n" | "o" | "x" | "X"
 
 The *fill* character can be any character other than '{' or '}'.  The presence
 of a fill character is signaled by the character following it, which must be
@@ -144,11 +144,12 @@ decimal-point character appears in the result of these conversions
 only if a digit follows it. In addition, for ``'g'`` and ``'G'``
 conversions, trailing zeros are not removed from the result.
 
-.. ifconfig:: False
-
-   The ``','`` option signals the use of a comma for a thousands separator.
-   For a locale aware separator, use the ``'n'`` integer presentation type
-   instead.
+The ``","`` option signals the use of a locale-aware thousands
+separator. In many locales, this may not be an actual `,` character.
+This option is supported for types ``'d'``, ``'f'``, ``'g'``, ``'F'``,
+``'G'``, and none. Type ``'n'`` always implies the use of a local-aware
+thousands separator; it is an error to explicitly specify both ``","``
+and ``'n'``.
 
 *width* is a decimal integer defining the minimum field width.  If not
 specified, then the field width will be determined by the content.
@@ -263,6 +264,10 @@ The available presentation types for floating-point values are:
 |         | ``'E'`` if the number gets too large. The                |
 |         | representations of infinity and NaN are uppercased, too. |
 +---------+----------------------------------------------------------+
+| ``'n'`` | Number. This is the same as ``'g'``, except that it uses |
+|         | the current locale setting to insert the appropriate     |
+|         | number separator characters.                             |
++---------+----------------------------------------------------------+
 | none    | The same as ``'g'``.                                     |
 +---------+----------------------------------------------------------+
 
diff --git a/fmt/format.h b/fmt/format.h
index 44bbf047..c7a9cac3 100644
--- a/fmt/format.h
+++ b/fmt/format.h
@@ -1558,7 +1558,8 @@ enum Alignment {
 // Flags.
 enum {
   SIGN_FLAG = 1, PLUS_FLAG = 2, MINUS_FLAG = 4, HASH_FLAG = 8,
-  CHAR_FLAG = 0x10  // Argument has char type - used in error reporting.
+  GROUP_THOUSANDS_FLAG = 0x10,
+  CHAR_FLAG = 0x20  // Argument has char type - used in error reporting.
 };
 
 // An empty format specifier.
@@ -2717,7 +2718,11 @@ void BasicWriter<Char>::write_int(T value, Spec spec) {
     prefix[0] = spec.flag(PLUS_FLAG) ? '+' : ' ';
     ++prefix_size;
   }
-  switch (spec.type()) {
+  char type = spec.type();
+  if (spec.flag(GROUP_THOUSANDS_FLAG)) {
+    type = 'n';
+  }
+  switch (type) {
   case 0: case 'd': {
     unsigned num_digits = internal::count_digits(abs_value);
     CharPtr p = prepare_int_buffer(num_digits, spec, prefix, prefix_size) + 1;
@@ -2799,7 +2804,7 @@ void BasicWriter<Char>::write_double(T value, const FormatSpec &spec) {
   char type = spec.type();
   bool upper = false;
   switch (type) {
-  case 0:
+  case 0: case 'n':
     type = 'g';
     break;
   case 'e': case 'f': case 'g': case 'a':
@@ -2868,13 +2873,15 @@ void BasicWriter<Char>::write_double(T value, const FormatSpec &spec) {
   }
 
   // Build format string.
-  enum { MAX_FORMAT_SIZE = 10}; // longest format: %#-*.*Lg
+  enum { MAX_FORMAT_SIZE = 11}; // longest format: %#'-*.*Lg
   Char format[MAX_FORMAT_SIZE];
   Char *format_ptr = format;
   *format_ptr++ = '%';
   unsigned width_for_sprintf = width;
   if (spec.flag(HASH_FLAG))
     *format_ptr++ = '#';
+  if (spec.flag(GROUP_THOUSANDS_FLAG) || spec.type() == 'n')
+    *format_ptr++ = '\'';
   if (spec.align() == ALIGN_CENTER) {
     width_for_sprintf = 0;
   } else {
@@ -3638,6 +3645,11 @@ const Char *BasicFormatter<Char, ArgFormatter>::format(
       spec.width_ = static_cast<int>(value);
     }
 
+    if (*s == ',') {
+      spec.flags_ |= GROUP_THOUSANDS_FLAG;
+      ++s;
+    }
+
     // Parse precision.
     if (*s == '.') {
       ++s;
@@ -3687,6 +3699,17 @@ const Char *BasicFormatter<Char, ArgFormatter>::format(
     // Parse type.
     if (*s != '}' && *s)
       spec.type_ = static_cast<char>(*s++);
+
+    if (spec.flag(GROUP_THOUSANDS_FLAG)) {
+      switch (spec.type()) {
+      case 0: case 'd': case 'f': case 'F': case 'g': case 'G':
+        break;
+      default:
+        FMT_THROW(FormatError(
+            fmt::format("format specifier ',' not allowed for type '{}'",
+            spec.type())));
+      }
+    }
   }
 
   if (*s++ != '}')
diff --git a/test/format-test.cc b/test/format-test.cc
index da9e57a8..1705fa60 100644
--- a/test/format-test.cc
+++ b/test/format-test.cc
@@ -960,6 +960,21 @@ TEST(FormatterTest, RuntimeWidth) {
   EXPECT_EQ("str         ", format("{0:{1}}", "str", 12));
 }
 
+TEST(FormatterTest, GroupThousandsFlag) {
+  EXPECT_THROW_MSG(format("{0:,x", 42),
+      FormatError, "format specifier ',' not allowed for type 'x'");
+  EXPECT_THROW_MSG(format("{0:,b", 42),
+      FormatError, "format specifier ',' not allowed for type 'b'");
+  EXPECT_THROW_MSG(format("{0:,o", 42),
+      FormatError, "format specifier ',' not allowed for type 'o'");
+  EXPECT_THROW_MSG(format("{0:,n", 42),
+      FormatError, "format specifier ',' not allowed for type 'n'");
+  EXPECT_THROW_MSG(format("{0:,n", 42.3),
+      FormatError, "format specifier ',' not allowed for type 'n'");
+  EXPECT_THROW_MSG(format("{0:,e", 42.3),
+      FormatError, "format specifier ',' not allowed for type 'e'");
+}
+
 TEST(FormatterTest, Precision) {
   char format_str[BUFFER_SIZE];
   safe_sprintf(format_str, "{0:.%u", UINT_MAX);
@@ -1106,7 +1121,7 @@ template <typename T>
 void check_unknown_types(
     const T &value, const char *types, const char *type_name) {
   char format_str[BUFFER_SIZE], message[BUFFER_SIZE];
-  const char *special = ".0123456789}";
+  const char *special = ",.0123456789}";
   for (int i = CHAR_MIN; i <= CHAR_MAX; ++i) {
     char c = static_cast<char>(i);
     if (std::strchr(types, c) || std::strchr(special, c) || !c) continue;
@@ -1229,10 +1244,13 @@ TEST(FormatterTest, FormatIntLocale) {
   lconv lc = {};
   char sep[] = "--";
   lc.thousands_sep = sep;
-  EXPECT_CALL(mock, localeconv()).Times(3).WillRepeatedly(testing::Return(&lc));
+  EXPECT_CALL(mock, localeconv()).Times(6).WillRepeatedly(testing::Return(&lc));
   EXPECT_EQ("123", format("{:n}", 123));
   EXPECT_EQ("1--234", format("{:n}", 1234));
   EXPECT_EQ("1--234--567", format("{:n}", 1234567));
+  EXPECT_EQ("123", format("{:,d}", 123));
+  EXPECT_EQ("1--234", format("{:,d}", 1234));
+  EXPECT_EQ("1--234--567", format("{:,d}", 1234567));
 }
 
 TEST(FormatterTest, FormatFloat) {
@@ -1240,7 +1258,7 @@ TEST(FormatterTest, FormatFloat) {
 }
 
 TEST(FormatterTest, FormatDouble) {
-  check_unknown_types(1.2, "eEfFgGaA", "double");
+  check_unknown_types(1.2, "eEfFgGaAn", "double");
   EXPECT_EQ("0", format("{0:}", 0.0));
   EXPECT_EQ("0.000000", format("{0:f}", 0.0));
   EXPECT_EQ("392.65", format("{0:}", 392.65));
@@ -1260,6 +1278,26 @@ TEST(FormatterTest, FormatDouble) {
   EXPECT_EQ(buffer, format("{:A}", -42.0));
 }
 
+TEST(FormatterTest, FormatDoubleLocale) {
+  // Grouping floating-point numbers uses snprintf internally. glibc's
+  // implementation of snprintf determines the current locale's
+  // thousands separator without calling localeconv, so we can't simply
+  // mock localeconv as in FormatIntLocale--we have to set an actual
+  // locale with a non-empty thousands separator using setlocale.
+  char* oldlocale = std::setlocale(LC_ALL, "");
+  char* sep = localeconv()->thousands_sep;
+  // Ensure this locale has a non-empty thousands separator.
+  EXPECT_GE(strlen(sep), 1);
+  char buffer[100];
+  safe_sprintf(buffer, "3%s141%s592.653589", sep, sep);
+  EXPECT_EQ(buffer, format("{:,f}", 3141592.653589));
+  safe_sprintf(buffer, "3%s141.65", sep);
+  EXPECT_EQ(buffer, format("{:n}", 3141.653589));
+  EXPECT_EQ(buffer, format("{:,g}", 3141.653589));
+  EXPECT_EQ(buffer, format("{:,}", 3141.653589));
+  std::setlocale(LC_ALL, oldlocale);
+}
+
 TEST(FormatterTest, FormatNaN) {
   double nan = std::numeric_limits<double>::quiet_NaN();
   EXPECT_EQ("nan", format("{}", nan));