Optimize alignment parsing

2020-10-21 09:11:10 -07:00 · 2020-10-21 09:11:10 -07:00 · 0ecb3d1829
commit 0ecb3d1829
parent 9755307842
3 changed files with 17 additions and 20 deletions
--- a/include/fmt/format-inl.h
+++ b/include/fmt/format-inl.h
@ -2603,24 +2603,17 @@ int snprintf_float(T value, int precision, float_specs specs,
 * error, but it will always advance at least one byte.
 */
 inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) {
  static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                                 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
                                 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
  static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
  static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
  static const int shiftc[] = {0, 18, 12, 6, 0};
  static const int shifte[] = {0, 6, 4, 2, 0};
-  auto s = reinterpret_cast<const unsigned char*>(buf);
+  int len = code_point_length(buf);
-  int len = lengths[s[0] >> 3];
+  const char* next = buf + len;
  // Compute the pointer to the next character early so that the next
  // iteration can start working on the next character. Neither Clang
  // nor GCC figure out this reordering on their own.
  const char* next = buf + len + !len;
  // Assume a four-byte character and load four bytes. Unused bits are
  // shifted out.
  auto s = reinterpret_cast<const unsigned char*>(buf);
  *c = uint32_t(s[0] & masks[len]) << 18;
  *c |= uint32_t(s[1] & 0x3f) << 12;
  *c |= uint32_t(s[2] & 0x3f) << 6;
--- a/include/fmt/format.h
+++ b/include/fmt/format.h
@ -1112,7 +1112,7 @@ Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits,
 template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
 inline It format_uint(It out, UInt value, int num_digits, bool upper = false) {
-  if (auto ptr = to_pointer<Char>(out, num_digits)) {
+  if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
    format_uint<BASE_BITS>(ptr, value, num_digits, upper);
    return out;
  }
@ -2727,12 +2727,16 @@ template <typename SpecHandler, typename Char> struct precision_adapter {
 };
 template <typename Char>
-FMT_CONSTEXPR const Char* next_code_point(const Char* begin, const Char* end) {
+FMT_CONSTEXPR int code_point_length(const Char* begin) {
-  if (const_check(sizeof(Char) != 1) || (*begin & 0x80) == 0) return begin + 1;
+  if (const_check(sizeof(Char) != 1)) return 1;
-  do {
+  constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    ++begin;
+                              0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
-  } while (begin != end && (*begin & 0xc0) == 0x80);
+  int len = lengths[static_cast<unsigned char>(*begin) >> 3];
-  return begin;
+
  // Compute the pointer to the next character early so that the next
  // iteration can start working on the next character. Neither Clang
  // nor GCC figure out this reordering on their own.
  return len + !len;
 }
 // Converts a character to the underlying integral type.
@ -2752,8 +2756,8 @@ FMT_CONSTEXPR const Char* parse_align(const Char* begin, const Char* end,
                                      Handler&& handler) {
  FMT_ASSERT(begin != end, "");
  auto align = align::none;
-  auto p = next_code_point(begin, end);
+  auto p = begin + code_point_length(begin);
-  if (p == end) p = begin;
+  if (p >= end) p = begin;
  for (;;) {
    switch (to_integral(*p)) {
    case '<':
--- a/test/format-test.cc
+++ b/test/format-test.cc
@ -646,7 +646,7 @@ TEST(FormatterTest, Fill) {
  EXPECT_EQ(std::string("\0\0\0*", 4), format(string_view("{:\0>4}", 6), '*'));
  EXPECT_EQ("жж42", format("{0:ж>4}", 42));
  EXPECT_THROW_MSG(format("{:\x80\x80\x80\x80\x80>}", 0), format_error,
-                   "invalid fill");
+                   "missing '}' in format string");
 }
 TEST(FormatterTest, PlusSign) {