Optimize alignment parsing

This commit is contained in:
Victor Zverovich 2020-10-21 09:11:10 -07:00
parent 9755307842
commit 0ecb3d1829
3 changed files with 17 additions and 20 deletions

View File

@ -2603,24 +2603,17 @@ int snprintf_float(T value, int precision, float_specs specs,
* error, but it will always advance at least one byte. * error, but it will always advance at least one byte.
*/ */
inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) { inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) {
static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
static const int shiftc[] = {0, 18, 12, 6, 0}; static const int shiftc[] = {0, 18, 12, 6, 0};
static const int shifte[] = {0, 6, 4, 2, 0}; static const int shifte[] = {0, 6, 4, 2, 0};
auto s = reinterpret_cast<const unsigned char*>(buf); int len = code_point_length(buf);
int len = lengths[s[0] >> 3]; const char* next = buf + len;
// Compute the pointer to the next character early so that the next
// iteration can start working on the next character. Neither Clang
// nor GCC figure out this reordering on their own.
const char* next = buf + len + !len;
// Assume a four-byte character and load four bytes. Unused bits are // Assume a four-byte character and load four bytes. Unused bits are
// shifted out. // shifted out.
auto s = reinterpret_cast<const unsigned char*>(buf);
*c = uint32_t(s[0] & masks[len]) << 18; *c = uint32_t(s[0] & masks[len]) << 18;
*c |= uint32_t(s[1] & 0x3f) << 12; *c |= uint32_t(s[1] & 0x3f) << 12;
*c |= uint32_t(s[2] & 0x3f) << 6; *c |= uint32_t(s[2] & 0x3f) << 6;

View File

@ -1112,7 +1112,7 @@ Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits,
template <unsigned BASE_BITS, typename Char, typename It, typename UInt> template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { inline It format_uint(It out, UInt value, int num_digits, bool upper = false) {
if (auto ptr = to_pointer<Char>(out, num_digits)) { if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
format_uint<BASE_BITS>(ptr, value, num_digits, upper); format_uint<BASE_BITS>(ptr, value, num_digits, upper);
return out; return out;
} }
@ -2727,12 +2727,16 @@ template <typename SpecHandler, typename Char> struct precision_adapter {
}; };
template <typename Char> template <typename Char>
FMT_CONSTEXPR const Char* next_code_point(const Char* begin, const Char* end) { FMT_CONSTEXPR int code_point_length(const Char* begin) {
if (const_check(sizeof(Char) != 1) || (*begin & 0x80) == 0) return begin + 1; if (const_check(sizeof(Char) != 1)) return 1;
do { constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
++begin; 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
} while (begin != end && (*begin & 0xc0) == 0x80); int len = lengths[static_cast<unsigned char>(*begin) >> 3];
return begin;
// Compute the pointer to the next character early so that the next
// iteration can start working on the next character. Neither Clang
// nor GCC figure out this reordering on their own.
return len + !len;
} }
// Converts a character to the underlying integral type. // Converts a character to the underlying integral type.
@ -2752,8 +2756,8 @@ FMT_CONSTEXPR const Char* parse_align(const Char* begin, const Char* end,
Handler&& handler) { Handler&& handler) {
FMT_ASSERT(begin != end, ""); FMT_ASSERT(begin != end, "");
auto align = align::none; auto align = align::none;
auto p = next_code_point(begin, end); auto p = begin + code_point_length(begin);
if (p == end) p = begin; if (p >= end) p = begin;
for (;;) { for (;;) {
switch (to_integral(*p)) { switch (to_integral(*p)) {
case '<': case '<':

View File

@ -646,7 +646,7 @@ TEST(FormatterTest, Fill) {
EXPECT_EQ(std::string("\0\0\0*", 4), format(string_view("{:\0>4}", 6), '*')); EXPECT_EQ(std::string("\0\0\0*", 4), format(string_view("{:\0>4}", 6), '*'));
EXPECT_EQ("жж42", format("{0:ж>4}", 42)); EXPECT_EQ("жж42", format("{0:ж>4}", 42));
EXPECT_THROW_MSG(format("{:\x80\x80\x80\x80\x80>}", 0), format_error, EXPECT_THROW_MSG(format("{:\x80\x80\x80\x80\x80>}", 0), format_error,
"invalid fill"); "missing '}' in format string");
} }
TEST(FormatterTest, PlusSign) { TEST(FormatterTest, PlusSign) {