diff --git a/fmt/format.h b/fmt/format.h index c6ee44d1..d3e2eb47 100644 --- a/fmt/format.h +++ b/fmt/format.h @@ -40,11 +40,17 @@ #include #include #include +#include // for MB_LEN_MAX #include #include #include #include #include // for std::pair + +#ifdef FMT_WCONV_USE_NOWIDE +# include +#endif + #undef FMT_INCLUDE // The fmt library version in the form major * 10000 + minor * 100 + patch. @@ -349,6 +355,12 @@ typedef __int64 intmax_t; # define FMT_ASSERT(condition, message) assert((condition) && message) #endif +// User option: throw runtime_error if code conversion fails. +// If false, use partially converted string. +#ifndef FMT_THROW_WCONV_ERROR +# define FMT_THROW_WCONV_ERROR 1 +#endif + // __builtin_clz is broken in clang with Microsoft CodeGen: // https://github.com/fmtlib/fmt/issues/519 #ifndef _MSC_VER @@ -960,6 +972,8 @@ class CharTraits : public BasicCharTraits { public: static char convert(char value) { return value; } + typedef wchar_t CharOther; + // Formats a floating-point number. template FMT_API static int format_float(char *buffer, std::size_t size, @@ -981,6 +995,8 @@ class CharTraits : public BasicCharTraits { static wchar_t convert(char value) { return value; } static wchar_t convert(wchar_t value) { return value; } + typedef char CharOther; + template FMT_API static int format_float(wchar_t *buffer, std::size_t size, const wchar_t *format, unsigned width, int precision, T value); @@ -1245,20 +1261,6 @@ struct NamedArgWithType; template struct Null {}; -// A helper class template to enable or disable overloads taking wide -// characters and strings in MakeValue. -template -struct WCharHelper { - typedef Null Supported; - typedef T Unsupported; -}; - -template -struct WCharHelper { - typedef T Supported; - typedef Null Unsupported; -}; - typedef char Yes[1]; typedef char No[2]; @@ -1387,24 +1389,6 @@ class MakeValue : public Arg { template MakeValue(T *value); - // The following methods are private to disallow formatting of wide - // characters and strings into narrow strings as in - // fmt::format("{}", L"test"); - // To fix this, use a wide format string: fmt::format(L"{}", L"test"). -#if !FMT_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED) - MakeValue(typename WCharHelper::Unsupported); -#endif - MakeValue(typename WCharHelper::Unsupported); - MakeValue(typename WCharHelper::Unsupported); - MakeValue(typename WCharHelper::Unsupported); -#if FMT_HAS_STRING_VIEW - MakeValue(typename WCharHelper::Unsupported); -#endif -#if FMT_HAS_EXPERIMENTAL_STRING_VIEW - MakeValue(typename WCharHelper::Unsupported); -#endif - MakeValue(typename WCharHelper::Unsupported); - void set_string(StringRef str) { string.value = str.data(); string.size = str.size(); @@ -1471,6 +1455,7 @@ class MakeValue : public Arg { FMT_MAKE_VALUE(signed char, int_value, INT) FMT_MAKE_VALUE(unsigned char, uint_value, UINT) FMT_MAKE_VALUE(char, int_value, CHAR) + FMT_MAKE_VALUE(wchar_t, int_value, CHAR) #if __cplusplus >= 201103L template < @@ -1486,13 +1471,6 @@ class MakeValue : public Arg { static uint64_t type(T) { return Arg::INT; } #endif -#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) - MakeValue(typename WCharHelper::Supported value) { - int_value = value; - } - static uint64_t type(wchar_t) { return Arg::CHAR; } -#endif - #define FMT_MAKE_STR_VALUE(Type, TYPE) \ MakeValue(Type value) { set_string(value); } \ static uint64_t type(Type) { return Arg::TYPE; } @@ -1513,22 +1491,16 @@ class MakeValue : public Arg { FMT_MAKE_STR_VALUE(StringRef, STRING) FMT_MAKE_VALUE_(CStringRef, string.value, CSTRING, value.c_str()) -#define FMT_MAKE_WSTR_VALUE(Type, TYPE) \ - MakeValue(typename WCharHelper::Supported value) { \ - set_string(value); \ - } \ - static uint64_t type(Type) { return Arg::TYPE; } - - FMT_MAKE_WSTR_VALUE(wchar_t *, WSTRING) - FMT_MAKE_WSTR_VALUE(const wchar_t *, WSTRING) - FMT_MAKE_WSTR_VALUE(const std::wstring &, WSTRING) + FMT_MAKE_STR_VALUE(wchar_t *, WSTRING) + FMT_MAKE_STR_VALUE(const wchar_t *, WSTRING) + FMT_MAKE_STR_VALUE(const std::wstring &, WSTRING) #if FMT_HAS_STRING_VIEW - FMT_MAKE_WSTR_VALUE(const std::wstring_view &, WSTRING) + FMT_MAKE_STR_VALUE(const std::wstring_view &, WSTRING) #endif #if FMT_HAS_EXPERIMENTAL_STRING_VIEW - FMT_MAKE_WSTR_VALUE(const std::experimental::wstring_view &, WSTRING) + FMT_MAKE_STR_VALUE(const std::experimental::wstring_view &, WSTRING) #endif - FMT_MAKE_WSTR_VALUE(WStringRef, WSTRING) + FMT_MAKE_STR_VALUE(WStringRef, WSTRING) FMT_MAKE_VALUE(void *, pointer, POINTER) FMT_MAKE_VALUE(const void *, pointer, POINTER) @@ -2218,9 +2190,7 @@ class ArgFormatterBase : public ArgVisitor { writer_.write_str(value, spec_); } - using ArgVisitor::visit_wstring; - - void visit_wstring(internal::Arg::StringValue value) { + void visit_wstring(internal::Arg::StringValue value) { writer_.write_str(value, spec_); } @@ -2653,6 +2623,7 @@ class BasicWriter { FMT_DISALLOW_COPY_AND_ASSIGN(BasicWriter); typedef typename internal::CharTraits::CharPtr CharPtr; + typedef typename internal::CharTraits::CharOther CharOther; #if FMT_SECURE_SCL // Returns pointer value. @@ -2725,13 +2696,132 @@ class BasicWriter { void write_str(const internal::Arg::StringValue &str, const Spec &spec); - // This following methods are private to disallow writing wide characters - // and strings to a char stream. If you want to print a wide string as a - // pointer as std::ostream does, cast it to const void*. - // Do not implement! - void operator<<(typename internal::WCharHelper::Unsupported); - void operator<<( - typename internal::WCharHelper::Unsupported); + // recode_str() and recode_str_len() are overloaded for Char and CharOther. + // The native Char version is trivial. + + static std::size_t recode_str_len( + const Char* s, + std::size_t isize, bool throw_error = FMT_THROW_WCONV_ERROR) + { + return isize; + } + + template + Iterator recode_str( + const Char *in, + std::size_t isize, Iterator out, bool throw_error = FMT_THROW_WCONV_ERROR) + { + return std::uninitialized_copy(in, in + isize, out); + } + + // Find length in Char code units of CharOther string. + static std::size_t recode_str_len( + const CharOther* in, + std::size_t isize, bool throw_error = FMT_THROW_WCONV_ERROR) + { + return recode_internal(in, isize, CharPtr(), throw_error); + } + + // Convert CharOther string to Char string. + template + Iterator recode_str( + const CharOther* in, + std::size_t isize, Iterator out, bool throw_error = FMT_THROW_WCONV_ERROR) + { + return out + recode_internal(in, isize, out, throw_error); + } + +#ifdef FMT_WCONV_USE_NOWIDE + + // Ignore locale and treat all strings as as Unicode. + // char = UTF-8. wchar_t = UTF-16 on Windows/MSVC, UTF-32 elsewhere. + + template + static std::size_t recode_internal(const CharOther *in, std::size_t isize, + CharPtr out, bool throw_error) + { + std::size_t osize = 0; + const CharOther *end = in + isize; + while (in != end) + { + using namespace nowide::utf; + code_point c = utf_traits::decode(in, end); + if (c == illegal || c == incomplete) { + if (throw_error) + FMT_THROW(std::runtime_error("encoding error")); + else if (c == incomplete) + break; + } + if (bWrite) + out = utf_traits::encode(c, out); + else + osize += utf_traits::width(c); + } + return osize; + } + +#else // not FMT_WCONV_USE_NOWIDE + + // Use locale encoding for char and wchar_t. + // Warning: This doesn't handle UTF-8 or UTF-16 on Windows/MSVC. + // Don't use wcsrtombs/mbsrtowcs because input is not null-terminated. + + template + static std::size_t recode_internal(const wchar_t* in, std::size_t isize, + internal::CharTraits::CharPtr out, bool throw_error) + { + std::size_t osize = 0; + std::mbstate_t state = {0}; + char tmp[MB_LEN_MAX]; + while (isize > 0) + { +#if __STDC_WANT_SECURE_LIB__ + std::size_t csize; + wcrtomb_s(&csize, &tmp[0], MB_LEN_MAX, *in, &state); +#else + std::size_t csize = std::wcrtomb(&tmp[0], *in, &state); +#endif + if (static_cast(csize) < 0) { + if (throw_error) + FMT_THROW(std::runtime_error("encoding error")); + else + break; + } + ++in; + --isize; + osize += csize; + if (bWrite) + out = std::uninitialized_copy(&tmp[0], &tmp[csize], out); + } + return osize; + } + + template + static std::size_t recode_internal(const char* in, std::size_t isize, + internal::CharTraits::CharPtr out, bool throw_error) + { + std::size_t osize = 0; + std::mbstate_t state = {0}; + wchar_t tmp; + while (isize > 0) + { + std::size_t csize = std::mbrtowc(&tmp, in, isize, &state); + if (static_cast(csize) < 0) { + if (throw_error) + FMT_THROW(std::runtime_error("encoding error")); + else + break; + } + in += csize; + isize -= csize; + ++osize; + if (bWrite) + *out++ = tmp; + } + return osize; + } + +#endif // FMT_WCONV_USE_NOWIDE // Appends floating-point length specifier to the format string. // The second argument is only used for overload resolution. @@ -2870,15 +2960,13 @@ class BasicWriter { /** Writes a character to the stream. */ - BasicWriter &operator<<(char value) { + BasicWriter &operator<<(Char value) { buffer_.push_back(value); return *this; } - BasicWriter &operator<<( - typename internal::WCharHelper::Supported value) { - buffer_.push_back(value); - return *this; + BasicWriter &operator<<(CharOther value) { + return operator<<(BasicStringRef(&value, 1)); } /** @@ -2886,16 +2974,17 @@ class BasicWriter { Writes *value* to the stream. \endrst */ - BasicWriter &operator<<(fmt::BasicStringRef value) { + BasicWriter &operator<<(BasicStringRef value) { const Char *str = value.data(); buffer_.append(str, str + value.size()); return *this; } - BasicWriter &operator<<( - typename internal::WCharHelper::Supported value) { - const char *str = value.data(); - buffer_.append(str, str + value.size()); + BasicWriter &operator<<(BasicStringRef value) { + std::size_t convsize = recode_str_len(value.data(), value.size()); + std::size_t oldsize = buffer_.size(); + buffer_.resize(oldsize + convsize); + recode_str(value.data(), value.size(), internal::make_ptr(&buffer_[oldsize], convsize)); return *this; } @@ -2921,23 +3010,24 @@ class BasicWriter { template template typename BasicWriter::CharPtr BasicWriter::write_str( - const StrChar *s, std::size_t size, const AlignSpec &spec) { + const StrChar *s, std::size_t isize, const AlignSpec &spec) { CharPtr out = CharPtr(); - if (spec.width() > size) { + std::size_t osize = recode_str_len(s, isize); + if (spec.width() > osize) { out = grow_buffer(spec.width()); Char fill = internal::CharTraits::cast(spec.fill()); if (spec.align() == ALIGN_RIGHT) { - std::uninitialized_fill_n(out, spec.width() - size, fill); - out += spec.width() - size; + std::uninitialized_fill_n(out, spec.width() - osize, fill); + out += spec.width() - osize; } else if (spec.align() == ALIGN_CENTER) { - out = fill_padding(out, spec.width(), size, fill); + out = fill_padding(out, spec.width(), osize, fill); } else { - std::uninitialized_fill_n(out + size, spec.width() - size, fill); + std::uninitialized_fill_n(out + osize, spec.width() - osize, fill); } } else { - out = grow_buffer(size); + out = grow_buffer(osize); } - std::uninitialized_copy(s, s + size, out); + recode_str(s, isize, out); return out; } @@ -2945,8 +3035,6 @@ template template void BasicWriter::write_str( const internal::Arg::StringValue &s, const Spec &spec) { - // Check if StrChar is convertible to Char. - internal::CharTraits::convert(StrChar()); if (spec.type_ && spec.type_ != 's') internal::report_unknown_type(spec.type_, "string"); const StrChar *str_value = s.value; diff --git a/test/printf-test.cc b/test/printf-test.cc index 81a041d7..1f27270a 100644 --- a/test/printf-test.cc +++ b/test/printf-test.cc @@ -29,6 +29,7 @@ #include #include +//#define FMT_WCONV_USE_NOWIDE #include "fmt/printf.h" #include "fmt/format.h" #include "gtest-extra.h" @@ -509,3 +510,50 @@ TEST(PrintfTest, Writer) { printf(writer, "%d", 42); EXPECT_EQ("42", writer.str()); } + +TEST(PrintfTest, ConvWchar) { +#ifdef _MSC_VER +# ifndef FMT_WCONV_USE_NOWIDE + // FMT_WCONV_USE_NOWIDE is required for this test on Windows. + // UTF-8 is not supported by MSVCRT so we have a compile-time option + // to bypass the CRT and force UTF-8. + return; +# endif +#else // not MSVC + // Select UTF-8 code set. + std::string oldlocale = setlocale(LC_CTYPE, NULL); + std::size_t dotpos = oldlocale.find('.'); + if (dotpos == std::string::npos) + dotpos = oldlocale.size(); + std::string newlocale = oldlocale.substr(0, dotpos) + ".UTF-8"; + EXPECT_EQ(newlocale, setlocale(LC_CTYPE, newlocale.c_str())); +#endif + + std::string s = "\xF0\x9F\x98\x81"; +#if WCHAR_MAX > 0xFFFF + std::wstring w = L"\U0001F601"; +#else + std::wstring w = L"\xD83D\xDE01"; // UTF-16 +#endif + // Ignore the 'l' in '%ls' and use types to do the right thing with strings. + EXPECT_EQ(s, fmt::sprintf( "%s" , s)); + EXPECT_EQ(s, fmt::sprintf( "%s" , w)); + EXPECT_EQ(s, fmt::sprintf( "%ls", w)); + EXPECT_EQ(w, fmt::sprintf(L"%s" , w)); + EXPECT_EQ(w, fmt::sprintf(L"%s" , s)); + EXPECT_EQ(w, fmt::sprintf(L"%ls", s)); + + EXPECT_EQ(s, fmt::format("{}", w)); + EXPECT_EQ(w, fmt::format(L"{}", s)); + + fmt::MemoryWriter out8; + out8 << w; + EXPECT_EQ(s, out8.c_str()); + fmt::WMemoryWriter out16; + out16 << s; + EXPECT_EQ(w, out16.c_str()); + +#ifndef _MSC_VER + setlocale(LC_ALL, oldlocale.c_str()); +#endif +}