diff --git a/test/integration/encoding_test.cpp b/test/integration/encoding_test.cpp new file mode 100644 index 0000000..d231d76 --- /dev/null +++ b/test/integration/encoding_test.cpp @@ -0,0 +1,180 @@ +#include + +#include "handler_test.h" +#include "yaml-cpp/yaml.h" // IWYU pragma: keep + +#include "gtest/gtest.h" + +using ::testing::_; + +namespace YAML { +namespace { +typedef void (*EncodingFn)(std::ostream&, int); + +inline char Byte(int ch) { + return static_cast( + static_cast(static_cast(ch))); +} + +void EncodeToUtf8(std::ostream& stream, int ch) { + if (ch <= 0x7F) { + stream << Byte(ch); + } else if (ch <= 0x7FF) { + stream << Byte(0xC0 | (ch >> 6)); + stream << Byte(0x80 | (ch & 0x3F)); + } else if (ch <= 0xFFFF) { + stream << Byte(0xE0 | (ch >> 12)); + stream << Byte(0x80 | ((ch >> 6) & 0x3F)); + stream << Byte(0x80 | (ch & 0x3F)); + } else if (ch <= 0x1FFFFF) { + stream << Byte(0xF0 | (ch >> 18)); + stream << Byte(0x80 | ((ch >> 12) & 0x3F)); + stream << Byte(0x80 | ((ch >> 6) & 0x3F)); + stream << Byte(0x80 | (ch & 0x3F)); + } +} + +bool SplitUtf16HighChar(std::ostream& stream, EncodingFn encoding, int ch) { + int biasedValue = ch - 0x10000; + if (biasedValue < 0) { + return false; + } + int high = 0xD800 | (biasedValue >> 10); + int low = 0xDC00 | (biasedValue & 0x3FF); + encoding(stream, high); + encoding(stream, low); + return true; +} + +void EncodeToUtf16LE(std::ostream& stream, int ch) { + if (!SplitUtf16HighChar(stream, &EncodeToUtf16LE, ch)) { + stream << Byte(ch & 0xFF) << Byte(ch >> 8); + } +} + +void EncodeToUtf16BE(std::ostream& stream, int ch) { + if (!SplitUtf16HighChar(stream, &EncodeToUtf16BE, ch)) { + stream << Byte(ch >> 8) << Byte(ch & 0xFF); + } +} + +void EncodeToUtf32LE(std::ostream& stream, int ch) { + stream << Byte(ch & 0xFF) << Byte((ch >> 8) & 0xFF) << Byte((ch >> 16) & 0xFF) + << Byte((ch >> 24) & 0xFF); +} + +void EncodeToUtf32BE(std::ostream& stream, int ch) { + stream << Byte((ch >> 24) & 0xFF) << Byte((ch >> 16) & 0xFF) + << Byte((ch >> 8) & 0xFF) << Byte(ch & 0xFF); +} + +class EncodingTest : public HandlerTest { + protected: + void SetUpEncoding(EncodingFn encoding, bool declareEncoding) { + if (declareEncoding) { + encoding(m_yaml, 0xFEFF); + } + + AddEntry(encoding, 0x0021, 0x007E); // Basic Latin + AddEntry(encoding, 0x00A1, 0x00FF); // Latin-1 Supplement + AddEntry(encoding, 0x0660, 0x06FF); // Arabic (largest contiguous block) + + // CJK unified ideographs (multiple lines) + AddEntry(encoding, 0x4E00, 0x4EFF); + AddEntry(encoding, 0x4F00, 0x4FFF); + AddEntry(encoding, 0x5000, 0x51FF); // 512 character line + AddEntry(encoding, 0x5200, 0x54FF); // 768 character line + AddEntry(encoding, 0x5500, 0x58FF); // 1024 character line + + AddEntry(encoding, 0x103A0, 0x103C3); // Old Persian + + m_yaml.seekg(0, std::ios::beg); + } + + void Run() { + EXPECT_CALL(handler, OnDocumentStart(_)); + EXPECT_CALL(handler, OnSequenceStart(_, "?", 0)); + for (std::size_t i = 0; i < m_entries.size(); i++) { + EXPECT_CALL(handler, OnScalar(_, "!", 0, m_entries[i])); + } + EXPECT_CALL(handler, OnSequenceEnd()); + EXPECT_CALL(handler, OnDocumentEnd()); + + Parse(m_yaml.str()); + } + + private: + std::stringstream m_yaml; + std::vector m_entries; + + void AddEntry(EncodingFn encoding, int startCh, int endCh) { + encoding(m_yaml, '-'); + encoding(m_yaml, ' '); + encoding(m_yaml, '|'); + encoding(m_yaml, '\n'); + encoding(m_yaml, ' '); + encoding(m_yaml, ' '); + + std::stringstream entry; + for (int ch = startCh; ch <= endCh; ++ch) { + encoding(m_yaml, ch); + EncodeToUtf8(entry, ch); + } + encoding(m_yaml, '\n'); + EncodeToUtf8(entry, '\n'); + + m_entries.push_back(entry.str()); + } +}; + +TEST_F(EncodingTest, UTF8_noBOM) { + SetUpEncoding(&EncodeToUtf8, false); + Run(); +} + +TEST_F(EncodingTest, UTF8_BOM) { + SetUpEncoding(&EncodeToUtf8, true); + Run(); +} + +TEST_F(EncodingTest, DISABLED_UTF16LE_noBOM) { + SetUpEncoding(&EncodeToUtf16LE, false); + Run(); +} + +TEST_F(EncodingTest, DISABLED_UTF16LE_BOM) { + SetUpEncoding(&EncodeToUtf16LE, true); + Run(); +} + +TEST_F(EncodingTest, DISABLED_UTF16BE_noBOM) { + SetUpEncoding(&EncodeToUtf16BE, false); + Run(); +} + +TEST_F(EncodingTest, DISABLED_UTF16BE_BOM) { + SetUpEncoding(&EncodeToUtf16BE, true); + Run(); +} + +TEST_F(EncodingTest, UTF32LE_noBOM) { + SetUpEncoding(&EncodeToUtf32LE, false); + Run(); +} + +TEST_F(EncodingTest, UTF32LE_BOM) { + SetUpEncoding(&EncodeToUtf32LE, true); + Run(); +} + +TEST_F(EncodingTest, UTF32BE_noBOM) { + SetUpEncoding(&EncodeToUtf32BE, false); + Run(); +} + +TEST_F(EncodingTest, UTF32BE_BOM) { + SetUpEncoding(&EncodeToUtf32BE, true); + Run(); +} +} +} diff --git a/test/tests.h b/test/tests.h new file mode 100644 index 0000000..72c87c1 --- /dev/null +++ b/test/tests.h @@ -0,0 +1,56 @@ +#ifndef TESTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define TESTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || \ + (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ + (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + +#include + +namespace Test { +void RunAll(); + +namespace Parser { +// scalar tests +void SimpleScalar(std::string& inputScalar, std::string& desiredOutput); +void MultiLineScalar(std::string& inputScalar, std::string& desiredOutput); +void LiteralScalar(std::string& inputScalar, std::string& desiredOutput); +void FoldedScalar(std::string& inputScalar, std::string& desiredOutput); +void ChompedFoldedScalar(std::string& inputScalar, std::string& desiredOutput); +void ChompedLiteralScalar(std::string& inputScalar, std::string& desiredOutput); +void FoldedScalarWithIndent(std::string& inputScalar, + std::string& desiredOutput); +void ColonScalar(std::string& inputScalar, std::string& desiredOutput); +void QuotedScalar(std::string& inputScalar, std::string& desiredOutput); +void CommaScalar(std::string& inputScalar, std::string& desiredOutput); +void DashScalar(std::string& inputScalar, std::string& desiredOutput); +void URLScalar(std::string& inputScalar, std::string& desiredOutput); + +// misc tests +bool SimpleSeq(); +bool SimpleMap(); +bool FlowSeq(); +bool FlowMap(); +bool FlowMapWithOmittedKey(); +bool FlowMapWithOmittedValue(); +bool FlowMapWithSoloEntry(); +bool FlowMapEndingWithSoloEntry(); +bool QuotedSimpleKeys(); +bool CompressedMapAndSeq(); +bool NullBlockSeqEntry(); +bool NullBlockMapKey(); +bool NullBlockMapValue(); +bool SimpleAlias(); +bool AliasWithNull(); +bool AnchorInSimpleKey(); +bool AliasAsSimpleKey(); +bool ExplicitDoc(); +bool MultipleDocs(); +bool ExplicitEndDoc(); +bool MultipleDocsWithSomeExplicitIndicators(); +} +} + +#endif // TESTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/test/teststruct.h b/test/teststruct.h new file mode 100644 index 0000000..60596cf --- /dev/null +++ b/test/teststruct.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#define YAML_ASSERT(cond) \ + do { \ + if (!(cond)) \ + return " Assert failed: " #cond; \ + } while (false) + +namespace Test { +struct TEST { + TEST() : ok(false) {} + TEST(bool ok_) : ok(ok_) {} + TEST(const char *error_) : ok(false), error(error_) {} + + bool ok; + std::string error; +}; +}