From eb478ec71098ac8ad29d9419fe9b893dc71979c4 Mon Sep 17 00:00:00 2001 From: Hannes Janetzek Date: Sat, 26 Nov 2016 00:54:50 +0100 Subject: [PATCH] use templated regexp --- src/emitterutils.cpp | 49 ++-- src/exp.h | 496 +++++++++++++++++++++++++++-------------- src/regex_yaml.cpp | 45 ---- src/regex_yaml.h | 87 -------- src/regeximpl.h | 186 ---------------- src/scanner.cpp | 43 ++-- src/scanner.h | 4 - src/scanscalar.cpp | 177 ++++++++++----- src/scanscalar.h | 28 ++- src/scantag.cpp | 10 +- src/scantoken.cpp | 53 +++-- src/stream.h | 2 +- src/streamcharsource.h | 45 ++-- src/stringsource.h | 6 +- test/CMakeLists.txt | 2 +- test/regex_test.cpp | 293 ++++++++++++++---------- 16 files changed, 767 insertions(+), 759 deletions(-) delete mode 100644 src/regex_yaml.cpp delete mode 100644 src/regex_yaml.h delete mode 100644 src/regeximpl.h diff --git a/src/emitterutils.cpp b/src/emitterutils.cpp index 93c2f9b..a581efd 100644 --- a/src/emitterutils.cpp +++ b/src/emitterutils.cpp @@ -4,8 +4,6 @@ #include "emitterutils.h" #include "exp.h" #include "indentation.h" -#include "regex_yaml.h" -#include "regeximpl.h" #include "stringsource.h" #include "yaml-cpp/binary.h" // IWYU pragma: keep #include "yaml-cpp/ostream_wrapper.h" @@ -159,35 +157,34 @@ bool IsValidPlainScalar(const std::string& str, FlowType::value flowType, } // check the start - const RegEx& start = (flowType == FlowType::Flow ? Exp::PlainScalarInFlow() - : Exp::PlainScalar()); - if (!start.Matches(str)) { - return false; + if (flowType == FlowType::Flow) { + if (!Exp::PlainScalarInFlow::Matches(str)) { return false; } + } else { + if (!Exp::PlainScalar::Matches(str)) { return false; } } - // and check the end for plain whitespace (which can't be faithfully kept in a // plain scalar) if (!str.empty() && *str.rbegin() == ' ') { return false; } - // then check until something is disallowed - static const RegEx& disallowed_flow = - Exp::EndScalarInFlow() || (Exp::BlankOrBreak() + Exp::Comment()) || - Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() || - Exp::Tab(); - static const RegEx& disallowed_block = - Exp::EndScalar() || (Exp::BlankOrBreak() + Exp::Comment()) || - Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() || - Exp::Tab(); - const RegEx& disallowed = - flowType == FlowType::Flow ? disallowed_flow : disallowed_block; + using namespace Exp; + using Disallowed = Matcher < + OR < SEQ < detail::BlankOrBreak, detail::Comment >, + detail::NotPrintable, + detail::Utf8_ByteOrderMark, + detail::Break, + detail::Tab>>; StringCharSource buffer(str.c_str(), str.size()); while (buffer) { - if (disallowed.Matches(buffer)) { - return false; + if ((flowType == FlowType::Flow ? + Matcher::Matches(buffer) : + Matcher::Matches(buffer)) || + Disallowed::Matches(buffer)) { + return false; } + if (allowOnlyAscii && (0x80 <= static_cast(buffer[0]))) { return false; } @@ -424,9 +421,13 @@ bool WriteAnchor(ostream_wrapper& out, const std::string& str) { bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim) { out << (verbatim ? "!<" : "!"); StringCharSource buffer(str.c_str(), str.size()); - const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag(); + auto reValid = verbatim ? + [](StringCharSource& s) { return Exp::URI::Match(s); } : + [](StringCharSource& s) { return Exp::Tag::Match(s); }; + while (buffer) { - int n = reValid.Match(buffer); + + int n = reValid(buffer); if (n <= 0) { return false; } @@ -447,7 +448,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix, out << "!"; StringCharSource prefixBuffer(prefix.c_str(), prefix.size()); while (prefixBuffer) { - int n = Exp::URI().Match(prefixBuffer); + int n = Exp::URI::Match(prefixBuffer); if (n <= 0) { return false; } @@ -461,7 +462,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix, out << "!"; StringCharSource tagBuffer(tag.c_str(), tag.size()); while (tagBuffer) { - int n = Exp::Tag().Match(tagBuffer); + int n = Exp::Tag::Match(tagBuffer); if (n <= 0) { return false; } diff --git a/src/exp.h b/src/exp.h index 50b0220..f5898d9 100644 --- a/src/exp.h +++ b/src/exp.h @@ -10,135 +10,271 @@ #include #include -#include "regex_yaml.h" #include "stream.h" +#include "stringsource.h" +#include "streamcharsource.h" + +#define REGEXP_INLINE inline __attribute__((always_inline)) +#define TEST_INLINE inline __attribute__((always_inline)) +//#define TEST_INLINE __attribute__((noinline)) namespace YAML { + +namespace Exp { + +template +struct Char { + template + REGEXP_INLINE static int match(const Source& source) { + return (source.get() == N) ? 1 : -1; + } +}; + +template +struct OR { + template + REGEXP_INLINE static int match(const Source& source) { + int pos = A::match(source); + if (pos >= 0) { + return pos; + } + + return OR::match(source); + } +}; + +template +struct OR { + template + REGEXP_INLINE static int match(const Source& source) { + return A::match(source); + } +}; + +template +struct SEQ { + template + REGEXP_INLINE static int match(const Source& source) { + int a = A::match(source); + if (a < 0) { + return -1; + } + + const Source nextSource = source + a; + // if (nextSource) { c = nextSource[0]; } + + int b = SEQ::match(nextSource); + if (b < 0) { + return -1; + } + + return a + b; + } +}; + +template +struct SEQ { + template + REGEXP_INLINE static int match(const Source& source) { + return A::match(source); + } +}; + +// TODO empty??? +template +struct NOT { + template + REGEXP_INLINE static int match(const Source& source) { + return A::match(source) >= 0 ? -1 : 1; + } +}; + +template +struct Range { + static_assert(A <= Z, "Invalid Range"); + template + REGEXP_INLINE static int match(const Source& source) { + return (source.get() < A || source.get() > Z) ? -1 : 1; + } +}; + +struct Empty { + template + REGEXP_INLINE static int match(const Source& source) { + return source.get() == Stream::eof() ? 0 : -1; + } + REGEXP_INLINE static int match(const StringCharSource& source) { + // the empty regex only is successful on the empty string + // return c == '\0' ? 0 : -1; + return !source ? 0 : -1; + } +}; + +template +inline bool IsValidSource(const Source& source) { + return source; +} + +template <> +inline bool IsValidSource(const StringCharSource& source) { + // switch (m_op) { + // case REGEX_MATCH: + // case REGEX_RANGE: + return source; + // default: + // return true; + // } +} + +template +struct Matcher { + template + TEST_INLINE static int Match(const Source& source) { + // return IsValidSource(source) ? Exp::match(source, source[0]) : -1; + return Exp::match(source); + } + + template + TEST_INLINE static bool Matches(const Source& source) { + return Match(source) >= 0; + } + + TEST_INLINE static int Match(const Stream& in) { + StreamCharSource source(in); + return Match(source); + } + TEST_INLINE static bool Matches(const Stream& in) { + StreamCharSource source(in); + return Matches(source); + } + + TEST_INLINE static int Match(const std::string& str) { + StringCharSource source(str.c_str(), str.size()); + return Match(source); + } + + TEST_INLINE static bool Matches(const std::string& str) { + return Match(str) >= 0; + } + + TEST_INLINE static bool Matches(char ch) { + std::string str; + str += ch; + return Matches(str); + } +}; + //////////////////////////////////////////////////////////////////////////////// // Here we store a bunch of expressions for matching different parts of the // file. -namespace Exp { -// misc -inline const RegEx& Empty() { - static const RegEx e; - return e; -} -inline const RegEx& Space() { - static const RegEx e = RegEx(' '); - return e; -} -inline const RegEx& Tab() { - static const RegEx e = RegEx('\t'); - return e; -} -inline const RegEx& Blank() { - static const RegEx e = Space() || Tab(); - return e; -} -inline const RegEx& Break() { - static const RegEx e = RegEx('\n') || RegEx("\r\n"); - return e; -} -inline const RegEx& BlankOrBreak() { - static const RegEx e = Blank() || Break(); - return e; -} -inline const RegEx& Digit() { - static const RegEx e = RegEx('0', '9'); - return e; -} -inline const RegEx& Alpha() { - static const RegEx e = RegEx('a', 'z') || RegEx('A', 'Z'); - return e; -} -inline const RegEx& AlphaNumeric() { - static const RegEx e = Alpha() || Digit(); - return e; -} -inline const RegEx& Word() { - static const RegEx e = AlphaNumeric() || RegEx('-'); - return e; -} -inline const RegEx& Hex() { - static const RegEx e = Digit() || RegEx('A', 'F') || RegEx('a', 'f'); - return e; -} -// Valid Unicode code points that are not part of c-printable (YAML 1.2, sec. -// 5.1) -inline const RegEx& NotPrintable() { - static const RegEx e = - RegEx(0) || - RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) || - RegEx(0x0E, 0x1F) || - (RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F'))); - return e; -} -inline const RegEx& Utf8_ByteOrderMark() { - static const RegEx e = RegEx("\xEF\xBB\xBF"); - return e; -} +namespace detail { -// actual tags +using Space = Char<' '>; -inline const RegEx& DocStart() { - static const RegEx e = RegEx("---") + (BlankOrBreak() || RegEx()); - return e; -} -inline const RegEx& DocEnd() { - static const RegEx e = RegEx("...") + (BlankOrBreak() || RegEx()); - return e; -} -inline const RegEx& DocIndicator() { - static const RegEx e = DocStart() || DocEnd(); - return e; -} -inline const RegEx& BlockEntry() { - static const RegEx e = RegEx('-') + (BlankOrBreak() || RegEx()); - return e; -} -inline const RegEx& Key() { - static const RegEx e = RegEx('?') + BlankOrBreak(); - return e; -} -inline const RegEx& KeyInFlow() { - static const RegEx e = RegEx('?') + BlankOrBreak(); - return e; -} -inline const RegEx& Value() { - static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx()); - return e; -} -inline const RegEx& ValueInFlow() { - static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx(",}", REGEX_OR)); - return e; -} -inline const RegEx& ValueInJSONFlow() { - static const RegEx e = RegEx(':'); - return e; -} -inline const RegEx Comment() { - static const RegEx e = RegEx('#'); - return e; -} -inline const RegEx& Anchor() { - static const RegEx e = !(RegEx("[]{},", REGEX_OR) || BlankOrBreak()); - return e; -} -inline const RegEx& AnchorEnd() { - static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak(); - return e; -} -inline const RegEx& URI() { - static const RegEx e = Word() || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) || - (RegEx('%') + Hex() + Hex()); - return e; -} -inline const RegEx& Tag() { - static const RegEx e = Word() || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) || - (RegEx('%') + Hex() + Hex()); - return e; -} +using Tab = Char<'\t'>; + +using Blank = OR < Space, Tab >; + +using Break = + OR < Char<'\n'>, + SEQ < Char<'\r'>, + Char<'\n'> >>; + +using BlankOrBreak = OR < Blank, Break >; + +using Digit = Range<'0', '9'>; + +using Alpha = + OR < Range<'a', 'z'>, + Range<'A', 'Z'> >; + +using AlphaNumeric = OR < Alpha, Digit >; + +using Word = OR < AlphaNumeric, Char<'-'> >; + +using Hex = OR < Digit, Range<'a','f'>, Range<'A', 'F'>>; + +// why not range? +using NotPrintable = + OR < Char<0>, Char<'\x01'>, + Char<'\x02'>, Char<'\x03'>, + Char<'\x04'>, Char<'\x05'>, + Char<'\x06'>, Char<'\x07'>, + Char<'\x08'>, Char<'\x0B'>, + Char<'\x0C'>, Char<'\x7F'>, + Range<0x0E, 0x1F>, + SEQ < Char<'\xC2'>, + OR < Range<'\x80', '\x84'>, + Range<'\x86', '\x9F'>>>>; + +using Utf8_ByteOrderMark = + SEQ < Char<'\xEF'>, + Char<'\xBB'>, + Char<'\xBF'>>; + +using DocStart = + SEQ < Char<'-'>, + Char<'-'>, + Char<'-'>, + OR < BlankOrBreak, Empty >>; + +using DocEnd = + SEQ < Char<'.'>, + Char<'.'>, + Char<'.'>, + OR < BlankOrBreak, Empty>>; + +using BlockEntry = + SEQ < Char<'-'>, + OR < BlankOrBreak, Empty >>; + +using Key = SEQ, BlankOrBreak>; + +using KeyInFlow = SEQ, BlankOrBreak>; + +using Value = + SEQ < Char<':'>, + OR < BlankOrBreak, Empty >>; + +using ValueInFlow = + SEQ < Char<':'>, + OR < BlankOrBreak, + Char<','>, + Char<'}'>>>; + +using ValueInJSONFlow = Char<':'>; + +using Comment = Char<'#'>; + +using Anchor = NOT< + OR < Char<'['>, Char<']'>, + Char<'{'>, Char<'}'>, + Char<','>, + BlankOrBreak>>; + +using AnchorEnd = + OR < Char<'?'>, Char<':'>, + Char<','>, Char<']'>, + Char<'}'>, Char<'%'>, + Char<'@'>, Char<'`'>, + BlankOrBreak>; + +using URI = + OR < Word, + Char<'#'>, Char<';'>, Char<'/'>, Char<'?'>, Char<':'>, + Char<'@'>, Char<'&'>, Char<'='>, Char<'+'>, Char<'$'>, + Char<','>, Char<'_'>, Char<'.'>, Char<'!'>, Char<'~'>, + Char<'*'>, Char<'\''>, Char<'('>, Char<')'>, Char<'['>, + Char<']'>, + SEQ < Char<'%'>, Hex, Hex>>; + +using Tag = + OR < Word, + Char<'#'>, Char<';'>, Char<'/'>, Char<'?'>, Char<':'>, + Char<'@'>, Char<'&'>, Char<'='>, Char<'+'>, Char<'$'>, + Char<'_'>, Char<'.'>, Char<'~'>, Char<'*'>, Char<'\''>, + SEQ < Char <'%'>, Hex, Hex>>; // Plain scalar rules: // . Cannot start with a blank. @@ -146,59 +282,81 @@ inline const RegEx& Tag() { // . In the block context - ? : must be not be followed with a space. // . In the flow context ? is illegal and : and - must not be followed with a // space. -inline const RegEx& PlainScalar() { - static const RegEx e = - !(BlankOrBreak() || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || - (RegEx("-?:", REGEX_OR) + (BlankOrBreak() || RegEx()))); - return e; -} -inline const RegEx& PlainScalarInFlow() { - static const RegEx e = - !(BlankOrBreak() || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) || - (RegEx("-:", REGEX_OR) + Blank())); - return e; -} -inline const RegEx& EndScalar() { - static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx()); - return e; -} -inline const RegEx& EndScalarInFlow() { - static const RegEx e = - (RegEx(':') + (BlankOrBreak() || RegEx() || RegEx(",]}", REGEX_OR))) || - RegEx(",?[]{}", REGEX_OR); - return e; -} +using PlainScalarCommon = + NOT < OR < BlankOrBreak, + Char<','>, Char<'['>, Char<']'>, Char<'{'>, Char<'}'>, + Char<'#'>, Char<'&'>, Char<'*'>, Char<'!'>, Char<'|'>, + Char<'>'>, Char<'\''>, Char<'\"'>, Char<'%'>, Char<'@'>, + Char<'`'>>>; -inline const RegEx& ScanScalarEndInFlow() { - static const RegEx e = (EndScalarInFlow() || (BlankOrBreak() + Comment())); - return e; -} +using PlainScalar = + NOT < SEQ < OR < Char<'-'>, + Char<'?'>, + Char<':'>>, + OR < BlankOrBreak, + Empty >>>; -inline const RegEx& ScanScalarEnd() { - static const RegEx e = EndScalar() || (BlankOrBreak() + Comment()); - return e; -} -inline const RegEx& EscSingleQuote() { - static const RegEx e = RegEx("\'\'"); - return e; -} -inline const RegEx& EscBreak() { - static const RegEx e = RegEx('\\') + Break(); - return e; -} +using PlainScalarInFlow = + NOT < OR < Char<'?'>, + SEQ < OR < Char<'-'>, + Char<':'>>, + Blank >>>; +using EndScalar = + SEQ < Char<':'>, + OR < BlankOrBreak, Empty >>; -inline const RegEx& ChompIndicator() { - static const RegEx e = RegEx("+-", REGEX_OR); - return e; -} -inline const RegEx& Chomp() { - static const RegEx e = (ChompIndicator() + Digit()) || - (Digit() + ChompIndicator()) || ChompIndicator() || - Digit(); - return e; -} +using EndScalarInFlow = + OR < SEQ < Char<':'>, + OR < BlankOrBreak, + Empty, + Char<','>, + Char<']'>, + Char<'}'>>>, + Char<','>, + Char<'?'>, + Char<'['>, + Char<']'>, + Char<'{'>, + Char<'}'>>; + + + +using ChompIndicator = OR < Char<'+'>, Char<'-'> >; + +using Chomp = + OR < SEQ < ChompIndicator, Digit >, + SEQ < Digit,ChompIndicator >, + ChompIndicator, + Digit>; + +} // end detail + +using Tab = Matcher; +using Blank = Matcher; +using Break = Matcher; +using Digit = Matcher; +using BlankOrBreak = Matcher; +using Word = Matcher; +using DocStart = Matcher; +using DocEnd = Matcher; +using BlockEntry = Matcher; +using Key = Matcher; +using KeyInFlow = Matcher; +using Value = Matcher; +using ValueInFlow = Matcher; +using ValueInJSONFlow = Matcher; +using Comment = Matcher; +using Anchor = Matcher; +using AnchorEnd = Matcher; +using URI = Matcher; +using Tag = Matcher; +using PlainScalarCommon = Matcher; +using PlainScalar = Matcher; +using PlainScalarInFlow = Matcher; +using EscSingleQuote = Matcher, Char<'\''> >>; +using EscBreak = Matcher, detail::Break >>; +using Chomp = Matcher; -// and some functions std::string Escape(Stream& in); } diff --git a/src/regex_yaml.cpp b/src/regex_yaml.cpp deleted file mode 100644 index 20b7720..0000000 --- a/src/regex_yaml.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "regex_yaml.h" - -namespace YAML { -// constructors -RegEx::RegEx() : m_op(REGEX_EMPTY) {} - -RegEx::RegEx(REGEX_OP op) : m_op(op) {} - -RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {} - -RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {} - -RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) { - for (std::size_t i = 0; i < str.size(); i++) - m_params.push_back(RegEx(str[i])); -} - -// combination constructors -RegEx operator!(const RegEx& ex) { - RegEx ret(REGEX_NOT); - ret.m_params.push_back(ex); - return ret; -} - -RegEx operator||(const RegEx& ex1, const RegEx& ex2) { - RegEx ret(REGEX_OR); - ret.m_params.push_back(ex1); - ret.m_params.push_back(ex2); - return ret; -} - -RegEx operator&&(const RegEx& ex1, const RegEx& ex2) { - RegEx ret(REGEX_AND); - ret.m_params.push_back(ex1); - ret.m_params.push_back(ex2); - return ret; -} - -RegEx operator+(const RegEx& ex1, const RegEx& ex2) { - RegEx ret(REGEX_SEQ); - ret.m_params.push_back(ex1); - ret.m_params.push_back(ex2); - return ret; -} -} diff --git a/src/regex_yaml.h b/src/regex_yaml.h deleted file mode 100644 index 8f28b85..0000000 --- a/src/regex_yaml.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66 -#define REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66 - -#if defined(_MSC_VER) || \ - (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ - (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 -#pragma once -#endif - -#include -#include - -#include "yaml-cpp/dll.h" - -namespace YAML { -class Stream; - -enum REGEX_OP { - REGEX_EMPTY, - REGEX_MATCH, - REGEX_RANGE, - REGEX_OR, - REGEX_AND, - REGEX_NOT, - REGEX_SEQ -}; - -// simplified regular expressions -// . Only straightforward matches (no repeated characters) -// . Only matches from start of string -class YAML_CPP_API RegEx { - public: - RegEx(); - RegEx(char ch); - RegEx(char a, char z); - RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ); - ~RegEx() {} - - friend YAML_CPP_API RegEx operator!(const RegEx& ex); - friend YAML_CPP_API RegEx operator||(const RegEx& ex1, const RegEx& ex2); - friend YAML_CPP_API RegEx operator&&(const RegEx& ex1, const RegEx& ex2); - friend YAML_CPP_API RegEx operator+(const RegEx& ex1, const RegEx& ex2); - - bool Matches(char ch) const; - bool Matches(const std::string& str) const; - bool Matches(const Stream& in) const; - template - bool Matches(const Source& source) const; - - int Match(const std::string& str) const; - int Match(const Stream& in) const; - template - int Match(const Source& source) const; - - private: - RegEx(REGEX_OP op); - - template - bool IsValidSource(const Source& source) const; - template - int MatchUnchecked(const Source& source) const; - - template - int MatchOpEmpty(const Source& source) const; - template - int MatchOpMatch(const Source& source) const; - template - int MatchOpRange(const Source& source) const; - template - int MatchOpOr(const Source& source) const; - template - int MatchOpAnd(const Source& source) const; - template - int MatchOpNot(const Source& source) const; - template - int MatchOpSeq(const Source& source) const; - - private: - REGEX_OP m_op; - char m_a, m_z; - std::vector m_params; -}; -} - -#include "regeximpl.h" - -#endif // REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/src/regeximpl.h b/src/regeximpl.h deleted file mode 100644 index 709124f..0000000 --- a/src/regeximpl.h +++ /dev/null @@ -1,186 +0,0 @@ -#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 -#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 - -#if defined(_MSC_VER) || \ - (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ - (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 -#pragma once -#endif - -#include "stream.h" -#include "stringsource.h" -#include "streamcharsource.h" - -namespace YAML { -// query matches -inline bool RegEx::Matches(char ch) const { - std::string str; - str += ch; - return Matches(str); -} - -inline bool RegEx::Matches(const std::string& str) const { - return Match(str) >= 0; -} - -inline bool RegEx::Matches(const Stream& in) const { return Match(in) >= 0; } - -template -inline bool RegEx::Matches(const Source& source) const { - return Match(source) >= 0; -} - -// Match -// . Matches the given string against this regular expression. -// . Returns the number of characters matched. -// . Returns -1 if no characters were matched (the reason for -// not returning zero is that we may have an empty regex -// which is ALWAYS successful at matching zero characters). -// . REMEMBER that we only match from the start of the buffer! -inline int RegEx::Match(const std::string& str) const { - StringCharSource source(str.c_str(), str.size()); - return Match(source); -} - -inline int RegEx::Match(const Stream& in) const { - StreamCharSource source(in); - return Match(source); -} - -template -inline bool RegEx::IsValidSource(const Source& source) const { - return source; -} - -template <> -inline bool RegEx::IsValidSource( - const StringCharSource& source) const { - switch (m_op) { - case REGEX_MATCH: - case REGEX_RANGE: - return source; - default: - return true; - } -} - -template -inline int RegEx::Match(const Source& source) const { - return IsValidSource(source) ? MatchUnchecked(source) : -1; -} - -template -inline int RegEx::MatchUnchecked(const Source& source) const { - switch (m_op) { - case REGEX_EMPTY: - return MatchOpEmpty(source); - case REGEX_MATCH: - return MatchOpMatch(source); - case REGEX_RANGE: - return MatchOpRange(source); - case REGEX_OR: - return MatchOpOr(source); - case REGEX_AND: - return MatchOpAnd(source); - case REGEX_NOT: - return MatchOpNot(source); - case REGEX_SEQ: - return MatchOpSeq(source); - } - - return -1; -} - -////////////////////////////////////////////////////////////////////////////// -// Operators -// Note: the convention MatchOp* is that we can assume -// IsSourceValid(source). -// So we do all our checks *before* we call these functions - -// EmptyOperator -template -inline int RegEx::MatchOpEmpty(const Source& source) const { - return source[0] == Stream::eof() ? 0 : -1; -} - -template <> -inline int RegEx::MatchOpEmpty( - const StringCharSource& source) const { - return !source - ? 0 - : -1; // the empty regex only is successful on the empty string -} - -// MatchOperator -template -inline int RegEx::MatchOpMatch(const Source& source) const { - if (source[0] != m_a) - return -1; - return 1; -} - -// RangeOperator -template -inline int RegEx::MatchOpRange(const Source& source) const { - if (m_a > source[0] || m_z < source[0]) - return -1; - return 1; -} - -// OrOperator -template -inline int RegEx::MatchOpOr(const Source& source) const { - for (std::size_t i = 0; i < m_params.size(); i++) { - int n = m_params[i].MatchUnchecked(source); - if (n >= 0) - return n; - } - return -1; -} - -// AndOperator -// Note: 'AND' is a little funny, since we may be required to match things -// of different lengths. If we find a match, we return the length of -// the FIRST entry on the list. -template -inline int RegEx::MatchOpAnd(const Source& source) const { - int first = -1; - for (std::size_t i = 0; i < m_params.size(); i++) { - int n = m_params[i].MatchUnchecked(source); - if (n == -1) - return -1; - if (i == 0) - first = n; - } - return first; -} - -// NotOperator -template -inline int RegEx::MatchOpNot(const Source& source) const { - if (m_params.empty()) - return -1; - if (m_params[0].MatchUnchecked(source) >= 0) - return -1; - return 1; -} - -// SeqOperator -template -inline int RegEx::MatchOpSeq(const Source& source) const { - int offset = 0; - for (std::size_t i = 0; i < m_params.size(); i++) { - int n = m_params[i].Match(source + offset); // note Match, not - // MatchUnchecked because we - // need to check validity after - // the offset - if (n == -1) - return -1; - offset += n; - } - - return offset; -} -} - -#endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/src/scanner.cpp b/src/scanner.cpp index b5cfcc1..5d3274a 100644 --- a/src/scanner.cpp +++ b/src/scanner.cpp @@ -103,11 +103,11 @@ void Scanner::ScanNextToken() { } // document token - if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) { + if (INPUT.column() == 0 && Exp::DocStart::Matches(INPUT)) { return ScanDocStart(); } - if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) { + if (INPUT.column() == 0 && Exp::DocEnd::Matches(INPUT)) { return ScanDocEnd(); } @@ -126,15 +126,18 @@ void Scanner::ScanNextToken() { } // block/map stuff - if (Exp::BlockEntry().Matches(INPUT)) { + if (Exp::BlockEntry::Matches(INPUT)) { return ScanBlockEntry(); } - if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) { + if (InBlockContext() ? Exp::Key::Matches(INPUT) : Exp::KeyInFlow::Matches(INPUT)) { return ScanKey(); } - if (GetValueRegex().Matches(INPUT)) { + if ((InBlockContext() && Exp::Value::Matches(INPUT)) || + (m_canBeJSONFlow ? + Exp::ValueInJSONFlow::Matches(INPUT) : + Exp::ValueInFlow::Matches(INPUT))) { return ScanValue(); } @@ -158,10 +161,13 @@ void Scanner::ScanNextToken() { return ScanQuotedScalar(); } - // plain scalars - if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()) - .Matches(INPUT)) { - return ScanPlainScalar(); + if (Exp::PlainScalarCommon::Matches(INPUT)) { + // plain scalars + if (InBlockContext() ? + Exp::PlainScalar::Matches(INPUT) : + Exp::PlainScalarInFlow::Matches(INPUT)) { + return ScanPlainScalar(); + } } // don't know what it is! @@ -172,27 +178,27 @@ void Scanner::ScanToNextToken() { while (1) { // first eat whitespace while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) { - if (InBlockContext() && Exp::Tab().Matches(INPUT)) { + if (InBlockContext() && Exp::Tab::Matches(INPUT)) { m_simpleKeyAllowed = false; } INPUT.eat(1); } // then eat a comment - if (Exp::Comment().Matches(INPUT)) { + if (Exp::Comment::Matches(INPUT)) { // eat until line break - while (INPUT && !Exp::Break().Matches(INPUT)) { + while (INPUT && !Exp::Break::Matches(INPUT)) { INPUT.eat(1); } } // if it's NOT a line break, then we're done! - if (!Exp::Break().Matches(INPUT)) { + if (!Exp::Break::Matches(INPUT)) { break; } // otherwise, let's eat the line break and keep going - int n = Exp::Break().Match(INPUT); + int n = Exp::Break::Match(INPUT); INPUT.eat(n); // oh yeah, and let's get rid of that simple key @@ -229,13 +235,6 @@ bool Scanner::IsWhitespaceToBeEaten(char ch) { return false; } -const RegEx& Scanner::GetValueRegex() const { - if (InBlockContext()) { - return Exp::Value(); - } - - return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow(); -} void Scanner::StartStream() { m_startedStream = true; @@ -322,7 +321,7 @@ void Scanner::PopIndentToHere() { } if (indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && - !Exp::BlockEntry().Matches(INPUT))) { + !Exp::BlockEntry::Matches(INPUT))) { break; } diff --git a/src/scanner.h b/src/scanner.h index 7bb2ccc..f3c1c68 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -133,10 +133,6 @@ class Scanner { bool IsWhitespaceToBeEaten(char ch); - /** - * Returns the appropriate regex to check if the next token is a value token. - */ - const RegEx &GetValueRegex() const; struct SimpleKey { SimpleKey(const Mark &mark_, std::size_t flowLevel_); diff --git a/src/scanscalar.cpp b/src/scanscalar.cpp index 10e359d..990a613 100644 --- a/src/scanscalar.cpp +++ b/src/scanscalar.cpp @@ -3,11 +3,78 @@ #include #include "exp.h" -#include "regeximpl.h" #include "stream.h" #include "yaml-cpp/exceptions.h" // IWYU pragma: keep namespace YAML { + +int ScanScalar::MatchScalarEmpty(const Stream&) { + // This is checked by !INPUT as well + return -1; +} + +int ScanScalar::MatchScalarSingleQuoted(const Stream& in) { + using namespace Exp; + return (Matcher>::Matches(in) && + !EscSingleQuote::Matches(in)) ? 1 : -1; +} + +int ScanScalar::MatchScalarDoubleQuoted(const Stream& in) { + using namespace Exp; + return Matcher>::Match(in); +} + +int ScanScalar::MatchScalarEnd(const Stream& in) { + using namespace Exp; + using ScalarEnd = Matcher< + OR < SEQ < Char<':'>, + OR < detail::BlankOrBreak, + Empty >>, + SEQ < detail::BlankOrBreak, + detail::Comment>>>; + + return ScalarEnd::Match(in); +} + +int ScanScalar::MatchScalarEndInFlow(const Stream& in) { + using namespace Exp; + using ScalarEndInFlow = Matcher < + OR < SEQ < Char<':'>, + OR < detail::BlankOrBreak, + Char<','>, + Char<']'>, + Char<'}'>, + Empty >>, + Char<','>, + Char<'?'>, + Char<'['>, + Char<']'>, + Char<'{'>, + Char<'}'>, + SEQ < detail::BlankOrBreak, + detail::Comment>>>; + + return ScalarEndInFlow::Match(in); +} + +bool ScanScalar::MatchDocIndicator(const Stream& in) { + using namespace Exp; + using DocIndicator = Matcher>; + + return DocIndicator::Matches(in); +} + +bool ScanScalar::CheckDocIndicator(Stream& INPUT, ScanScalarParams& params) { + if (MatchDocIndicator(INPUT)) { + if (params.onDocIndicator == BREAK) { + return true; + } else if (params.onDocIndicator == THROW) { + throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR); + } + } + return false; +} + // ScanScalar // . This is where the scalar magic happens. // @@ -18,7 +85,7 @@ namespace YAML { // // . Depending on the parameters given, we store or stop // and different places in the above flow. -std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { +std::string ScanScalar::Apply(Stream& INPUT, ScanScalarParams& params) { bool foundNonEmptyLine = false; bool pastOpeningBreak = (params.fold == FOLD_FLOW); bool emptyLine = false, moreIndented = false; @@ -28,58 +95,68 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { std::string scalar; params.leadingSpaces = false; - if (!params.end) { - params.end = &Exp::Empty(); - } - while (INPUT) { // ******************************** // Phase #1: scan until line ending - std::size_t lastNonWhitespaceChar = scalar.size(); bool escapedNewline = false; - while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) { + std::size_t lastNonWhitespaceChar = scalar.size(); + + while (1) { + + // find end posiion + if (params.end(INPUT) >= 0) { + break; + } + if (!INPUT) { break; } - // document indicator? - if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { - if (params.onDocIndicator == BREAK) { - break; - } else if (params.onDocIndicator == THROW) { - throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR); - } + // find break posiion + char ch = INPUT.peek(); + + bool isWhiteSpace = (ch == ' ' || ch == '\t'); + + if (!isWhiteSpace) { + if (ch == '\n' || (ch == '\r' && Exp::Break::Matches(INPUT))) { + break; + } + // document indicator? + if (INPUT.column() == 0 && CheckDocIndicator(INPUT, params)) { + break; + } } foundNonEmptyLine = true; pastOpeningBreak = true; - // escaped newline? (only if we're escaping on slash) - if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { - // eat escape character and get out (but preserve trailing whitespace!) - INPUT.get(); - lastNonWhitespaceChar = scalar.size(); - lastEscapedChar = scalar.size(); - escapedNewline = true; - break; - } + if (params.escape != ch) { + // just add the character + scalar += ch; + INPUT.eat(); - // escape this? - if (INPUT.peek() == params.escape) { - scalar += Exp::Escape(INPUT); - lastNonWhitespaceChar = scalar.size(); - lastEscapedChar = scalar.size(); - continue; - } + if (!isWhiteSpace) { + lastNonWhitespaceChar = scalar.size(); + } - // otherwise, just add the damn character - char ch = INPUT.get(); - scalar += ch; - if (ch != ' ' && ch != '\t') { - lastNonWhitespaceChar = scalar.size(); + } else { + // escaped newline? (only if we're escaping on slash) + if (params.escape == '\\' && Exp::EscBreak::Matches(INPUT)) { + // eat escape character and get out (but preserve trailing whitespace!) + INPUT.eat(); + lastNonWhitespaceChar = scalar.size(); + lastEscapedChar = scalar.size(); + escapedNewline = true; + break; + + } else { + scalar += Exp::Escape(INPUT); + lastNonWhitespaceChar = scalar.size(); + lastEscapedChar = scalar.size(); + } } - } + } // end while(1) // eof? if we're looking to eat something, then we throw if (!INPUT) { @@ -90,14 +167,14 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { } // doc indicator? - if (params.onDocIndicator == BREAK && INPUT.column() == 0 && - Exp::DocIndicator().Matches(INPUT)) { + if (params.onDocIndicator == BREAK && + INPUT.column() == 0 && + MatchDocIndicator(INPUT)) { break; } // are we done via character match? - int n = params.end->Match(INPUT); - if (n >= 0) { + if (int n = params.end(INPUT) >= 0) { if (params.eatEnd) { INPUT.eat(n); } @@ -110,9 +187,9 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { // ******************************** // Phase #2: eat line ending - n = Exp::Break().Match(INPUT); - INPUT.eat(n); - + if (int n = Exp::Break::Match(INPUT)) { + INPUT.eat(n); + } // ******************************** // Phase #3: scan initial spaces @@ -120,7 +197,7 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { while (INPUT.peek() == ' ' && (INPUT.column() < params.indent || (params.detectIndent && !foundNonEmptyLine)) && - !params.end->Matches(INPUT)) { + !(params.end(INPUT) >= 0)) { INPUT.eat(1); } @@ -130,9 +207,9 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { } // and then the rest of the whitespace - while (Exp::Blank().Matches(INPUT)) { + for (char c = INPUT.peek(); (c == ' ' || c == '\t'); c = INPUT.peek()) { // we check for tabs that masquerade as indentation - if (INPUT.peek() == '\t' && INPUT.column() < params.indent && + if (c == '\t' && INPUT.column() < params.indent && params.onTabInIndentation == THROW) { throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); } @@ -141,7 +218,7 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { break; } - if (params.end->Matches(INPUT)) { + if (params.end(INPUT) >= 0) { break; } @@ -149,8 +226,8 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) { } // was this an empty line? - bool nextEmptyLine = Exp::Break().Matches(INPUT); - bool nextMoreIndented = Exp::Blank().Matches(INPUT); + bool nextEmptyLine = Exp::Break::Matches(INPUT); + bool nextMoreIndented = Exp::Blank::Matches(INPUT); if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) foldedNewlineStartedMoreIndented = moreIndented; diff --git a/src/scanscalar.h b/src/scanscalar.h index c3a574a..4309be9 100644 --- a/src/scanscalar.h +++ b/src/scanscalar.h @@ -8,8 +8,8 @@ #endif #include +#include -#include "regex_yaml.h" #include "stream.h" namespace YAML { @@ -19,8 +19,7 @@ enum FOLD { DONT_FOLD, FOLD_BLOCK, FOLD_FLOW }; struct ScanScalarParams { ScanScalarParams() - : end(nullptr), - eatEnd(false), + : eatEnd(false), indent(0), detectIndent(false), eatLeadingWhitespace(0), @@ -33,8 +32,7 @@ struct ScanScalarParams { leadingSpaces(false) {} // input: - const RegEx* end; // what condition ends this scalar? - // unowned. + std::function end; // what condition ends this scalar? bool eatEnd; // should we eat that condition when we see it? int indent; // what level of indentation should be eaten and ignored? bool detectIndent; // should we try to autodetect the indent? @@ -57,7 +55,25 @@ struct ScanScalarParams { bool leadingSpaces; }; -std::string ScanScalar(Stream& INPUT, ScanScalarParams& info); +struct ScanScalar { + static int MatchScalarEmpty(const Stream& in); + + static int MatchScalarSingleQuoted(const Stream& in); + + static int MatchScalarDoubleQuoted(const Stream& in); + + static int MatchScalarEnd(const Stream& in); + + static int MatchScalarEndInFlow(const Stream& in); + + static std::string Apply(Stream& INPUT, ScanScalarParams& info); + +private: + static bool MatchDocIndicator(const Stream& in); + static bool CheckDocIndicator(Stream& INPUT, ScanScalarParams& params); + +}; + } #endif // SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/src/scantag.cpp b/src/scantag.cpp index c5b3965..060377a 100644 --- a/src/scantag.cpp +++ b/src/scantag.cpp @@ -1,6 +1,4 @@ #include "exp.h" -#include "regex_yaml.h" -#include "regeximpl.h" #include "stream.h" #include "yaml-cpp/exceptions.h" // IWYU pragma: keep #include "yaml-cpp/mark.h" @@ -19,7 +17,7 @@ const std::string ScanVerbatimTag(Stream& INPUT) { return tag; } - int n = Exp::URI().Match(INPUT); + int n = Exp::URI::Match(INPUT); if (n <= 0) break; @@ -43,7 +41,7 @@ const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) { int n = 0; if (canBeHandle) { - n = Exp::Word().Match(INPUT); + n = Exp::Word::Match(INPUT); if (n <= 0) { canBeHandle = false; firstNonWordChar = INPUT.mark(); @@ -51,7 +49,7 @@ const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) { } if (!canBeHandle) - n = Exp::Tag().Match(INPUT); + n = Exp::Tag::Match(INPUT); if (n <= 0) break; @@ -66,7 +64,7 @@ const std::string ScanTagSuffix(Stream& INPUT) { std::string tag; while (INPUT) { - int n = Exp::Tag().Match(INPUT); + int n = Exp::Tag::Match(INPUT); if (n <= 0) break; diff --git a/src/scantoken.cpp b/src/scantoken.cpp index fd8758d..1d7fe7e 100644 --- a/src/scantoken.cpp +++ b/src/scantoken.cpp @@ -1,8 +1,6 @@ #include #include "exp.h" -#include "regex_yaml.h" -#include "regeximpl.h" #include "scanner.h" #include "scanscalar.h" #include "scantag.h" // IWYU pragma: keep @@ -33,22 +31,22 @@ void Scanner::ScanDirective() { INPUT.eat(1); // read name - while (INPUT && !Exp::BlankOrBreak().Matches(INPUT)) + while (INPUT && !Exp::BlankOrBreak::Matches(INPUT)) token.value += INPUT.get(); // read parameters while (1) { // first get rid of whitespace - while (Exp::Blank().Matches(INPUT)) + while (Exp::Blank::Matches(INPUT)) INPUT.eat(1); // break on newline or comment - if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT)) + if (!INPUT || Exp::Break::Matches(INPUT) || Exp::Comment::Matches(INPUT)) break; // now read parameter std::string param; - while (INPUT && !Exp::BlankOrBreak().Matches(INPUT)) + while (INPUT && !Exp::BlankOrBreak::Matches(INPUT)) param += INPUT.get(); token.params.push_back(param); @@ -233,7 +231,7 @@ void Scanner::ScanAnchorOrAlias() { alias = (indicator == Keys::Alias); // now eat the content - while (INPUT && Exp::Anchor().Matches(INPUT)) + while (INPUT && Exp::Anchor::Matches(INPUT)) name += INPUT.get(); // we need to have read SOMETHING! @@ -242,7 +240,7 @@ void Scanner::ScanAnchorOrAlias() { : ErrorMsg::ANCHOR_NOT_FOUND); // and needs to end correctly - if (INPUT && !Exp::AnchorEnd().Matches(INPUT)) + if (INPUT && !Exp::AnchorEnd::Matches(INPUT)) throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR); @@ -291,14 +289,19 @@ void Scanner::ScanTag() { m_tokens.push(token); } + // PlainScalar void Scanner::ScanPlainScalar() { std::string scalar; // set up the scanning parameters ScanScalarParams params; - params.end = - (InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd()); + if (InFlowContext()) { + params.end = ScanScalar::MatchScalarEndInFlow; + } else { + params.end = ScanScalar::MatchScalarEnd; + } + params.eatEnd = false; params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1); params.fold = FOLD_FLOW; @@ -312,7 +315,7 @@ void Scanner::ScanPlainScalar() { InsertPotentialSimpleKey(); Mark mark = INPUT.mark(); - scalar = ScanScalar(INPUT, params); + scalar = ScanScalar::Apply(INPUT, params); // can have a simple key only if we ended the scalar by starting a new line m_simpleKeyAllowed = params.leadingSpaces; @@ -327,6 +330,7 @@ void Scanner::ScanPlainScalar() { m_tokens.push(token); } + // QuotedScalar void Scanner::ScanQuotedScalar() { std::string scalar; @@ -338,8 +342,11 @@ void Scanner::ScanQuotedScalar() { // setup the scanning parameters ScanScalarParams params; - RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote)); - params.end = &end; + if (single) { + params.end = ScanScalar::MatchScalarSingleQuoted; + } else { + params.end = ScanScalar::MatchScalarDoubleQuoted; + } params.eatEnd = true; params.escape = (single ? '\'' : '\\'); params.indent = 0; @@ -358,7 +365,7 @@ void Scanner::ScanQuotedScalar() { INPUT.get(); // and scan - scalar = ScanScalar(INPUT, params); + scalar = ScanScalar::Apply(INPUT, params); m_simpleKeyAllowed = false; m_canBeJSONFlow = true; @@ -367,6 +374,8 @@ void Scanner::ScanQuotedScalar() { m_tokens.push(token); } + + // BlockScalarToken // . These need a little extra processing beforehand. // . We need to scan the line where the indicator is (this doesn't count as part @@ -379,6 +388,8 @@ void Scanner::ScanBlockScalar() { params.indent = 1; params.detectIndent = true; + params.end = ScanScalar::MatchScalarEmpty; + // eat block indicator ('|' or '>') Mark mark = INPUT.mark(); char indicator = INPUT.get(); @@ -386,14 +397,14 @@ void Scanner::ScanBlockScalar() { // eat chomping/indentation indicators params.chomp = CLIP; - int n = Exp::Chomp().Match(INPUT); + int n = Exp::Chomp::Match(INPUT); for (int i = 0; i < n; i++) { char ch = INPUT.get(); if (ch == '+') params.chomp = KEEP; else if (ch == '-') params.chomp = STRIP; - else if (Exp::Digit().Matches(ch)) { + else if (Exp::Digit::Matches(ch)) { if (ch == '0') throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK); @@ -403,16 +414,16 @@ void Scanner::ScanBlockScalar() { } // now eat whitespace - while (Exp::Blank().Matches(INPUT)) + while (Exp::Blank::Matches(INPUT)) INPUT.eat(1); // and comments to the end of the line - if (Exp::Comment().Matches(INPUT)) - while (INPUT && !Exp::Break().Matches(INPUT)) + if (Exp::Comment::Matches(INPUT)) + while (INPUT && !Exp::Break::Matches(INPUT)) INPUT.eat(1); // if it's not a line break, then we ran into a bad character inline - if (INPUT && !Exp::Break().Matches(INPUT)) + if (INPUT && !Exp::Break::Matches(INPUT)) throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK); // set the initial indentation @@ -423,7 +434,7 @@ void Scanner::ScanBlockScalar() { params.trimTrailingSpaces = false; params.onTabInIndentation = THROW; - scalar = ScanScalar(INPUT, params); + scalar = ScanScalar::Apply(INPUT, params); // simple keys always ok after block scalars (since we're gonna start a new // line anyways) diff --git a/src/stream.h b/src/stream.h index 42d542d..a44ff36 100644 --- a/src/stream.h +++ b/src/stream.h @@ -32,7 +32,7 @@ class Stream : private noncopyable { std::string get(int n); void eat(int n = 1); - static char eof() { return 0x04; } + static constexpr char eof() { return 0x04; } const Mark mark() const { return m_mark; } int pos() const { return m_mark.pos; } diff --git a/src/streamcharsource.h b/src/streamcharsource.h index 624599e..c9bae29 100644 --- a/src/streamcharsource.h +++ b/src/streamcharsource.h @@ -13,36 +13,45 @@ namespace YAML { class StreamCharSource { public: - StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) {} - StreamCharSource(const StreamCharSource& source) - : m_offset(source.m_offset), m_stream(source.m_stream) {} + StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) { + if (m_stream.ReadAheadTo(0)){ + m_char = m_stream.peek(); + } else { + m_char = Stream::eof(); + } + } ~StreamCharSource() {} - operator bool() const; + inline operator bool() const { return m_char != Stream::eof(); } + char operator[](std::size_t i) const { return m_stream.CharAt(m_offset + i); } + + char get() const { return m_char; } + bool operator!() const { return !static_cast(*this); } - const StreamCharSource operator+(int i) const; + const StreamCharSource operator+(int i) const { + return StreamCharSource( + *this, (static_cast(m_offset) + i >= 0) ? m_offset + 1 : 0); + } private: std::size_t m_offset; const Stream& m_stream; + char m_char; StreamCharSource& operator=(const StreamCharSource&); // non-assignable + + StreamCharSource(const StreamCharSource& source, size_t offset) + : m_offset(offset), m_stream(source.m_stream) { + + if (m_stream.ReadAheadTo(m_offset)) { + m_char = m_stream.CharAt(m_offset); + } else { + m_char = Stream::eof(); + } + } }; - -inline StreamCharSource::operator bool() const { - return m_stream.ReadAheadTo(m_offset); -} - -inline const StreamCharSource StreamCharSource::operator+(int i) const { - StreamCharSource source(*this); - if (static_cast(source.m_offset) + i >= 0) - source.m_offset += i; - else - source.m_offset = 0; - return source; -} } #endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/src/stringsource.h b/src/stringsource.h index 6fee44b..61fc045 100644 --- a/src/stringsource.h +++ b/src/stringsource.h @@ -16,7 +16,9 @@ class StringCharSource { : m_str(str), m_size(size), m_offset(0) {} operator bool() const { return m_offset < m_size; } - char operator[](std::size_t i) const { return m_str[m_offset + i]; } + char operator[](std::size_t i) const { + return m_str ? m_str[m_offset + i] : 0x04; // EOF + } bool operator!() const { return !static_cast(*this); } const StringCharSource operator+(int i) const { @@ -38,6 +40,8 @@ class StringCharSource { return *this; } + char get() const { return m_str[m_offset]; } + private: const char* m_str; std::size_t m_size; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 74455a5..5d630fc 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -26,7 +26,7 @@ file(GLOB test_new_api_sources new-api/[a-z]*.cpp) list(APPEND test_sources ${test_new_api_sources}) add_sources(${test_sources} ${test_headers}) -include_directories(${YAML_CPP_SOURCE_DIR}/test) +include_directories(${YAML_CPP_SOURCE_DIR}/test ${YAML_CPP_SOURCE_DIR}/src) add_executable(run-tests ${test_sources} diff --git a/test/regex_test.cpp b/test/regex_test.cpp index e67c094..8fd3a1e 100644 --- a/test/regex_test.cpp +++ b/test/regex_test.cpp @@ -1,179 +1,236 @@ -#if 0 #include "gtest/gtest.h" -#include "regex_yaml.h" +#include "exp.h" #include "stream.h" -using YAML::RegEx; +using namespace YAML::Exp; using YAML::Stream; namespace { -const auto MIN_CHAR = Stream::eof() + 1; +constexpr char MIN_CHAR = Stream::eof() + 1; +constexpr char MAX_CHAR = 127; TEST(RegExTest, Empty) { - RegEx empty; - EXPECT_TRUE(empty.Matches(std::string())); - EXPECT_EQ(0, empty.Match(std::string())); + using empty = Matcher; + EXPECT_TRUE(empty::Matches(std::string())); + EXPECT_EQ(0, empty::Match(std::string())); for (int i = MIN_CHAR; i < 128; ++i) { auto str = std::string(1, char(i)); - EXPECT_FALSE(empty.Matches(str)); - EXPECT_EQ(-1, empty.Match(str)); + EXPECT_FALSE(empty::Matches(str)); + EXPECT_EQ(-1, empty::Match(str)); } } TEST(RegExTest, Range) { - for (int i = MIN_CHAR; i < 128; ++i) { - for (int j = MIN_CHAR; j < 128; ++j) { - RegEx ex((char)i, (char)j); - for (int k = MIN_CHAR; k < 128; ++k) { - auto str = std::string(1, char(k)); - if (i <= k && k <= j) { - EXPECT_TRUE(ex.Matches(str)); - EXPECT_EQ(1, ex.Match(str)); - } else { - EXPECT_FALSE(ex.Matches(str)); - EXPECT_EQ(-1, ex.Match(str)); - } - } + int i = MIN_CHAR; + int j = MAX_CHAR; + using ex1 = Matcher>; + + for (int k = MIN_CHAR; k < 128; ++k) { + auto str = std::string(1, char(k)); + if (i <= k && k <= j) { + EXPECT_TRUE(ex1::Matches(str)); + EXPECT_EQ(1, ex1::Match(str)); + } else { + EXPECT_FALSE(ex1::Matches(str)); + EXPECT_EQ(-1, ex1::Match(str)); } } + i = 'a'; + j = 'z'; + using ex2 = Matcher>; + for (int k = MIN_CHAR; k < 128; ++k) { + auto str = std::string(1, char(k)); + if (i <= k && k <= j) { + EXPECT_TRUE(ex2::Matches(str)); + EXPECT_EQ(1, ex2::Match(str)); + } else { + EXPECT_FALSE(ex2::Matches(str)); + EXPECT_EQ(-1, ex2::Match(str)); + } + } + + // for (int i = MIN_CHAR; i < 128; ++i) { + // for (int j = MIN_CHAR; j < 128; ++j) { + // RegEx ex((char)i, (char)j); + // for (int k = MIN_CHAR; k < 128; ++k) { + // auto str = std::string(1, char(k)); + // if (i <= k && k <= j) { + // EXPECT_TRUE(ex.Matches(str)); + // EXPECT_EQ(1, ex.Match(str)); + // } else { + // EXPECT_FALSE(ex.Matches(str)); + // EXPECT_EQ(-1, ex.Match(str)); + // } + // } + // } + // } } TEST(RegExTest, EmptyString) { - RegEx ex = RegEx(std::string()); - EXPECT_TRUE(ex.Matches(std::string())); - EXPECT_EQ(0, ex.Match(std::string())); + using ex = Matcher; + EXPECT_TRUE(ex::Matches(std::string())); + EXPECT_EQ(0, ex::Match(std::string())); // Matches anything, unlike RegEx()! - EXPECT_TRUE(ex.Matches(std::string("hello"))); - EXPECT_EQ(0, ex.Match(std::string("hello"))); + // EXPECT_TRUE(ex::Matches(std::string("hello"))); + // EXPECT_EQ(0, ex::Match(std::string("hello"))); } -TEST(RegExTest, SingleCharacterString) { - for (int i = MIN_CHAR; i < 128; ++i) { - RegEx ex(std::string(1, (char)i)); - for (int j = MIN_CHAR; j < 128; ++j) { - auto str = std::string(1, char(j)); - if (j == i) { - EXPECT_TRUE(ex.Matches(str)); - EXPECT_EQ(1, ex.Match(str)); - // Match at start of string only! - std::string prefixed = - std::string(1, i + 1) + std::string("prefix: ") + str; - EXPECT_FALSE(ex.Matches(prefixed)); - EXPECT_EQ(-1, ex.Match(prefixed)); - } else { - EXPECT_FALSE(ex.Matches(str)); - EXPECT_EQ(-1, ex.Match(str)); - } - } - } -} +// TEST(RegExTest, SingleCharacterString) { +// for (int i = MIN_CHAR; i < 128; ++i) { +// using ex = Matcher(std::string(1, (char)i)); +// for (int j = MIN_CHAR; j < 128; ++j) { +// auto str = std::string(1, char(j)); +// if (j == i) { +// EXPECT_TRUE(ex.Matches(str)); +// EXPECT_EQ(1, ex.Match(str)); +// // Match at start of string only! +// std::string prefixed = +// std::string(1, i + 1) + std::string("prefix: ") + str; +// EXPECT_FALSE(ex.Matches(prefixed)); +// EXPECT_EQ(-1, ex.Match(prefixed)); +// } else { +// EXPECT_FALSE(ex.Matches(str)); +// EXPECT_EQ(-1, ex.Match(str)); +// } +// } +// } +// } TEST(RegExTest, MultiCharacterString) { - RegEx ex(std::string("ab")); + using ex = Matcher, Char<'b'>>>; - EXPECT_FALSE(ex.Matches(std::string("a"))); - EXPECT_EQ(-1, ex.Match(std::string("a"))); + EXPECT_FALSE(ex::Matches(std::string("a"))); + EXPECT_EQ(-1, ex::Match(std::string("a"))); - EXPECT_TRUE(ex.Matches(std::string("ab"))); - EXPECT_EQ(2, ex.Match(std::string("ab"))); - EXPECT_TRUE(ex.Matches(std::string("abba"))); - EXPECT_EQ(2, ex.Match(std::string("abba"))); + EXPECT_TRUE(ex::Matches(std::string("ab"))); + EXPECT_EQ(2, ex::Match(std::string("ab"))); + EXPECT_TRUE(ex::Matches(std::string("abba"))); + EXPECT_EQ(2, ex::Match(std::string("abba"))); // match at start of string only! - EXPECT_FALSE(ex.Matches(std::string("baab"))); - EXPECT_EQ(-1, ex.Match(std::string("baab"))); + EXPECT_FALSE(ex::Matches(std::string("baab"))); + EXPECT_EQ(-1, ex::Match(std::string("baab"))); } TEST(RegExTest, OperatorNot) { - RegEx ex = !RegEx(std::string("ab")); + using ex = Matcher,Char<'b'>>>>; - EXPECT_TRUE(ex.Matches(std::string("a"))); - EXPECT_EQ(1, ex.Match(std::string("a"))); + EXPECT_TRUE(ex::Matches(std::string("a"))); + EXPECT_EQ(1, ex::Match(std::string("a"))); - EXPECT_FALSE(ex.Matches(std::string("ab"))); - EXPECT_EQ(-1, ex.Match(std::string("ab"))); - EXPECT_FALSE(ex.Matches(std::string("abba"))); - EXPECT_EQ(-1, ex.Match(std::string("abba"))); + EXPECT_FALSE(ex::Matches(std::string("ab"))); + EXPECT_EQ(-1, ex::Match(std::string("ab"))); + EXPECT_FALSE(ex::Matches(std::string("abba"))); + EXPECT_EQ(-1, ex::Match(std::string("abba"))); // match at start of string only! - EXPECT_TRUE(ex.Matches(std::string("baab"))); + EXPECT_TRUE(ex::Matches(std::string("baab"))); // Operator not causes only one character to be matched. - EXPECT_EQ(1, ex.Match(std::string("baab"))); + EXPECT_EQ(1, ex::Match(std::string("baab"))); } -TEST(RegExTest, OperatorOr) { - for (int i = MIN_CHAR; i < 127; ++i) { - for (int j = i + 1; j < 128; ++j) { - auto iStr = std::string(1, char(i)); - auto jStr = std::string(1, char(j)); - RegEx ex1 = RegEx(iStr) || RegEx(jStr); - RegEx ex2 = RegEx(jStr) || RegEx(iStr); - - for (int k = MIN_CHAR; k < 128; ++k) { - auto str = std::string(1, char(k)); - if (i == k || j == k) { - EXPECT_TRUE(ex1.Matches(str)); - EXPECT_TRUE(ex2.Matches(str)); - EXPECT_EQ(1, ex1.Match(str)); - EXPECT_EQ(1, ex2.Match(str)); - } else { - EXPECT_FALSE(ex1.Matches(str)); - EXPECT_FALSE(ex2.Matches(str)); - EXPECT_EQ(-1, ex1.Match(str)); - EXPECT_EQ(-1, ex2.Match(str)); - } - } - } - } -} +// TEST(RegExTest, OperatorOr) { +// for (int i = MIN_CHAR; i < 127; ++i) { +// for (int j = i + 1; j < 128; ++j) { +// auto iStr = std::string(1, char(i)); +// auto jStr = std::string(1, char(j)); +// RegEx ex1 = RegEx(iStr) || RegEx(jStr); +// RegEx ex2 = RegEx(jStr) || RegEx(iStr); +// for (int k = MIN_CHAR; k < 128; ++k) { +// auto str = std::string(1, char(k)); +// if (i == k || j == k) { +// EXPECT_TRUE(ex1.Matches(str)); +// EXPECT_TRUE(ex2.Matches(str)); +// EXPECT_EQ(1, ex1.Match(str)); +// EXPECT_EQ(1, ex2.Match(str)); +// } else { +// EXPECT_FALSE(ex1.Matches(str)); +// EXPECT_FALSE(ex2.Matches(str)); +// EXPECT_EQ(-1, ex1.Match(str)); +// EXPECT_EQ(-1, ex2.Match(str)); +// } +// } +// } +// } +// } TEST(RegExTest, OperatorOrShortCircuits) { - RegEx ex1 = RegEx(std::string("aaaa")) || RegEx(std::string("aa")); - RegEx ex2 = RegEx(std::string("aa")) || RegEx(std::string("aaaa")); + using ex1 = Matcher < + OR < SEQ < Char<'a'>, + Char<'a'>, + Char<'a'>, + Char<'a'>>, + SEQ < Char<'a'>, + Char<'a'>>>>; - EXPECT_TRUE(ex1.Matches(std::string("aaaaa"))); - EXPECT_EQ(4, ex1.Match(std::string("aaaaa"))); + using ex2 = Matcher < + OR < SEQ < Char<'a'>, + Char<'a'>>, + SEQ < Char<'a'>, + Char<'a'>, + Char<'a'>>, + Char<'a'>>>; - EXPECT_TRUE(ex2.Matches(std::string("aaaaa"))); - EXPECT_EQ(2, ex2.Match(std::string("aaaaa"))); + // RegEx(std::string("aaaa")) || RegEx(std::string("aa")); + // RegEx ex2 = RegEx(std::string("aa")) || RegEx(std::string("aaaa")); + + EXPECT_TRUE(ex1::Matches(std::string("aaaaa"))); + EXPECT_EQ(4, ex1::Match(std::string("aaaaa"))); + + EXPECT_TRUE(ex2::Matches(std::string("aaaaa"))); + EXPECT_EQ(2, ex2::Match(std::string("aaaaa"))); } -TEST(RegExTest, OperatorAnd) { - RegEx emptySet = RegEx('a') && RegEx(); - EXPECT_FALSE(emptySet.Matches(std::string("a"))); -} +// TEST(RegExTest, OperatorAnd) { +// //RegEx emptySet = RegEx('a') && RegEx(); +// using emptySet = Match<>RegEx('a') && RegEx(); +// EXPECT_FALSE(emptySet.Matches(std::string("a"))); +// } -TEST(RegExTest, OperatorAndShortCircuits) { - RegEx ex1 = RegEx(std::string("aaaa")) && RegEx(std::string("aa")); - RegEx ex2 = RegEx(std::string("aa")) && RegEx(std::string("aaaa")); +// TEST(RegExTest, OperatorAndShortCircuits) { +// RegEx ex1 = RegEx(std::string("aaaa")) && RegEx(std::string("aa")); +// RegEx ex2 = RegEx(std::string("aa")) && RegEx(std::string("aaaa")); - EXPECT_TRUE(ex1.Matches(std::string("aaaaa"))); - EXPECT_EQ(4, ex1.Match(std::string("aaaaa"))); +// EXPECT_TRUE(ex1.Matches(std::string("aaaaa"))); +// EXPECT_EQ(4, ex1.Match(std::string("aaaaa"))); - EXPECT_TRUE(ex2.Matches(std::string("aaaaa"))); - EXPECT_EQ(2, ex2.Match(std::string("aaaaa"))); -} +// EXPECT_TRUE(ex2.Matches(std::string("aaaaa"))); +// EXPECT_EQ(2, ex2.Match(std::string("aaaaa"))); +// } TEST(RegExTest, OperatorPlus) { - RegEx ex = RegEx(std::string("hello ")) + RegEx(std::string("there")); + using ex = Matcher < + SEQ < SEQ < + Char<'h'>, + Char<'e'>, + Char<'l'>, + Char<'l'>, + Char<'o'>, + Char<' '>>, + SEQ < + Char<'t'>, + Char<'h'>, + Char<'e'>, + Char<'r'>, + Char<'e'>> + >>; - EXPECT_TRUE(ex.Matches(std::string("hello there"))); - EXPECT_FALSE(ex.Matches(std::string("hello "))); - EXPECT_FALSE(ex.Matches(std::string("there"))); - EXPECT_EQ(11, ex.Match(std::string("hello there"))); + EXPECT_TRUE(ex::Matches(std::string("hello there"))); + EXPECT_FALSE(ex::Matches(std::string("hello "))); + EXPECT_FALSE(ex::Matches(std::string("there"))); + EXPECT_EQ(11, ex::Match(std::string("hello there"))); } TEST(RegExTest, StringOr) { std::string str = "abcde"; - RegEx ex = RegEx(str, YAML::REGEX_OR); + using ex = Matcher,Char<'b'>,Char<'c'>,Char<'d'>,Char<'e'>>>; for (size_t i = 0; i < str.size(); ++i) { - EXPECT_TRUE(ex.Matches(str.substr(i, 1))); - EXPECT_EQ(1, ex.Match(str.substr(i, 1))); + EXPECT_TRUE(ex::Matches(str.substr(i, 1))); + EXPECT_EQ(1, ex::Match(str.substr(i, 1))); } - EXPECT_EQ(1, ex.Match(str)); + EXPECT_EQ(1, ex::Match(str)); } } -#endif