use templated regexp

This commit is contained in:
Hannes Janetzek 2016-11-26 00:54:50 +01:00
parent 2899bdae62
commit eb478ec710
16 changed files with 767 additions and 759 deletions

View File

@ -4,8 +4,6 @@
#include "emitterutils.h"
#include "exp.h"
#include "indentation.h"
#include "regex_yaml.h"
#include "regeximpl.h"
#include "stringsource.h"
#include "yaml-cpp/binary.h" // IWYU pragma: keep
#include "yaml-cpp/ostream_wrapper.h"
@ -159,35 +157,34 @@ bool IsValidPlainScalar(const std::string& str, FlowType::value flowType,
}
// check the start
const RegEx& start = (flowType == FlowType::Flow ? Exp::PlainScalarInFlow()
: Exp::PlainScalar());
if (!start.Matches(str)) {
return false;
if (flowType == FlowType::Flow) {
if (!Exp::PlainScalarInFlow::Matches(str)) { return false; }
} else {
if (!Exp::PlainScalar::Matches(str)) { return false; }
}
// and check the end for plain whitespace (which can't be faithfully kept in a
// plain scalar)
if (!str.empty() && *str.rbegin() == ' ') {
return false;
}
// then check until something is disallowed
static const RegEx& disallowed_flow =
Exp::EndScalarInFlow() || (Exp::BlankOrBreak() + Exp::Comment()) ||
Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() ||
Exp::Tab();
static const RegEx& disallowed_block =
Exp::EndScalar() || (Exp::BlankOrBreak() + Exp::Comment()) ||
Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() ||
Exp::Tab();
const RegEx& disallowed =
flowType == FlowType::Flow ? disallowed_flow : disallowed_block;
using namespace Exp;
using Disallowed = Matcher <
OR < SEQ < detail::BlankOrBreak, detail::Comment >,
detail::NotPrintable,
detail::Utf8_ByteOrderMark,
detail::Break,
detail::Tab>>;
StringCharSource buffer(str.c_str(), str.size());
while (buffer) {
if (disallowed.Matches(buffer)) {
return false;
if ((flowType == FlowType::Flow ?
Matcher<detail::EndScalarInFlow>::Matches(buffer) :
Matcher<detail::EndScalar>::Matches(buffer)) ||
Disallowed::Matches(buffer)) {
return false;
}
if (allowOnlyAscii && (0x80 <= static_cast<unsigned char>(buffer[0]))) {
return false;
}
@ -424,9 +421,13 @@ bool WriteAnchor(ostream_wrapper& out, const std::string& str) {
bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim) {
out << (verbatim ? "!<" : "!");
StringCharSource buffer(str.c_str(), str.size());
const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
auto reValid = verbatim ?
[](StringCharSource& s) { return Exp::URI::Match(s); } :
[](StringCharSource& s) { return Exp::Tag::Match(s); };
while (buffer) {
int n = reValid.Match(buffer);
int n = reValid(buffer);
if (n <= 0) {
return false;
}
@ -447,7 +448,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
out << "!";
StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
while (prefixBuffer) {
int n = Exp::URI().Match(prefixBuffer);
int n = Exp::URI::Match(prefixBuffer);
if (n <= 0) {
return false;
}
@ -461,7 +462,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
out << "!";
StringCharSource tagBuffer(tag.c_str(), tag.size());
while (tagBuffer) {
int n = Exp::Tag().Match(tagBuffer);
int n = Exp::Tag::Match(tagBuffer);
if (n <= 0) {
return false;
}

496
src/exp.h
View File

@ -10,135 +10,271 @@
#include <ios>
#include <string>
#include "regex_yaml.h"
#include "stream.h"
#include "stringsource.h"
#include "streamcharsource.h"
#define REGEXP_INLINE inline __attribute__((always_inline))
#define TEST_INLINE inline __attribute__((always_inline))
//#define TEST_INLINE __attribute__((noinline))
namespace YAML {
namespace Exp {
template <char N>
struct Char {
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
return (source.get() == N) ? 1 : -1;
}
};
template <typename A, typename... B>
struct OR {
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
int pos = A::match(source);
if (pos >= 0) {
return pos;
}
return OR<B...>::match(source);
}
};
template <typename A>
struct OR<A> {
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
return A::match(source);
}
};
template <typename A, typename... B>
struct SEQ {
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
int a = A::match(source);
if (a < 0) {
return -1;
}
const Source nextSource = source + a;
// if (nextSource) { c = nextSource[0]; }
int b = SEQ<B...>::match(nextSource);
if (b < 0) {
return -1;
}
return a + b;
}
};
template <typename A>
struct SEQ<A> {
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
return A::match(source);
}
};
// TODO empty???
template <typename A>
struct NOT {
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
return A::match(source) >= 0 ? -1 : 1;
}
};
template <char A, char Z>
struct Range {
static_assert(A <= Z, "Invalid Range");
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
return (source.get() < A || source.get() > Z) ? -1 : 1;
}
};
struct Empty {
template <typename Source>
REGEXP_INLINE static int match(const Source& source) {
return source.get() == Stream::eof() ? 0 : -1;
}
REGEXP_INLINE static int match(const StringCharSource& source) {
// the empty regex only is successful on the empty string
// return c == '\0' ? 0 : -1;
return !source ? 0 : -1;
}
};
template <typename Source>
inline bool IsValidSource(const Source& source) {
return source;
}
template <>
inline bool IsValidSource<StringCharSource>(const StringCharSource& source) {
// switch (m_op) {
// case REGEX_MATCH:
// case REGEX_RANGE:
return source;
// default:
// return true;
// }
}
template <typename Exp>
struct Matcher {
template <typename Source>
TEST_INLINE static int Match(const Source& source) {
// return IsValidSource(source) ? Exp::match(source, source[0]) : -1;
return Exp::match(source);
}
template <typename Source>
TEST_INLINE static bool Matches(const Source& source) {
return Match(source) >= 0;
}
TEST_INLINE static int Match(const Stream& in) {
StreamCharSource source(in);
return Match(source);
}
TEST_INLINE static bool Matches(const Stream& in) {
StreamCharSource source(in);
return Matches(source);
}
TEST_INLINE static int Match(const std::string& str) {
StringCharSource source(str.c_str(), str.size());
return Match(source);
}
TEST_INLINE static bool Matches(const std::string& str) {
return Match(str) >= 0;
}
TEST_INLINE static bool Matches(char ch) {
std::string str;
str += ch;
return Matches(str);
}
};
////////////////////////////////////////////////////////////////////////////////
// Here we store a bunch of expressions for matching different parts of the
// file.
namespace Exp {
// misc
inline const RegEx& Empty() {
static const RegEx e;
return e;
}
inline const RegEx& Space() {
static const RegEx e = RegEx(' ');
return e;
}
inline const RegEx& Tab() {
static const RegEx e = RegEx('\t');
return e;
}
inline const RegEx& Blank() {
static const RegEx e = Space() || Tab();
return e;
}
inline const RegEx& Break() {
static const RegEx e = RegEx('\n') || RegEx("\r\n");
return e;
}
inline const RegEx& BlankOrBreak() {
static const RegEx e = Blank() || Break();
return e;
}
inline const RegEx& Digit() {
static const RegEx e = RegEx('0', '9');
return e;
}
inline const RegEx& Alpha() {
static const RegEx e = RegEx('a', 'z') || RegEx('A', 'Z');
return e;
}
inline const RegEx& AlphaNumeric() {
static const RegEx e = Alpha() || Digit();
return e;
}
inline const RegEx& Word() {
static const RegEx e = AlphaNumeric() || RegEx('-');
return e;
}
inline const RegEx& Hex() {
static const RegEx e = Digit() || RegEx('A', 'F') || RegEx('a', 'f');
return e;
}
// Valid Unicode code points that are not part of c-printable (YAML 1.2, sec.
// 5.1)
inline const RegEx& NotPrintable() {
static const RegEx e =
RegEx(0) ||
RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) ||
RegEx(0x0E, 0x1F) ||
(RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F')));
return e;
}
inline const RegEx& Utf8_ByteOrderMark() {
static const RegEx e = RegEx("\xEF\xBB\xBF");
return e;
}
namespace detail {
// actual tags
using Space = Char<' '>;
inline const RegEx& DocStart() {
static const RegEx e = RegEx("---") + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& DocEnd() {
static const RegEx e = RegEx("...") + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& DocIndicator() {
static const RegEx e = DocStart() || DocEnd();
return e;
}
inline const RegEx& BlockEntry() {
static const RegEx e = RegEx('-') + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& Key() {
static const RegEx e = RegEx('?') + BlankOrBreak();
return e;
}
inline const RegEx& KeyInFlow() {
static const RegEx e = RegEx('?') + BlankOrBreak();
return e;
}
inline const RegEx& Value() {
static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& ValueInFlow() {
static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx(",}", REGEX_OR));
return e;
}
inline const RegEx& ValueInJSONFlow() {
static const RegEx e = RegEx(':');
return e;
}
inline const RegEx Comment() {
static const RegEx e = RegEx('#');
return e;
}
inline const RegEx& Anchor() {
static const RegEx e = !(RegEx("[]{},", REGEX_OR) || BlankOrBreak());
return e;
}
inline const RegEx& AnchorEnd() {
static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak();
return e;
}
inline const RegEx& URI() {
static const RegEx e = Word() || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) ||
(RegEx('%') + Hex() + Hex());
return e;
}
inline const RegEx& Tag() {
static const RegEx e = Word() || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) ||
(RegEx('%') + Hex() + Hex());
return e;
}
using Tab = Char<'\t'>;
using Blank = OR < Space, Tab >;
using Break =
OR < Char<'\n'>,
SEQ < Char<'\r'>,
Char<'\n'> >>;
using BlankOrBreak = OR < Blank, Break >;
using Digit = Range<'0', '9'>;
using Alpha =
OR < Range<'a', 'z'>,
Range<'A', 'Z'> >;
using AlphaNumeric = OR < Alpha, Digit >;
using Word = OR < AlphaNumeric, Char<'-'> >;
using Hex = OR < Digit, Range<'a','f'>, Range<'A', 'F'>>;
// why not range?
using NotPrintable =
OR < Char<0>, Char<'\x01'>,
Char<'\x02'>, Char<'\x03'>,
Char<'\x04'>, Char<'\x05'>,
Char<'\x06'>, Char<'\x07'>,
Char<'\x08'>, Char<'\x0B'>,
Char<'\x0C'>, Char<'\x7F'>,
Range<0x0E, 0x1F>,
SEQ < Char<'\xC2'>,
OR < Range<'\x80', '\x84'>,
Range<'\x86', '\x9F'>>>>;
using Utf8_ByteOrderMark =
SEQ < Char<'\xEF'>,
Char<'\xBB'>,
Char<'\xBF'>>;
using DocStart =
SEQ < Char<'-'>,
Char<'-'>,
Char<'-'>,
OR < BlankOrBreak, Empty >>;
using DocEnd =
SEQ < Char<'.'>,
Char<'.'>,
Char<'.'>,
OR < BlankOrBreak, Empty>>;
using BlockEntry =
SEQ < Char<'-'>,
OR < BlankOrBreak, Empty >>;
using Key = SEQ<Char<'?'>, BlankOrBreak>;
using KeyInFlow = SEQ<Char<'?'>, BlankOrBreak>;
using Value =
SEQ < Char<':'>,
OR < BlankOrBreak, Empty >>;
using ValueInFlow =
SEQ < Char<':'>,
OR < BlankOrBreak,
Char<','>,
Char<'}'>>>;
using ValueInJSONFlow = Char<':'>;
using Comment = Char<'#'>;
using Anchor = NOT<
OR < Char<'['>, Char<']'>,
Char<'{'>, Char<'}'>,
Char<','>,
BlankOrBreak>>;
using AnchorEnd =
OR < Char<'?'>, Char<':'>,
Char<','>, Char<']'>,
Char<'}'>, Char<'%'>,
Char<'@'>, Char<'`'>,
BlankOrBreak>;
using URI =
OR < Word,
Char<'#'>, Char<';'>, Char<'/'>, Char<'?'>, Char<':'>,
Char<'@'>, Char<'&'>, Char<'='>, Char<'+'>, Char<'$'>,
Char<','>, Char<'_'>, Char<'.'>, Char<'!'>, Char<'~'>,
Char<'*'>, Char<'\''>, Char<'('>, Char<')'>, Char<'['>,
Char<']'>,
SEQ < Char<'%'>, Hex, Hex>>;
using Tag =
OR < Word,
Char<'#'>, Char<';'>, Char<'/'>, Char<'?'>, Char<':'>,
Char<'@'>, Char<'&'>, Char<'='>, Char<'+'>, Char<'$'>,
Char<'_'>, Char<'.'>, Char<'~'>, Char<'*'>, Char<'\''>,
SEQ < Char <'%'>, Hex, Hex>>;
// Plain scalar rules:
// . Cannot start with a blank.
@ -146,59 +282,81 @@ inline const RegEx& Tag() {
// . In the block context - ? : must be not be followed with a space.
// . In the flow context ? is illegal and : and - must not be followed with a
// space.
inline const RegEx& PlainScalar() {
static const RegEx e =
!(BlankOrBreak() || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) ||
(RegEx("-?:", REGEX_OR) + (BlankOrBreak() || RegEx())));
return e;
}
inline const RegEx& PlainScalarInFlow() {
static const RegEx e =
!(BlankOrBreak() || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) ||
(RegEx("-:", REGEX_OR) + Blank()));
return e;
}
inline const RegEx& EndScalar() {
static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& EndScalarInFlow() {
static const RegEx e =
(RegEx(':') + (BlankOrBreak() || RegEx() || RegEx(",]}", REGEX_OR))) ||
RegEx(",?[]{}", REGEX_OR);
return e;
}
using PlainScalarCommon =
NOT < OR < BlankOrBreak,
Char<','>, Char<'['>, Char<']'>, Char<'{'>, Char<'}'>,
Char<'#'>, Char<'&'>, Char<'*'>, Char<'!'>, Char<'|'>,
Char<'>'>, Char<'\''>, Char<'\"'>, Char<'%'>, Char<'@'>,
Char<'`'>>>;
inline const RegEx& ScanScalarEndInFlow() {
static const RegEx e = (EndScalarInFlow() || (BlankOrBreak() + Comment()));
return e;
}
using PlainScalar =
NOT < SEQ < OR < Char<'-'>,
Char<'?'>,
Char<':'>>,
OR < BlankOrBreak,
Empty >>>;
inline const RegEx& ScanScalarEnd() {
static const RegEx e = EndScalar() || (BlankOrBreak() + Comment());
return e;
}
inline const RegEx& EscSingleQuote() {
static const RegEx e = RegEx("\'\'");
return e;
}
inline const RegEx& EscBreak() {
static const RegEx e = RegEx('\\') + Break();
return e;
}
using PlainScalarInFlow =
NOT < OR < Char<'?'>,
SEQ < OR < Char<'-'>,
Char<':'>>,
Blank >>>;
using EndScalar =
SEQ < Char<':'>,
OR < BlankOrBreak, Empty >>;
inline const RegEx& ChompIndicator() {
static const RegEx e = RegEx("+-", REGEX_OR);
return e;
}
inline const RegEx& Chomp() {
static const RegEx e = (ChompIndicator() + Digit()) ||
(Digit() + ChompIndicator()) || ChompIndicator() ||
Digit();
return e;
}
using EndScalarInFlow =
OR < SEQ < Char<':'>,
OR < BlankOrBreak,
Empty,
Char<','>,
Char<']'>,
Char<'}'>>>,
Char<','>,
Char<'?'>,
Char<'['>,
Char<']'>,
Char<'{'>,
Char<'}'>>;
using ChompIndicator = OR < Char<'+'>, Char<'-'> >;
using Chomp =
OR < SEQ < ChompIndicator, Digit >,
SEQ < Digit,ChompIndicator >,
ChompIndicator,
Digit>;
} // end detail
using Tab = Matcher<detail::Tab>;
using Blank = Matcher<detail::Blank>;
using Break = Matcher<detail::Break>;
using Digit = Matcher<detail::Digit>;
using BlankOrBreak = Matcher<detail::BlankOrBreak>;
using Word = Matcher<detail::Word>;
using DocStart = Matcher<detail::DocStart>;
using DocEnd = Matcher<detail::DocEnd>;
using BlockEntry = Matcher<detail::BlockEntry>;
using Key = Matcher<detail::Key>;
using KeyInFlow = Matcher<detail::KeyInFlow>;
using Value = Matcher<detail::Value>;
using ValueInFlow = Matcher<detail::ValueInFlow>;
using ValueInJSONFlow = Matcher<detail::ValueInJSONFlow>;
using Comment = Matcher<detail::Comment>;
using Anchor = Matcher<detail::Anchor>;
using AnchorEnd = Matcher<detail::AnchorEnd>;
using URI = Matcher<detail::URI>;
using Tag = Matcher<detail::Tag>;
using PlainScalarCommon = Matcher<detail::PlainScalarCommon>;
using PlainScalar = Matcher<detail::PlainScalar>;
using PlainScalarInFlow = Matcher<detail::PlainScalarInFlow>;
using EscSingleQuote = Matcher<SEQ < Char<'\''>, Char<'\''> >>;
using EscBreak = Matcher<SEQ < Char<'\\'>, detail::Break >>;
using Chomp = Matcher<detail::Chomp>;
// and some functions
std::string Escape(Stream& in);
}

View File

@ -1,45 +0,0 @@
#include "regex_yaml.h"
namespace YAML {
// constructors
RegEx::RegEx() : m_op(REGEX_EMPTY) {}
RegEx::RegEx(REGEX_OP op) : m_op(op) {}
RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {}
RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {}
RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) {
for (std::size_t i = 0; i < str.size(); i++)
m_params.push_back(RegEx(str[i]));
}
// combination constructors
RegEx operator!(const RegEx& ex) {
RegEx ret(REGEX_NOT);
ret.m_params.push_back(ex);
return ret;
}
RegEx operator||(const RegEx& ex1, const RegEx& ex2) {
RegEx ret(REGEX_OR);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
RegEx operator&&(const RegEx& ex1, const RegEx& ex2) {
RegEx ret(REGEX_AND);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
RegEx operator+(const RegEx& ex1, const RegEx& ex2) {
RegEx ret(REGEX_SEQ);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
}

View File

@ -1,87 +0,0 @@
#ifndef REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once
#endif
#include <string>
#include <vector>
#include "yaml-cpp/dll.h"
namespace YAML {
class Stream;
enum REGEX_OP {
REGEX_EMPTY,
REGEX_MATCH,
REGEX_RANGE,
REGEX_OR,
REGEX_AND,
REGEX_NOT,
REGEX_SEQ
};
// simplified regular expressions
// . Only straightforward matches (no repeated characters)
// . Only matches from start of string
class YAML_CPP_API RegEx {
public:
RegEx();
RegEx(char ch);
RegEx(char a, char z);
RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
~RegEx() {}
friend YAML_CPP_API RegEx operator!(const RegEx& ex);
friend YAML_CPP_API RegEx operator||(const RegEx& ex1, const RegEx& ex2);
friend YAML_CPP_API RegEx operator&&(const RegEx& ex1, const RegEx& ex2);
friend YAML_CPP_API RegEx operator+(const RegEx& ex1, const RegEx& ex2);
bool Matches(char ch) const;
bool Matches(const std::string& str) const;
bool Matches(const Stream& in) const;
template <typename Source>
bool Matches(const Source& source) const;
int Match(const std::string& str) const;
int Match(const Stream& in) const;
template <typename Source>
int Match(const Source& source) const;
private:
RegEx(REGEX_OP op);
template <typename Source>
bool IsValidSource(const Source& source) const;
template <typename Source>
int MatchUnchecked(const Source& source) const;
template <typename Source>
int MatchOpEmpty(const Source& source) const;
template <typename Source>
int MatchOpMatch(const Source& source) const;
template <typename Source>
int MatchOpRange(const Source& source) const;
template <typename Source>
int MatchOpOr(const Source& source) const;
template <typename Source>
int MatchOpAnd(const Source& source) const;
template <typename Source>
int MatchOpNot(const Source& source) const;
template <typename Source>
int MatchOpSeq(const Source& source) const;
private:
REGEX_OP m_op;
char m_a, m_z;
std::vector<RegEx> m_params;
};
}
#include "regeximpl.h"
#endif // REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66

View File

@ -1,186 +0,0 @@
#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once
#endif
#include "stream.h"
#include "stringsource.h"
#include "streamcharsource.h"
namespace YAML {
// query matches
inline bool RegEx::Matches(char ch) const {
std::string str;
str += ch;
return Matches(str);
}
inline bool RegEx::Matches(const std::string& str) const {
return Match(str) >= 0;
}
inline bool RegEx::Matches(const Stream& in) const { return Match(in) >= 0; }
template <typename Source>
inline bool RegEx::Matches(const Source& source) const {
return Match(source) >= 0;
}
// Match
// . Matches the given string against this regular expression.
// . Returns the number of characters matched.
// . Returns -1 if no characters were matched (the reason for
// not returning zero is that we may have an empty regex
// which is ALWAYS successful at matching zero characters).
// . REMEMBER that we only match from the start of the buffer!
inline int RegEx::Match(const std::string& str) const {
StringCharSource source(str.c_str(), str.size());
return Match(source);
}
inline int RegEx::Match(const Stream& in) const {
StreamCharSource source(in);
return Match(source);
}
template <typename Source>
inline bool RegEx::IsValidSource(const Source& source) const {
return source;
}
template <>
inline bool RegEx::IsValidSource<StringCharSource>(
const StringCharSource& source) const {
switch (m_op) {
case REGEX_MATCH:
case REGEX_RANGE:
return source;
default:
return true;
}
}
template <typename Source>
inline int RegEx::Match(const Source& source) const {
return IsValidSource(source) ? MatchUnchecked(source) : -1;
}
template <typename Source>
inline int RegEx::MatchUnchecked(const Source& source) const {
switch (m_op) {
case REGEX_EMPTY:
return MatchOpEmpty(source);
case REGEX_MATCH:
return MatchOpMatch(source);
case REGEX_RANGE:
return MatchOpRange(source);
case REGEX_OR:
return MatchOpOr(source);
case REGEX_AND:
return MatchOpAnd(source);
case REGEX_NOT:
return MatchOpNot(source);
case REGEX_SEQ:
return MatchOpSeq(source);
}
return -1;
}
//////////////////////////////////////////////////////////////////////////////
// Operators
// Note: the convention MatchOp*<Source> is that we can assume
// IsSourceValid(source).
// So we do all our checks *before* we call these functions
// EmptyOperator
template <typename Source>
inline int RegEx::MatchOpEmpty(const Source& source) const {
return source[0] == Stream::eof() ? 0 : -1;
}
template <>
inline int RegEx::MatchOpEmpty<StringCharSource>(
const StringCharSource& source) const {
return !source
? 0
: -1; // the empty regex only is successful on the empty string
}
// MatchOperator
template <typename Source>
inline int RegEx::MatchOpMatch(const Source& source) const {
if (source[0] != m_a)
return -1;
return 1;
}
// RangeOperator
template <typename Source>
inline int RegEx::MatchOpRange(const Source& source) const {
if (m_a > source[0] || m_z < source[0])
return -1;
return 1;
}
// OrOperator
template <typename Source>
inline int RegEx::MatchOpOr(const Source& source) const {
for (std::size_t i = 0; i < m_params.size(); i++) {
int n = m_params[i].MatchUnchecked(source);
if (n >= 0)
return n;
}
return -1;
}
// AndOperator
// Note: 'AND' is a little funny, since we may be required to match things
// of different lengths. If we find a match, we return the length of
// the FIRST entry on the list.
template <typename Source>
inline int RegEx::MatchOpAnd(const Source& source) const {
int first = -1;
for (std::size_t i = 0; i < m_params.size(); i++) {
int n = m_params[i].MatchUnchecked(source);
if (n == -1)
return -1;
if (i == 0)
first = n;
}
return first;
}
// NotOperator
template <typename Source>
inline int RegEx::MatchOpNot(const Source& source) const {
if (m_params.empty())
return -1;
if (m_params[0].MatchUnchecked(source) >= 0)
return -1;
return 1;
}
// SeqOperator
template <typename Source>
inline int RegEx::MatchOpSeq(const Source& source) const {
int offset = 0;
for (std::size_t i = 0; i < m_params.size(); i++) {
int n = m_params[i].Match(source + offset); // note Match, not
// MatchUnchecked because we
// need to check validity after
// the offset
if (n == -1)
return -1;
offset += n;
}
return offset;
}
}
#endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66

View File

@ -103,11 +103,11 @@ void Scanner::ScanNextToken() {
}
// document token
if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
if (INPUT.column() == 0 && Exp::DocStart::Matches(INPUT)) {
return ScanDocStart();
}
if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
if (INPUT.column() == 0 && Exp::DocEnd::Matches(INPUT)) {
return ScanDocEnd();
}
@ -126,15 +126,18 @@ void Scanner::ScanNextToken() {
}
// block/map stuff
if (Exp::BlockEntry().Matches(INPUT)) {
if (Exp::BlockEntry::Matches(INPUT)) {
return ScanBlockEntry();
}
if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
if (InBlockContext() ? Exp::Key::Matches(INPUT) : Exp::KeyInFlow::Matches(INPUT)) {
return ScanKey();
}
if (GetValueRegex().Matches(INPUT)) {
if ((InBlockContext() && Exp::Value::Matches(INPUT)) ||
(m_canBeJSONFlow ?
Exp::ValueInJSONFlow::Matches(INPUT) :
Exp::ValueInFlow::Matches(INPUT))) {
return ScanValue();
}
@ -158,10 +161,13 @@ void Scanner::ScanNextToken() {
return ScanQuotedScalar();
}
// plain scalars
if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
.Matches(INPUT)) {
return ScanPlainScalar();
if (Exp::PlainScalarCommon::Matches(INPUT)) {
// plain scalars
if (InBlockContext() ?
Exp::PlainScalar::Matches(INPUT) :
Exp::PlainScalarInFlow::Matches(INPUT)) {
return ScanPlainScalar();
}
}
// don't know what it is!
@ -172,27 +178,27 @@ void Scanner::ScanToNextToken() {
while (1) {
// first eat whitespace
while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
if (InBlockContext() && Exp::Tab::Matches(INPUT)) {
m_simpleKeyAllowed = false;
}
INPUT.eat(1);
}
// then eat a comment
if (Exp::Comment().Matches(INPUT)) {
if (Exp::Comment::Matches(INPUT)) {
// eat until line break
while (INPUT && !Exp::Break().Matches(INPUT)) {
while (INPUT && !Exp::Break::Matches(INPUT)) {
INPUT.eat(1);
}
}
// if it's NOT a line break, then we're done!
if (!Exp::Break().Matches(INPUT)) {
if (!Exp::Break::Matches(INPUT)) {
break;
}
// otherwise, let's eat the line break and keep going
int n = Exp::Break().Match(INPUT);
int n = Exp::Break::Match(INPUT);
INPUT.eat(n);
// oh yeah, and let's get rid of that simple key
@ -229,13 +235,6 @@ bool Scanner::IsWhitespaceToBeEaten(char ch) {
return false;
}
const RegEx& Scanner::GetValueRegex() const {
if (InBlockContext()) {
return Exp::Value();
}
return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
}
void Scanner::StartStream() {
m_startedStream = true;
@ -322,7 +321,7 @@ void Scanner::PopIndentToHere() {
}
if (indent.column == INPUT.column() &&
!(indent.type == IndentMarker::SEQ &&
!Exp::BlockEntry().Matches(INPUT))) {
!Exp::BlockEntry::Matches(INPUT))) {
break;
}

View File

@ -133,10 +133,6 @@ class Scanner {
bool IsWhitespaceToBeEaten(char ch);
/**
* Returns the appropriate regex to check if the next token is a value token.
*/
const RegEx &GetValueRegex() const;
struct SimpleKey {
SimpleKey(const Mark &mark_, std::size_t flowLevel_);

View File

@ -3,11 +3,78 @@
#include <algorithm>
#include "exp.h"
#include "regeximpl.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
namespace YAML {
int ScanScalar::MatchScalarEmpty(const Stream&) {
// This is checked by !INPUT as well
return -1;
}
int ScanScalar::MatchScalarSingleQuoted(const Stream& in) {
using namespace Exp;
return (Matcher<Char<'\''>>::Matches(in) &&
!EscSingleQuote::Matches(in)) ? 1 : -1;
}
int ScanScalar::MatchScalarDoubleQuoted(const Stream& in) {
using namespace Exp;
return Matcher<Char<'\"'>>::Match(in);
}
int ScanScalar::MatchScalarEnd(const Stream& in) {
using namespace Exp;
using ScalarEnd = Matcher<
OR < SEQ < Char<':'>,
OR < detail::BlankOrBreak,
Empty >>,
SEQ < detail::BlankOrBreak,
detail::Comment>>>;
return ScalarEnd::Match(in);
}
int ScanScalar::MatchScalarEndInFlow(const Stream& in) {
using namespace Exp;
using ScalarEndInFlow = Matcher <
OR < SEQ < Char<':'>,
OR < detail::BlankOrBreak,
Char<','>,
Char<']'>,
Char<'}'>,
Empty >>,
Char<','>,
Char<'?'>,
Char<'['>,
Char<']'>,
Char<'{'>,
Char<'}'>,
SEQ < detail::BlankOrBreak,
detail::Comment>>>;
return ScalarEndInFlow::Match(in);
}
bool ScanScalar::MatchDocIndicator(const Stream& in) {
using namespace Exp;
using DocIndicator = Matcher<OR <detail::DocStart, detail::DocEnd>>;
return DocIndicator::Matches(in);
}
bool ScanScalar::CheckDocIndicator(Stream& INPUT, ScanScalarParams& params) {
if (MatchDocIndicator(INPUT)) {
if (params.onDocIndicator == BREAK) {
return true;
} else if (params.onDocIndicator == THROW) {
throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
}
}
return false;
}
// ScanScalar
// . This is where the scalar magic happens.
//
@ -18,7 +85,7 @@ namespace YAML {
//
// . Depending on the parameters given, we store or stop
// and different places in the above flow.
std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
std::string ScanScalar::Apply(Stream& INPUT, ScanScalarParams& params) {
bool foundNonEmptyLine = false;
bool pastOpeningBreak = (params.fold == FOLD_FLOW);
bool emptyLine = false, moreIndented = false;
@ -28,58 +95,68 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
std::string scalar;
params.leadingSpaces = false;
if (!params.end) {
params.end = &Exp::Empty();
}
while (INPUT) {
// ********************************
// Phase #1: scan until line ending
std::size_t lastNonWhitespaceChar = scalar.size();
bool escapedNewline = false;
while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
std::size_t lastNonWhitespaceChar = scalar.size();
while (1) {
// find end posiion
if (params.end(INPUT) >= 0) {
break;
}
if (!INPUT) {
break;
}
// document indicator?
if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
if (params.onDocIndicator == BREAK) {
break;
} else if (params.onDocIndicator == THROW) {
throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
}
// find break posiion
char ch = INPUT.peek();
bool isWhiteSpace = (ch == ' ' || ch == '\t');
if (!isWhiteSpace) {
if (ch == '\n' || (ch == '\r' && Exp::Break::Matches(INPUT))) {
break;
}
// document indicator?
if (INPUT.column() == 0 && CheckDocIndicator(INPUT, params)) {
break;
}
}
foundNonEmptyLine = true;
pastOpeningBreak = true;
// escaped newline? (only if we're escaping on slash)
if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
// eat escape character and get out (but preserve trailing whitespace!)
INPUT.get();
lastNonWhitespaceChar = scalar.size();
lastEscapedChar = scalar.size();
escapedNewline = true;
break;
}
if (params.escape != ch) {
// just add the character
scalar += ch;
INPUT.eat();
// escape this?
if (INPUT.peek() == params.escape) {
scalar += Exp::Escape(INPUT);
lastNonWhitespaceChar = scalar.size();
lastEscapedChar = scalar.size();
continue;
}
if (!isWhiteSpace) {
lastNonWhitespaceChar = scalar.size();
}
// otherwise, just add the damn character
char ch = INPUT.get();
scalar += ch;
if (ch != ' ' && ch != '\t') {
lastNonWhitespaceChar = scalar.size();
} else {
// escaped newline? (only if we're escaping on slash)
if (params.escape == '\\' && Exp::EscBreak::Matches(INPUT)) {
// eat escape character and get out (but preserve trailing whitespace!)
INPUT.eat();
lastNonWhitespaceChar = scalar.size();
lastEscapedChar = scalar.size();
escapedNewline = true;
break;
} else {
scalar += Exp::Escape(INPUT);
lastNonWhitespaceChar = scalar.size();
lastEscapedChar = scalar.size();
}
}
}
} // end while(1)
// eof? if we're looking to eat something, then we throw
if (!INPUT) {
@ -90,14 +167,14 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
}
// doc indicator?
if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
Exp::DocIndicator().Matches(INPUT)) {
if (params.onDocIndicator == BREAK &&
INPUT.column() == 0 &&
MatchDocIndicator(INPUT)) {
break;
}
// are we done via character match?
int n = params.end->Match(INPUT);
if (n >= 0) {
if (int n = params.end(INPUT) >= 0) {
if (params.eatEnd) {
INPUT.eat(n);
}
@ -110,9 +187,9 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
// ********************************
// Phase #2: eat line ending
n = Exp::Break().Match(INPUT);
INPUT.eat(n);
if (int n = Exp::Break::Match(INPUT)) {
INPUT.eat(n);
}
// ********************************
// Phase #3: scan initial spaces
@ -120,7 +197,7 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
while (INPUT.peek() == ' ' &&
(INPUT.column() < params.indent ||
(params.detectIndent && !foundNonEmptyLine)) &&
!params.end->Matches(INPUT)) {
!(params.end(INPUT) >= 0)) {
INPUT.eat(1);
}
@ -130,9 +207,9 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
}
// and then the rest of the whitespace
while (Exp::Blank().Matches(INPUT)) {
for (char c = INPUT.peek(); (c == ' ' || c == '\t'); c = INPUT.peek()) {
// we check for tabs that masquerade as indentation
if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
if (c == '\t' && INPUT.column() < params.indent &&
params.onTabInIndentation == THROW) {
throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
}
@ -141,7 +218,7 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
break;
}
if (params.end->Matches(INPUT)) {
if (params.end(INPUT) >= 0) {
break;
}
@ -149,8 +226,8 @@ std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
}
// was this an empty line?
bool nextEmptyLine = Exp::Break().Matches(INPUT);
bool nextMoreIndented = Exp::Blank().Matches(INPUT);
bool nextEmptyLine = Exp::Break::Matches(INPUT);
bool nextMoreIndented = Exp::Blank::Matches(INPUT);
if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
foldedNewlineStartedMoreIndented = moreIndented;

View File

@ -8,8 +8,8 @@
#endif
#include <string>
#include <functional>
#include "regex_yaml.h"
#include "stream.h"
namespace YAML {
@ -19,8 +19,7 @@ enum FOLD { DONT_FOLD, FOLD_BLOCK, FOLD_FLOW };
struct ScanScalarParams {
ScanScalarParams()
: end(nullptr),
eatEnd(false),
: eatEnd(false),
indent(0),
detectIndent(false),
eatLeadingWhitespace(0),
@ -33,8 +32,7 @@ struct ScanScalarParams {
leadingSpaces(false) {}
// input:
const RegEx* end; // what condition ends this scalar?
// unowned.
std::function<int(const Stream& in)> end; // what condition ends this scalar?
bool eatEnd; // should we eat that condition when we see it?
int indent; // what level of indentation should be eaten and ignored?
bool detectIndent; // should we try to autodetect the indent?
@ -57,7 +55,25 @@ struct ScanScalarParams {
bool leadingSpaces;
};
std::string ScanScalar(Stream& INPUT, ScanScalarParams& info);
struct ScanScalar {
static int MatchScalarEmpty(const Stream& in);
static int MatchScalarSingleQuoted(const Stream& in);
static int MatchScalarDoubleQuoted(const Stream& in);
static int MatchScalarEnd(const Stream& in);
static int MatchScalarEndInFlow(const Stream& in);
static std::string Apply(Stream& INPUT, ScanScalarParams& info);
private:
static bool MatchDocIndicator(const Stream& in);
static bool CheckDocIndicator(Stream& INPUT, ScanScalarParams& params);
};
}
#endif // SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66

View File

@ -1,6 +1,4 @@
#include "exp.h"
#include "regex_yaml.h"
#include "regeximpl.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
#include "yaml-cpp/mark.h"
@ -19,7 +17,7 @@ const std::string ScanVerbatimTag(Stream& INPUT) {
return tag;
}
int n = Exp::URI().Match(INPUT);
int n = Exp::URI::Match(INPUT);
if (n <= 0)
break;
@ -43,7 +41,7 @@ const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) {
int n = 0;
if (canBeHandle) {
n = Exp::Word().Match(INPUT);
n = Exp::Word::Match(INPUT);
if (n <= 0) {
canBeHandle = false;
firstNonWordChar = INPUT.mark();
@ -51,7 +49,7 @@ const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) {
}
if (!canBeHandle)
n = Exp::Tag().Match(INPUT);
n = Exp::Tag::Match(INPUT);
if (n <= 0)
break;
@ -66,7 +64,7 @@ const std::string ScanTagSuffix(Stream& INPUT) {
std::string tag;
while (INPUT) {
int n = Exp::Tag().Match(INPUT);
int n = Exp::Tag::Match(INPUT);
if (n <= 0)
break;

View File

@ -1,8 +1,6 @@
#include <sstream>
#include "exp.h"
#include "regex_yaml.h"
#include "regeximpl.h"
#include "scanner.h"
#include "scanscalar.h"
#include "scantag.h" // IWYU pragma: keep
@ -33,22 +31,22 @@ void Scanner::ScanDirective() {
INPUT.eat(1);
// read name
while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
while (INPUT && !Exp::BlankOrBreak::Matches(INPUT))
token.value += INPUT.get();
// read parameters
while (1) {
// first get rid of whitespace
while (Exp::Blank().Matches(INPUT))
while (Exp::Blank::Matches(INPUT))
INPUT.eat(1);
// break on newline or comment
if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
if (!INPUT || Exp::Break::Matches(INPUT) || Exp::Comment::Matches(INPUT))
break;
// now read parameter
std::string param;
while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
while (INPUT && !Exp::BlankOrBreak::Matches(INPUT))
param += INPUT.get();
token.params.push_back(param);
@ -233,7 +231,7 @@ void Scanner::ScanAnchorOrAlias() {
alias = (indicator == Keys::Alias);
// now eat the content
while (INPUT && Exp::Anchor().Matches(INPUT))
while (INPUT && Exp::Anchor::Matches(INPUT))
name += INPUT.get();
// we need to have read SOMETHING!
@ -242,7 +240,7 @@ void Scanner::ScanAnchorOrAlias() {
: ErrorMsg::ANCHOR_NOT_FOUND);
// and needs to end correctly
if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
if (INPUT && !Exp::AnchorEnd::Matches(INPUT))
throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
: ErrorMsg::CHAR_IN_ANCHOR);
@ -291,14 +289,19 @@ void Scanner::ScanTag() {
m_tokens.push(token);
}
// PlainScalar
void Scanner::ScanPlainScalar() {
std::string scalar;
// set up the scanning parameters
ScanScalarParams params;
params.end =
(InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
if (InFlowContext()) {
params.end = ScanScalar::MatchScalarEndInFlow;
} else {
params.end = ScanScalar::MatchScalarEnd;
}
params.eatEnd = false;
params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
params.fold = FOLD_FLOW;
@ -312,7 +315,7 @@ void Scanner::ScanPlainScalar() {
InsertPotentialSimpleKey();
Mark mark = INPUT.mark();
scalar = ScanScalar(INPUT, params);
scalar = ScanScalar::Apply(INPUT, params);
// can have a simple key only if we ended the scalar by starting a new line
m_simpleKeyAllowed = params.leadingSpaces;
@ -327,6 +330,7 @@ void Scanner::ScanPlainScalar() {
m_tokens.push(token);
}
// QuotedScalar
void Scanner::ScanQuotedScalar() {
std::string scalar;
@ -338,8 +342,11 @@ void Scanner::ScanQuotedScalar() {
// setup the scanning parameters
ScanScalarParams params;
RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
params.end = &end;
if (single) {
params.end = ScanScalar::MatchScalarSingleQuoted;
} else {
params.end = ScanScalar::MatchScalarDoubleQuoted;
}
params.eatEnd = true;
params.escape = (single ? '\'' : '\\');
params.indent = 0;
@ -358,7 +365,7 @@ void Scanner::ScanQuotedScalar() {
INPUT.get();
// and scan
scalar = ScanScalar(INPUT, params);
scalar = ScanScalar::Apply(INPUT, params);
m_simpleKeyAllowed = false;
m_canBeJSONFlow = true;
@ -367,6 +374,8 @@ void Scanner::ScanQuotedScalar() {
m_tokens.push(token);
}
// BlockScalarToken
// . These need a little extra processing beforehand.
// . We need to scan the line where the indicator is (this doesn't count as part
@ -379,6 +388,8 @@ void Scanner::ScanBlockScalar() {
params.indent = 1;
params.detectIndent = true;
params.end = ScanScalar::MatchScalarEmpty;
// eat block indicator ('|' or '>')
Mark mark = INPUT.mark();
char indicator = INPUT.get();
@ -386,14 +397,14 @@ void Scanner::ScanBlockScalar() {
// eat chomping/indentation indicators
params.chomp = CLIP;
int n = Exp::Chomp().Match(INPUT);
int n = Exp::Chomp::Match(INPUT);
for (int i = 0; i < n; i++) {
char ch = INPUT.get();
if (ch == '+')
params.chomp = KEEP;
else if (ch == '-')
params.chomp = STRIP;
else if (Exp::Digit().Matches(ch)) {
else if (Exp::Digit::Matches(ch)) {
if (ch == '0')
throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
@ -403,16 +414,16 @@ void Scanner::ScanBlockScalar() {
}
// now eat whitespace
while (Exp::Blank().Matches(INPUT))
while (Exp::Blank::Matches(INPUT))
INPUT.eat(1);
// and comments to the end of the line
if (Exp::Comment().Matches(INPUT))
while (INPUT && !Exp::Break().Matches(INPUT))
if (Exp::Comment::Matches(INPUT))
while (INPUT && !Exp::Break::Matches(INPUT))
INPUT.eat(1);
// if it's not a line break, then we ran into a bad character inline
if (INPUT && !Exp::Break().Matches(INPUT))
if (INPUT && !Exp::Break::Matches(INPUT))
throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
// set the initial indentation
@ -423,7 +434,7 @@ void Scanner::ScanBlockScalar() {
params.trimTrailingSpaces = false;
params.onTabInIndentation = THROW;
scalar = ScanScalar(INPUT, params);
scalar = ScanScalar::Apply(INPUT, params);
// simple keys always ok after block scalars (since we're gonna start a new
// line anyways)

View File

@ -32,7 +32,7 @@ class Stream : private noncopyable {
std::string get(int n);
void eat(int n = 1);
static char eof() { return 0x04; }
static constexpr char eof() { return 0x04; }
const Mark mark() const { return m_mark; }
int pos() const { return m_mark.pos; }

View File

@ -13,36 +13,45 @@
namespace YAML {
class StreamCharSource {
public:
StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) {}
StreamCharSource(const StreamCharSource& source)
: m_offset(source.m_offset), m_stream(source.m_stream) {}
StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) {
if (m_stream.ReadAheadTo(0)){
m_char = m_stream.peek();
} else {
m_char = Stream::eof();
}
}
~StreamCharSource() {}
operator bool() const;
inline operator bool() const { return m_char != Stream::eof(); }
char operator[](std::size_t i) const { return m_stream.CharAt(m_offset + i); }
char get() const { return m_char; }
bool operator!() const { return !static_cast<bool>(*this); }
const StreamCharSource operator+(int i) const;
const StreamCharSource operator+(int i) const {
return StreamCharSource(
*this, (static_cast<int>(m_offset) + i >= 0) ? m_offset + 1 : 0);
}
private:
std::size_t m_offset;
const Stream& m_stream;
char m_char;
StreamCharSource& operator=(const StreamCharSource&); // non-assignable
StreamCharSource(const StreamCharSource& source, size_t offset)
: m_offset(offset), m_stream(source.m_stream) {
if (m_stream.ReadAheadTo(m_offset)) {
m_char = m_stream.CharAt(m_offset);
} else {
m_char = Stream::eof();
}
}
};
inline StreamCharSource::operator bool() const {
return m_stream.ReadAheadTo(m_offset);
}
inline const StreamCharSource StreamCharSource::operator+(int i) const {
StreamCharSource source(*this);
if (static_cast<int>(source.m_offset) + i >= 0)
source.m_offset += i;
else
source.m_offset = 0;
return source;
}
}
#endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66

View File

@ -16,7 +16,9 @@ class StringCharSource {
: m_str(str), m_size(size), m_offset(0) {}
operator bool() const { return m_offset < m_size; }
char operator[](std::size_t i) const { return m_str[m_offset + i]; }
char operator[](std::size_t i) const {
return m_str ? m_str[m_offset + i] : 0x04; // EOF
}
bool operator!() const { return !static_cast<bool>(*this); }
const StringCharSource operator+(int i) const {
@ -38,6 +40,8 @@ class StringCharSource {
return *this;
}
char get() const { return m_str[m_offset]; }
private:
const char* m_str;
std::size_t m_size;

View File

@ -26,7 +26,7 @@ file(GLOB test_new_api_sources new-api/[a-z]*.cpp)
list(APPEND test_sources ${test_new_api_sources})
add_sources(${test_sources} ${test_headers})
include_directories(${YAML_CPP_SOURCE_DIR}/test)
include_directories(${YAML_CPP_SOURCE_DIR}/test ${YAML_CPP_SOURCE_DIR}/src)
add_executable(run-tests
${test_sources}

View File

@ -1,179 +1,236 @@
#if 0
#include "gtest/gtest.h"
#include "regex_yaml.h"
#include "exp.h"
#include "stream.h"
using YAML::RegEx;
using namespace YAML::Exp;
using YAML::Stream;
namespace {
const auto MIN_CHAR = Stream::eof() + 1;
constexpr char MIN_CHAR = Stream::eof() + 1;
constexpr char MAX_CHAR = 127;
TEST(RegExTest, Empty) {
RegEx empty;
EXPECT_TRUE(empty.Matches(std::string()));
EXPECT_EQ(0, empty.Match(std::string()));
using empty = Matcher<Empty>;
EXPECT_TRUE(empty::Matches(std::string()));
EXPECT_EQ(0, empty::Match(std::string()));
for (int i = MIN_CHAR; i < 128; ++i) {
auto str = std::string(1, char(i));
EXPECT_FALSE(empty.Matches(str));
EXPECT_EQ(-1, empty.Match(str));
EXPECT_FALSE(empty::Matches(str));
EXPECT_EQ(-1, empty::Match(str));
}
}
TEST(RegExTest, Range) {
for (int i = MIN_CHAR; i < 128; ++i) {
for (int j = MIN_CHAR; j < 128; ++j) {
RegEx ex((char)i, (char)j);
for (int k = MIN_CHAR; k < 128; ++k) {
auto str = std::string(1, char(k));
if (i <= k && k <= j) {
EXPECT_TRUE(ex.Matches(str));
EXPECT_EQ(1, ex.Match(str));
} else {
EXPECT_FALSE(ex.Matches(str));
EXPECT_EQ(-1, ex.Match(str));
}
}
int i = MIN_CHAR;
int j = MAX_CHAR;
using ex1 = Matcher<Range<MIN_CHAR, MAX_CHAR>>;
for (int k = MIN_CHAR; k < 128; ++k) {
auto str = std::string(1, char(k));
if (i <= k && k <= j) {
EXPECT_TRUE(ex1::Matches(str));
EXPECT_EQ(1, ex1::Match(str));
} else {
EXPECT_FALSE(ex1::Matches(str));
EXPECT_EQ(-1, ex1::Match(str));
}
}
i = 'a';
j = 'z';
using ex2 = Matcher<Range<'a', 'z'>>;
for (int k = MIN_CHAR; k < 128; ++k) {
auto str = std::string(1, char(k));
if (i <= k && k <= j) {
EXPECT_TRUE(ex2::Matches(str));
EXPECT_EQ(1, ex2::Match(str));
} else {
EXPECT_FALSE(ex2::Matches(str));
EXPECT_EQ(-1, ex2::Match(str));
}
}
// for (int i = MIN_CHAR; i < 128; ++i) {
// for (int j = MIN_CHAR; j < 128; ++j) {
// RegEx ex((char)i, (char)j);
// for (int k = MIN_CHAR; k < 128; ++k) {
// auto str = std::string(1, char(k));
// if (i <= k && k <= j) {
// EXPECT_TRUE(ex.Matches(str));
// EXPECT_EQ(1, ex.Match(str));
// } else {
// EXPECT_FALSE(ex.Matches(str));
// EXPECT_EQ(-1, ex.Match(str));
// }
// }
// }
// }
}
TEST(RegExTest, EmptyString) {
RegEx ex = RegEx(std::string());
EXPECT_TRUE(ex.Matches(std::string()));
EXPECT_EQ(0, ex.Match(std::string()));
using ex = Matcher<Empty>;
EXPECT_TRUE(ex::Matches(std::string()));
EXPECT_EQ(0, ex::Match(std::string()));
// Matches anything, unlike RegEx()!
EXPECT_TRUE(ex.Matches(std::string("hello")));
EXPECT_EQ(0, ex.Match(std::string("hello")));
// EXPECT_TRUE(ex::Matches(std::string("hello")));
// EXPECT_EQ(0, ex::Match(std::string("hello")));
}
TEST(RegExTest, SingleCharacterString) {
for (int i = MIN_CHAR; i < 128; ++i) {
RegEx ex(std::string(1, (char)i));
for (int j = MIN_CHAR; j < 128; ++j) {
auto str = std::string(1, char(j));
if (j == i) {
EXPECT_TRUE(ex.Matches(str));
EXPECT_EQ(1, ex.Match(str));
// Match at start of string only!
std::string prefixed =
std::string(1, i + 1) + std::string("prefix: ") + str;
EXPECT_FALSE(ex.Matches(prefixed));
EXPECT_EQ(-1, ex.Match(prefixed));
} else {
EXPECT_FALSE(ex.Matches(str));
EXPECT_EQ(-1, ex.Match(str));
}
}
}
}
// TEST(RegExTest, SingleCharacterString) {
// for (int i = MIN_CHAR; i < 128; ++i) {
// using ex = Matcher<Char>(std::string(1, (char)i));
// for (int j = MIN_CHAR; j < 128; ++j) {
// auto str = std::string(1, char(j));
// if (j == i) {
// EXPECT_TRUE(ex.Matches(str));
// EXPECT_EQ(1, ex.Match(str));
// // Match at start of string only!
// std::string prefixed =
// std::string(1, i + 1) + std::string("prefix: ") + str;
// EXPECT_FALSE(ex.Matches(prefixed));
// EXPECT_EQ(-1, ex.Match(prefixed));
// } else {
// EXPECT_FALSE(ex.Matches(str));
// EXPECT_EQ(-1, ex.Match(str));
// }
// }
// }
// }
TEST(RegExTest, MultiCharacterString) {
RegEx ex(std::string("ab"));
using ex = Matcher<SEQ<Char<'a'>, Char<'b'>>>;
EXPECT_FALSE(ex.Matches(std::string("a")));
EXPECT_EQ(-1, ex.Match(std::string("a")));
EXPECT_FALSE(ex::Matches(std::string("a")));
EXPECT_EQ(-1, ex::Match(std::string("a")));
EXPECT_TRUE(ex.Matches(std::string("ab")));
EXPECT_EQ(2, ex.Match(std::string("ab")));
EXPECT_TRUE(ex.Matches(std::string("abba")));
EXPECT_EQ(2, ex.Match(std::string("abba")));
EXPECT_TRUE(ex::Matches(std::string("ab")));
EXPECT_EQ(2, ex::Match(std::string("ab")));
EXPECT_TRUE(ex::Matches(std::string("abba")));
EXPECT_EQ(2, ex::Match(std::string("abba")));
// match at start of string only!
EXPECT_FALSE(ex.Matches(std::string("baab")));
EXPECT_EQ(-1, ex.Match(std::string("baab")));
EXPECT_FALSE(ex::Matches(std::string("baab")));
EXPECT_EQ(-1, ex::Match(std::string("baab")));
}
TEST(RegExTest, OperatorNot) {
RegEx ex = !RegEx(std::string("ab"));
using ex = Matcher<NOT<SEQ<Char<'a'>,Char<'b'>>>>;
EXPECT_TRUE(ex.Matches(std::string("a")));
EXPECT_EQ(1, ex.Match(std::string("a")));
EXPECT_TRUE(ex::Matches(std::string("a")));
EXPECT_EQ(1, ex::Match(std::string("a")));
EXPECT_FALSE(ex.Matches(std::string("ab")));
EXPECT_EQ(-1, ex.Match(std::string("ab")));
EXPECT_FALSE(ex.Matches(std::string("abba")));
EXPECT_EQ(-1, ex.Match(std::string("abba")));
EXPECT_FALSE(ex::Matches(std::string("ab")));
EXPECT_EQ(-1, ex::Match(std::string("ab")));
EXPECT_FALSE(ex::Matches(std::string("abba")));
EXPECT_EQ(-1, ex::Match(std::string("abba")));
// match at start of string only!
EXPECT_TRUE(ex.Matches(std::string("baab")));
EXPECT_TRUE(ex::Matches(std::string("baab")));
// Operator not causes only one character to be matched.
EXPECT_EQ(1, ex.Match(std::string("baab")));
EXPECT_EQ(1, ex::Match(std::string("baab")));
}
TEST(RegExTest, OperatorOr) {
for (int i = MIN_CHAR; i < 127; ++i) {
for (int j = i + 1; j < 128; ++j) {
auto iStr = std::string(1, char(i));
auto jStr = std::string(1, char(j));
RegEx ex1 = RegEx(iStr) || RegEx(jStr);
RegEx ex2 = RegEx(jStr) || RegEx(iStr);
for (int k = MIN_CHAR; k < 128; ++k) {
auto str = std::string(1, char(k));
if (i == k || j == k) {
EXPECT_TRUE(ex1.Matches(str));
EXPECT_TRUE(ex2.Matches(str));
EXPECT_EQ(1, ex1.Match(str));
EXPECT_EQ(1, ex2.Match(str));
} else {
EXPECT_FALSE(ex1.Matches(str));
EXPECT_FALSE(ex2.Matches(str));
EXPECT_EQ(-1, ex1.Match(str));
EXPECT_EQ(-1, ex2.Match(str));
}
}
}
}
}
// TEST(RegExTest, OperatorOr) {
// for (int i = MIN_CHAR; i < 127; ++i) {
// for (int j = i + 1; j < 128; ++j) {
// auto iStr = std::string(1, char(i));
// auto jStr = std::string(1, char(j));
// RegEx ex1 = RegEx(iStr) || RegEx(jStr);
// RegEx ex2 = RegEx(jStr) || RegEx(iStr);
// for (int k = MIN_CHAR; k < 128; ++k) {
// auto str = std::string(1, char(k));
// if (i == k || j == k) {
// EXPECT_TRUE(ex1.Matches(str));
// EXPECT_TRUE(ex2.Matches(str));
// EXPECT_EQ(1, ex1.Match(str));
// EXPECT_EQ(1, ex2.Match(str));
// } else {
// EXPECT_FALSE(ex1.Matches(str));
// EXPECT_FALSE(ex2.Matches(str));
// EXPECT_EQ(-1, ex1.Match(str));
// EXPECT_EQ(-1, ex2.Match(str));
// }
// }
// }
// }
// }
TEST(RegExTest, OperatorOrShortCircuits) {
RegEx ex1 = RegEx(std::string("aaaa")) || RegEx(std::string("aa"));
RegEx ex2 = RegEx(std::string("aa")) || RegEx(std::string("aaaa"));
using ex1 = Matcher <
OR < SEQ < Char<'a'>,
Char<'a'>,
Char<'a'>,
Char<'a'>>,
SEQ < Char<'a'>,
Char<'a'>>>>;
EXPECT_TRUE(ex1.Matches(std::string("aaaaa")));
EXPECT_EQ(4, ex1.Match(std::string("aaaaa")));
using ex2 = Matcher <
OR < SEQ < Char<'a'>,
Char<'a'>>,
SEQ < Char<'a'>,
Char<'a'>,
Char<'a'>>,
Char<'a'>>>;
EXPECT_TRUE(ex2.Matches(std::string("aaaaa")));
EXPECT_EQ(2, ex2.Match(std::string("aaaaa")));
// RegEx(std::string("aaaa")) || RegEx(std::string("aa"));
// RegEx ex2 = RegEx(std::string("aa")) || RegEx(std::string("aaaa"));
EXPECT_TRUE(ex1::Matches(std::string("aaaaa")));
EXPECT_EQ(4, ex1::Match(std::string("aaaaa")));
EXPECT_TRUE(ex2::Matches(std::string("aaaaa")));
EXPECT_EQ(2, ex2::Match(std::string("aaaaa")));
}
TEST(RegExTest, OperatorAnd) {
RegEx emptySet = RegEx('a') && RegEx();
EXPECT_FALSE(emptySet.Matches(std::string("a")));
}
// TEST(RegExTest, OperatorAnd) {
// //RegEx emptySet = RegEx('a') && RegEx();
// using emptySet = Match<>RegEx('a') && RegEx();
// EXPECT_FALSE(emptySet.Matches(std::string("a")));
// }
TEST(RegExTest, OperatorAndShortCircuits) {
RegEx ex1 = RegEx(std::string("aaaa")) && RegEx(std::string("aa"));
RegEx ex2 = RegEx(std::string("aa")) && RegEx(std::string("aaaa"));
// TEST(RegExTest, OperatorAndShortCircuits) {
// RegEx ex1 = RegEx(std::string("aaaa")) && RegEx(std::string("aa"));
// RegEx ex2 = RegEx(std::string("aa")) && RegEx(std::string("aaaa"));
EXPECT_TRUE(ex1.Matches(std::string("aaaaa")));
EXPECT_EQ(4, ex1.Match(std::string("aaaaa")));
// EXPECT_TRUE(ex1.Matches(std::string("aaaaa")));
// EXPECT_EQ(4, ex1.Match(std::string("aaaaa")));
EXPECT_TRUE(ex2.Matches(std::string("aaaaa")));
EXPECT_EQ(2, ex2.Match(std::string("aaaaa")));
}
// EXPECT_TRUE(ex2.Matches(std::string("aaaaa")));
// EXPECT_EQ(2, ex2.Match(std::string("aaaaa")));
// }
TEST(RegExTest, OperatorPlus) {
RegEx ex = RegEx(std::string("hello ")) + RegEx(std::string("there"));
using ex = Matcher <
SEQ < SEQ <
Char<'h'>,
Char<'e'>,
Char<'l'>,
Char<'l'>,
Char<'o'>,
Char<' '>>,
SEQ <
Char<'t'>,
Char<'h'>,
Char<'e'>,
Char<'r'>,
Char<'e'>>
>>;
EXPECT_TRUE(ex.Matches(std::string("hello there")));
EXPECT_FALSE(ex.Matches(std::string("hello ")));
EXPECT_FALSE(ex.Matches(std::string("there")));
EXPECT_EQ(11, ex.Match(std::string("hello there")));
EXPECT_TRUE(ex::Matches(std::string("hello there")));
EXPECT_FALSE(ex::Matches(std::string("hello ")));
EXPECT_FALSE(ex::Matches(std::string("there")));
EXPECT_EQ(11, ex::Match(std::string("hello there")));
}
TEST(RegExTest, StringOr) {
std::string str = "abcde";
RegEx ex = RegEx(str, YAML::REGEX_OR);
using ex = Matcher<OR<Char<'a'>,Char<'b'>,Char<'c'>,Char<'d'>,Char<'e'>>>;
for (size_t i = 0; i < str.size(); ++i) {
EXPECT_TRUE(ex.Matches(str.substr(i, 1)));
EXPECT_EQ(1, ex.Match(str.substr(i, 1)));
EXPECT_TRUE(ex::Matches(str.substr(i, 1)));
EXPECT_EQ(1, ex::Match(str.substr(i, 1)));
}
EXPECT_EQ(1, ex.Match(str));
EXPECT_EQ(1, ex::Match(str));
}
}
#endif