From de29068110e3aee790661ca070073d9b60354c93 Mon Sep 17 00:00:00 2001 From: Jesse Beder Date: Fri, 27 Jun 2008 19:07:30 +0000 Subject: [PATCH] Added stream input to the regular expressions, greatly simplifying the usage (in particular, we no longer have to specify the number of characters to be checked). --- regex.cpp | 214 +++++++++++++++++++++++++++++++++++++++++----------- regex.h | 45 ++++++++++- scanner.cpp | 56 +++++--------- scanner.h | 2 - 4 files changed, 232 insertions(+), 85 deletions(-) diff --git a/regex.cpp b/regex.cpp index a2907f5..f7830b7 100644 --- a/regex.cpp +++ b/regex.cpp @@ -2,30 +2,60 @@ namespace YAML { - RegEx::RegEx(REGEX_OP op): m_op(op) + RegEx::RegEx(REGEX_OP op): m_op(op), m_pOp(0) { + SetOp(); } - RegEx::RegEx(): m_op(REGEX_EMPTY) + RegEx::RegEx(const RegEx& rhs): m_pOp(0) { + m_op = rhs.m_op; + m_a = rhs.m_a; + m_z = rhs.m_z; + m_params = rhs.m_params; + + SetOp(); } - RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_a(ch) + RegEx::RegEx(): m_op(REGEX_EMPTY), m_pOp(0) { + SetOp(); } - RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_a(a), m_z(z) + RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_pOp(0), m_a(ch) { + SetOp(); } - RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op) + RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_pOp(0), m_a(a), m_z(z) + { + SetOp(); + } + + RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0) { for(unsigned i=0;i= 0; } + bool RegEx::Matches(std::istream& in) const + { + return Match(in) >= 0; + } + // Match // . Matches the given string against this regular expression. // . Returns the number of characters matched. @@ -49,44 +84,36 @@ namespace YAML // but that of course matches zero characters). int RegEx::Match(const std::string& str) const { - switch(m_op) { - case REGEX_EMPTY: - if(str.empty()) - return 0; - return -1; - case REGEX_MATCH: - if(str.empty() || str[0] != m_a) - return -1; - return 1; - case REGEX_RANGE: - if(str.empty() || m_a > str[0] || m_z < str[0]) - return -1; - return 1; - case REGEX_NOT: - if(m_params.empty()) - return false; - if(m_params[0].Match(str) >= 0) - return -1; - return 1; - case REGEX_OR: - for(unsigned i=0;i= 0) - return n; - } - return -1; - case REGEX_SEQ: - int offset = 0; - for(unsigned i=0;iMatch(str, *this); + + //case REGEX_EMPTY: + // if(str.empty()) + // return 0; + // return -1; + } + + // Match + // . The stream version does the same thing as the string version; + // REMEMBER that we only match from the start of the stream! + // . Note: the istream is not a const reference, but we guarantee + // that the pointer will be in the same spot, and we'll clear its + // flags before we end. + int RegEx::Match(std::istream& in) const + { + if(!m_pOp) + return -1; + + int pos = in.tellg(); + int ret = m_pOp->Match(in, *this); + + // reset input stream! + in.clear(); + in.seekg(pos); + + return ret; } RegEx operator ! (const RegEx& ex) @@ -111,4 +138,107 @@ namespace YAML ret.m_params.push_back(ex2); return ret; } + + ////////////////////////////////////////////////////////////////////////////// + // Operators + + // MatchOperator + int RegEx::MatchOperator::Match(const std::string& str, const RegEx& regex) const + { + if(str.empty() || str[0] != regex.m_a) + return -1; + return 1; + } + + + int RegEx::MatchOperator::Match(std::istream& in, const RegEx& regex) const + { + if(!in || in.peek() != regex.m_a) + return -1; + return 1; + } + + // RangeOperator + int RegEx::RangeOperator::Match(const std::string& str, const RegEx& regex) const + { + if(str.empty() || regex.m_a > str[0] || regex.m_z < str[0]) + return -1; + return 1; + } + + int RegEx::RangeOperator::Match(std::istream& in, const RegEx& regex) const + { + if(!in || regex.m_a > in.peek() || regex.m_z < in.peek()) + return -1; + return 1; + } + + // OrOperator + int RegEx::OrOperator::Match(const std::string& str, const RegEx& regex) const + { + for(unsigned i=0;i= 0) + return n; + } + return -1; + } + + int RegEx::OrOperator::Match(std::istream& in, const RegEx& regex) const + { + for(unsigned i=0;i= 0) + return n; + } + return -1; + } + + // NotOperator + int RegEx::NotOperator::Match(const std::string& str, const RegEx& regex) const + { + if(regex.m_params.empty()) + return -1; + if(regex.m_params[0].Match(str) >= 0) + return -1; + return 1; + } + + int RegEx::NotOperator::Match(std::istream& in, const RegEx& regex) const + { + if(regex.m_params.empty()) + return -1; + if(regex.m_params[0].Match(in) >= 0) + return -1; + return 1; + } + + // SeqOperator + int RegEx::SeqOperator::Match(const std::string& str, const RegEx& regex) const + { + int offset = 0; + for(unsigned i=0;i #include +#include namespace YAML { @@ -10,17 +11,55 @@ namespace YAML // simplified regular expressions // . Only straightforward matches (no repeated characters) // . Only matches from start of string - class RegEx { + class RegEx + { + private: + struct Operator { + virtual ~Operator() {} + virtual int Match(const std::string& str, const RegEx& regex) const = 0; + virtual int Match(std::istream& in, const RegEx& regex) const = 0; + }; + + struct MatchOperator: public Operator { + virtual int Match(const std::string& str, const RegEx& regex) const; + virtual int Match(std::istream& in, const RegEx& regex) const; + }; + + struct RangeOperator: public Operator { + virtual int Match(const std::string& str, const RegEx& regex) const; + virtual int Match(std::istream& in, const RegEx& regex) const; + }; + + struct OrOperator: public Operator { + virtual int Match(const std::string& str, const RegEx& regex) const; + virtual int Match(std::istream& in, const RegEx& regex) const; + }; + + struct NotOperator: public Operator { + virtual int Match(const std::string& str, const RegEx& regex) const; + virtual int Match(std::istream& in, const RegEx& regex) const; + }; + + struct SeqOperator: public Operator { + virtual int Match(const std::string& str, const RegEx& regex) const; + virtual int Match(std::istream& in, const RegEx& regex) const; + }; + public: + friend struct Operator; + RegEx(); RegEx(char ch); RegEx(char a, char z); - RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ); + RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ); + RegEx(const RegEx& rhs); ~RegEx(); bool Matches(char ch) const; bool Matches(const std::string& str) const; + bool Matches(std::istream& in) const; int Match(const std::string& str) const; + int Match(std::istream& in) const; friend RegEx operator ! (const RegEx& ex); friend RegEx operator || (const RegEx& ex1, const RegEx& ex2); @@ -28,9 +67,11 @@ namespace YAML private: RegEx(REGEX_OP op); + void SetOp(); private: REGEX_OP m_op; + Operator *m_pOp; char m_a, m_z; std::vector m_params; }; diff --git a/scanner.cpp b/scanner.cpp index e1495f5..f656d32 100644 --- a/scanner.cpp +++ b/scanner.cpp @@ -48,22 +48,6 @@ namespace YAML } } - // Peek - // . Peeks at the next 'n' characters and returns them in a string. - std::string Scanner::Peek(int n) - { - std::string ret; - - int pos = INPUT.tellg(); - for(int i=0;i 0) - return Exp::KeyInFlow.Matches(next); - return Exp::Key.Matches(next); + return Exp::KeyInFlow.Matches(INPUT); + return Exp::Key.Matches(INPUT); } // IsValue bool Scanner::IsValue() { - std::string next = Peek(2); if(m_flowLevel > 0) - return Exp::ValueInFlow.Matches(next); - return Exp::Value.Matches(next); + return Exp::ValueInFlow.Matches(INPUT); + return Exp::Value.Matches(INPUT); } // IsPlainScalar - // . Rules: bool Scanner::IsPlainScalar() { - std::string next = Peek(2); if(m_flowLevel > 0) - return Exp::PlainScalarInFlow.Matches(next); - return Exp::PlainScalar.Matches(next); + return Exp::PlainScalarInFlow.Matches(INPUT); + return Exp::PlainScalar.Matches(INPUT); } /////////////////////////////////////////////////////////////////////// @@ -368,21 +348,19 @@ namespace YAML break; // comment - if(Exp::Comment.Matches(INPUT.peek())) + if(Exp::Comment.Matches(INPUT)) break; // first eat non-blanks - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT.peek())) { - std::string next = Peek(2); - + while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { // illegal colon in flow context - if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(next)) + if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) throw IllegalScalar(); // characters that might end the scalar - if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(next)) + if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) break; - if(m_flowLevel == 0 && Exp::EndScalar.Matches(next)) + if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) break; if(leadingBlanks) { @@ -409,12 +387,12 @@ namespace YAML } // did we hit a non-blank character that ended us? - if(!Exp::BlankOrBreak.Matches(INPUT.peek())) + if(!Exp::BlankOrBreak.Matches(INPUT)) break; // now eat blanks - while(INPUT && Exp::BlankOrBreak.Matches(INPUT.peek())) { - if(Exp::Blank.Matches(INPUT.peek())) { + while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { + if(Exp::Blank.Matches(INPUT)) { if(leadingBlanks && m_column <= m_indents.top()) throw IllegalTabInScalar(); diff --git a/scanner.h b/scanner.h index 94cd88a..4a91e50 100644 --- a/scanner.h +++ b/scanner.h @@ -71,8 +71,6 @@ namespace YAML private: char GetChar(); void Eat(int n = 1); - std::string Peek(int n); - void EatLineBreak(); bool IsWhitespaceToBeEaten(char ch);