yaml-cpp/src/singledocparser.cpp
Antoine Beaupré e951e9fb0b fix stack overflow in HandleNode() (CVE-2017-5950)
simply set a hardcoded recursion limit to 2000 (inspired by Python's)
to avoid infinitely recursing into arbitrary data structures

assert() the depth. unsure if this is the right approach, but given
that HandleNode() is "void", I am not sure how else to return an
error. the problem with this approach of course is that it will still
crash the caller, unless they have proper exception handling in place.

Closes: #459
2020-04-09 10:28:49 +01:00

431 lines
12 KiB
C++

#include <algorithm>
#include <cstdio>
#include <sstream>
#include "collectionstack.h" // IWYU pragma: keep
#include "scanner.h"
#include "singledocparser.h"
#include "tag.h"
#include "token.h"
#include "yaml-cpp/emitterstyle.h"
#include "yaml-cpp/eventhandler.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
#include "yaml-cpp/mark.h"
#include "yaml-cpp/null.h"
namespace YAML {
SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives)
: m_scanner(scanner),
m_directives(directives),
m_pCollectionStack(new CollectionStack),
m_anchors{},
m_curAnchor(0) {}
SingleDocParser::~SingleDocParser() = default;
// HandleDocument
// . Handles the next document
// . Throws a ParserException on error.
void SingleDocParser::HandleDocument(EventHandler& eventHandler) {
assert(!m_scanner.empty()); // guaranteed that there are tokens
assert(!m_curAnchor);
eventHandler.OnDocumentStart(m_scanner.peek().mark);
// eat doc start
if (m_scanner.peek().type == Token::DOC_START)
m_scanner.pop();
// recurse!
HandleNode(eventHandler);
eventHandler.OnDocumentEnd();
// and finally eat any doc ends we see
while (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END)
m_scanner.pop();
}
void SingleDocParser::HandleNode(EventHandler& eventHandler) {
assert(depth < depth_limit);
depth++;
// an empty node *is* a possibility
if (m_scanner.empty()) {
eventHandler.OnNull(m_scanner.mark(), NullAnchor);
return;
}
// save location
Mark mark = m_scanner.peek().mark;
// special case: a value node by itself must be a map, with no header
if (m_scanner.peek().type == Token::VALUE) {
eventHandler.OnMapStart(mark, "?", NullAnchor, EmitterStyle::Default);
HandleMap(eventHandler);
eventHandler.OnMapEnd();
return;
}
// special case: an alias node
if (m_scanner.peek().type == Token::ALIAS) {
eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value));
m_scanner.pop();
return;
}
std::string tag;
std::string anchor_name;
anchor_t anchor;
ParseProperties(tag, anchor, anchor_name);
if (!anchor_name.empty())
eventHandler.OnAnchor(mark, anchor_name);
// after parsing properties, an empty node is again a possibility
if (m_scanner.empty()) {
eventHandler.OnNull(mark, anchor);
return;
}
const Token& token = m_scanner.peek();
if (token.type == Token::PLAIN_SCALAR && IsNullString(token.value)) {
eventHandler.OnNull(mark, anchor);
m_scanner.pop();
return;
}
// add non-specific tags
if (tag.empty())
tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?");
// now split based on what kind of node we should be
switch (token.type) {
case Token::PLAIN_SCALAR:
case Token::NON_PLAIN_SCALAR:
eventHandler.OnScalar(mark, tag, anchor, token.value);
m_scanner.pop();
return;
case Token::FLOW_SEQ_START:
eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Flow);
HandleSequence(eventHandler);
eventHandler.OnSequenceEnd();
return;
case Token::BLOCK_SEQ_START:
eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Block);
HandleSequence(eventHandler);
eventHandler.OnSequenceEnd();
return;
case Token::FLOW_MAP_START:
eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
HandleMap(eventHandler);
eventHandler.OnMapEnd();
return;
case Token::BLOCK_MAP_START:
eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Block);
HandleMap(eventHandler);
eventHandler.OnMapEnd();
return;
case Token::KEY:
// compact maps can only go in a flow sequence
if (m_pCollectionStack->GetCurCollectionType() ==
CollectionType::FlowSeq) {
eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
HandleMap(eventHandler);
eventHandler.OnMapEnd();
return;
}
break;
default:
break;
}
if (tag == "?")
eventHandler.OnNull(mark, anchor);
else
eventHandler.OnScalar(mark, tag, anchor, "");
}
void SingleDocParser::HandleSequence(EventHandler& eventHandler) {
// split based on start token
switch (m_scanner.peek().type) {
case Token::BLOCK_SEQ_START:
HandleBlockSequence(eventHandler);
break;
case Token::FLOW_SEQ_START:
HandleFlowSequence(eventHandler);
break;
default:
break;
}
}
void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
// eat start token
m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq);
while (1) {
if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ);
Token token = m_scanner.peek();
if (token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END)
throw ParserException(token.mark, ErrorMsg::END_OF_SEQ);
m_scanner.pop();
if (token.type == Token::BLOCK_SEQ_END)
break;
// check for null
if (!m_scanner.empty()) {
const Token& nextToken = m_scanner.peek();
if (nextToken.type == Token::BLOCK_ENTRY ||
nextToken.type == Token::BLOCK_SEQ_END) {
eventHandler.OnNull(nextToken.mark, NullAnchor);
continue;
}
}
HandleNode(eventHandler);
}
m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq);
}
void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) {
// eat start token
m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq);
while (1) {
if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
// first check for end
if (m_scanner.peek().type == Token::FLOW_SEQ_END) {
m_scanner.pop();
break;
}
// then read the node
HandleNode(eventHandler);
if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
// now eat the separator (or could be a sequence end, which we ignore - but
// if it's neither, then it's a bad node)
Token& token = m_scanner.peek();
if (token.type == Token::FLOW_ENTRY)
m_scanner.pop();
else if (token.type != Token::FLOW_SEQ_END)
throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW);
}
m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq);
}
void SingleDocParser::HandleMap(EventHandler& eventHandler) {
// split based on start token
switch (m_scanner.peek().type) {
case Token::BLOCK_MAP_START:
HandleBlockMap(eventHandler);
break;
case Token::FLOW_MAP_START:
HandleFlowMap(eventHandler);
break;
case Token::KEY:
HandleCompactMap(eventHandler);
break;
case Token::VALUE:
HandleCompactMapWithNoKey(eventHandler);
break;
default:
break;
}
}
void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) {
// eat start token
m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::BlockMap);
while (1) {
if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP);
Token token = m_scanner.peek();
if (token.type != Token::KEY && token.type != Token::VALUE &&
token.type != Token::BLOCK_MAP_END)
throw ParserException(token.mark, ErrorMsg::END_OF_MAP);
if (token.type == Token::BLOCK_MAP_END) {
m_scanner.pop();
break;
}
// grab key (if non-null)
if (token.type == Token::KEY) {
m_scanner.pop();
HandleNode(eventHandler);
} else {
eventHandler.OnNull(token.mark, NullAnchor);
}
// now grab value (optional)
if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
m_scanner.pop();
HandleNode(eventHandler);
} else {
eventHandler.OnNull(token.mark, NullAnchor);
}
}
m_pCollectionStack->PopCollectionType(CollectionType::BlockMap);
}
void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) {
// eat start token
m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::FlowMap);
while (1) {
if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
Token& token = m_scanner.peek();
const Mark mark = token.mark;
// first check for end
if (token.type == Token::FLOW_MAP_END) {
m_scanner.pop();
break;
}
// grab key (if non-null)
if (token.type == Token::KEY) {
m_scanner.pop();
HandleNode(eventHandler);
} else {
eventHandler.OnNull(mark, NullAnchor);
}
// now grab value (optional)
if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
m_scanner.pop();
HandleNode(eventHandler);
} else {
eventHandler.OnNull(mark, NullAnchor);
}
if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
// now eat the separator (or could be a map end, which we ignore - but if
// it's neither, then it's a bad node)
Token& nextToken = m_scanner.peek();
if (nextToken.type == Token::FLOW_ENTRY)
m_scanner.pop();
else if (nextToken.type != Token::FLOW_MAP_END)
throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW);
}
m_pCollectionStack->PopCollectionType(CollectionType::FlowMap);
}
// . Single "key: value" pair in a flow sequence
void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) {
m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
// grab key
Mark mark = m_scanner.peek().mark;
m_scanner.pop();
HandleNode(eventHandler);
// now grab value (optional)
if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
m_scanner.pop();
HandleNode(eventHandler);
} else {
eventHandler.OnNull(mark, NullAnchor);
}
m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
}
// . Single ": value" pair in a flow sequence
void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) {
m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
// null key
eventHandler.OnNull(m_scanner.peek().mark, NullAnchor);
// grab value
m_scanner.pop();
HandleNode(eventHandler);
m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
}
// ParseProperties
// . Grabs any tag or anchor tokens and deals with them.
void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor,
std::string& anchor_name) {
tag.clear();
anchor_name.clear();
anchor = NullAnchor;
while (1) {
if (m_scanner.empty())
return;
switch (m_scanner.peek().type) {
case Token::TAG:
ParseTag(tag);
break;
case Token::ANCHOR:
ParseAnchor(anchor, anchor_name);
break;
default:
return;
}
}
}
void SingleDocParser::ParseTag(std::string& tag) {
Token& token = m_scanner.peek();
if (!tag.empty())
throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS);
Tag tagInfo(token);
tag = tagInfo.Translate(m_directives);
m_scanner.pop();
}
void SingleDocParser::ParseAnchor(anchor_t& anchor, std::string& anchor_name) {
Token& token = m_scanner.peek();
if (anchor)
throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS);
anchor_name = token.value;
anchor = RegisterAnchor(token.value);
m_scanner.pop();
}
anchor_t SingleDocParser::RegisterAnchor(const std::string& name) {
if (name.empty())
return NullAnchor;
return m_anchors[name] = ++m_curAnchor;
}
anchor_t SingleDocParser::LookupAnchor(const Mark& mark,
const std::string& name) const {
Anchors::const_iterator it = m_anchors.find(name);
if (it == m_anchors.end())
throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR);
return it->second;
}
} // namespace YAML