fixed lexer issue which required null byte at the end of contiguous storage containers #290

This commit is contained in:
Niels 2016-08-23 22:38:05 +02:00
parent a79d634ccb
commit 1d66ab9f7a
4 changed files with 82 additions and 44 deletions

View File

@ -8538,6 +8538,13 @@ basic_json_parser_63:
incremented without leaving the limits of the line buffer. Note re2c
decides when to call this function.
If the lexer reads from contiguous storage, there is no trailing null
byte. Therefore, this function must make sure to add these padding
null bytes.
If the lexer reads from an input stream, this function reads the next
line of the input.
@pre
p p p p p p u u u u u x . . . . . .
^ ^ ^ ^
@ -8553,26 +8560,38 @@ basic_json_parser_63:
*/
void fill_line_buffer()
{
// no stream is used or end of file is reached
if (m_stream == nullptr or not * m_stream)
{
return;
}
// number of processed characters (p)
const auto offset_start = m_start - m_content;
// offset for m_marker wrt. to m_start
const auto offset_marker = m_marker - m_start;
const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
// number of unprocessed characters (u)
const auto offset_cursor = m_cursor - m_start;
// delete processed characters from line buffer
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
// read next line from input stream
std::string line;
std::getline(*m_stream, line);
// add line with newline symbol to the line buffer
m_line_buffer += "\n" + line;
// no stream is used or end of file is reached
if (m_stream == nullptr or not * m_stream)
{
// copy unprocessed characters to line buffer
m_line_buffer.clear();
for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
{
m_line_buffer.append(1, static_cast<const char>(*m_cursor));
}
// append 5 characters (size of longest keyword "false") to
// make sure that there is sufficient space between m_cursor
// and m_limit
m_line_buffer.append(5, '\0');
}
else
{
// delete processed characters from line buffer
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
// read next line from input stream
std::string line;
std::getline(*m_stream, line);
// add line with newline symbol to the line buffer
m_line_buffer += "\n" + line;
}
// set pointers
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@ -8580,7 +8599,7 @@ basic_json_parser_63:
m_start = m_content;
m_marker = m_start + offset_marker;
m_cursor = m_start + offset_cursor;
m_limit = m_start + m_line_buffer.size() - 1;
m_limit = m_start + m_line_buffer.size();
}
/// return string representation of last read token

View File

@ -7835,6 +7835,13 @@ class basic_json
incremented without leaving the limits of the line buffer. Note re2c
decides when to call this function.
If the lexer reads from contiguous storage, there is no trailing null
byte. Therefore, this function must make sure to add these padding
null bytes.
If the lexer reads from an input stream, this function reads the next
line of the input.
@pre
p p p p p p u u u u u x . . . . . .
^ ^ ^ ^
@ -7850,26 +7857,38 @@ class basic_json
*/
void fill_line_buffer()
{
// no stream is used or end of file is reached
if (m_stream == nullptr or not * m_stream)
{
return;
}
// number of processed characters (p)
const auto offset_start = m_start - m_content;
// offset for m_marker wrt. to m_start
const auto offset_marker = m_marker - m_start;
const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start;
// number of unprocessed characters (u)
const auto offset_cursor = m_cursor - m_start;
// delete processed characters from line buffer
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
// read next line from input stream
std::string line;
std::getline(*m_stream, line);
// add line with newline symbol to the line buffer
m_line_buffer += "\n" + line;
// no stream is used or end of file is reached
if (m_stream == nullptr or not * m_stream)
{
// copy unprocessed characters to line buffer
m_line_buffer.clear();
for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor)
{
m_line_buffer.append(1, static_cast<const char>(*m_cursor));
}
// append 5 characters (size of longest keyword "false") to
// make sure that there is sufficient space between m_cursor
// and m_limit
m_line_buffer.append(5, '\0');
}
else
{
// delete processed characters from line buffer
m_line_buffer.erase(0, static_cast<size_t>(offset_start));
// read next line from input stream
std::string line;
std::getline(*m_stream, line);
// add line with newline symbol to the line buffer
m_line_buffer += "\n" + line;
}
// set pointers
m_content = reinterpret_cast<const lexer_char_t*>(m_line_buffer.c_str());
@ -7877,7 +7896,7 @@ class basic_json
m_start = m_content;
m_marker = m_start + offset_marker;
m_cursor = m_start + offset_cursor;
m_limit = m_start + m_line_buffer.size() - 1;
m_limit = m_start + m_line_buffer.size();
}
/// return string representation of last read token

View File

@ -761,19 +761,19 @@ TEST_CASE("parser class")
{
SECTION("from std::vector")
{
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
}
SECTION("from std::array")
{
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
}
SECTION("from array")
{
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
uint8_t v[] = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
}
@ -790,13 +790,13 @@ TEST_CASE("parser class")
SECTION("from std::initializer_list")
{
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
}
SECTION("from std::valarray")
{
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parser(std::begin(v), std::end(v)).parse() == json(true));
}
}

View File

@ -86,19 +86,19 @@ TEST_CASE("deserialization")
{
SECTION("from std::vector")
{
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(v) == json(true));
}
SECTION("from std::array")
{
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
CHECK(json::parse(v) == json(true));
}
SECTION("from array")
{
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
uint8_t v[] = {'t', 'r', 'u', 'e'};
CHECK(json::parse(v) == json(true));
}
@ -110,7 +110,7 @@ TEST_CASE("deserialization")
SECTION("from std::initializer_list")
{
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(v) == json(true));
}
@ -125,19 +125,19 @@ TEST_CASE("deserialization")
{
SECTION("from std::vector")
{
std::vector<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::vector<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}
SECTION("from std::array")
{
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e', '\0'} };
std::array<uint8_t, 5> v { {'t', 'r', 'u', 'e'} };
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}
SECTION("from array")
{
uint8_t v[] = {'t', 'r', 'u', 'e', '\0'};
uint8_t v[] = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}
@ -149,13 +149,13 @@ TEST_CASE("deserialization")
SECTION("from std::initializer_list")
{
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::initializer_list<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}
SECTION("from std::valarray")
{
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e', '\0'};
std::valarray<uint8_t> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(std::begin(v), std::end(v)) == json(true));
}