Added input_position struct to record char, line, and column

Added line and column number to parse error
This commit is contained in:
efp 2018-08-23 10:31:06 -06:00
parent 359f98d140
commit 349b19b84f
2 changed files with 61 additions and 22 deletions

View File

@ -12,6 +12,19 @@ namespace detail
// exceptions //
////////////////
//! struct for positions
struct input_position {
std::size_t chars_read ;
std::size_t lines_read ;
std::size_t chars_read_this_line ;
input_position(size_t chars, size_t lines, size_t chars_this_line) :
chars_read(chars),
lines_read(lines),
chars_read_this_line(chars_this_line)
{}
} ;
/*!
@brief general exception of the @ref basic_json class
@ -119,12 +132,21 @@ class parse_error : public exception
@param[in] what_arg the explanatory string
@return parse_error object
*/
static parse_error create(int id_, std::size_t byte_, const std::string& what_arg)
static parse_error create(int id_, const input_position pos_, const std::string& what_arg)
{
std::string w = exception::name("parse_error", id_) + "parse error" +
(byte_ != 0 ? (" at " + std::to_string(byte_)) : "") +
" at line: " + std::to_string(pos_.lines_read + 1) +
" col: " + std::to_string(pos_.chars_read_this_line) +
" : " + what_arg;
return parse_error(id_, pos_, w.c_str());
}
static parse_error create(int id_, size_t bytes_, const std::string& what_arg)
{
std::string w = exception::name("parse_error", id_) + "parse error" +
" at char " + std::to_string(bytes_) +
": " + what_arg;
return parse_error(id_, byte_, w.c_str());
return parse_error(id_, bytes_, w.c_str());
}
/*!
@ -136,11 +158,18 @@ class parse_error : public exception
n+1 is the index of the terminating null byte or the end of file.
This also holds true when reading a byte vector (CBOR or MessagePack).
*/
const std::size_t byte;
input_position position ;
private:
parse_error(int id_, std::size_t byte_, const char* what_arg)
: exception(id_, what_arg), byte(byte_) {}
parse_error(int id_, const input_position& pos_, const char* what_arg)
: exception(id_, what_arg),
position( pos_ )
{}
parse_error(int id_, const size_t bytes_, const char* what_arg)
: exception(id_, what_arg),
position( bytes_, 0, 0)
{}
};
/*!

View File

@ -92,13 +92,11 @@ class lexer
return "end of input";
case token_type::literal_or_value:
return "'[', '{', or a literal";
// LCOV_EXCL_START
default: // catch non-enum values
return "unknown token";
// LCOV_EXCL_STOP
return "unknown token"; // LCOV_EXCL_LINE
}
}
explicit lexer(detail::input_adapter_t&& adapter)
: ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}
@ -747,13 +745,11 @@ class lexer
goto scan_number_any1;
}
// LCOV_EXCL_START
default:
{
// all other characters are rejected outside scan_number()
assert(false);
assert(false); // LCOV_EXCL_LINE
}
// LCOV_EXCL_STOP
}
scan_number_minus:
@ -1082,7 +1078,8 @@ scan_number_done:
*/
std::char_traits<char>::int_type get()
{
++chars_read;
++position.chars_read;
++position.chars_read_this_line;
if (next_unget)
{
// just reset the next_unget variable and work with current
@ -1097,6 +1094,13 @@ scan_number_done:
{
token_string.push_back(std::char_traits<char>::to_char_type(current));
}
if ( current == '\n' )
{
++position.lines_read ;
position.chars_read_this_line = 0 ;
}
return current;
}
@ -1111,12 +1115,18 @@ scan_number_done:
void unget()
{
next_unget = true;
--chars_read;
--position.chars_read;
--position.chars_read_this_line;
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
{
assert(token_string.size() != 0);
token_string.pop_back();
}
if ( (position.lines_read != 0 ) && (position.chars_read_this_line == 0) )
{
// chars_read_this_line will be invalid, but reset the next get()
--position.lines_read ;
}
}
/// add a character to token_buffer
@ -1159,9 +1169,9 @@ scan_number_done:
/////////////////////
/// return position of last read token
constexpr std::size_t get_position() const noexcept
constexpr input_position get_position() const noexcept
{
return chars_read;
return position ;
}
/// return the last read token (for errors only). Will never contain EOF
@ -1177,7 +1187,7 @@ scan_number_done:
{
// escape control characters
char cs[9];
snprintf(cs, 9, "<U+%.4X>", static_cast<unsigned char>(c));
snprintf(cs, 9, "<U+%.4hhX>", static_cast<unsigned char>(c));
result += cs;
}
else
@ -1231,7 +1241,7 @@ scan_number_done:
token_type scan()
{
// initially, skip the BOM
if (chars_read == 0 and not skip_bom())
if (position.chars_read == 0 and not skip_bom())
{
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
return token_type::parse_error;
@ -1309,9 +1319,9 @@ scan_number_done:
/// whether the next get() call should just return current
bool next_unget = false;
/// the number of characters read
std::size_t chars_read = 0;
/// the current location in the input (defined in exceptions.hpp)
input_position position {0,0,0} ;
/// raw input token string (for error messages)
std::vector<char> token_string {};