Added input_position struct to record char, line, and column

Added line and column number to parse error
This commit is contained in:
efp 2018-08-23 10:31:06 -06:00
parent 359f98d140
commit 349b19b84f
2 changed files with 61 additions and 22 deletions

View File

@ -12,6 +12,19 @@ namespace detail
// exceptions // // exceptions //
//////////////// ////////////////
//! struct for positions
struct input_position {
std::size_t chars_read ;
std::size_t lines_read ;
std::size_t chars_read_this_line ;
input_position(size_t chars, size_t lines, size_t chars_this_line) :
chars_read(chars),
lines_read(lines),
chars_read_this_line(chars_this_line)
{}
} ;
/*! /*!
@brief general exception of the @ref basic_json class @brief general exception of the @ref basic_json class
@ -119,12 +132,21 @@ class parse_error : public exception
@param[in] what_arg the explanatory string @param[in] what_arg the explanatory string
@return parse_error object @return parse_error object
*/ */
static parse_error create(int id_, std::size_t byte_, const std::string& what_arg) static parse_error create(int id_, const input_position pos_, const std::string& what_arg)
{ {
std::string w = exception::name("parse_error", id_) + "parse error" + std::string w = exception::name("parse_error", id_) + "parse error" +
(byte_ != 0 ? (" at " + std::to_string(byte_)) : "") + " at line: " + std::to_string(pos_.lines_read + 1) +
" col: " + std::to_string(pos_.chars_read_this_line) +
" : " + what_arg;
return parse_error(id_, pos_, w.c_str());
}
static parse_error create(int id_, size_t bytes_, const std::string& what_arg)
{
std::string w = exception::name("parse_error", id_) + "parse error" +
" at char " + std::to_string(bytes_) +
": " + what_arg; ": " + what_arg;
return parse_error(id_, byte_, w.c_str()); return parse_error(id_, bytes_, w.c_str());
} }
/*! /*!
@ -136,11 +158,18 @@ class parse_error : public exception
n+1 is the index of the terminating null byte or the end of file. n+1 is the index of the terminating null byte or the end of file.
This also holds true when reading a byte vector (CBOR or MessagePack). This also holds true when reading a byte vector (CBOR or MessagePack).
*/ */
const std::size_t byte; input_position position ;
private: private:
parse_error(int id_, std::size_t byte_, const char* what_arg) parse_error(int id_, const input_position& pos_, const char* what_arg)
: exception(id_, what_arg), byte(byte_) {} : exception(id_, what_arg),
position( pos_ )
{}
parse_error(int id_, const size_t bytes_, const char* what_arg)
: exception(id_, what_arg),
position( bytes_, 0, 0)
{}
}; };
/*! /*!

View File

@ -92,13 +92,11 @@ class lexer
return "end of input"; return "end of input";
case token_type::literal_or_value: case token_type::literal_or_value:
return "'[', '{', or a literal"; return "'[', '{', or a literal";
// LCOV_EXCL_START
default: // catch non-enum values default: // catch non-enum values
return "unknown token"; return "unknown token"; // LCOV_EXCL_LINE
// LCOV_EXCL_STOP
} }
} }
explicit lexer(detail::input_adapter_t&& adapter) explicit lexer(detail::input_adapter_t&& adapter)
: ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}
@ -747,13 +745,11 @@ class lexer
goto scan_number_any1; goto scan_number_any1;
} }
// LCOV_EXCL_START
default: default:
{ {
// all other characters are rejected outside scan_number() // all other characters are rejected outside scan_number()
assert(false); assert(false); // LCOV_EXCL_LINE
} }
// LCOV_EXCL_STOP
} }
scan_number_minus: scan_number_minus:
@ -1082,7 +1078,8 @@ scan_number_done:
*/ */
std::char_traits<char>::int_type get() std::char_traits<char>::int_type get()
{ {
++chars_read; ++position.chars_read;
++position.chars_read_this_line;
if (next_unget) if (next_unget)
{ {
// just reset the next_unget variable and work with current // just reset the next_unget variable and work with current
@ -1097,6 +1094,13 @@ scan_number_done:
{ {
token_string.push_back(std::char_traits<char>::to_char_type(current)); token_string.push_back(std::char_traits<char>::to_char_type(current));
} }
if ( current == '\n' )
{
++position.lines_read ;
position.chars_read_this_line = 0 ;
}
return current; return current;
} }
@ -1111,12 +1115,18 @@ scan_number_done:
void unget() void unget()
{ {
next_unget = true; next_unget = true;
--chars_read; --position.chars_read;
--position.chars_read_this_line;
if (JSON_LIKELY(current != std::char_traits<char>::eof())) if (JSON_LIKELY(current != std::char_traits<char>::eof()))
{ {
assert(token_string.size() != 0); assert(token_string.size() != 0);
token_string.pop_back(); token_string.pop_back();
} }
if ( (position.lines_read != 0 ) && (position.chars_read_this_line == 0) )
{
// chars_read_this_line will be invalid, but reset the next get()
--position.lines_read ;
}
} }
/// add a character to token_buffer /// add a character to token_buffer
@ -1159,9 +1169,9 @@ scan_number_done:
///////////////////// /////////////////////
/// return position of last read token /// return position of last read token
constexpr std::size_t get_position() const noexcept constexpr input_position get_position() const noexcept
{ {
return chars_read; return position ;
} }
/// return the last read token (for errors only). Will never contain EOF /// return the last read token (for errors only). Will never contain EOF
@ -1177,7 +1187,7 @@ scan_number_done:
{ {
// escape control characters // escape control characters
char cs[9]; char cs[9];
snprintf(cs, 9, "<U+%.4X>", static_cast<unsigned char>(c)); snprintf(cs, 9, "<U+%.4hhX>", static_cast<unsigned char>(c));
result += cs; result += cs;
} }
else else
@ -1231,7 +1241,7 @@ scan_number_done:
token_type scan() token_type scan()
{ {
// initially, skip the BOM // initially, skip the BOM
if (chars_read == 0 and not skip_bom()) if (position.chars_read == 0 and not skip_bom())
{ {
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
return token_type::parse_error; return token_type::parse_error;
@ -1309,9 +1319,9 @@ scan_number_done:
/// whether the next get() call should just return current /// whether the next get() call should just return current
bool next_unget = false; bool next_unget = false;
/// the number of characters read /// the current location in the input (defined in exceptions.hpp)
std::size_t chars_read = 0; input_position position {0,0,0} ;
/// raw input token string (for error messages) /// raw input token string (for error messages)
std::vector<char> token_string {}; std::vector<char> token_string {};