Added input_position struct to record char, line, and column
Added line and column number to parse error
This commit is contained in:
parent
359f98d140
commit
349b19b84f
@ -12,6 +12,19 @@ namespace detail
|
|||||||
// exceptions //
|
// exceptions //
|
||||||
////////////////
|
////////////////
|
||||||
|
|
||||||
|
//! struct for positions
|
||||||
|
struct input_position {
|
||||||
|
std::size_t chars_read ;
|
||||||
|
std::size_t lines_read ;
|
||||||
|
std::size_t chars_read_this_line ;
|
||||||
|
|
||||||
|
input_position(size_t chars, size_t lines, size_t chars_this_line) :
|
||||||
|
chars_read(chars),
|
||||||
|
lines_read(lines),
|
||||||
|
chars_read_this_line(chars_this_line)
|
||||||
|
{}
|
||||||
|
} ;
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@brief general exception of the @ref basic_json class
|
@brief general exception of the @ref basic_json class
|
||||||
|
|
||||||
@ -119,12 +132,21 @@ class parse_error : public exception
|
|||||||
@param[in] what_arg the explanatory string
|
@param[in] what_arg the explanatory string
|
||||||
@return parse_error object
|
@return parse_error object
|
||||||
*/
|
*/
|
||||||
static parse_error create(int id_, std::size_t byte_, const std::string& what_arg)
|
static parse_error create(int id_, const input_position pos_, const std::string& what_arg)
|
||||||
{
|
{
|
||||||
std::string w = exception::name("parse_error", id_) + "parse error" +
|
std::string w = exception::name("parse_error", id_) + "parse error" +
|
||||||
(byte_ != 0 ? (" at " + std::to_string(byte_)) : "") +
|
" at line: " + std::to_string(pos_.lines_read + 1) +
|
||||||
|
" col: " + std::to_string(pos_.chars_read_this_line) +
|
||||||
|
" : " + what_arg;
|
||||||
|
return parse_error(id_, pos_, w.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
static parse_error create(int id_, size_t bytes_, const std::string& what_arg)
|
||||||
|
{
|
||||||
|
std::string w = exception::name("parse_error", id_) + "parse error" +
|
||||||
|
" at char " + std::to_string(bytes_) +
|
||||||
": " + what_arg;
|
": " + what_arg;
|
||||||
return parse_error(id_, byte_, w.c_str());
|
return parse_error(id_, bytes_, w.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -136,11 +158,18 @@ class parse_error : public exception
|
|||||||
n+1 is the index of the terminating null byte or the end of file.
|
n+1 is the index of the terminating null byte or the end of file.
|
||||||
This also holds true when reading a byte vector (CBOR or MessagePack).
|
This also holds true when reading a byte vector (CBOR or MessagePack).
|
||||||
*/
|
*/
|
||||||
const std::size_t byte;
|
input_position position ;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
parse_error(int id_, std::size_t byte_, const char* what_arg)
|
parse_error(int id_, const input_position& pos_, const char* what_arg)
|
||||||
: exception(id_, what_arg), byte(byte_) {}
|
: exception(id_, what_arg),
|
||||||
|
position( pos_ )
|
||||||
|
{}
|
||||||
|
|
||||||
|
parse_error(int id_, const size_t bytes_, const char* what_arg)
|
||||||
|
: exception(id_, what_arg),
|
||||||
|
position( bytes_, 0, 0)
|
||||||
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
@ -92,13 +92,11 @@ class lexer
|
|||||||
return "end of input";
|
return "end of input";
|
||||||
case token_type::literal_or_value:
|
case token_type::literal_or_value:
|
||||||
return "'[', '{', or a literal";
|
return "'[', '{', or a literal";
|
||||||
// LCOV_EXCL_START
|
|
||||||
default: // catch non-enum values
|
default: // catch non-enum values
|
||||||
return "unknown token";
|
return "unknown token"; // LCOV_EXCL_LINE
|
||||||
// LCOV_EXCL_STOP
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit lexer(detail::input_adapter_t&& adapter)
|
explicit lexer(detail::input_adapter_t&& adapter)
|
||||||
: ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}
|
: ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}
|
||||||
|
|
||||||
@ -747,13 +745,11 @@ class lexer
|
|||||||
goto scan_number_any1;
|
goto scan_number_any1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// LCOV_EXCL_START
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
// all other characters are rejected outside scan_number()
|
// all other characters are rejected outside scan_number()
|
||||||
assert(false);
|
assert(false); // LCOV_EXCL_LINE
|
||||||
}
|
}
|
||||||
// LCOV_EXCL_STOP
|
|
||||||
}
|
}
|
||||||
|
|
||||||
scan_number_minus:
|
scan_number_minus:
|
||||||
@ -1082,7 +1078,8 @@ scan_number_done:
|
|||||||
*/
|
*/
|
||||||
std::char_traits<char>::int_type get()
|
std::char_traits<char>::int_type get()
|
||||||
{
|
{
|
||||||
++chars_read;
|
++position.chars_read;
|
||||||
|
++position.chars_read_this_line;
|
||||||
if (next_unget)
|
if (next_unget)
|
||||||
{
|
{
|
||||||
// just reset the next_unget variable and work with current
|
// just reset the next_unget variable and work with current
|
||||||
@ -1097,6 +1094,13 @@ scan_number_done:
|
|||||||
{
|
{
|
||||||
token_string.push_back(std::char_traits<char>::to_char_type(current));
|
token_string.push_back(std::char_traits<char>::to_char_type(current));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( current == '\n' )
|
||||||
|
{
|
||||||
|
++position.lines_read ;
|
||||||
|
position.chars_read_this_line = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1111,12 +1115,18 @@ scan_number_done:
|
|||||||
void unget()
|
void unget()
|
||||||
{
|
{
|
||||||
next_unget = true;
|
next_unget = true;
|
||||||
--chars_read;
|
--position.chars_read;
|
||||||
|
--position.chars_read_this_line;
|
||||||
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
|
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
|
||||||
{
|
{
|
||||||
assert(token_string.size() != 0);
|
assert(token_string.size() != 0);
|
||||||
token_string.pop_back();
|
token_string.pop_back();
|
||||||
}
|
}
|
||||||
|
if ( (position.lines_read != 0 ) && (position.chars_read_this_line == 0) )
|
||||||
|
{
|
||||||
|
// chars_read_this_line will be invalid, but reset the next get()
|
||||||
|
--position.lines_read ;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// add a character to token_buffer
|
/// add a character to token_buffer
|
||||||
@ -1159,9 +1169,9 @@ scan_number_done:
|
|||||||
/////////////////////
|
/////////////////////
|
||||||
|
|
||||||
/// return position of last read token
|
/// return position of last read token
|
||||||
constexpr std::size_t get_position() const noexcept
|
constexpr input_position get_position() const noexcept
|
||||||
{
|
{
|
||||||
return chars_read;
|
return position ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// return the last read token (for errors only). Will never contain EOF
|
/// return the last read token (for errors only). Will never contain EOF
|
||||||
@ -1177,7 +1187,7 @@ scan_number_done:
|
|||||||
{
|
{
|
||||||
// escape control characters
|
// escape control characters
|
||||||
char cs[9];
|
char cs[9];
|
||||||
snprintf(cs, 9, "<U+%.4X>", static_cast<unsigned char>(c));
|
snprintf(cs, 9, "<U+%.4hhX>", static_cast<unsigned char>(c));
|
||||||
result += cs;
|
result += cs;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1231,7 +1241,7 @@ scan_number_done:
|
|||||||
token_type scan()
|
token_type scan()
|
||||||
{
|
{
|
||||||
// initially, skip the BOM
|
// initially, skip the BOM
|
||||||
if (chars_read == 0 and not skip_bom())
|
if (position.chars_read == 0 and not skip_bom())
|
||||||
{
|
{
|
||||||
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
|
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
|
||||||
return token_type::parse_error;
|
return token_type::parse_error;
|
||||||
@ -1309,9 +1319,9 @@ scan_number_done:
|
|||||||
/// whether the next get() call should just return current
|
/// whether the next get() call should just return current
|
||||||
bool next_unget = false;
|
bool next_unget = false;
|
||||||
|
|
||||||
/// the number of characters read
|
/// the current location in the input (defined in exceptions.hpp)
|
||||||
std::size_t chars_read = 0;
|
input_position position {0,0,0} ;
|
||||||
|
|
||||||
/// raw input token string (for error messages)
|
/// raw input token string (for error messages)
|
||||||
std::vector<char> token_string {};
|
std::vector<char> token_string {};
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user