From 349b19b84faa4861bafd870499ff83cfae387013 Mon Sep 17 00:00:00 2001 From: efp Date: Thu, 23 Aug 2018 10:31:06 -0600 Subject: [PATCH] Added input_position struct to record char, line, and column Added line and column number to parse error --- include/nlohmann/detail/exceptions.hpp | 41 ++++++++++++++++++++---- include/nlohmann/detail/input/lexer.hpp | 42 +++++++++++++++---------- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index b73d7b1f9..895642147 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -12,6 +12,19 @@ namespace detail // exceptions // //////////////// +//! struct for positions +struct input_position { + std::size_t chars_read ; + std::size_t lines_read ; + std::size_t chars_read_this_line ; + + input_position(size_t chars, size_t lines, size_t chars_this_line) : + chars_read(chars), + lines_read(lines), + chars_read_this_line(chars_this_line) + {} +} ; + /*! @brief general exception of the @ref basic_json class @@ -119,12 +132,21 @@ class parse_error : public exception @param[in] what_arg the explanatory string @return parse_error object */ - static parse_error create(int id_, std::size_t byte_, const std::string& what_arg) + static parse_error create(int id_, const input_position pos_, const std::string& what_arg) { std::string w = exception::name("parse_error", id_) + "parse error" + - (byte_ != 0 ? (" at " + std::to_string(byte_)) : "") + + " at line: " + std::to_string(pos_.lines_read + 1) + + " col: " + std::to_string(pos_.chars_read_this_line) + + " : " + what_arg; + return parse_error(id_, pos_, w.c_str()); + } + + static parse_error create(int id_, size_t bytes_, const std::string& what_arg) + { + std::string w = exception::name("parse_error", id_) + "parse error" + + " at char " + std::to_string(bytes_) + ": " + what_arg; - return parse_error(id_, byte_, w.c_str()); + return parse_error(id_, bytes_, w.c_str()); } /*! @@ -136,11 +158,18 @@ class parse_error : public exception n+1 is the index of the terminating null byte or the end of file. This also holds true when reading a byte vector (CBOR or MessagePack). */ - const std::size_t byte; + input_position position ; private: - parse_error(int id_, std::size_t byte_, const char* what_arg) - : exception(id_, what_arg), byte(byte_) {} + parse_error(int id_, const input_position& pos_, const char* what_arg) + : exception(id_, what_arg), + position( pos_ ) + {} + + parse_error(int id_, const size_t bytes_, const char* what_arg) + : exception(id_, what_arg), + position( bytes_, 0, 0) + {} }; /*! diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 44165ff06..3368e1f31 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -92,13 +92,11 @@ class lexer return "end of input"; case token_type::literal_or_value: return "'[', '{', or a literal"; - // LCOV_EXCL_START default: // catch non-enum values - return "unknown token"; - // LCOV_EXCL_STOP + return "unknown token"; // LCOV_EXCL_LINE } } - + explicit lexer(detail::input_adapter_t&& adapter) : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} @@ -747,13 +745,11 @@ class lexer goto scan_number_any1; } - // LCOV_EXCL_START default: { // all other characters are rejected outside scan_number() - assert(false); + assert(false); // LCOV_EXCL_LINE } - // LCOV_EXCL_STOP } scan_number_minus: @@ -1082,7 +1078,8 @@ scan_number_done: */ std::char_traits::int_type get() { - ++chars_read; + ++position.chars_read; + ++position.chars_read_this_line; if (next_unget) { // just reset the next_unget variable and work with current @@ -1097,6 +1094,13 @@ scan_number_done: { token_string.push_back(std::char_traits::to_char_type(current)); } + + if ( current == '\n' ) + { + ++position.lines_read ; + position.chars_read_this_line = 0 ; + } + return current; } @@ -1111,12 +1115,18 @@ scan_number_done: void unget() { next_unget = true; - --chars_read; + --position.chars_read; + --position.chars_read_this_line; if (JSON_LIKELY(current != std::char_traits::eof())) { assert(token_string.size() != 0); token_string.pop_back(); } + if ( (position.lines_read != 0 ) && (position.chars_read_this_line == 0) ) + { + // chars_read_this_line will be invalid, but reset the next get() + --position.lines_read ; + } } /// add a character to token_buffer @@ -1159,9 +1169,9 @@ scan_number_done: ///////////////////// /// return position of last read token - constexpr std::size_t get_position() const noexcept + constexpr input_position get_position() const noexcept { - return chars_read; + return position ; } /// return the last read token (for errors only). Will never contain EOF @@ -1177,7 +1187,7 @@ scan_number_done: { // escape control characters char cs[9]; - snprintf(cs, 9, "", static_cast(c)); + snprintf(cs, 9, "", static_cast(c)); result += cs; } else @@ -1231,7 +1241,7 @@ scan_number_done: token_type scan() { // initially, skip the BOM - if (chars_read == 0 and not skip_bom()) + if (position.chars_read == 0 and not skip_bom()) { error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; return token_type::parse_error; @@ -1309,9 +1319,9 @@ scan_number_done: /// whether the next get() call should just return current bool next_unget = false; - /// the number of characters read - std::size_t chars_read = 0; - + /// the current location in the input (defined in exceptions.hpp) + input_position position {0,0,0} ; + /// raw input token string (for error messages) std::vector token_string {};