Added input_position struct to record char, line, and column

Added line and column number to parse error
2018-08-23 10:31:06 -06:00 · 2018-08-23 10:31:06 -06:00 · 349b19b84f
commit 349b19b84f
parent 359f98d140
2 changed files with 61 additions and 22 deletions
--- a/include/nlohmann/detail/exceptions.hpp
+++ b/include/nlohmann/detail/exceptions.hpp
@ -12,6 +12,19 @@ namespace detail
 // exceptions //
 ////////////////

+//! struct for positions
+struct input_position {
+    std::size_t chars_read ;
+    std::size_t lines_read ;
+    std::size_t chars_read_this_line ;
+    
+    input_position(size_t chars, size_t lines, size_t chars_this_line) :
+        chars_read(chars),
+        lines_read(lines),
+        chars_read_this_line(chars_this_line)
+    {}
+} ;
+
 /*!
@brief general exception of the @ref basic_json class

@ -119,12 +132,21 @@ class parse_error : public exception
    @param[in] what_arg  the explanatory string
    @return parse_error object
    */
-    static parse_error create(int id_, std::size_t byte_, const std::string& what_arg)
+    static parse_error create(int id_, const input_position pos_, const std::string& what_arg)
    {
        std::string w = exception::name("parse_error", id_) + "parse error" +
-                        (byte_ != 0 ? (" at " + std::to_string(byte_)) : "") +
+                        " at line: " + std::to_string(pos_.lines_read + 1) +
+                        " col: " + std::to_string(pos_.chars_read_this_line) +
+                        " : " + what_arg;
+        return parse_error(id_, pos_, w.c_str());
+    }
+    
+    static parse_error create(int id_, size_t bytes_, const std::string& what_arg)
+    {
+        std::string w = exception::name("parse_error", id_) + "parse error" +
+                        " at char " + std::to_string(bytes_) +
                        ": " + what_arg;
-        return parse_error(id_, byte_, w.c_str());
+        return parse_error(id_, bytes_, w.c_str());
    }

    /*!
@ -136,11 +158,18 @@ class parse_error : public exception
          n+1 is the index of the terminating null byte or the end of file.
          This also holds true when reading a byte vector (CBOR or MessagePack).
    */
-    const std::size_t byte;
+    input_position position ;

  private:
-    parse_error(int id_, std::size_t byte_, const char* what_arg)
-        : exception(id_, what_arg), byte(byte_) {}
+    parse_error(int id_, const input_position& pos_, const char* what_arg)
+        : exception(id_, what_arg), 
+          position( pos_ )
+    {}
+    
+    parse_error(int id_, const size_t bytes_, const char* what_arg)
+        : exception(id_, what_arg), 
+          position( bytes_, 0, 0)
+    {}
 };

 /*!
--- a/include/nlohmann/detail/input/lexer.hpp
+++ b/include/nlohmann/detail/input/lexer.hpp
@ -92,13 +92,11 @@ class lexer
                return "end of input";
            case token_type::literal_or_value:
                return "'[', '{', or a literal";
-            // LCOV_EXCL_START
            default: // catch non-enum values
-                return "unknown token";
-                // LCOV_EXCL_STOP
+                return "unknown token"; // LCOV_EXCL_LINE
        }
    }
-
+    
    explicit lexer(detail::input_adapter_t&& adapter)
        : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}

@ -747,13 +745,11 @@ class lexer
                goto scan_number_any1;
            }

-            // LCOV_EXCL_START
            default:
            {
                // all other characters are rejected outside scan_number()
-                assert(false);
+                assert(false); // LCOV_EXCL_LINE
            }
-                // LCOV_EXCL_STOP
        }

 scan_number_minus:
@ -1082,7 +1078,8 @@ scan_number_done:
    */
    std::char_traits<char>::int_type get()
    {
-        ++chars_read;
+        ++position.chars_read;
+        ++position.chars_read_this_line;
        if (next_unget)
        {
            // just reset the next_unget variable and work with current
@ -1097,6 +1094,13 @@ scan_number_done:
        {
            token_string.push_back(std::char_traits<char>::to_char_type(current));
        }
+        
+        if ( current == '\n' )
+        {
+            ++position.lines_read ;
+            position.chars_read_this_line = 0 ;
+        }
+        
        return current;
    }

@ -1111,12 +1115,18 @@ scan_number_done:
    void unget()
    {
        next_unget = true;
-        --chars_read;
+        --position.chars_read;
+        --position.chars_read_this_line;
        if (JSON_LIKELY(current != std::char_traits<char>::eof()))
        {
            assert(token_string.size() != 0);
            token_string.pop_back();
        }
+        if ( (position.lines_read != 0 ) && (position.chars_read_this_line == 0) )
+        {
+            // chars_read_this_line will be invalid, but reset the next get()
+            --position.lines_read ;
+        }
    }

    /// add a character to token_buffer
@ -1159,9 +1169,9 @@ scan_number_done:
    /////////////////////

    /// return position of last read token
-    constexpr std::size_t get_position() const noexcept
+    constexpr input_position get_position() const noexcept
    {
-        return chars_read;
+        return position ;
    }

    /// return the last read token (for errors only).  Will never contain EOF
@ -1177,7 +1187,7 @@ scan_number_done:
            {
                // escape control characters
                char cs[9];
-                snprintf(cs, 9, "<U+%.4X>", static_cast<unsigned char>(c));
+                snprintf(cs, 9, "<U+%.4hhX>", static_cast<unsigned char>(c));
                result += cs;
            }
            else
@ -1231,7 +1241,7 @@ scan_number_done:
    token_type scan()
    {
        // initially, skip the BOM
-        if (chars_read == 0 and not skip_bom())
+        if (position.chars_read == 0 and not skip_bom())
        {
            error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
            return token_type::parse_error;
@ -1309,9 +1319,9 @@ scan_number_done:
    /// whether the next get() call should just return current
    bool next_unget = false;

-    /// the number of characters read
-    std::size_t chars_read = 0;
-
+    /// the current location in the input (defined in exceptions.hpp)
+    input_position position {0,0,0} ;
+    
    /// raw input token string (for error messages)
    std::vector<char> token_string {};