Use nlohmann::position_t instead of lexer for detailed position information when using a sax parser

This commit is contained in:
barcode 2022-12-23 14:32:04 +01:00 committed by Raphael Grimm
parent 22f56995fb
commit bda3e4b7bc
15 changed files with 348 additions and 162 deletions

View File

@ -9,14 +9,13 @@ using json = nlohmann::json;
// allows us to store metadata and add custom methods to each node
struct token_start_stop
{
nlohmann::detail::position_t start{};
nlohmann::detail::position_t stop{};
nlohmann::position_t start{};
nlohmann::position_t stop{};
std::string start_pos_str() const
{
return "{l=" + std::to_string(start.lines_read) + ":c="
//the lexer is already one char ahead (e.g. the opening { of an object )
+ std::to_string(start.chars_read_current_line - 1) + "}";
+ std::to_string(start.chars_read_current_line) + "}";
}
std::string stop_pos_str() const
{
@ -68,16 +67,14 @@ class sax_with_token_start_stop_metadata
, start_stop{}
{}
template<class T1, class T2>
void next_token_start(const nlohmann::detail::lexer<T1, T2>& lex)
void next_token_start(const nlohmann::position_t& p)
{
start_stop.start = lex.get_position();
start_stop.start = p;
}
template<class T1, class T2>
void next_token_end(const nlohmann::detail::lexer<T1, T2>& lex)
void next_token_end(const nlohmann::position_t& p)
{
start_stop.stop = lex.get_position();
start_stop.stop = p;
}
bool null()

View File

@ -7,31 +7,23 @@ There are two possible signatures for this method:
```cpp
void next_token_end(std::size_t pos);
```
This version is called with the byte position after the next element ends. This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md).
This version is called with the byte position after the next element ends.
This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md).
2.
```cpp
template<class BasicJsonType, class InputAdapterType>
void next_token_end(const nlohmann::detail::lexer<BasicJsonType, InputAdapterType>& lex)
void next_token_end(const nlohmann::position_t& p)
```
This version is called with the lexer after the last character of the next element was parsed. The lexer can provide additional information about the current parse context. This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence.
## Template parameters
1.
(none)
2.
`BasicJsonType`
: a specialization of `basic_json` used by the lexer. (Leave this as a template parameter)
`InputAdapterType`
: The input adapter used by the lexer. (Leave this as a template parameter)
This version is called with the [detailed parser position information](../position_t/index.md) after the last character of the next element was parsed.
This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence.
## Parameters
1.
`pos` (in)
: Byte position one after the next elements last byte.
2.
`lex` (in)
: Lexer after the last char of the next element was parsed.
`p` (in)
: [Detailed parser position information](../position_t/index.md) after the last char of the next element was parsed.
## Notes
@ -57,7 +49,8 @@ It is recommended, but not required, to also implement [next_token_start](next_t
??? example
The example below shows a SAX parser using the second version of this method and storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
The example below shows a SAX parser using the second version of this method and
storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
```cpp
--8<-- "examples/sax_parse_with_src_location_in_json.cpp"

View File

@ -7,31 +7,23 @@ There are two possible signatures for this method:
```cpp
void next_token_start(std::size_t pos);
```
This version is called with the byte position where the next element starts. This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md).
This version is called with the byte position where the next element starts.
This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md).
2.
```cpp
template<class BasicJsonType, class InputAdapterType>
void next_token_start(const nlohmann::detail::lexer<BasicJsonType, InputAdapterType>& lex)
void next_token_start(const nlohmann::position_t& p)
```
This version is called with the lexer after the first character of the next element was parsed. The lexer can provide additional information about the current parse context. This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence.
## Template parameters
1.
(none)
2.
`BasicJsonType`
: a specialization of `basic_json` used by the lexer. (Leave this as a template parameter)
`InputAdapterType`
: The input adapter used by the lexer. (Leave this as a template parameter)
This version is called with [detailed parser position information](../position_t/index.md).
This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence.
## Parameters
1.
`pos` (in)
: Byte position where the next element starts.
2.
`lex` (in)
: Lexer after the first char of the next element was parsed.
`p` (in)
: [Detailed parser position information](../position_t/index.md) after the first char of the next element was parsed.
## Notes
@ -57,7 +49,8 @@ It is recommended, but not required, to also implement [next_token_end](next_tok
??? example
The example below shows a SAX parser using the second version of this method and storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
The example below shows a SAX parser using the second version of this method and
storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
```cpp
--8<-- "examples/sax_parse_with_src_location_in_json.cpp"

View File

@ -0,0 +1,28 @@
# <small>nlohmann::position_t::</small>chars_read_current_line
```cpp
std::size_t chars_read_current_line;
```
The number of characters read in the current line.
## Examples
??? example
The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and
storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
```cpp
--8<-- "examples/sax_parse_with_src_location_in_json.cpp"
```
Output:
```json
--8<-- "examples/sax_parse_with_src_location_in_json.output"
```
## Version history
- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???.

View File

@ -0,0 +1,28 @@
# <small>nlohmann::position_t::</small>chars_read_total
```cpp
std::size_t chars_read_total;
```
The total number of characters read.
## Examples
??? example
The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and
storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
```cpp
--8<-- "examples/sax_parse_with_src_location_in_json.cpp"
```
Output:
```json
--8<-- "examples/sax_parse_with_src_location_in_json.output"
```
## Version history
- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???.

View File

@ -0,0 +1,23 @@
# <small>nlohmann::</small>position_t
```cpp
struct position_t;
```
This type represents the parsers position when parsing a json string using.
This position can be retrieved when using a [sax parser](../json_sax/index.md) with the format `nlohmann::json::input_format_t::json`
and implementing [next_token_start](../json_sax/next_token_start.md) or [next_token_end](../json_sax/next_token_end.md).
## Member functions
- [**operator size_t**](operator_size_t.md) - return the value of [chars_read_total](chars_read_total.md).
## Member variables
- [**chars_read_total**](chars_read_total.md) - The total number of characters read.
- [**lines_read**](lines_read.md) - The number of lines read.
- [**chars_read_current_line**](chars_read_current_line.md) - The number of characters read in the current line.
## Version history
- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???.

View File

@ -0,0 +1,28 @@
# <small>nlohmann::position_t::</small>lines_read
```cpp
std::size_t lines_read;
```
The number of lines read.
## Examples
??? example
The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and
storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
```cpp
--8<-- "examples/sax_parse_with_src_location_in_json.cpp"
```
Output:
```json
--8<-- "examples/sax_parse_with_src_location_in_json.output"
```
## Version history
- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???.

View File

@ -0,0 +1,28 @@
# <small>nlohmann::position_t::</small>
```cpp
constexpr operator size_t() const;
```
return the value of [chars_read_total](chars_read_total.md).
## Examples
??? example
The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and
storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point.
```cpp
--8<-- "examples/sax_parse_with_src_location_in_json.cpp"
```
Output:
```json
--8<-- "examples/sax_parse_with_src_location_in_json.output"
```
## Version history
- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???.

View File

@ -67,6 +67,30 @@ To implement your own SAX handler, proceed as follows:
Note the `sax_parse` function only returns a `#!cpp bool` indicating the result of the last executed SAX event. It does not return `json` value - it is up to you to decide what to do with the SAX events. Furthermore, no exceptions are thrown in case of a parse error - it is up to you what to do with the exception object passed to your `parse_error` implementation. Internally, the SAX interface is used for the DOM parser (class `json_sax_dom_parser`) as well as the acceptor (`json_sax_acceptor`), see file `json_sax.hpp`.
## Element position information
The position of a parsed element can be retrieved by implementing the optional methods [next_token_start](../../api/json_sax/next_token_start.md) and [next_token_end](../../api/json_sax/next_token_end.md).
These methods will be called with the parser position before any of the other methods are called and can be used to retrieve the half open bounds (`[start, end)`) of a parsed element.
These Methods come in two flavors:
1.
```cpp
void next_token_start(std::size_t pos);
void next_token_end(std::size_t pos);
```
This flavor is called with the byte positions of each element and are available for any `nlohmann::json::input_format_t` passed to `nlohmann::json::sax_parse`.
2.
```cpp
void next_token_start(const nlohmann::position_t& p);
void next_token_end(const nlohmann::position_t& p);
```
This flavor is called with the [detailed parser position information](../../api/position_t/index.md) of each element and are only available if `nlohmann::json::sax_parse` is called with `nlohmann::json::input_format_t::json`.
Furthermore this flavor takes precedence over the first flavor.
Depending on the required information it is possible for the SAX parser to implement all four or only one or none of these methods.
## See also
- [json_sax](../../api/json_sax/index.md) - documentation of the SAX interface

View File

@ -250,6 +250,8 @@ nav:
- 'start_array': api/json_sax/start_array.md
- 'start_object': api/json_sax/start_object.md
- 'string': api/json_sax/string.md
- 'next_token_start' : api/json_sax/next_token_start.md
- 'next_token_end' : api/json_sax/next_token_end.md
- 'operator<<(basic_json)': api/operator_ltlt.md
- 'operator<<(json_pointer)': api/operator_ltlt.md
- 'operator>>(basic_json)': api/operator_gtgt.md
@ -257,6 +259,12 @@ nav:
- 'operator""_json_pointer': api/operator_literal_json_pointer.md
- 'ordered_json': api/ordered_json.md
- 'ordered_map': api/ordered_map.md
- position_t:
- 'Overview': api/position_t/index.md
- 'operator size_t': api/position_t/operator_size_t.md
- 'chars_read_total': api/position_t/chars_read_total.md
- 'lines_read': api/position_t/lines_read.md
- 'chars_read_current_line': api/position_t/chars_read_current_line.md
- macros:
- 'Overview': api/macros/index.md
- 'JSON_ASSERT': api/macros/json_assert.md

View File

@ -13,9 +13,6 @@
#include <nlohmann/detail/abi_macros.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail
{
/// struct to capture the start position of the current token
struct position_t
{
@ -32,6 +29,4 @@ struct position_t
return chars_read_total;
}
};
} // namespace detail
NLOHMANN_JSON_NAMESPACE_END

View File

@ -50,30 +50,30 @@ struct sax_call_next_token_end_pos_direct
template <typename DirectCaller, typename SAX, typename LexOrPos>
struct sax_call_function
{
// is the parameter a lexer or a position
static constexpr bool no_lexer = std::is_same<LexOrPos, std::size_t>::value;
// is the parameter a lexer or a byte position
static constexpr bool called_with_byte_pos = std::is_same<LexOrPos, std::size_t>::value;
template<typename SAX2, typename...Ts2>
using call_t = decltype(DirectCaller::call(std::declval<SAX2*>(), std::declval<Ts2>()...));
//the sax parser supports calls with a position
static constexpr bool detected_call_with_pos =
static constexpr bool detected_call_with_byte_pos =
is_detected_exact<void, call_t, SAX, std::size_t>::value;
//the sax parser supports calls with a lexer
static constexpr bool detected_call_with_lex =
!no_lexer &&
is_detected_exact<void, call_t, SAX, const LexOrPos>::value;
static constexpr bool detected_call_with_lex_pos =
!called_with_byte_pos &&
is_detected_exact<void, call_t, SAX, const position_t >::value;
//there either has to be a version accepting a lexer or a position
static constexpr bool valid = detected_call_with_pos || detected_call_with_lex;
static constexpr bool valid = detected_call_with_byte_pos || detected_call_with_lex_pos;
//called with pos and pos is method supported -> pass data on
//called with byte pos and byte pos is method supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_pos
valid &&
detected_call_with_byte_pos
>::type
call(SaxT* sax, std::size_t pos)
{
@ -84,46 +84,70 @@ struct sax_call_function
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::valid
!valid
>::type
call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {}
//called with lex and lex method is supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex);
}
// called with lex and only pos method is supported -> call with position from lexer
//called with lex and lex pos method is supported -> call with position from lexer
// the start pos in the lexer is last read char -> chars_read_total-1
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
valid &&
!called_with_byte_pos &&
detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_start_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position().chars_read_total - 1);
JSON_ASSERT(lex.get_position().chars_read_total > 0);
JSON_ASSERT(lex.get_position().chars_read_current_line > 0);
//the lexer has already read the first char of the current element -> fix this
auto pos_copy = lex.get_position();
--pos_copy.chars_read_total;
--pos_copy.chars_read_current_line;
DirectCaller::call(sax, pos_copy);
}
// called with lex and only pos method is supported -> call with position from lexer
//called with lex and lex pos method is supported -> pass data on
// the one past end pos in the lexer is the current index -> chars_read_total
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
valid &&
!called_with_byte_pos &&
detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_end_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position());
}
// called with lex and only byte pos method is supported -> call with byte position from lexer
// the start pos in the lexer is last read char -> chars_read_total-1
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
valid &&
!called_with_byte_pos &&
!detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_start_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
JSON_ASSERT(lex.get_position().chars_read_total > 0);
DirectCaller::call(sax, lex.get_position().chars_read_total - 1);
}
// called with lex and only byte pos method is supported -> call with byte position from lexer
// the one past end pos in the lexer is the current index -> chars_read_total
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
valid &&
!called_with_byte_pos &&
!detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_end_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)

View File

@ -3015,9 +3015,6 @@ NLOHMANN_JSON_NAMESPACE_END
NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail
{
/// struct to capture the start position of the current token
struct position_t
{
@ -3034,8 +3031,6 @@ struct position_t
return chars_read_total;
}
};
} // namespace detail
NLOHMANN_JSON_NAMESPACE_END
// #include <nlohmann/detail/macro_scope.hpp>
@ -9002,30 +8997,30 @@ struct sax_call_next_token_end_pos_direct
template <typename DirectCaller, typename SAX, typename LexOrPos>
struct sax_call_function
{
// is the parameter a lexer or a position
static constexpr bool no_lexer = std::is_same<LexOrPos, std::size_t>::value;
// is the parameter a lexer or a byte position
static constexpr bool called_with_byte_pos = std::is_same<LexOrPos, std::size_t>::value;
template<typename SAX2, typename...Ts2>
using call_t = decltype(DirectCaller::call(std::declval<SAX2*>(), std::declval<Ts2>()...));
//the sax parser supports calls with a position
static constexpr bool detected_call_with_pos =
static constexpr bool detected_call_with_byte_pos =
is_detected_exact<void, call_t, SAX, std::size_t>::value;
//the sax parser supports calls with a lexer
static constexpr bool detected_call_with_lex =
!no_lexer &&
is_detected_exact<void, call_t, SAX, const LexOrPos>::value;
static constexpr bool detected_call_with_lex_pos =
!called_with_byte_pos &&
is_detected_exact<void, call_t, SAX, const position_t >::value;
//there either has to be a version accepting a lexer or a position
static constexpr bool valid = detected_call_with_pos || detected_call_with_lex;
static constexpr bool valid = detected_call_with_byte_pos || detected_call_with_lex_pos;
//called with pos and pos is method supported -> pass data on
//called with byte pos and byte pos is method supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_pos
valid &&
detected_call_with_byte_pos
>::type
call(SaxT* sax, std::size_t pos)
{
@ -9036,46 +9031,70 @@ struct sax_call_function
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::valid
!valid
>::type
call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {}
//called with lex and lex method is supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex);
}
// called with lex and only pos method is supported -> call with position from lexer
//called with lex and lex pos method is supported -> call with position from lexer
// the start pos in the lexer is last read char -> chars_read_total-1
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
valid &&
!called_with_byte_pos &&
detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_start_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position().chars_read_total - 1);
JSON_ASSERT(lex.get_position().chars_read_total > 0);
JSON_ASSERT(lex.get_position().chars_read_current_line > 0);
//the lexer has already read the first char of the current element -> fix this
auto pos_copy = lex.get_position();
--pos_copy.chars_read_total;
--pos_copy.chars_read_current_line;
DirectCaller::call(sax, pos_copy);
}
// called with lex and only pos method is supported -> call with position from lexer
//called with lex and lex pos method is supported -> pass data on
// the one past end pos in the lexer is the current index -> chars_read_total
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
valid &&
!called_with_byte_pos &&
detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_end_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position());
}
// called with lex and only byte pos method is supported -> call with byte position from lexer
// the start pos in the lexer is last read char -> chars_read_total-1
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
valid &&
!called_with_byte_pos &&
!detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_start_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
JSON_ASSERT(lex.get_position().chars_read_total > 0);
DirectCaller::call(sax, lex.get_position().chars_read_total - 1);
}
// called with lex and only byte pos method is supported -> call with byte position from lexer
// the one past end pos in the lexer is the current index -> chars_read_total
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
valid &&
!called_with_byte_pos &&
!detected_call_with_lex_pos &&
std::is_same<DirectCaller, sax_call_next_token_end_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)

View File

@ -109,10 +109,10 @@ std::ostream& operator<<(std::ostream& out, const std::set<element_info_t>& v)
return out;
}
template<bool LexCallImpossible, bool WithPos, bool WithLex>
template<bool LexCallImpossible, bool WithBytePos, bool WithLexPos>
struct Sax
{
static constexpr bool has_callback = WithPos || (WithLex && !LexCallImpossible);
static constexpr bool has_callback = WithBytePos || (WithLexPos && !LexCallImpossible);
using json = nlohmann::json;
enum class last_call_t
@ -167,32 +167,32 @@ struct Sax
last_call = last_call_t::end_pos;
}
template<bool Act = WithPos>
template<bool Act = WithBytePos>
typename std::enable_if<Act>::type next_token_start(std::size_t pos)
{
check_start(pos);
CHECK((!WithLex || LexCallImpossible));
CHECK((!WithLexPos || LexCallImpossible));
}
template < class LexT, bool Act = WithLex && !std::is_same<LexT, std::size_t>::value >
typename std::enable_if<Act>::type next_token_start(const LexT& lex)
template < bool Act = WithLexPos >
typename std::enable_if<Act>::type next_token_start(const nlohmann::position_t& p)
{
check_start(lex.get_position().chars_read_total - 1);
CHECK(WithLex);
check_start(p.chars_read_total);
CHECK(WithLexPos);
}
template<bool Act = WithPos>
template<bool Act = WithBytePos>
typename std::enable_if<Act>::type next_token_end(std::size_t pos)
{
check_end(pos);
CHECK((!WithLex || LexCallImpossible));
CHECK((!WithLexPos || LexCallImpossible));
}
template < class LexT, bool Act = WithLex && !std::is_same<LexT, std::size_t>::value >
typename std::enable_if<Act>::type next_token_end(const LexT& lex)
template < bool Act = WithLexPos >
typename std::enable_if<Act>::type next_token_end(const nlohmann::position_t& p)
{
check_end(lex.get_position().chars_read_total);
CHECK(WithLex);
check_end(p.chars_read_total);
CHECK(WithLexPos);
}
bool null()
@ -303,11 +303,11 @@ struct Sax
}
};
template<bool WithPosV, bool WithLexV>
template<bool WithBytePosV, bool WithLexPosV>
struct Opt
{
static constexpr bool WithPos = WithPosV;
static constexpr bool WithLex = WithLexV;
static constexpr bool WithBytePos = WithBytePosV;
static constexpr bool WithLexPos = WithLexPosV;
};
using OptNone = Opt<false, false>;
@ -318,10 +318,10 @@ using OptBoth = Opt<true, true>;
//test basic functionality
TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth)
{
const bool with_pos = T::WithPos;
const bool with_lex = T::WithLex;
const bool with_pos = T::WithBytePos;
const bool with_lex = T::WithLexPos;
INFO("WithPos " << with_pos << ", WithLex " << with_lex);
INFO("WithBytePos " << with_pos << ", WithLexPos " << with_lex);
//element count 0 1 2 3 4 5 6 7 8 9 10
//index 10s place 0 1 2 3 4 5
//index 1s place 012345678901234567890123456789012345678901234567890123
@ -351,7 +351,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth)
reconstructed += s;
skip(s.size());
};
Sax</*LexCallImpossible*/ false, T::WithPos, T::WithLex> sax;
Sax</*LexCallImpossible*/ false, T::WithBytePos, T::WithLexPos> sax;
sax.pos_start_object.emplace(elementFromStr("{"));
skipFromStr(" ");
sax.pos_key.emplace(elementFromStr(R"("array")"));
@ -384,7 +384,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth)
{
const auto j = nlohmann::json::parse(str);
const auto bin = nlohmann::json::to_bson(j);
Sax</*LexCallImpossible*/ true, T::WithPos, T::WithLex> sax;
Sax</*LexCallImpossible*/ true, T::WithBytePos, T::WithLexPos> sax;
sax.pos_start_object.emplace(element(4)); //4 bytes size
skip(1); //one byte type array
sax.pos_key.emplace(element(6)); //6 key (array\0)
@ -414,7 +414,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth)
{
const auto j = nlohmann::json::parse(str);
const auto bin = nlohmann::json::to_cbor(j);
Sax</*LexCallImpossible*/ true, T::WithPos, T::WithLex> sax;
Sax</*LexCallImpossible*/ true, T::WithBytePos, T::WithLexPos> sax;
sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type)
sax.pos_key.emplace(element(6)); //1 byte type + 5 bytes string (array) (size implicit)
sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type)
@ -437,7 +437,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth)
{
const auto j = nlohmann::json::parse(str);
const auto bin = nlohmann::json::to_msgpack(j);
Sax</*LexCallImpossible*/ true, T::WithPos, T::WithLex> sax;
Sax</*LexCallImpossible*/ true, T::WithBytePos, T::WithLexPos> sax;
sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size
sax.pos_key.emplace(element(6)); //1 byte type + 5 bytes string (array) (size implicit)
sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type)
@ -460,7 +460,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth)
{
const auto j = nlohmann::json::parse(str);
const auto bin = nlohmann::json::to_ubjson(j);
Sax</*LexCallImpossible*/ true, T::WithPos, T::WithLex> sax;
Sax</*LexCallImpossible*/ true, T::WithBytePos, T::WithLexPos> sax;
sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size
sax.pos_key.emplace(element(7)); //1 byte type + 6 bytes string (array\0)
sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type)
@ -483,7 +483,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth)
{
const auto j = nlohmann::json::parse(str);
const auto bin = nlohmann::json::to_bjdata(j);
Sax</*LexCallImpossible*/ true, T::WithPos, T::WithLex> sax;
Sax</*LexCallImpossible*/ true, T::WithBytePos, T::WithLexPos> sax;
sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size
sax.pos_key.emplace(element(7)); //1 byte type + 6 bytes string (array\0)
sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type)

View File

@ -35,17 +35,17 @@ SOFTWARE.
#include <nlohmann/json.hpp>
//prototype to make -Wmissing-prototypes happy
std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p);
std::ostream& operator<<(std::ostream& out, const nlohmann::position_t& p);
//test json parser with detailed line / col information as metadata
struct token_start_stop
{
nlohmann::detail::position_t start{};
nlohmann::detail::position_t stop{};
nlohmann::position_t start{};
nlohmann::position_t stop{};
};
std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p)
std::ostream& operator<<(std::ostream& out, const nlohmann::position_t& p)
{
out << p.chars_read_total << '(' << p.lines_read << ':' << p.chars_read_current_line << ')';
return out;
@ -90,16 +90,14 @@ class sax_with_token_start_stop_metadata
, start_stop{}
{}
template<class T1, class T2>
void next_token_start(const nlohmann::detail::lexer<T1, T2>& lex)
void next_token_start(const nlohmann::position_t& p)
{
start_stop.start = lex.get_position();
start_stop.start = p;
}
template<class T1, class T2>
void next_token_end(const nlohmann::detail::lexer<T1, T2>& lex)
void next_token_end(const nlohmann::position_t& p)
{
start_stop.stop = lex.get_position();
start_stop.stop = p;
}
bool null()
@ -294,38 +292,38 @@ TEST_CASE("parse-json-with-position-info")
sax_with_token_start_stop_metadata sax{j};
CHECK(nlohmann::json::sax_parse(str, &sax, nlohmann::json::input_format_t::json));
CHECK(j.start.lines_read == 0);
CHECK(j.start.chars_read_current_line == 1);
CHECK(j.start.chars_read_current_line == 0);
CHECK(j["array"].start.lines_read == 1);
CHECK(j["array"].start.chars_read_current_line == 13);
CHECK(j["array"].start.chars_read_current_line == 12);
CHECK(j["array"][0].start.lines_read == 2);
CHECK(j["array"][0].start.chars_read_current_line == 5);
CHECK(j["array"][0].start.chars_read_current_line == 4);
CHECK(j["array"][0].stop.lines_read == 2);
CHECK(j["array"][0].stop.chars_read_current_line == 15);
CHECK(j["array"][1].start.lines_read == 3);
CHECK(j["array"][1].start.chars_read_current_line == 5);
CHECK(j["array"][1].start.chars_read_current_line == 4);
CHECK(j["array"][1].stop.lines_read == 3);
CHECK(j["array"][1].stop.chars_read_current_line == 6);
CHECK(j["array"][2].start.lines_read == 4);
CHECK(j["array"][2].start.chars_read_current_line == 5);
CHECK(j["array"][2].start.chars_read_current_line == 4);
CHECK(j["array"][2].stop.lines_read == 4);
CHECK(j["array"][2].stop.chars_read_current_line == 8);
CHECK(j["array"][3].start.lines_read == 5);
CHECK(j["array"][3].start.chars_read_current_line == 5);
CHECK(j["array"][3].start.chars_read_current_line == 4);
CHECK(j["array"][3].stop.lines_read == 5);
CHECK(j["array"][3].stop.chars_read_current_line == 7);
CHECK(j["array"][4].start.lines_read == 6); //starts directly after last value....
CHECK(j["array"][4].start.chars_read_current_line == 5);
CHECK(j["array"][4].start.chars_read_current_line == 4);
CHECK(j["array"][4].stop.lines_read == 6);
CHECK(j["array"][4].stop.chars_read_current_line == 8);
CHECK(j["array"][5].start.lines_read == 7);
CHECK(j["array"][5].start.chars_read_current_line == 5);
CHECK(j["array"][5].start.chars_read_current_line == 4);
CHECK(j["array"][5].stop.lines_read == 7);
CHECK(j["array"][5].stop.chars_read_current_line == 9);