Extend sax parser to optionally accept position information for parsed tokens

This commit is contained in:
Raphael Grimm 2021-11-20 14:22:56 +01:00 committed by Raphael Grimm
parent 5d2754306d
commit bd9cdcd99c
7 changed files with 2933 additions and 102 deletions

View File

@ -168,8 +168,9 @@ class binary_reader
bool parse_bson_internal()
{
std::int32_t document_size{};
detail::sax_call_next_token_start_pos(sax, chars_read);
get_number<std::int32_t, true>(input_format_t::bson, document_size);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
{
return false;
@ -180,6 +181,7 @@ class binary_reader
return false;
}
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_object();
}
@ -277,6 +279,7 @@ class binary_reader
case 0x01: // double
{
double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(number));
return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
@ -284,7 +287,10 @@ class binary_reader
{
std::int32_t len{};
string_t value;
return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
detail::sax_call_next_token_start_pos(sax, chars_read);
const bool result_get = get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(value);
}
case 0x03: // object
@ -301,28 +307,35 @@ class binary_reader
{
std::int32_t len{};
binary_t value;
return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
detail::sax_call_next_token_start_pos(sax, chars_read);
const bool result_get = get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(value);
}
case 0x08: // boolean
{
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + 1);
return sax->boolean(get() != 0);
}
case 0x0A: // null
{
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->null();
}
case 0x10: // int32
{
std::int32_t value{};
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value));
return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
}
case 0x12: // int64
{
std::int64_t value{};
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value));
return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
}
@ -361,15 +374,23 @@ class binary_reader
}
const std::size_t element_type_parse_position = chars_read;
if (!is_array)
{
detail::sax_call_next_token_start_pos(sax, chars_read);
}
if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
{
return false;
}
if (!is_array && !sax->key(key))
if (!is_array)
{
detail::sax_call_next_token_end_pos(sax, chars_read);
if (!sax->key(key))
{
return false;
}
}
if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
{
@ -390,6 +411,7 @@ class binary_reader
bool parse_bson_array()
{
std::int32_t document_size{};
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(std::int32_t));
get_number<std::int32_t, true>(input_format_t::bson, document_size);
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
@ -402,6 +424,7 @@ class binary_reader
return false;
}
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_array();
}
@ -451,29 +474,34 @@ class binary_reader
case 0x15:
case 0x16:
case 0x17:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_unsigned(static_cast<number_unsigned_t>(current));
case 0x18: // Unsigned integer (one-byte uint8_t follows)
{
std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
}
case 0x19: // Unsigned integer (two-byte uint16_t follows)
{
std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
}
case 0x1A: // Unsigned integer (four-byte uint32_t follows)
{
std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
}
case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
{
std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
}
@ -502,29 +530,34 @@ class binary_reader
case 0x35:
case 0x36:
case 0x37:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
case 0x38: // Negative integer (one-byte uint8_t follows)
{
std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
}
case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
{
std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
}
case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
{
std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
}
case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
{
std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
- static_cast<number_integer_t>(number));
}
@ -561,7 +594,10 @@ class binary_reader
case 0x5F: // Binary data (indefinite length)
{
binary_t b;
return get_cbor_binary(b) && sax->binary(b);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_binary(b);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(b);
}
// UTF-8 string (0x00..0x17 bytes follow)
@ -596,7 +632,10 @@ class binary_reader
case 0x7F: // UTF-8 string (indefinite length)
{
string_t s;
return get_cbor_string(s) && sax->string(s);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_string(s);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(s);
}
// array (0x00..0x17 data items follow)
@ -624,35 +663,51 @@ class binary_reader
case 0x95:
case 0x96:
case 0x97:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_array(
conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
case 0x98: // array (one-byte uint8_t for n follows)
{
std::uint8_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
}
case 0x99: // array (two-byte uint16_t for n follow)
{
std::uint16_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
}
case 0x9A: // array (four-byte uint32_t for n follow)
{
std::uint32_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
}
case 0x9B: // array (eight-byte uint64_t for n follow)
{
std::uint64_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
}
case 0x9F: // array (indefinite length)
{
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_array(static_cast<std::size_t>(-1), tag_handler);
}
// map (0x00..0x17 pairs of data items follow)
case 0xA0:
@ -679,33 +734,47 @@ class binary_reader
case 0xB5:
case 0xB6:
case 0xB7:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
case 0xB8: // map (one-byte uint8_t for n follows)
{
std::uint8_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
}
case 0xB9: // map (two-byte uint16_t for n follow)
{
std::uint16_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
}
case 0xBA: // map (four-byte uint32_t for n follow)
{
std::uint32_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
}
case 0xBB: // map (eight-byte uint64_t for n follow)
{
std::uint64_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
}
case 0xBF: // map (indefinite length)
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_object(static_cast<std::size_t>(-1), tag_handler);
case 0xC6: // tagged item
@ -810,7 +879,10 @@ class binary_reader
return parse_cbor_internal(true, tag_handler);
}
get();
return get_cbor_binary(b) && sax->binary(b);
detail::sax_call_next_token_start_pos(sax, chars_read);
const bool result_get = get_cbor_binary(b);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(b);
}
default: // LCOV_EXCL_LINE
@ -820,16 +892,20 @@ class binary_reader
}
case 0xF4: // false
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(false);
case 0xF5: // true
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(true);
case 0xF6: // null
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->null();
case 0xF9: // Half-Precision Float (two-byte IEEE 754)
{
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const auto byte1_raw = get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
{
@ -871,6 +947,7 @@ class binary_reader
return std::ldexp(mant + 1024, exp - 25);
}
}();
detail::sax_call_next_token_end_pos(sax, chars_read);
return sax->number_float((half & 0x8000u) != 0
? static_cast<number_float_t>(-val)
: static_cast<number_float_t>(val), "");
@ -879,12 +956,14 @@ class binary_reader
case 0xFA: // Single-Precision Float (four-byte IEEE 754)
{
float number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
{
double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
@ -1128,6 +1207,7 @@ class binary_reader
}
}
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_array();
}
@ -1153,7 +1233,10 @@ class binary_reader
for (std::size_t i = 0; i < len; ++i)
{
get();
if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{
return false;
}
@ -1169,7 +1252,10 @@ class binary_reader
{
while (get() != 0xFF)
{
if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{
return false;
}
@ -1183,6 +1269,7 @@ class binary_reader
}
}
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_object();
}
@ -1330,6 +1417,7 @@ class binary_reader
case 0x7D:
case 0x7E:
case 0x7F:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap
@ -1349,6 +1437,7 @@ class binary_reader
case 0x8D:
case 0x8E:
case 0x8F:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_msgpack_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
// fixarray
@ -1368,6 +1457,7 @@ class binary_reader
case 0x9D:
case 0x9E:
case 0x9F:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_msgpack_array(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
// fixstr
@ -1408,16 +1498,22 @@ class binary_reader
case 0xDB: // str 32
{
string_t s;
return get_msgpack_string(s) && sax->string(s);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_msgpack_string(s);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(s);
}
case 0xC0: // nil
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->null();
case 0xC2: // false
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(false);
case 0xC3: // true
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(true);
case 0xC4: // bin 8
@ -1433,90 +1529,107 @@ class binary_reader
case 0xD8: // fixext 16
{
binary_t b;
return get_msgpack_binary(b) && sax->binary(b);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_msgpack_binary(b);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(b);
}
case 0xCA: // float 32
{
float number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCB: // float 64
{
double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
case 0xCC: // uint 8
{
std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
}
case 0xCD: // uint 16
{
std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
}
case 0xCE: // uint 32
{
std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
}
case 0xCF: // uint 64
{
std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
}
case 0xD0: // int 8
{
std::int8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
}
case 0xD1: // int 16
{
std::int16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
}
case 0xD2: // int 32
{
std::int32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
}
case 0xD3: // int 64
{
std::int64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
}
case 0xDC: // array 16
{
std::uint16_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
}
case 0xDD: // array 32
{
std::uint32_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast<std::size_t>(len));
}
case 0xDE: // map 16
{
std::uint16_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
}
case 0xDF: // map 32
{
std::uint32_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast<std::size_t>(len));
}
@ -1553,6 +1666,7 @@ class binary_reader
case 0xFD:
case 0xFE:
case 0xFF:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_integer(static_cast<std::int8_t>(current));
default: // anything else
@ -1783,6 +1897,7 @@ class binary_reader
}
}
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_array();
}
@ -1801,7 +1916,10 @@ class binary_reader
for (std::size_t i = 0; i < len; ++i)
{
get();
if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_msgpack_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{
return false;
}
@ -1813,6 +1931,7 @@ class binary_reader
key.clear();
}
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_object();
}
@ -2175,7 +2294,6 @@ class binary_reader
return true;
}
}
string_t key = "_ArraySize_";
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size())))
{
@ -2236,7 +2354,6 @@ class binary_reader
bool is_ndarray = false;
get_ignore_noop();
if (current == '$')
{
result.second = get(); // must not ignore 'N', because 'N' maybe the type
@ -2265,7 +2382,9 @@ class binary_reader
exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
}
// detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool is_error = get_ubjson_size_value(result.first, is_ndarray);
//detail::sax_call_next_token_end_pos(sax, chars_read);
if (input_format == input_format_t::bjdata && is_ndarray)
{
if (inside_ndarray)
@ -2280,7 +2399,9 @@ class binary_reader
if (current == '#')
{
// detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool is_error = get_ubjson_size_value(result.first, is_ndarray);
// detail::sax_call_next_token_end_pos(sax, chars_read);
if (input_format == input_format_t::bjdata && is_ndarray)
{
return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
@ -2289,6 +2410,7 @@ class binary_reader
return is_error;
}
// detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read - 1);
return true;
}
@ -2304,40 +2426,47 @@ class binary_reader
return unexpect_eof(input_format, "value");
case 'T': // true
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(true);
case 'F': // false
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(false);
case 'Z': // null
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->null();
case 'U':
{
std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'i':
{
std::int8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number);
}
case 'I':
{
std::int16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number);
}
case 'l':
{
std::int32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number);
}
case 'L':
{
std::int64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number);
}
@ -2348,6 +2477,7 @@ class binary_reader
break;
}
std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number);
}
@ -2358,6 +2488,7 @@ class binary_reader
break;
}
std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number);
}
@ -2368,11 +2499,13 @@ class binary_reader
break;
}
std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number);
}
case 'h':
{
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
if (input_format != input_format_t::bjdata)
{
break;
@ -2418,25 +2551,30 @@ class binary_reader
return std::ldexp(mant + 1024, exp - 25);
}
}();
detail::sax_call_next_token_end_pos(sax, chars_read);
return sax->number_float((half & 0x8000u) != 0
? static_cast<number_float_t>(-val)
: static_cast<number_float_t>(val), "");
: static_cast<number_float_t>(val),
"");
}
case 'd':
{
float number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
case 'D':
{
double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
case 'H':
{
// call to detail::sax_call_next_token_start_end_pos inside of the method
return get_ubjson_high_precision_number();
}
@ -2454,19 +2592,25 @@ class binary_reader
exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr));
}
string_t s(1, static_cast<typename string_t::value_type>(current));
detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read);
return sax->string(s);
}
case 'S': // string
{
string_t s;
return get_ubjson_string(s) && sax->string(s);
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(s);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(s);
}
case '[': // array
// call to detail::sax_call_next_token_start_end_pos inside of the method
return get_ubjson_array();
case '{': // object
// call to detail::sax_call_next_token_start_end_pos inside of the method
return get_ubjson_object();
default: // anything else
@ -2481,6 +2625,7 @@ class binary_reader
*/
bool get_ubjson_array()
{
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
std::pair<std::size_t, char_int_type> size_and_type;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{
@ -2505,6 +2650,7 @@ class binary_reader
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
}
detail::sax_call_next_token_end_pos(sax, chars_read);
string_t type = it->second; // sax->string() takes a reference
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
{
@ -2516,6 +2662,7 @@ class binary_reader
size_and_type.second = 'U';
}
detail::sax_call_next_token_start_end_pos(sax, chars_read);
key = "_ArrayData_";
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) ))
{
@ -2524,17 +2671,20 @@ class binary_reader
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
// call to detail::sax_call_next_token_start_end_pos inside of the method
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
{
return false;
}
}
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return (sax->end_array() && sax->end_object());
}
if (size_and_type.first != npos)
{
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
{
return false;
@ -2546,6 +2696,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
// call to detail::sax_call_next_token_start_end_pos inside of the method
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
{
return false;
@ -2557,6 +2708,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
// call to detail::sax_call_next_token_start_end_pos inside of the method
if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
{
return false;
@ -2566,6 +2718,7 @@ class binary_reader
}
else
{
detail::sax_call_next_token_end_pos(sax, chars_read - 1);
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
{
return false;
@ -2581,6 +2734,7 @@ class binary_reader
}
}
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_array();
}
@ -2589,6 +2743,7 @@ class binary_reader
*/
bool get_ubjson_object()
{
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
std::pair<std::size_t, char_int_type> size_and_type;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{
@ -2606,6 +2761,7 @@ class binary_reader
string_t key;
if (size_and_type.first != npos)
{
detail::sax_call_next_token_end_pos(sax, chars_read - 1);
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
{
return false;
@ -2615,7 +2771,10 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{
return false;
}
@ -2630,7 +2789,10 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{
return false;
}
@ -2644,6 +2806,7 @@ class binary_reader
}
else
{
detail::sax_call_next_token_end_pos(sax, chars_read - 1);
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
{
return false;
@ -2651,7 +2814,10 @@ class binary_reader
while (current != '}')
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(key, false);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{
return false;
}
@ -2664,6 +2830,7 @@ class binary_reader
}
}
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_object();
}
@ -2672,6 +2839,7 @@ class binary_reader
bool get_ubjson_high_precision_number()
{
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
// get size of following number string
std::size_t size{};
bool no_ndarray = true;
@ -2692,6 +2860,7 @@ class binary_reader
}
number_vector.push_back(static_cast<char>(current));
}
detail::sax_call_next_token_end_pos(sax, chars_read);
// parse number string
using ia_type = decltype(detail::input_adapter(number_vector));
@ -2889,6 +3058,7 @@ class binary_reader
{
if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
{
detail::sax_call_next_token_end_pos(sax, chars_read);
return sax->parse_error(chars_read, "<end of file>",
parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
}

View File

@ -1506,13 +1506,13 @@ scan_number_done:
while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
}
token_type scan()
bool scan_start()
{
// initially, skip the BOM
if (position.chars_read_total == 0 && !skip_bom())
{
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
return token_type::parse_error;
return false;
}
// read next character and ignore whitespace
@ -1523,13 +1523,17 @@ scan_number_done:
{
if (!scan_comment())
{
return token_type::parse_error;
return false;
}
// skip following whitespace
skip_whitespace();
}
return true;
}
token_type scan_end()
{
switch (current)
{
// structural characters
@ -1593,6 +1597,10 @@ scan_number_done:
return token_type::parse_error;
}
}
token_type scan()
{
return !scan_start() ? token_type::parse_error : scan_end();
}
private:
/// input adapter

View File

@ -76,8 +76,6 @@ class parser
, m_lexer(std::move(adapter), skip_comments)
, allow_exceptions(allow_exceptions_)
{
// read first token
get_token();
}
/*!
@ -98,7 +96,7 @@ class parser
sax_parse_internal(&sdp);
// in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input))
if (strict && (get_token(&sdp) != token_type::end_of_input))
{
sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
@ -126,7 +124,7 @@ class parser
sax_parse_internal(&sdp);
// in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input))
if (strict && (get_token(&sdp) != token_type::end_of_input))
{
sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
@ -164,7 +162,7 @@ class parser
const bool result = sax_parse_internal(sax);
// strict mode: next byte must be EOF
if (result && strict && (get_token() != token_type::end_of_input))
if (result && strict && (get_token(sax) != token_type::end_of_input))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
@ -185,6 +183,8 @@ class parser
// value to avoid a goto (see comment where set to true)
bool skip_to_state_evaluation = false;
// read first token
get_token(sax);
while (true)
{
if (!skip_to_state_evaluation)
@ -200,7 +200,7 @@ class parser
}
// closing } -> we are done
if (get_token() == token_type::end_object)
if (get_token(sax) == token_type::end_object)
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
{
@ -222,7 +222,7 @@ class parser
}
// parse separator (:)
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
@ -233,7 +233,7 @@ class parser
states.push_back(false);
// parse values
get_token();
get_token(sax);
continue;
}
@ -245,7 +245,7 @@ class parser
}
// closing ] -> we are done
if (get_token() == token_type::end_array)
if (get_token(sax) == token_type::end_array)
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
{
@ -372,10 +372,10 @@ class parser
if (states.back()) // array
{
// comma -> next value
if (get_token() == token_type::value_separator)
if (get_token(sax) == token_type::value_separator)
{
// parse a new value
get_token();
get_token(sax);
continue;
}
@ -405,10 +405,10 @@ class parser
// states.back() is false -> object
// comma -> next value
if (get_token() == token_type::value_separator)
if (get_token(sax) == token_type::value_separator)
{
// parse key
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::value_string))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
@ -421,7 +421,7 @@ class parser
}
// parse separator (:)
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
@ -429,7 +429,7 @@ class parser
}
// parse values
get_token();
get_token(sax);
continue;
}
@ -457,10 +457,19 @@ class parser
}
}
/// get next token from lexer
token_type get_token()
/// get next token from lexer and pass position info to sax (if it is accepted)
template<class SAX>
token_type get_token(SAX* sax)
{
return last_token = m_lexer.scan();
if (!m_lexer.scan_start())
{
last_token = token_type::parse_error;
return token_type::parse_error;
}
detail::sax_call_next_token_start_pos(sax, m_lexer);
last_token = m_lexer.scan_end();
detail::sax_call_next_token_end_pos(sax, m_lexer);
return last_token;
}
std::string exception_message(const token_type expected, const std::string& context)

View File

@ -19,6 +19,151 @@
NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail
{
// helper struct to call sax->next_token_start
//(we want this functionality as a type to ease passing it as template argument)
struct sax_call_next_token_start_pos_direct
{
template<typename SAX, typename...Ts>
static auto call(SAX* sax, Ts&& ...ts)
-> decltype(sax->next_token_start(std::forward<Ts>(ts)...))
{
sax->next_token_start(std::forward<Ts>(ts)...);
}
};
// helper struct to call sax->next_token_end
// (we want this functionality as a type to ease passing it as template argument)
struct sax_call_next_token_end_pos_direct
{
template<typename SAX, typename...Ts>
static auto call(SAX* sax, Ts&& ...ts)
-> decltype(sax->next_token_end(std::forward<Ts>(ts)...))
{
sax->next_token_end(std::forward<Ts>(ts)...);
}
};
// dispatch the calls to next_token_start next_token_end
// and drop the calls if the sax parser does not support these methods.
//
// DirectCaller can be set to one of sax_call_next_token_{start,end}_pos_direct to
// determine which method is called
template <typename DirectCaller, typename SAX, typename LexOrPos>
struct sax_call_function
{
// is the parameter a lexer or a position
static constexpr bool no_lexer = std::is_same<LexOrPos, std::size_t>::value;
template<typename SAX2, typename...Ts2>
using call_t = decltype(DirectCaller::call(std::declval<SAX2*>(), std::declval<Ts2>()...));
//the sax parser supports calls with a position
static constexpr bool detected_call_with_pos =
is_detected_exact<void, call_t, SAX, std::size_t>::value;
//the sax parser supports calls with a lexer
static constexpr bool detected_call_with_lex =
!no_lexer &&
is_detected_exact<void, call_t, SAX, const LexOrPos>::value;
//there either has to be a version accepting a lexer or a position
static constexpr bool valid = detected_call_with_pos || detected_call_with_lex;
//called with pos and pos is method supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_pos
>::type
call(SaxT* sax, std::size_t pos)
{
DirectCaller::call(sax, pos);
}
//the sax parser has no version of the method -> drop call
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::valid
>::type
call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {}
//called with lex and lex method is supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex);
}
// called with lex and only pos method is supported -> call with position from lexer
// the start pos in the lexer is last read char -> chars_read_total-1
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
std::is_same<DirectCaller, sax_call_next_token_start_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position().chars_read_total - 1);
}
// called with lex and only pos method is supported -> call with position from lexer
// the one past end pos in the lexer is the current index -> chars_read_total
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
std::is_same<DirectCaller, sax_call_next_token_end_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position().chars_read_total);
}
};
//set the element start pos of a sax parser by calling any version of sax->next_token_start (if available)
template<class SAX, class LexOrPos>
void sax_call_next_token_start_pos(SAX* sax, const LexOrPos& lexOrPos)
{
using call_t = sax_call_function<sax_call_next_token_start_pos_direct, SAX, LexOrPos>;
call_t::call(sax, lexOrPos);
}
//set the element end pos of a sax parser by calling any version of sax->next_token_end (if available)
template<class SAX, class LexOrPos>
void sax_call_next_token_end_pos(SAX* sax, const LexOrPos& lexOrPos)
{
using call_t = sax_call_function<sax_call_next_token_end_pos_direct, SAX, LexOrPos>;
call_t::call(sax, lexOrPos);
}
//set the element start end pos of a sax parser by calling any version of
// sax->next_token_start and sax->next_token_end (if available)
template<class SAX, class LexOrPos1, class LexOrPos2>
void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos1& lexOrPos1, const LexOrPos2& lexOrPos2)
{
sax_call_next_token_start_pos(sax, lexOrPos1);
sax_call_next_token_end_pos(sax, lexOrPos2);
}
//set the element start end pos of a sax parser by calling any version of
// sax->next_token_start and sax->next_token_end (if available)
template<class SAX, class LexOrPos>
void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos& lexOrPos)
{
sax_call_next_token_start_pos(sax, lexOrPos);
sax_call_next_token_end_pos(sax, lexOrPos);
}
template<typename T>
using null_function_t = decltype(std::declval<T&>().null());

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,337 @@
/*
__ _____ _____ _____
__| | __| | | | JSON for Modern C++ (test suite)
| | |__ | | | | | | version 3.10.2
|_____|_____|_____|_|___| https://github.com/nlohmann/json
Licensed under the MIT License <http://opensource.org/licenses/MIT>.
SPDX-License-Identifier: MIT
Copyright (c) 2013-2019 Niels Lohmann <http://nlohmann.me>.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <iostream>
#include <string>
#include "doctest_compatibility.h"
#include <nlohmann/json.hpp>
//prototype to make -Wmissing-prototypes happy
std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p);
//test json parser with detailed line / col information as metadata
struct token_start_stop
{
nlohmann::detail::position_t start{};
nlohmann::detail::position_t stop{};
};
std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p)
{
out << p.chars_read_total << '(' << p.lines_read << ':' << p.chars_read_current_line << ')';
return out;
}
using json_with_token_start_stop =
nlohmann::basic_json <
std::map,
std::vector,
std::string,
bool,
std::int64_t,
std::uint64_t,
double,
std::allocator,
nlohmann::adl_serializer,
std::vector<std::uint8_t>,
token_start_stop >;
//adapted from detail::json_sax_dom_parser
class sax_with_token_start_stop_metadata
{
public:
using json = json_with_token_start_stop;
using number_integer_t = typename json::number_integer_t;
using number_unsigned_t = typename json::number_unsigned_t;
using number_float_t = typename json::number_float_t;
using string_t = typename json::string_t;
using binary_t = typename json::binary_t;
/*!
@param[in,out] r reference to a JSON value that is manipulated while
parsing
@param[in] allow_exceptions_ whether parse errors yield exceptions
*/
explicit sax_with_token_start_stop_metadata(json& r, const bool allow_exceptions_ = true)
: root(r)
, ref_stack{}
, object_element{nullptr}
, errored{false}
, allow_exceptions(allow_exceptions_)
, start_stop{}
{}
template<class T1, class T2>
void next_token_start(const nlohmann::detail::lexer<T1, T2>& lex)
{
start_stop.start = lex.get_position();
}
template<class T1, class T2>
void next_token_end(const nlohmann::detail::lexer<T1, T2>& lex)
{
start_stop.stop = lex.get_position();
}
bool null()
{
handle_value(nullptr);
return true;
}
bool boolean(bool val)
{
handle_value(val);
return true;
}
bool number_integer(number_integer_t val)
{
handle_value(val);
return true;
}
bool number_unsigned(number_unsigned_t val)
{
handle_value(val);
return true;
}
bool number_float(number_float_t val, const string_t& /*unused*/)
{
handle_value(val);
return true;
}
bool string(string_t& val)
{
handle_value(val);
return true;
}
bool binary(binary_t& val)
{
handle_value(std::move(val));
return true;
}
bool start_object(std::size_t len)
{
ref_stack.push_back(handle_value(json::value_t::object));
ref_stack.back()->start = start_stop.start;
if (len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size())
{
throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive object size: ", std::to_string(len)), ref_stack.back());
}
return true;
}
bool key(string_t& val)
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_object());
// add null at given key and store the reference for later
object_element = &(*ref_stack.back())[val];
return true;
}
bool end_object()
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_object());
ref_stack.back()->stop = start_stop.stop;
ref_stack.pop_back();
return true;
}
bool start_array(std::size_t len)
{
ref_stack.push_back(handle_value(json::value_t::array));
ref_stack.back()->start = start_stop.start;
if (len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size())
{
throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive array size: ", std::to_string(len)), ref_stack.back());
}
return true;
}
bool end_array()
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_array());
ref_stack.back()->stop = start_stop.stop;
ref_stack.pop_back();
return true;
}
template<class Exception>
bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const Exception& ex)
{
errored = true;
static_cast<void>(ex);
if (allow_exceptions)
{
throw ex;
}
return false;
}
constexpr bool is_errored() const
{
return errored;
}
private:
/*!
@invariant If the ref stack is empty, then the passed value will be the new
root.
@invariant If the ref stack contains a value, then it is an array or an
object to which we can add elements
*/
template<typename Value>
json*
handle_value(Value&& v)
{
if (ref_stack.empty())
{
root = json(std::forward<Value>(v));
root.start = start_stop.start;
root.stop = start_stop.stop;
return &root;
}
assert(ref_stack.back()->is_array() || ref_stack.back()->is_object());
if (ref_stack.back()->is_array())
{
auto& array_element = ref_stack.back()->emplace_back(std::forward<Value>(v));
array_element.start = start_stop.start;
array_element.stop = start_stop.stop;
return &array_element;
}
assert(ref_stack.back()->is_object());
assert(object_element);
*object_element = json(std::forward<Value>(v));
object_element->start = start_stop.start;
object_element->stop = start_stop.stop;
return object_element;
}
/// the parsed JSON value
json& root;
/// stack to model hierarchy of values
std::vector<json*> ref_stack{};
/// helper to hold the reference for the next object element
json* object_element = nullptr;
/// whether a syntax error occurred
bool errored = false;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
/// start / stop information for the current token
token_start_stop start_stop{};
};
TEST_CASE("parse-json-with-position-info")
{
const std::string str =
/*line 0*/ R"({)"
"\n"
/*line 1*/ R"( "array" : [)"
"\n"
/*line 2*/ R"( 14294967296,)"
"\n"
/*line 3*/ R"( -1,)"
"\n"
/*line 4*/ R"( true,)"
"\n"
/*line 5*/ R"( 4.2,)"
"\n"
/*line 6*/ R"( null,)"
"\n"
/*line 7*/ R"( "str")"
"\n"
/*line 8*/ R"( ])"
"\n"
/*line 9*/ R"(})";
json_with_token_start_stop j;
sax_with_token_start_stop_metadata sax{j};
CHECK(nlohmann::json::sax_parse(str, &sax, nlohmann::json::input_format_t::json));
CHECK(j.start.lines_read == 0);
CHECK(j.start.chars_read_current_line == 1);
CHECK(j["array"].start.lines_read == 1);
CHECK(j["array"].start.chars_read_current_line == 13);
CHECK(j["array"][0].start.lines_read == 2);
CHECK(j["array"][0].start.chars_read_current_line == 5);
CHECK(j["array"][0].stop.lines_read == 2);
CHECK(j["array"][0].stop.chars_read_current_line == 15);
CHECK(j["array"][1].start.lines_read == 3);
CHECK(j["array"][1].start.chars_read_current_line == 5);
CHECK(j["array"][1].stop.lines_read == 3);
CHECK(j["array"][1].stop.chars_read_current_line == 6);
CHECK(j["array"][2].start.lines_read == 4);
CHECK(j["array"][2].start.chars_read_current_line == 5);
CHECK(j["array"][2].stop.lines_read == 4);
CHECK(j["array"][2].stop.chars_read_current_line == 8);
CHECK(j["array"][3].start.lines_read == 5);
CHECK(j["array"][3].start.chars_read_current_line == 5);
CHECK(j["array"][3].stop.lines_read == 5);
CHECK(j["array"][3].stop.chars_read_current_line == 7);
CHECK(j["array"][4].start.lines_read == 6); //starts directly after last value....
CHECK(j["array"][4].start.chars_read_current_line == 5);
CHECK(j["array"][4].stop.lines_read == 6);
CHECK(j["array"][4].stop.chars_read_current_line == 8);
CHECK(j["array"][5].start.lines_read == 7);
CHECK(j["array"][5].start.chars_read_current_line == 5);
CHECK(j["array"][5].stop.lines_read == 7);
CHECK(j["array"][5].stop.chars_read_current_line == 9);
CHECK(j["array"].stop.lines_read == 8);
CHECK(j["array"].stop.chars_read_current_line == 3);
CHECK(j.stop.lines_read == 9);
CHECK(j.stop.chars_read_current_line == 1);
}