Extend sax parser to optionally accept position information for parsed tokens

This commit is contained in:
Raphael Grimm 2021-11-20 14:22:56 +01:00 committed by Raphael Grimm
parent 5d2754306d
commit bd9cdcd99c
7 changed files with 2933 additions and 102 deletions

View File

@ -168,8 +168,9 @@ class binary_reader
bool parse_bson_internal() bool parse_bson_internal()
{ {
std::int32_t document_size{}; std::int32_t document_size{};
detail::sax_call_next_token_start_pos(sax, chars_read);
get_number<std::int32_t, true>(input_format_t::bson, document_size); get_number<std::int32_t, true>(input_format_t::bson, document_size);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1)))) if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
{ {
return false; return false;
@ -180,6 +181,7 @@ class binary_reader
return false; return false;
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_object(); return sax->end_object();
} }
@ -277,6 +279,7 @@ class binary_reader
case 0x01: // double case 0x01: // double
{ {
double number{}; double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(number));
return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), ""); return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
} }
@ -284,7 +287,10 @@ class binary_reader
{ {
std::int32_t len{}; std::int32_t len{};
string_t value; string_t value;
return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value); detail::sax_call_next_token_start_pos(sax, chars_read);
const bool result_get = get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(value);
} }
case 0x03: // object case 0x03: // object
@ -301,28 +307,35 @@ class binary_reader
{ {
std::int32_t len{}; std::int32_t len{};
binary_t value; binary_t value;
return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value); detail::sax_call_next_token_start_pos(sax, chars_read);
const bool result_get = get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(value);
} }
case 0x08: // boolean case 0x08: // boolean
{ {
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + 1);
return sax->boolean(get() != 0); return sax->boolean(get() != 0);
} }
case 0x0A: // null case 0x0A: // null
{ {
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->null(); return sax->null();
} }
case 0x10: // int32 case 0x10: // int32
{ {
std::int32_t value{}; std::int32_t value{};
return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value); detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value));
return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
} }
case 0x12: // int64 case 0x12: // int64
{ {
std::int64_t value{}; std::int64_t value{};
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value));
return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value); return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
} }
@ -361,14 +374,22 @@ class binary_reader
} }
const std::size_t element_type_parse_position = chars_read; const std::size_t element_type_parse_position = chars_read;
if (!is_array)
{
detail::sax_call_next_token_start_pos(sax, chars_read);
}
if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key))) if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
{ {
return false; return false;
} }
if (!is_array && !sax->key(key)) if (!is_array)
{ {
return false; detail::sax_call_next_token_end_pos(sax, chars_read);
if (!sax->key(key))
{
return false;
}
} }
if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position))) if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
@ -390,6 +411,7 @@ class binary_reader
bool parse_bson_array() bool parse_bson_array()
{ {
std::int32_t document_size{}; std::int32_t document_size{};
detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(std::int32_t));
get_number<std::int32_t, true>(input_format_t::bson, document_size); get_number<std::int32_t, true>(input_format_t::bson, document_size);
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1)))) if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
@ -402,6 +424,7 @@ class binary_reader
return false; return false;
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_array(); return sax->end_array();
} }
@ -451,29 +474,34 @@ class binary_reader
case 0x15: case 0x15:
case 0x16: case 0x16:
case 0x17: case 0x17:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_unsigned(static_cast<number_unsigned_t>(current)); return sax->number_unsigned(static_cast<number_unsigned_t>(current));
case 0x18: // Unsigned integer (one-byte uint8_t follows) case 0x18: // Unsigned integer (one-byte uint8_t follows)
{ {
std::uint8_t number{}; std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
} }
case 0x19: // Unsigned integer (two-byte uint16_t follows) case 0x19: // Unsigned integer (two-byte uint16_t follows)
{ {
std::uint16_t number{}; std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
} }
case 0x1A: // Unsigned integer (four-byte uint32_t follows) case 0x1A: // Unsigned integer (four-byte uint32_t follows)
{ {
std::uint32_t number{}; std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
} }
case 0x1B: // Unsigned integer (eight-byte uint64_t follows) case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
{ {
std::uint64_t number{}; std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
} }
@ -502,29 +530,34 @@ class binary_reader
case 0x35: case 0x35:
case 0x36: case 0x36:
case 0x37: case 0x37:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current)); return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
case 0x38: // Negative integer (one-byte uint8_t follows) case 0x38: // Negative integer (one-byte uint8_t follows)
{ {
std::uint8_t number{}; std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
} }
case 0x39: // Negative integer -1-n (two-byte uint16_t follows) case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
{ {
std::uint16_t number{}; std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
} }
case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
{ {
std::uint32_t number{}; std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
} }
case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
{ {
std::uint64_t number{}; std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
- static_cast<number_integer_t>(number)); - static_cast<number_integer_t>(number));
} }
@ -561,7 +594,10 @@ class binary_reader
case 0x5F: // Binary data (indefinite length) case 0x5F: // Binary data (indefinite length)
{ {
binary_t b; binary_t b;
return get_cbor_binary(b) && sax->binary(b); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_binary(b);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(b);
} }
// UTF-8 string (0x00..0x17 bytes follow) // UTF-8 string (0x00..0x17 bytes follow)
@ -596,7 +632,10 @@ class binary_reader
case 0x7F: // UTF-8 string (indefinite length) case 0x7F: // UTF-8 string (indefinite length)
{ {
string_t s; string_t s;
return get_cbor_string(s) && sax->string(s); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_string(s);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(s);
} }
// array (0x00..0x17 data items follow) // array (0x00..0x17 data items follow)
@ -624,35 +663,51 @@ class binary_reader
case 0x95: case 0x95:
case 0x96: case 0x96:
case 0x97: case 0x97:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_array( return get_cbor_array(
conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler); conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
case 0x98: // array (one-byte uint8_t for n follows) case 0x98: // array (one-byte uint8_t for n follows)
{ {
std::uint8_t len{}; std::uint8_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
} }
case 0x99: // array (two-byte uint16_t for n follow) case 0x99: // array (two-byte uint16_t for n follow)
{ {
std::uint16_t len{}; std::uint16_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
} }
case 0x9A: // array (four-byte uint32_t for n follow) case 0x9A: // array (four-byte uint32_t for n follow)
{ {
std::uint32_t len{}; std::uint32_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
} }
case 0x9B: // array (eight-byte uint64_t for n follow) case 0x9B: // array (eight-byte uint64_t for n follow)
{ {
std::uint64_t len{}; std::uint64_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
} }
case 0x9F: // array (indefinite length) case 0x9F: // array (indefinite length)
{
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_array(static_cast<std::size_t>(-1), tag_handler); return get_cbor_array(static_cast<std::size_t>(-1), tag_handler);
}
// map (0x00..0x17 pairs of data items follow) // map (0x00..0x17 pairs of data items follow)
case 0xA0: case 0xA0:
@ -679,33 +734,47 @@ class binary_reader
case 0xB5: case 0xB5:
case 0xB6: case 0xB6:
case 0xB7: case 0xB7:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler); return get_cbor_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
case 0xB8: // map (one-byte uint8_t for n follows) case 0xB8: // map (one-byte uint8_t for n follows)
{ {
std::uint8_t len{}; std::uint8_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
} }
case 0xB9: // map (two-byte uint16_t for n follow) case 0xB9: // map (two-byte uint16_t for n follow)
{ {
std::uint16_t len{}; std::uint16_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
} }
case 0xBA: // map (four-byte uint32_t for n follow) case 0xBA: // map (four-byte uint32_t for n follow)
{ {
std::uint32_t len{}; std::uint32_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
} }
case 0xBB: // map (eight-byte uint64_t for n follow) case 0xBB: // map (eight-byte uint64_t for n follow)
{ {
std::uint64_t len{}; std::uint64_t len{};
return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_number(input_format_t::cbor, len);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
} }
case 0xBF: // map (indefinite length) case 0xBF: // map (indefinite length)
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_cbor_object(static_cast<std::size_t>(-1), tag_handler); return get_cbor_object(static_cast<std::size_t>(-1), tag_handler);
case 0xC6: // tagged item case 0xC6: // tagged item
@ -810,7 +879,10 @@ class binary_reader
return parse_cbor_internal(true, tag_handler); return parse_cbor_internal(true, tag_handler);
} }
get(); get();
return get_cbor_binary(b) && sax->binary(b); detail::sax_call_next_token_start_pos(sax, chars_read);
const bool result_get = get_cbor_binary(b);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(b);
} }
default: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE
@ -820,16 +892,20 @@ class binary_reader
} }
case 0xF4: // false case 0xF4: // false
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(false); return sax->boolean(false);
case 0xF5: // true case 0xF5: // true
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(true); return sax->boolean(true);
case 0xF6: // null case 0xF6: // null
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->null(); return sax->null();
case 0xF9: // Half-Precision Float (two-byte IEEE 754) case 0xF9: // Half-Precision Float (two-byte IEEE 754)
{ {
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const auto byte1_raw = get(); const auto byte1_raw = get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number"))) if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
{ {
@ -871,6 +947,7 @@ class binary_reader
return std::ldexp(mant + 1024, exp - 25); return std::ldexp(mant + 1024, exp - 25);
} }
}(); }();
detail::sax_call_next_token_end_pos(sax, chars_read);
return sax->number_float((half & 0x8000u) != 0 return sax->number_float((half & 0x8000u) != 0
? static_cast<number_float_t>(-val) ? static_cast<number_float_t>(-val)
: static_cast<number_float_t>(val), ""); : static_cast<number_float_t>(val), "");
@ -879,12 +956,14 @@ class binary_reader
case 0xFA: // Single-Precision Float (four-byte IEEE 754) case 0xFA: // Single-Precision Float (four-byte IEEE 754)
{ {
float number{}; float number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), ""); return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
} }
case 0xFB: // Double-Precision Float (eight-byte IEEE 754) case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
{ {
double number{}; double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), ""); return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
} }
@ -1128,6 +1207,7 @@ class binary_reader
} }
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_array(); return sax->end_array();
} }
@ -1153,7 +1233,10 @@ class binary_reader
for (std::size_t i = 0; i < len; ++i) for (std::size_t i = 0; i < len; ++i)
{ {
get(); get();
if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{ {
return false; return false;
} }
@ -1169,7 +1252,10 @@ class binary_reader
{ {
while (get() != 0xFF) while (get() != 0xFF)
{ {
if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_cbor_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{ {
return false; return false;
} }
@ -1183,6 +1269,7 @@ class binary_reader
} }
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_object(); return sax->end_object();
} }
@ -1330,6 +1417,7 @@ class binary_reader
case 0x7D: case 0x7D:
case 0x7E: case 0x7E:
case 0x7F: case 0x7F:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_unsigned(static_cast<number_unsigned_t>(current)); return sax->number_unsigned(static_cast<number_unsigned_t>(current));
// fixmap // fixmap
@ -1349,6 +1437,7 @@ class binary_reader
case 0x8D: case 0x8D:
case 0x8E: case 0x8E:
case 0x8F: case 0x8F:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_msgpack_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); return get_msgpack_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
// fixarray // fixarray
@ -1368,6 +1457,7 @@ class binary_reader
case 0x9D: case 0x9D:
case 0x9E: case 0x9E:
case 0x9F: case 0x9F:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return get_msgpack_array(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); return get_msgpack_array(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
// fixstr // fixstr
@ -1408,16 +1498,22 @@ class binary_reader
case 0xDB: // str 32 case 0xDB: // str 32
{ {
string_t s; string_t s;
return get_msgpack_string(s) && sax->string(s); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_msgpack_string(s);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(s);
} }
case 0xC0: // nil case 0xC0: // nil
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->null(); return sax->null();
case 0xC2: // false case 0xC2: // false
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(false); return sax->boolean(false);
case 0xC3: // true case 0xC3: // true
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(true); return sax->boolean(true);
case 0xC4: // bin 8 case 0xC4: // bin 8
@ -1433,90 +1529,107 @@ class binary_reader
case 0xD8: // fixext 16 case 0xD8: // fixext 16
{ {
binary_t b; binary_t b;
return get_msgpack_binary(b) && sax->binary(b); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_msgpack_binary(b);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->binary(b);
} }
case 0xCA: // float 32 case 0xCA: // float 32
{ {
float number{}; float number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), ""); return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
} }
case 0xCB: // float 64 case 0xCB: // float 64
{ {
double number{}; double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), ""); return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
} }
case 0xCC: // uint 8 case 0xCC: // uint 8
{ {
std::uint8_t number{}; std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
} }
case 0xCD: // uint 16 case 0xCD: // uint 16
{ {
std::uint16_t number{}; std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
} }
case 0xCE: // uint 32 case 0xCE: // uint 32
{ {
std::uint32_t number{}; std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
} }
case 0xCF: // uint 64 case 0xCF: // uint 64
{ {
std::uint64_t number{}; std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
} }
case 0xD0: // int 8 case 0xD0: // int 8
{ {
std::int8_t number{}; std::int8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number); return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
} }
case 0xD1: // int 16 case 0xD1: // int 16
{ {
std::int16_t number{}; std::int16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number); return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
} }
case 0xD2: // int 32 case 0xD2: // int 32
{ {
std::int32_t number{}; std::int32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number); return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
} }
case 0xD3: // int 64 case 0xD3: // int 64
{ {
std::int64_t number{}; std::int64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format_t::msgpack, number) && sax->number_integer(number); return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
} }
case 0xDC: // array 16 case 0xDC: // array 16
{ {
std::uint16_t len{}; std::uint16_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
} }
case 0xDD: // array 32 case 0xDD: // array 32
{ {
std::uint32_t len{}; std::uint32_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast<std::size_t>(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast<std::size_t>(len));
} }
case 0xDE: // map 16 case 0xDE: // map 16
{ {
std::uint16_t len{}; std::uint16_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
} }
case 0xDF: // map 32 case 0xDF: // map 32
{ {
std::uint32_t len{}; std::uint32_t len{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len));
return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast<std::size_t>(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast<std::size_t>(len));
} }
@ -1553,6 +1666,7 @@ class binary_reader
case 0xFD: case 0xFD:
case 0xFE: case 0xFE:
case 0xFF: case 0xFF:
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->number_integer(static_cast<std::int8_t>(current)); return sax->number_integer(static_cast<std::int8_t>(current));
default: // anything else default: // anything else
@ -1783,6 +1897,7 @@ class binary_reader
} }
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_array(); return sax->end_array();
} }
@ -1801,7 +1916,10 @@ class binary_reader
for (std::size_t i = 0; i < len; ++i) for (std::size_t i = 0; i < len; ++i)
{ {
get(); get();
if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key))) detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_msgpack_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{ {
return false; return false;
} }
@ -1813,6 +1931,7 @@ class binary_reader
key.clear(); key.clear();
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return sax->end_object(); return sax->end_object();
} }
@ -2175,7 +2294,6 @@ class binary_reader
return true; return true;
} }
} }
string_t key = "_ArraySize_"; string_t key = "_ArraySize_";
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size()))) if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size())))
{ {
@ -2236,7 +2354,6 @@ class binary_reader
bool is_ndarray = false; bool is_ndarray = false;
get_ignore_noop(); get_ignore_noop();
if (current == '$') if (current == '$')
{ {
result.second = get(); // must not ignore 'N', because 'N' maybe the type result.second = get(); // must not ignore 'N', because 'N' maybe the type
@ -2265,7 +2382,9 @@ class binary_reader
exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
} }
// detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool is_error = get_ubjson_size_value(result.first, is_ndarray); const bool is_error = get_ubjson_size_value(result.first, is_ndarray);
//detail::sax_call_next_token_end_pos(sax, chars_read);
if (input_format == input_format_t::bjdata && is_ndarray) if (input_format == input_format_t::bjdata && is_ndarray)
{ {
if (inside_ndarray) if (inside_ndarray)
@ -2280,7 +2399,9 @@ class binary_reader
if (current == '#') if (current == '#')
{ {
// detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool is_error = get_ubjson_size_value(result.first, is_ndarray); const bool is_error = get_ubjson_size_value(result.first, is_ndarray);
// detail::sax_call_next_token_end_pos(sax, chars_read);
if (input_format == input_format_t::bjdata && is_ndarray) if (input_format == input_format_t::bjdata && is_ndarray)
{ {
return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read, return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
@ -2289,6 +2410,7 @@ class binary_reader
return is_error; return is_error;
} }
// detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read - 1);
return true; return true;
} }
@ -2304,40 +2426,47 @@ class binary_reader
return unexpect_eof(input_format, "value"); return unexpect_eof(input_format, "value");
case 'T': // true case 'T': // true
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(true); return sax->boolean(true);
case 'F': // false case 'F': // false
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->boolean(false); return sax->boolean(false);
case 'Z': // null case 'Z': // null
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->null(); return sax->null();
case 'U': case 'U':
{ {
std::uint8_t number{}; std::uint8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number); return get_number(input_format, number) && sax->number_unsigned(number);
} }
case 'i': case 'i':
{ {
std::int8_t number{}; std::int8_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number); return get_number(input_format, number) && sax->number_integer(number);
} }
case 'I': case 'I':
{ {
std::int16_t number{}; std::int16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number); return get_number(input_format, number) && sax->number_integer(number);
} }
case 'l': case 'l':
{ {
std::int32_t number{}; std::int32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number); return get_number(input_format, number) && sax->number_integer(number);
} }
case 'L': case 'L':
{ {
std::int64_t number{}; std::int64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_integer(number); return get_number(input_format, number) && sax->number_integer(number);
} }
@ -2348,6 +2477,7 @@ class binary_reader
break; break;
} }
std::uint16_t number{}; std::uint16_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number); return get_number(input_format, number) && sax->number_unsigned(number);
} }
@ -2358,6 +2488,7 @@ class binary_reader
break; break;
} }
std::uint32_t number{}; std::uint32_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number); return get_number(input_format, number) && sax->number_unsigned(number);
} }
@ -2368,11 +2499,13 @@ class binary_reader
break; break;
} }
std::uint64_t number{}; std::uint64_t number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_unsigned(number); return get_number(input_format, number) && sax->number_unsigned(number);
} }
case 'h': case 'h':
{ {
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
if (input_format != input_format_t::bjdata) if (input_format != input_format_t::bjdata)
{ {
break; break;
@ -2418,25 +2551,30 @@ class binary_reader
return std::ldexp(mant + 1024, exp - 25); return std::ldexp(mant + 1024, exp - 25);
} }
}(); }();
detail::sax_call_next_token_end_pos(sax, chars_read);
return sax->number_float((half & 0x8000u) != 0 return sax->number_float((half & 0x8000u) != 0
? static_cast<number_float_t>(-val) ? static_cast<number_float_t>(-val)
: static_cast<number_float_t>(val), ""); : static_cast<number_float_t>(val),
"");
} }
case 'd': case 'd':
{ {
float number{}; float number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), ""); return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
} }
case 'D': case 'D':
{ {
double number{}; double number{};
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number));
return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), ""); return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
} }
case 'H': case 'H':
{ {
// call to detail::sax_call_next_token_start_end_pos inside of the method
return get_ubjson_high_precision_number(); return get_ubjson_high_precision_number();
} }
@ -2454,19 +2592,25 @@ class binary_reader
exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr)); exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr));
} }
string_t s(1, static_cast<typename string_t::value_type>(current)); string_t s(1, static_cast<typename string_t::value_type>(current));
detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read);
return sax->string(s); return sax->string(s);
} }
case 'S': // string case 'S': // string
{ {
string_t s; string_t s;
return get_ubjson_string(s) && sax->string(s); detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(s);
detail::sax_call_next_token_end_pos(sax, chars_read);
return result_get && sax->string(s);
} }
case '[': // array case '[': // array
// call to detail::sax_call_next_token_start_end_pos inside of the method
return get_ubjson_array(); return get_ubjson_array();
case '{': // object case '{': // object
// call to detail::sax_call_next_token_start_end_pos inside of the method
return get_ubjson_object(); return get_ubjson_object();
default: // anything else default: // anything else
@ -2481,6 +2625,7 @@ class binary_reader
*/ */
bool get_ubjson_array() bool get_ubjson_array()
{ {
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
std::pair<std::size_t, char_int_type> size_and_type; std::pair<std::size_t, char_int_type> size_and_type;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{ {
@ -2505,6 +2650,7 @@ class binary_reader
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
} }
detail::sax_call_next_token_end_pos(sax, chars_read);
string_t type = it->second; // sax->string() takes a reference string_t type = it->second; // sax->string() takes a reference
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type))) if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
{ {
@ -2516,6 +2662,7 @@ class binary_reader
size_and_type.second = 'U'; size_and_type.second = 'U';
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read);
key = "_ArrayData_"; key = "_ArrayData_";
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) ))
{ {
@ -2524,17 +2671,20 @@ class binary_reader
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
// call to detail::sax_call_next_token_start_end_pos inside of the method
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
{ {
return false; return false;
} }
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read);
return (sax->end_array() && sax->end_object()); return (sax->end_array() && sax->end_object());
} }
if (size_and_type.first != npos) if (size_and_type.first != npos)
{ {
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
{ {
return false; return false;
@ -2546,6 +2696,7 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
// call to detail::sax_call_next_token_start_end_pos inside of the method
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
{ {
return false; return false;
@ -2557,6 +2708,7 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
// call to detail::sax_call_next_token_start_end_pos inside of the method
if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
{ {
return false; return false;
@ -2566,6 +2718,7 @@ class binary_reader
} }
else else
{ {
detail::sax_call_next_token_end_pos(sax, chars_read - 1);
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1)))) if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
{ {
return false; return false;
@ -2581,6 +2734,7 @@ class binary_reader
} }
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_array(); return sax->end_array();
} }
@ -2589,6 +2743,7 @@ class binary_reader
*/ */
bool get_ubjson_object() bool get_ubjson_object()
{ {
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
std::pair<std::size_t, char_int_type> size_and_type; std::pair<std::size_t, char_int_type> size_and_type;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
{ {
@ -2606,6 +2761,7 @@ class binary_reader
string_t key; string_t key;
if (size_and_type.first != npos) if (size_and_type.first != npos)
{ {
detail::sax_call_next_token_end_pos(sax, chars_read - 1);
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
{ {
return false; return false;
@ -2615,7 +2771,10 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{ {
return false; return false;
} }
@ -2630,7 +2789,10 @@ class binary_reader
{ {
for (std::size_t i = 0; i < size_and_type.first; ++i) for (std::size_t i = 0; i < size_and_type.first; ++i)
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(key);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{ {
return false; return false;
} }
@ -2644,6 +2806,7 @@ class binary_reader
} }
else else
{ {
detail::sax_call_next_token_end_pos(sax, chars_read - 1);
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1)))) if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
{ {
return false; return false;
@ -2651,7 +2814,10 @@ class binary_reader
while (current != '}') while (current != '}')
{ {
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key))) detail::sax_call_next_token_start_pos(sax, chars_read - 1);
const bool result_get = get_ubjson_string(key, false);
detail::sax_call_next_token_end_pos(sax, chars_read);
if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key)))
{ {
return false; return false;
} }
@ -2664,6 +2830,7 @@ class binary_reader
} }
} }
detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read);
return sax->end_object(); return sax->end_object();
} }
@ -2672,6 +2839,7 @@ class binary_reader
bool get_ubjson_high_precision_number() bool get_ubjson_high_precision_number()
{ {
detail::sax_call_next_token_start_pos(sax, chars_read - 1);
// get size of following number string // get size of following number string
std::size_t size{}; std::size_t size{};
bool no_ndarray = true; bool no_ndarray = true;
@ -2692,6 +2860,7 @@ class binary_reader
} }
number_vector.push_back(static_cast<char>(current)); number_vector.push_back(static_cast<char>(current));
} }
detail::sax_call_next_token_end_pos(sax, chars_read);
// parse number string // parse number string
using ia_type = decltype(detail::input_adapter(number_vector)); using ia_type = decltype(detail::input_adapter(number_vector));
@ -2889,6 +3058,7 @@ class binary_reader
{ {
if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof())) if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
{ {
detail::sax_call_next_token_end_pos(sax, chars_read);
return sax->parse_error(chars_read, "<end of file>", return sax->parse_error(chars_read, "<end of file>",
parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
} }

View File

@ -1506,13 +1506,13 @@ scan_number_done:
while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
} }
token_type scan() bool scan_start()
{ {
// initially, skip the BOM // initially, skip the BOM
if (position.chars_read_total == 0 && !skip_bom()) if (position.chars_read_total == 0 && !skip_bom())
{ {
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
return token_type::parse_error; return false;
} }
// read next character and ignore whitespace // read next character and ignore whitespace
@ -1523,13 +1523,17 @@ scan_number_done:
{ {
if (!scan_comment()) if (!scan_comment())
{ {
return token_type::parse_error; return false;
} }
// skip following whitespace // skip following whitespace
skip_whitespace(); skip_whitespace();
} }
return true;
}
token_type scan_end()
{
switch (current) switch (current)
{ {
// structural characters // structural characters
@ -1593,6 +1597,10 @@ scan_number_done:
return token_type::parse_error; return token_type::parse_error;
} }
} }
token_type scan()
{
return !scan_start() ? token_type::parse_error : scan_end();
}
private: private:
/// input adapter /// input adapter

View File

@ -76,8 +76,6 @@ class parser
, m_lexer(std::move(adapter), skip_comments) , m_lexer(std::move(adapter), skip_comments)
, allow_exceptions(allow_exceptions_) , allow_exceptions(allow_exceptions_)
{ {
// read first token
get_token();
} }
/*! /*!
@ -98,7 +96,7 @@ class parser
sax_parse_internal(&sdp); sax_parse_internal(&sdp);
// in strict mode, input must be completely read // in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input)) if (strict && (get_token(&sdp) != token_type::end_of_input))
{ {
sdp.parse_error(m_lexer.get_position(), sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(), m_lexer.get_token_string(),
@ -126,7 +124,7 @@ class parser
sax_parse_internal(&sdp); sax_parse_internal(&sdp);
// in strict mode, input must be completely read // in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input)) if (strict && (get_token(&sdp) != token_type::end_of_input))
{ {
sdp.parse_error(m_lexer.get_position(), sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(), m_lexer.get_token_string(),
@ -164,7 +162,7 @@ class parser
const bool result = sax_parse_internal(sax); const bool result = sax_parse_internal(sax);
// strict mode: next byte must be EOF // strict mode: next byte must be EOF
if (result && strict && (get_token() != token_type::end_of_input)) if (result && strict && (get_token(sax) != token_type::end_of_input))
{ {
return sax->parse_error(m_lexer.get_position(), return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(), m_lexer.get_token_string(),
@ -185,6 +183,8 @@ class parser
// value to avoid a goto (see comment where set to true) // value to avoid a goto (see comment where set to true)
bool skip_to_state_evaluation = false; bool skip_to_state_evaluation = false;
// read first token
get_token(sax);
while (true) while (true)
{ {
if (!skip_to_state_evaluation) if (!skip_to_state_evaluation)
@ -200,7 +200,7 @@ class parser
} }
// closing } -> we are done // closing } -> we are done
if (get_token() == token_type::end_object) if (get_token(sax) == token_type::end_object)
{ {
if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
{ {
@ -222,7 +222,7 @@ class parser
} }
// parse separator (:) // parse separator (:)
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator))
{ {
return sax->parse_error(m_lexer.get_position(), return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(), m_lexer.get_token_string(),
@ -233,7 +233,7 @@ class parser
states.push_back(false); states.push_back(false);
// parse values // parse values
get_token(); get_token(sax);
continue; continue;
} }
@ -245,7 +245,7 @@ class parser
} }
// closing ] -> we are done // closing ] -> we are done
if (get_token() == token_type::end_array) if (get_token(sax) == token_type::end_array)
{ {
if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
{ {
@ -372,10 +372,10 @@ class parser
if (states.back()) // array if (states.back()) // array
{ {
// comma -> next value // comma -> next value
if (get_token() == token_type::value_separator) if (get_token(sax) == token_type::value_separator)
{ {
// parse a new value // parse a new value
get_token(); get_token(sax);
continue; continue;
} }
@ -405,10 +405,10 @@ class parser
// states.back() is false -> object // states.back() is false -> object
// comma -> next value // comma -> next value
if (get_token() == token_type::value_separator) if (get_token(sax) == token_type::value_separator)
{ {
// parse key // parse key
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::value_string))
{ {
return sax->parse_error(m_lexer.get_position(), return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(), m_lexer.get_token_string(),
@ -421,7 +421,7 @@ class parser
} }
// parse separator (:) // parse separator (:)
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator))
{ {
return sax->parse_error(m_lexer.get_position(), return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(), m_lexer.get_token_string(),
@ -429,7 +429,7 @@ class parser
} }
// parse values // parse values
get_token(); get_token(sax);
continue; continue;
} }
@ -457,10 +457,19 @@ class parser
} }
} }
/// get next token from lexer /// get next token from lexer and pass position info to sax (if it is accepted)
token_type get_token() template<class SAX>
token_type get_token(SAX* sax)
{ {
return last_token = m_lexer.scan(); if (!m_lexer.scan_start())
{
last_token = token_type::parse_error;
return token_type::parse_error;
}
detail::sax_call_next_token_start_pos(sax, m_lexer);
last_token = m_lexer.scan_end();
detail::sax_call_next_token_end_pos(sax, m_lexer);
return last_token;
} }
std::string exception_message(const token_type expected, const std::string& context) std::string exception_message(const token_type expected, const std::string& context)

View File

@ -19,6 +19,151 @@
NLOHMANN_JSON_NAMESPACE_BEGIN NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail namespace detail
{ {
// helper struct to call sax->next_token_start
//(we want this functionality as a type to ease passing it as template argument)
struct sax_call_next_token_start_pos_direct
{
template<typename SAX, typename...Ts>
static auto call(SAX* sax, Ts&& ...ts)
-> decltype(sax->next_token_start(std::forward<Ts>(ts)...))
{
sax->next_token_start(std::forward<Ts>(ts)...);
}
};
// helper struct to call sax->next_token_end
// (we want this functionality as a type to ease passing it as template argument)
struct sax_call_next_token_end_pos_direct
{
template<typename SAX, typename...Ts>
static auto call(SAX* sax, Ts&& ...ts)
-> decltype(sax->next_token_end(std::forward<Ts>(ts)...))
{
sax->next_token_end(std::forward<Ts>(ts)...);
}
};
// dispatch the calls to next_token_start next_token_end
// and drop the calls if the sax parser does not support these methods.
//
// DirectCaller can be set to one of sax_call_next_token_{start,end}_pos_direct to
// determine which method is called
template <typename DirectCaller, typename SAX, typename LexOrPos>
struct sax_call_function
{
// is the parameter a lexer or a position
static constexpr bool no_lexer = std::is_same<LexOrPos, std::size_t>::value;
template<typename SAX2, typename...Ts2>
using call_t = decltype(DirectCaller::call(std::declval<SAX2*>(), std::declval<Ts2>()...));
//the sax parser supports calls with a position
static constexpr bool detected_call_with_pos =
is_detected_exact<void, call_t, SAX, std::size_t>::value;
//the sax parser supports calls with a lexer
static constexpr bool detected_call_with_lex =
!no_lexer &&
is_detected_exact<void, call_t, SAX, const LexOrPos>::value;
//there either has to be a version accepting a lexer or a position
static constexpr bool valid = detected_call_with_pos || detected_call_with_lex;
//called with pos and pos is method supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_pos
>::type
call(SaxT* sax, std::size_t pos)
{
DirectCaller::call(sax, pos);
}
//the sax parser has no version of the method -> drop call
template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::valid
>::type
call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {}
//called with lex and lex method is supported -> pass data on
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex);
}
// called with lex and only pos method is supported -> call with position from lexer
// the start pos in the lexer is last read char -> chars_read_total-1
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
std::is_same<DirectCaller, sax_call_next_token_start_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position().chars_read_total - 1);
}
// called with lex and only pos method is supported -> call with position from lexer
// the one past end pos in the lexer is the current index -> chars_read_total
template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex &&
std::is_same<DirectCaller, sax_call_next_token_end_pos_direct>::value
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position().chars_read_total);
}
};
//set the element start pos of a sax parser by calling any version of sax->next_token_start (if available)
template<class SAX, class LexOrPos>
void sax_call_next_token_start_pos(SAX* sax, const LexOrPos& lexOrPos)
{
using call_t = sax_call_function<sax_call_next_token_start_pos_direct, SAX, LexOrPos>;
call_t::call(sax, lexOrPos);
}
//set the element end pos of a sax parser by calling any version of sax->next_token_end (if available)
template<class SAX, class LexOrPos>
void sax_call_next_token_end_pos(SAX* sax, const LexOrPos& lexOrPos)
{
using call_t = sax_call_function<sax_call_next_token_end_pos_direct, SAX, LexOrPos>;
call_t::call(sax, lexOrPos);
}
//set the element start end pos of a sax parser by calling any version of
// sax->next_token_start and sax->next_token_end (if available)
template<class SAX, class LexOrPos1, class LexOrPos2>
void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos1& lexOrPos1, const LexOrPos2& lexOrPos2)
{
sax_call_next_token_start_pos(sax, lexOrPos1);
sax_call_next_token_end_pos(sax, lexOrPos2);
}
//set the element start end pos of a sax parser by calling any version of
// sax->next_token_start and sax->next_token_end (if available)
template<class SAX, class LexOrPos>
void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos& lexOrPos)
{
sax_call_next_token_start_pos(sax, lexOrPos);
sax_call_next_token_end_pos(sax, lexOrPos);
}
template<typename T> template<typename T>
using null_function_t = decltype(std::declval<T&>().null()); using null_function_t = decltype(std::declval<T&>().null());

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,337 @@
/*
__ _____ _____ _____
__| | __| | | | JSON for Modern C++ (test suite)
| | |__ | | | | | | version 3.10.2
|_____|_____|_____|_|___| https://github.com/nlohmann/json
Licensed under the MIT License <http://opensource.org/licenses/MIT>.
SPDX-License-Identifier: MIT
Copyright (c) 2013-2019 Niels Lohmann <http://nlohmann.me>.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <iostream>
#include <string>
#include "doctest_compatibility.h"
#include <nlohmann/json.hpp>
//prototype to make -Wmissing-prototypes happy
std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p);
//test json parser with detailed line / col information as metadata
struct token_start_stop
{
nlohmann::detail::position_t start{};
nlohmann::detail::position_t stop{};
};
std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p)
{
out << p.chars_read_total << '(' << p.lines_read << ':' << p.chars_read_current_line << ')';
return out;
}
using json_with_token_start_stop =
nlohmann::basic_json <
std::map,
std::vector,
std::string,
bool,
std::int64_t,
std::uint64_t,
double,
std::allocator,
nlohmann::adl_serializer,
std::vector<std::uint8_t>,
token_start_stop >;
//adapted from detail::json_sax_dom_parser
class sax_with_token_start_stop_metadata
{
public:
using json = json_with_token_start_stop;
using number_integer_t = typename json::number_integer_t;
using number_unsigned_t = typename json::number_unsigned_t;
using number_float_t = typename json::number_float_t;
using string_t = typename json::string_t;
using binary_t = typename json::binary_t;
/*!
@param[in,out] r reference to a JSON value that is manipulated while
parsing
@param[in] allow_exceptions_ whether parse errors yield exceptions
*/
explicit sax_with_token_start_stop_metadata(json& r, const bool allow_exceptions_ = true)
: root(r)
, ref_stack{}
, object_element{nullptr}
, errored{false}
, allow_exceptions(allow_exceptions_)
, start_stop{}
{}
template<class T1, class T2>
void next_token_start(const nlohmann::detail::lexer<T1, T2>& lex)
{
start_stop.start = lex.get_position();
}
template<class T1, class T2>
void next_token_end(const nlohmann::detail::lexer<T1, T2>& lex)
{
start_stop.stop = lex.get_position();
}
bool null()
{
handle_value(nullptr);
return true;
}
bool boolean(bool val)
{
handle_value(val);
return true;
}
bool number_integer(number_integer_t val)
{
handle_value(val);
return true;
}
bool number_unsigned(number_unsigned_t val)
{
handle_value(val);
return true;
}
bool number_float(number_float_t val, const string_t& /*unused*/)
{
handle_value(val);
return true;
}
bool string(string_t& val)
{
handle_value(val);
return true;
}
bool binary(binary_t& val)
{
handle_value(std::move(val));
return true;
}
bool start_object(std::size_t len)
{
ref_stack.push_back(handle_value(json::value_t::object));
ref_stack.back()->start = start_stop.start;
if (len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size())
{
throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive object size: ", std::to_string(len)), ref_stack.back());
}
return true;
}
bool key(string_t& val)
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_object());
// add null at given key and store the reference for later
object_element = &(*ref_stack.back())[val];
return true;
}
bool end_object()
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_object());
ref_stack.back()->stop = start_stop.stop;
ref_stack.pop_back();
return true;
}
bool start_array(std::size_t len)
{
ref_stack.push_back(handle_value(json::value_t::array));
ref_stack.back()->start = start_stop.start;
if (len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size())
{
throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive array size: ", std::to_string(len)), ref_stack.back());
}
return true;
}
bool end_array()
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_array());
ref_stack.back()->stop = start_stop.stop;
ref_stack.pop_back();
return true;
}
template<class Exception>
bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const Exception& ex)
{
errored = true;
static_cast<void>(ex);
if (allow_exceptions)
{
throw ex;
}
return false;
}
constexpr bool is_errored() const
{
return errored;
}
private:
/*!
@invariant If the ref stack is empty, then the passed value will be the new
root.
@invariant If the ref stack contains a value, then it is an array or an
object to which we can add elements
*/
template<typename Value>
json*
handle_value(Value&& v)
{
if (ref_stack.empty())
{
root = json(std::forward<Value>(v));
root.start = start_stop.start;
root.stop = start_stop.stop;
return &root;
}
assert(ref_stack.back()->is_array() || ref_stack.back()->is_object());
if (ref_stack.back()->is_array())
{
auto& array_element = ref_stack.back()->emplace_back(std::forward<Value>(v));
array_element.start = start_stop.start;
array_element.stop = start_stop.stop;
return &array_element;
}
assert(ref_stack.back()->is_object());
assert(object_element);
*object_element = json(std::forward<Value>(v));
object_element->start = start_stop.start;
object_element->stop = start_stop.stop;
return object_element;
}
/// the parsed JSON value
json& root;
/// stack to model hierarchy of values
std::vector<json*> ref_stack{};
/// helper to hold the reference for the next object element
json* object_element = nullptr;
/// whether a syntax error occurred
bool errored = false;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
/// start / stop information for the current token
token_start_stop start_stop{};
};
TEST_CASE("parse-json-with-position-info")
{
const std::string str =
/*line 0*/ R"({)"
"\n"
/*line 1*/ R"( "array" : [)"
"\n"
/*line 2*/ R"( 14294967296,)"
"\n"
/*line 3*/ R"( -1,)"
"\n"
/*line 4*/ R"( true,)"
"\n"
/*line 5*/ R"( 4.2,)"
"\n"
/*line 6*/ R"( null,)"
"\n"
/*line 7*/ R"( "str")"
"\n"
/*line 8*/ R"( ])"
"\n"
/*line 9*/ R"(})";
json_with_token_start_stop j;
sax_with_token_start_stop_metadata sax{j};
CHECK(nlohmann::json::sax_parse(str, &sax, nlohmann::json::input_format_t::json));
CHECK(j.start.lines_read == 0);
CHECK(j.start.chars_read_current_line == 1);
CHECK(j["array"].start.lines_read == 1);
CHECK(j["array"].start.chars_read_current_line == 13);
CHECK(j["array"][0].start.lines_read == 2);
CHECK(j["array"][0].start.chars_read_current_line == 5);
CHECK(j["array"][0].stop.lines_read == 2);
CHECK(j["array"][0].stop.chars_read_current_line == 15);
CHECK(j["array"][1].start.lines_read == 3);
CHECK(j["array"][1].start.chars_read_current_line == 5);
CHECK(j["array"][1].stop.lines_read == 3);
CHECK(j["array"][1].stop.chars_read_current_line == 6);
CHECK(j["array"][2].start.lines_read == 4);
CHECK(j["array"][2].start.chars_read_current_line == 5);
CHECK(j["array"][2].stop.lines_read == 4);
CHECK(j["array"][2].stop.chars_read_current_line == 8);
CHECK(j["array"][3].start.lines_read == 5);
CHECK(j["array"][3].start.chars_read_current_line == 5);
CHECK(j["array"][3].stop.lines_read == 5);
CHECK(j["array"][3].stop.chars_read_current_line == 7);
CHECK(j["array"][4].start.lines_read == 6); //starts directly after last value....
CHECK(j["array"][4].start.chars_read_current_line == 5);
CHECK(j["array"][4].stop.lines_read == 6);
CHECK(j["array"][4].stop.chars_read_current_line == 8);
CHECK(j["array"][5].start.lines_read == 7);
CHECK(j["array"][5].start.chars_read_current_line == 5);
CHECK(j["array"][5].stop.lines_read == 7);
CHECK(j["array"][5].stop.chars_read_current_line == 9);
CHECK(j["array"].stop.lines_read == 8);
CHECK(j["array"].stop.chars_read_current_line == 3);
CHECK(j.stop.lines_read == 9);
CHECK(j.stop.chars_read_current_line == 1);
}