Add examples for extend sax parser

This commit is contained in:
Raphael Grimm 2022-12-19 16:20:47 +01:00 committed by Raphael Grimm
parent bd9cdcd99c
commit 52a1d542ca
4 changed files with 544 additions and 0 deletions

View File

@ -0,0 +1,149 @@
#include <iostream>
#include <iomanip>
#include <sstream>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
// a simple event consumer that collects string representations of the passed
// values and their source locations;
// note inheriting from json::json_sax_t is not required, but can
// help not to forget a required function
class sax_event_consumer : public json::json_sax_t
{
public:
std::vector<std::string> events;
std::size_t next_token_start_pos = 0;
std::size_t next_token_end_pos = 0;
void next_token_start(std::size_t pos)
{
next_token_start_pos = pos;
}
void next_token_end(std::size_t pos)
{
next_token_end_pos = pos;
}
std::string location_as_str() const
{
return "at=[" + std::to_string(next_token_start_pos) + "," + std::to_string(next_token_end_pos) + ")";
}
bool null() override
{
events.push_back("null(" + location_as_str() + ")");
return true;
}
bool boolean(bool val) override
{
events.push_back("boolean(val=" + std::string(val ? "true" : "false") + ", " + location_as_str() + ")");
return true;
}
bool number_integer(number_integer_t val) override
{
events.push_back("number_integer(val=" + std::to_string(val) + ", " + location_as_str() + ")");
return true;
}
bool number_unsigned(number_unsigned_t val) override
{
events.push_back("number_unsigned(val=" + std::to_string(val) + ", " + location_as_str() + ")");
return true;
}
bool number_float(number_float_t val, const string_t& s) override
{
events.push_back("number_float(val=" + std::to_string(val) + ", s=" + s + ", " + location_as_str() + ")");
return true;
}
bool string(string_t& val) override
{
events.push_back("string(val=" + val + ", " + location_as_str() + ")");
return true;
}
bool start_object(std::size_t elements) override
{
events.push_back("start_object(elements=" + std::to_string(elements) + ", " + location_as_str() + ")");
return true;
}
bool end_object() override
{
events.push_back("end_object(" + location_as_str() + ")");
return true;
}
bool start_array(std::size_t elements) override
{
events.push_back("start_array(elements=" + std::to_string(elements) + ", " + location_as_str() + ")");
return true;
}
bool end_array() override
{
events.push_back("end_array(" + location_as_str() + ")");
return true;
}
bool key(string_t& val) override
{
events.push_back("key(val=" + val + ", " + location_as_str() + ")");
return true;
}
bool binary(json::binary_t& val) override
{
events.push_back("binary(val=[...], " + location_as_str() + ")");
return true;
}
bool parse_error(std::size_t position, const std::string& last_token, const json::exception& ex) override
{
events.push_back("parse_error(position=" + std::to_string(position) + ", last_token=" + last_token + ",\n ex=" + std::string(ex.what()) + ")");
return false;
}
};
int main()
{
// a JSON text
auto text = R"(
{
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": 100
},
"Animated" : false,
"IDs": [116, 943, 234, -38793],
"DeletionDate": null,
"Distance": 12.723374634
}
}]
)";
// create a SAX event consumer object
sax_event_consumer sec;
// parse JSON
bool result = json::sax_parse(text, &sec);
// output the recorded events
for (auto& event : sec.events)
{
std::cout << event << "\n";
}
// output the result of sax_parse
std::cout << "\nresult: " << std::boolalpha << result << std::endl;
}

View File

@ -0,0 +1,37 @@
start_object(elements=18446744073709551615, at=[5,6))
key(val=Image, at=[15,22))
start_object(elements=18446744073709551615, at=[24,25))
key(val=Width, at=[38,45))
number_unsigned(val=800, at=[48,51))
key(val=Height, at=[65,73))
number_unsigned(val=600, at=[75,78))
key(val=Title, at=[92,99))
string(val=View from 15th Floor, at=[102,124))
key(val=Thumbnail, at=[138,149))
start_object(elements=18446744073709551615, at=[151,152))
key(val=Url, at=[169,174))
string(val=http://www.example.com/image/481989943, at=[179,219))
key(val=Height, at=[237,245))
number_unsigned(val=125, at=[247,250))
key(val=Width, at=[268,275))
number_unsigned(val=100, at=[278,281))
end_object(at=[294,295))
key(val=Animated, at=[309,319))
boolean(val=false, at=[322,327))
key(val=IDs, at=[341,346))
start_array(elements=18446744073709551615, at=[348,349))
number_unsigned(val=116, at=[349,352))
number_unsigned(val=943, at=[354,357))
number_unsigned(val=234, at=[359,362))
number_integer(val=-38793, at=[364,370))
end_array(at=[370,371))
key(val=DeletionDate, at=[385,399))
null(at=[401,405))
key(val=Distance, at=[419,429))
number_float(val=12.723375, s=12.723374634, at=[431,443))
end_object(at=[452,453))
end_object(at=[458,459))
parse_error(position=460, last_token=12.723374634<U+000A> }<U+000A> }],
ex=[json.exception.parse_error.101] parse error at line 17, column 6: syntax error while parsing value - unexpected ']'; expected end of input)
result: false

View File

@ -0,0 +1,339 @@
#include <iomanip>
#include <iostream>
#include <nlohmann/json.hpp>
#include <sstream>
using json = nlohmann::json;
// custom base class for the json node.
// allows us to store metadata and add custom methods to each node
struct token_start_stop
{
nlohmann::detail::position_t start{};
nlohmann::detail::position_t stop{};
std::string start_pos_str() const
{
return "{l=" + std::to_string(start.lines_read) + ":c="
//the lexer is already one char ahead (e.g. the opening { of an object )
+ std::to_string(start.chars_read_current_line - 1) + "}";
}
std::string stop_pos_str() const
{
return "{l=" + std::to_string(stop.lines_read) + ":c=" + std::to_string(stop.chars_read_current_line) + "}";
}
std::string location_str() const
{
return "[" + start_pos_str() + ", " + stop_pos_str() + ")";
}
};
//json type using token_start_stop as base class
using json_with_token_start_stop =
nlohmann::basic_json <
std::map,
std::vector,
std::string,
bool,
std::int64_t,
std::uint64_t,
double,
std::allocator,
nlohmann::adl_serializer,
std::vector<std::uint8_t>,
token_start_stop >;
// a parser storing the lexer information for each node
class sax_with_token_start_stop_metadata
{
public:
using json = json_with_token_start_stop;
using number_integer_t = typename json::number_integer_t;
using number_unsigned_t = typename json::number_unsigned_t;
using number_float_t = typename json::number_float_t;
using string_t = typename json::string_t;
using binary_t = typename json::binary_t;
/*!
@param[in,out] r reference to a JSON value that is manipulated while
parsing
@param[in] allow_exceptions_ whether parse errors yield exceptions
*/
explicit sax_with_token_start_stop_metadata(json& r, const bool allow_exceptions_ = true)
: root(r)
, ref_stack{}
, object_element{nullptr}
, errored{false}
, allow_exceptions(allow_exceptions_)
, start_stop{}
{}
template<class T1, class T2>
void next_token_start(const nlohmann::detail::lexer<T1, T2>& lex)
{
start_stop.start = lex.get_position();
}
template<class T1, class T2>
void next_token_end(const nlohmann::detail::lexer<T1, T2>& lex)
{
start_stop.stop = lex.get_position();
}
bool null()
{
handle_value(nullptr);
return true;
}
bool boolean(bool val)
{
handle_value(val);
return true;
}
bool number_integer(number_integer_t val)
{
handle_value(val);
return true;
}
bool number_unsigned(number_unsigned_t val)
{
handle_value(val);
return true;
}
bool number_float(number_float_t val, const string_t& /*unused*/)
{
handle_value(val);
return true;
}
bool string(string_t& val)
{
handle_value(val);
return true;
}
bool binary(binary_t& val)
{
handle_value(std::move(val));
return true;
}
bool start_object(std::size_t len)
{
ref_stack.push_back(handle_value(json::value_t::object));
ref_stack.back()->start = start_stop.start;
if (len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size())
{
throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive object size: ", std::to_string(len)), ref_stack.back());
}
return true;
}
bool key(string_t& val)
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_object());
// add null at given key and store the reference for later
object_element = &(*ref_stack.back())[val];
return true;
}
bool end_object()
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_object());
ref_stack.back()->stop = start_stop.stop;
ref_stack.pop_back();
return true;
}
bool start_array(std::size_t len)
{
ref_stack.push_back(handle_value(json::value_t::array));
ref_stack.back()->start = start_stop.start;
if (len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size())
{
throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive array size: ", std::to_string(len)), ref_stack.back());
}
return true;
}
bool end_array()
{
assert(!ref_stack.empty());
assert(ref_stack.back()->is_array());
ref_stack.back()->stop = start_stop.stop;
ref_stack.pop_back();
return true;
}
template<class Exception>
bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const Exception& ex)
{
errored = true;
static_cast<void>(ex);
if (allow_exceptions)
{
throw ex;
}
return false;
}
constexpr bool is_errored() const
{
return errored;
}
private:
/*!
@invariant If the ref stack is empty, then the passed value will be the new
root.
@invariant If the ref stack contains a value, then it is an array or an
object to which we can add elements
*/
template<typename Value>
json*
handle_value(Value&& v)
{
if (ref_stack.empty())
{
root = json(std::forward<Value>(v));
root.start = start_stop.start;
root.stop = start_stop.stop;
return &root;
}
assert(ref_stack.back()->is_array() || ref_stack.back()->is_object());
if (ref_stack.back()->is_array())
{
auto& array_element = ref_stack.back()->emplace_back(std::forward<Value>(v));
array_element.start = start_stop.start;
array_element.stop = start_stop.stop;
return &array_element;
}
assert(ref_stack.back()->is_object());
assert(object_element);
*object_element = json(std::forward<Value>(v));
object_element->start = start_stop.start;
object_element->stop = start_stop.stop;
return object_element;
}
/// the parsed JSON value
json& root;
/// stack to model hierarchy of values
std::vector<json*> ref_stack{};
/// helper to hold the reference for the next object element
json* object_element = nullptr;
/// whether a syntax error occurred
bool errored = false;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
/// start / stop information for the current token
token_start_stop start_stop{};
};
void dump(const json_with_token_start_stop& j, std::size_t indentlvl = 0)
{
const std::string indent(indentlvl * 4, ' ');
switch (j.type())
{
case nlohmann::json::value_t::null:
{
std::cout << indent << "null(at=" << j.location_str() << ")\n";
}
break;
case nlohmann::json::value_t::object:
{
std::cout << indent << "object(size=" << j.size() << ", at=" << j.location_str() << ")\n";
for (const auto& elem : j.items())
{
dump(elem.value(), indentlvl + 1);
}
}
break;
case nlohmann::json::value_t::array:
{
std::cout << indent << "array(size=" << j.size() << ", at=" << j.location_str() << ")\n";
for (const auto& elem : j)
{
dump(elem, indentlvl + 1);
}
}
break;
case nlohmann::json::value_t::string:
{
std::cout << indent << "string(val=" << j.get<std::string>() << ", at=" << j.location_str() << ")\n";
}
break;
case nlohmann::json::value_t::boolean:
{
std::cout << indent << "boolean(val=" << j.get<bool>() << ", at=" << j.location_str() << ")\n";
}
break;
case nlohmann::json::value_t::number_integer:
{
std::cout << indent << "number_integer(val=" << j.get<std::int64_t>() << ", at=" << j.location_str() << ")\n";
}
break;
case nlohmann::json::value_t::number_unsigned:
{
std::cout << indent << "number_unsigned(val=" << j.get<std::uint64_t>() << ", at=" << j.location_str() << ")\n";
}
break;
case nlohmann::json::value_t::number_float:
{
std::cout << indent << "number_float(val=" << j.get<double>() << ", at=" << j.location_str() << ")\n";
}
break;
default:
throw std::runtime_error{"unexpected input"};
}
}
int main()
{
// a JSON text
auto text = R"({
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": 100
},
"Animated" : false,
"IDs": [116, 943, 234, -38793],
"DeletionDate": null,
"Distance": 12.723374634
}
})";
// create a SAX parser object
json_with_token_start_stop parsed;
sax_with_token_start_stop_metadata sax{parsed};
// parse JSON
bool result = json::sax_parse(text, &sax);
// output the json data
dump(parsed);
// output the result of sax_parse
std::cout << "\nresult: " << std::boolalpha << result << std::endl;
}

View File

@ -0,0 +1,19 @@
object(size=1, at=[{l=0:c=0}, {l=15:c=1}))
object(size=8, at=[{l=1:c=17}, {l=14:c=9}))
boolean(val=0, at=[{l=10:c=25}, {l=10:c=30}))
null(at=[{l=12:c=28}, {l=12:c=32}))
number_float(val=12.7234, at=[{l=13:c=24}, {l=13:c=0}))
number_unsigned(val=600, at=[{l=3:c=22}, {l=3:c=25}))
array(size=4, at=[{l=11:c=19}, {l=11:c=42}))
number_unsigned(val=116, at=[{l=11:c=20}, {l=11:c=23}))
number_unsigned(val=943, at=[{l=11:c=25}, {l=11:c=28}))
number_unsigned(val=234, at=[{l=11:c=30}, {l=11:c=33}))
number_integer(val=-38793, at=[{l=11:c=35}, {l=11:c=41}))
object(size=3, at=[{l=5:c=25}, {l=9:c=13}))
number_unsigned(val=125, at=[{l=7:c=26}, {l=7:c=29}))
string(val=http://www.example.com/image/481989943, at=[{l=6:c=26}, {l=6:c=66}))
number_unsigned(val=100, at=[{l=8:c=26}, {l=8:c=0}))
string(val=View from 15th Floor, at=[{l=4:c=22}, {l=4:c=44}))
number_unsigned(val=800, at=[{l=2:c=22}, {l=2:c=25}))
result: true