diff --git a/docs/examples/sax_parse_with_src_location.cpp b/docs/examples/sax_parse_with_src_location.cpp new file mode 100644 index 000000000..85457772f --- /dev/null +++ b/docs/examples/sax_parse_with_src_location.cpp @@ -0,0 +1,149 @@ +#include +#include +#include +#include + +using json = nlohmann::json; + +// a simple event consumer that collects string representations of the passed +// values and their source locations; +// note inheriting from json::json_sax_t is not required, but can +// help not to forget a required function +class sax_event_consumer : public json::json_sax_t +{ + public: + std::vector events; + std::size_t next_token_start_pos = 0; + std::size_t next_token_end_pos = 0; + + void next_token_start(std::size_t pos) + { + next_token_start_pos = pos; + } + + void next_token_end(std::size_t pos) + { + next_token_end_pos = pos; + } + + std::string location_as_str() const + { + return "at=[" + std::to_string(next_token_start_pos) + "," + std::to_string(next_token_end_pos) + ")"; + } + + bool null() override + { + events.push_back("null(" + location_as_str() + ")"); + return true; + } + + bool boolean(bool val) override + { + events.push_back("boolean(val=" + std::string(val ? "true" : "false") + ", " + location_as_str() + ")"); + return true; + } + + bool number_integer(number_integer_t val) override + { + events.push_back("number_integer(val=" + std::to_string(val) + ", " + location_as_str() + ")"); + return true; + } + + bool number_unsigned(number_unsigned_t val) override + { + events.push_back("number_unsigned(val=" + std::to_string(val) + ", " + location_as_str() + ")"); + return true; + } + + bool number_float(number_float_t val, const string_t& s) override + { + events.push_back("number_float(val=" + std::to_string(val) + ", s=" + s + ", " + location_as_str() + ")"); + return true; + } + + bool string(string_t& val) override + { + events.push_back("string(val=" + val + ", " + location_as_str() + ")"); + return true; + } + + bool start_object(std::size_t elements) override + { + events.push_back("start_object(elements=" + std::to_string(elements) + ", " + location_as_str() + ")"); + return true; + } + + bool end_object() override + { + events.push_back("end_object(" + location_as_str() + ")"); + return true; + } + + bool start_array(std::size_t elements) override + { + events.push_back("start_array(elements=" + std::to_string(elements) + ", " + location_as_str() + ")"); + return true; + } + + bool end_array() override + { + events.push_back("end_array(" + location_as_str() + ")"); + return true; + } + + bool key(string_t& val) override + { + events.push_back("key(val=" + val + ", " + location_as_str() + ")"); + return true; + } + + bool binary(json::binary_t& val) override + { + events.push_back("binary(val=[...], " + location_as_str() + ")"); + return true; + } + + bool parse_error(std::size_t position, const std::string& last_token, const json::exception& ex) override + { + events.push_back("parse_error(position=" + std::to_string(position) + ", last_token=" + last_token + ",\n ex=" + std::string(ex.what()) + ")"); + return false; + } +}; + +int main() +{ + // a JSON text + auto text = R"( + { + "Image": { + "Width": 800, + "Height": 600, + "Title": "View from 15th Floor", + "Thumbnail": { + "Url": "http://www.example.com/image/481989943", + "Height": 125, + "Width": 100 + }, + "Animated" : false, + "IDs": [116, 943, 234, -38793], + "DeletionDate": null, + "Distance": 12.723374634 + } + }] + )"; + + // create a SAX event consumer object + sax_event_consumer sec; + + // parse JSON + bool result = json::sax_parse(text, &sec); + + // output the recorded events + for (auto& event : sec.events) + { + std::cout << event << "\n"; + } + + // output the result of sax_parse + std::cout << "\nresult: " << std::boolalpha << result << std::endl; +} diff --git a/docs/examples/sax_parse_with_src_location.output b/docs/examples/sax_parse_with_src_location.output new file mode 100644 index 000000000..dbc004110 --- /dev/null +++ b/docs/examples/sax_parse_with_src_location.output @@ -0,0 +1,37 @@ +start_object(elements=18446744073709551615, at=[5,6)) +key(val=Image, at=[15,22)) +start_object(elements=18446744073709551615, at=[24,25)) +key(val=Width, at=[38,45)) +number_unsigned(val=800, at=[48,51)) +key(val=Height, at=[65,73)) +number_unsigned(val=600, at=[75,78)) +key(val=Title, at=[92,99)) +string(val=View from 15th Floor, at=[102,124)) +key(val=Thumbnail, at=[138,149)) +start_object(elements=18446744073709551615, at=[151,152)) +key(val=Url, at=[169,174)) +string(val=http://www.example.com/image/481989943, at=[179,219)) +key(val=Height, at=[237,245)) +number_unsigned(val=125, at=[247,250)) +key(val=Width, at=[268,275)) +number_unsigned(val=100, at=[278,281)) +end_object(at=[294,295)) +key(val=Animated, at=[309,319)) +boolean(val=false, at=[322,327)) +key(val=IDs, at=[341,346)) +start_array(elements=18446744073709551615, at=[348,349)) +number_unsigned(val=116, at=[349,352)) +number_unsigned(val=943, at=[354,357)) +number_unsigned(val=234, at=[359,362)) +number_integer(val=-38793, at=[364,370)) +end_array(at=[370,371)) +key(val=DeletionDate, at=[385,399)) +null(at=[401,405)) +key(val=Distance, at=[419,429)) +number_float(val=12.723375, s=12.723374634, at=[431,443)) +end_object(at=[452,453)) +end_object(at=[458,459)) +parse_error(position=460, last_token=12.723374634 } }], + ex=[json.exception.parse_error.101] parse error at line 17, column 6: syntax error while parsing value - unexpected ']'; expected end of input) + +result: false diff --git a/docs/examples/sax_parse_with_src_location_in_json.cpp b/docs/examples/sax_parse_with_src_location_in_json.cpp new file mode 100644 index 000000000..ab9b30cc5 --- /dev/null +++ b/docs/examples/sax_parse_with_src_location_in_json.cpp @@ -0,0 +1,339 @@ +#include +#include +#include +#include + +using json = nlohmann::json; + +// custom base class for the json node. +// allows us to store metadata and add custom methods to each node +struct token_start_stop +{ + nlohmann::detail::position_t start{}; + nlohmann::detail::position_t stop{}; + + std::string start_pos_str() const + { + return "{l=" + std::to_string(start.lines_read) + ":c=" + //the lexer is already one char ahead (e.g. the opening { of an object ) + + std::to_string(start.chars_read_current_line - 1) + "}"; + } + std::string stop_pos_str() const + { + return "{l=" + std::to_string(stop.lines_read) + ":c=" + std::to_string(stop.chars_read_current_line) + "}"; + } + std::string location_str() const + { + return "[" + start_pos_str() + ", " + stop_pos_str() + ")"; + } +}; + +//json type using token_start_stop as base class +using json_with_token_start_stop = + nlohmann::basic_json < + std::map, + std::vector, + std::string, + bool, + std::int64_t, + std::uint64_t, + double, + std::allocator, + nlohmann::adl_serializer, + std::vector, + token_start_stop >; + +// a parser storing the lexer information for each node +class sax_with_token_start_stop_metadata +{ + public: + using json = json_with_token_start_stop; + using number_integer_t = typename json::number_integer_t; + using number_unsigned_t = typename json::number_unsigned_t; + using number_float_t = typename json::number_float_t; + using string_t = typename json::string_t; + using binary_t = typename json::binary_t; + + /*! + @param[in,out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit sax_with_token_start_stop_metadata(json& r, const bool allow_exceptions_ = true) + : root(r) + , ref_stack{} + , object_element{nullptr} + , errored{false} + , allow_exceptions(allow_exceptions_) + , start_stop{} + {} + + template + void next_token_start(const nlohmann::detail::lexer& lex) + { + start_stop.start = lex.get_position(); + } + + template + void next_token_end(const nlohmann::detail::lexer& lex) + { + start_stop.stop = lex.get_position(); + } + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(json::value_t::object)); + ref_stack.back()->start = start_stop.start; + + if (len != static_cast(-1) && len > ref_stack.back()->max_size()) + { + throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive object size: ", std::to_string(len)), ref_stack.back()); + } + + return true; + } + + bool key(string_t& val) + { + assert(!ref_stack.empty()); + assert(ref_stack.back()->is_object()); + + // add null at given key and store the reference for later + object_element = &(*ref_stack.back())[val]; + return true; + } + + bool end_object() + { + assert(!ref_stack.empty()); + assert(ref_stack.back()->is_object()); + + ref_stack.back()->stop = start_stop.stop; + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(json::value_t::array)); + ref_stack.back()->start = start_stop.start; + + if (len != static_cast(-1) && len > ref_stack.back()->max_size()) + { + throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive array size: ", std::to_string(len)), ref_stack.back()); + } + + return true; + } + + bool end_array() + { + assert(!ref_stack.empty()); + assert(ref_stack.back()->is_array()); + + ref_stack.back()->stop = start_stop.stop; + ref_stack.pop_back(); + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + throw ex; + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + json* + handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = json(std::forward(v)); + root.start = start_stop.start; + root.stop = start_stop.stop; + return &root; + } + + assert(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + auto& array_element = ref_stack.back()->emplace_back(std::forward(v)); + array_element.start = start_stop.start; + array_element.stop = start_stop.stop; + return &array_element; + } + + assert(ref_stack.back()->is_object()); + assert(object_element); + *object_element = json(std::forward(v)); + object_element->start = start_stop.start; + object_element->stop = start_stop.stop; + return object_element; + } + + /// the parsed JSON value + json& root; + /// stack to model hierarchy of values + std::vector ref_stack{}; + /// helper to hold the reference for the next object element + json* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// start / stop information for the current token + token_start_stop start_stop{}; +}; + +void dump(const json_with_token_start_stop& j, std::size_t indentlvl = 0) +{ + const std::string indent(indentlvl * 4, ' '); + switch (j.type()) + { + case nlohmann::json::value_t::null: + { + std::cout << indent << "null(at=" << j.location_str() << ")\n"; + } + break; + case nlohmann::json::value_t::object: + { + std::cout << indent << "object(size=" << j.size() << ", at=" << j.location_str() << ")\n"; + for (const auto& elem : j.items()) + { + dump(elem.value(), indentlvl + 1); + } + } + break; + case nlohmann::json::value_t::array: + { + std::cout << indent << "array(size=" << j.size() << ", at=" << j.location_str() << ")\n"; + for (const auto& elem : j) + { + dump(elem, indentlvl + 1); + } + } + break; + case nlohmann::json::value_t::string: + { + std::cout << indent << "string(val=" << j.get() << ", at=" << j.location_str() << ")\n"; + } + break; + case nlohmann::json::value_t::boolean: + { + std::cout << indent << "boolean(val=" << j.get() << ", at=" << j.location_str() << ")\n"; + } + break; + case nlohmann::json::value_t::number_integer: + { + std::cout << indent << "number_integer(val=" << j.get() << ", at=" << j.location_str() << ")\n"; + } + break; + case nlohmann::json::value_t::number_unsigned: + { + std::cout << indent << "number_unsigned(val=" << j.get() << ", at=" << j.location_str() << ")\n"; + } + break; + case nlohmann::json::value_t::number_float: + { + std::cout << indent << "number_float(val=" << j.get() << ", at=" << j.location_str() << ")\n"; + } + break; + default: + throw std::runtime_error{"unexpected input"}; + } +} + +int main() +{ + // a JSON text + auto text = R"({ + "Image": { + "Width": 800, + "Height": 600, + "Title": "View from 15th Floor", + "Thumbnail": { + "Url": "http://www.example.com/image/481989943", + "Height": 125, + "Width": 100 + }, + "Animated" : false, + "IDs": [116, 943, 234, -38793], + "DeletionDate": null, + "Distance": 12.723374634 + } +})"; + + // create a SAX parser object + json_with_token_start_stop parsed; + sax_with_token_start_stop_metadata sax{parsed}; + + // parse JSON + bool result = json::sax_parse(text, &sax); + + // output the json data + dump(parsed); + + // output the result of sax_parse + std::cout << "\nresult: " << std::boolalpha << result << std::endl; +} diff --git a/docs/examples/sax_parse_with_src_location_in_json.output b/docs/examples/sax_parse_with_src_location_in_json.output new file mode 100644 index 000000000..676682e6d --- /dev/null +++ b/docs/examples/sax_parse_with_src_location_in_json.output @@ -0,0 +1,19 @@ +object(size=1, at=[{l=0:c=0}, {l=15:c=1})) + object(size=8, at=[{l=1:c=17}, {l=14:c=9})) + boolean(val=0, at=[{l=10:c=25}, {l=10:c=30})) + null(at=[{l=12:c=28}, {l=12:c=32})) + number_float(val=12.7234, at=[{l=13:c=24}, {l=13:c=0})) + number_unsigned(val=600, at=[{l=3:c=22}, {l=3:c=25})) + array(size=4, at=[{l=11:c=19}, {l=11:c=42})) + number_unsigned(val=116, at=[{l=11:c=20}, {l=11:c=23})) + number_unsigned(val=943, at=[{l=11:c=25}, {l=11:c=28})) + number_unsigned(val=234, at=[{l=11:c=30}, {l=11:c=33})) + number_integer(val=-38793, at=[{l=11:c=35}, {l=11:c=41})) + object(size=3, at=[{l=5:c=25}, {l=9:c=13})) + number_unsigned(val=125, at=[{l=7:c=26}, {l=7:c=29})) + string(val=http://www.example.com/image/481989943, at=[{l=6:c=26}, {l=6:c=66})) + number_unsigned(val=100, at=[{l=8:c=26}, {l=8:c=0})) + string(val=View from 15th Floor, at=[{l=4:c=22}, {l=4:c=44})) + number_unsigned(val=800, at=[{l=2:c=22}, {l=2:c=25})) + +result: true