From 741d422fa6a31d138ac65f38259155ea34095d21 Mon Sep 17 00:00:00 2001 From: Sfinktah Bungholio Date: Sat, 28 Nov 2020 18:31:57 +1100 Subject: [PATCH] POC for hex literal constants parsing --- include/nlohmann/detail/input/lexer.hpp | 73 ++++++++++++++++++++----- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 0a9601352..77139f9ee 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -929,17 +929,18 @@ class lexer : public lexer_base accepts the number. State "error" is a trap state to model errors. In the table below, "anything" means any character but the ones listed before. - state | 0 | 1-9 | e E | + | - | . | anything - ---------|----------|----------|----------|---------|---------|----------|----------- - init | zero | any1 | [error] | [error] | minus | [error] | [error] - minus | zero | any1 | [error] | [error] | [error] | [error] | [error] - zero | done | done | exponent | done | done | decimal1 | done - any1 | any1 | any1 | exponent | done | done | decimal1 | done - decimal1 | decimal2 | decimal2 | [error] | [error] | [error] | [error] | [error] - decimal2 | decimal2 | decimal2 | exponent | done | done | done | done - exponent | any2 | any2 | [error] | sign | sign | [error] | [error] - sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] - any2 | any2 | any2 | done | done | done | done | done + state | 0 | 1-9 | e E | + | - | . | x | a-f A-F | anything + ---------|----------|----------|----------|---------|---------|----------|-----------|-----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | *hex1* | done | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done | done | done + decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done | done | done + hex1 | hex1 | hex1 | hex1 | [error] | [error] | [error] | [error] | hex1 | done The state machine is realized with one label per state (prefixed with "scan_number_") and `goto` statements between them. The state machine @@ -1090,6 +1091,52 @@ scan_number_any1: goto scan_number_done; } +scan_number_hex1: + // state: we are parsing a hex number having previously parsed '0x' or + // '0X' (however ugly that latter may be) + switch (get()) { + case '0': // in hex, we love padding with leading zeros + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + { + add(current); + goto scan_number_hex1; + } + + // omg, we are so not doing hex floating point + //case '.': { + // add(decimal_point_char); + // goto scan_number_decimal1; + //} + + // safer to error while testing, but it would error on natural ends of numbers? + //default: { + // error_message = "invalid hexidecimal number; expected hex digit"; + // return token_type::parse_error; + //} + default: + goto scan_number_done; + } + scan_number_decimal1: // state: we just parsed a decimal point number_type = token_type::value_float; @@ -1242,7 +1289,7 @@ scan_number_done: // try to parse integers first and fall back to floats if (number_type == token_type::value_unsigned) { - const auto x = std::strtoull(token_buffer.data(), &endptr, 10); + const auto x = std::strtoull(token_buffer.data(), &endptr, 0); // we checked the number format before JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); @@ -1258,7 +1305,7 @@ scan_number_done: } else if (number_type == token_type::value_integer) { - const auto x = std::strtoll(token_buffer.data(), &endptr, 10); + const auto x = std::strtoll(token_buffer.data(), &endptr, 0); // we checked the number format before JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());