diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c629ee6a..d79f8f0f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,12 +30,13 @@ if (POLICY CMP0077) cmake_policy(SET CMP0077 NEW) endif () -option(JSON_BuildTests "Build the unit tests when BUILD_TESTING is enabled." ${MAIN_PROJECT}) -option(JSON_Install "Install CMake targets during install step." ${MAIN_PROJECT}) -option(JSON_MultipleHeaders "Use non-amalgamated version of the library." OFF) +option(JSON_BuildTests "Build the unit tests when BUILD_TESTING is enabled." ${MAIN_PROJECT}) +option(JSON_CI "Enable CI build targets." OFF) +option(JSON_Diagnostics "Use extended diagnostic messages." OFF) option(JSON_ImplicitConversions "Enable implicit conversions." ON) -option(JSON_Diagnostics "Enable better diagnostic messages." OFF) -option(JSON_CI "Enable CI build targets." OFF) +option(JSON_Install "Install CMake targets during install step." ${MAIN_PROJECT}) +option(JSON_MultipleHeaders "Use non-amalgamated version of the library." OFF) +option(JSON_SystemInclude "Include as system headers (skip for clang-tidy)." OFF) if (JSON_CI) include(cmake/ci.cmake) @@ -73,6 +74,10 @@ if (JSON_Diagnostics) message(STATUS "Diagnostics enabled") endif() +if (JSON_SystemInclude) + set(NLOHMANN_JSON_SYSTEM_INCLUDE "SYSTEM") +endif() + ## ## TARGET ## create target and add include path @@ -94,7 +99,7 @@ target_compile_definitions( target_include_directories( ${NLOHMANN_JSON_TARGET_NAME} - INTERFACE + ${NLOHMANN_JSON_SYSTEM_INCLUDE} INTERFACE $ $ ) diff --git a/Makefile b/Makefile index 655bb6c4a..43db74d85 100644 --- a/Makefile +++ b/Makefile @@ -32,36 +32,16 @@ all: @echo "ChangeLog.md - generate ChangeLog file" @echo "check-amalgamation - check whether sources have been amalgamated" @echo "clean - remove built files" - @echo "coverage - create coverage information with lcov" - @echo "cppcheck - analyze code with cppcheck" - @echo "cpplint - analyze code with cpplint" - @echo "clang_tidy - analyze code with Clang-Tidy" - @echo "clang_analyze - analyze code with Clang-Analyzer" @echo "doctest - compile example files and check their output" @echo "fuzz_testing - prepare fuzz testing of the JSON parser" @echo "fuzz_testing_bson - prepare fuzz testing of the BSON parser" @echo "fuzz_testing_cbor - prepare fuzz testing of the CBOR parser" @echo "fuzz_testing_msgpack - prepare fuzz testing of the MessagePack parser" @echo "fuzz_testing_ubjson - prepare fuzz testing of the UBJSON parser" - @echo "pedantic_clang - run Clang with maximal warning flags" - @echo "pedantic_gcc - run GCC with maximal warning flags" @echo "pretty - beautify code with Artistic Style" @echo "run_benchmarks - build and run benchmarks" -########################################################################## -# coverage -########################################################################## - -coverage: - rm -fr cmake-build-coverage - mkdir cmake-build-coverage - cd cmake-build-coverage ; cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DJSON_Coverage=ON -DJSON_MultipleHeaders=ON - cd cmake-build-coverage ; ninja - cd cmake-build-coverage ; ctest -j10 - cd cmake-build-coverage ; ninja lcov_html - open cmake-build-coverage/test/html/index.html - ########################################################################## # documentation tests ########################################################################## @@ -71,304 +51,6 @@ doctest: $(MAKE) check_output -C doc -########################################################################## -# warning detector -########################################################################## - -# calling Clang with all warnings, except: -# -Wno-c++2a-compat: u8 literals will behave differently in C++20... -# -Wno-deprecated-declarations: the library deprecated some functions -# -Wno-documentation-unknown-command: code uses user-defined commands like @complexity -# -Wno-exit-time-destructors: warning in json code triggered by NLOHMANN_JSON_SERIALIZE_ENUM -# -Wno-float-equal: not all comparisons in the tests can be replaced by Approx -# -Wno-missing-prototypes: for NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE -# -Wno-padded: padding is nothing to warn about -# -Wno-range-loop-analysis: items tests "for(const auto i...)" -# -Wno-extra-semi-stmt: spurious warnings for semicolons after JSON_ASSERT() -# -Wno-switch-enum -Wno-covered-switch-default: pedantic/contradicting warnings about switches -# -Wno-weak-vtables: exception class is defined inline, but has virtual method -pedantic_clang: - rm -fr cmake-build-pedantic - CXXFLAGS=" \ - -std=c++11 -Wno-c++98-compat -Wno-c++98-compat-pedantic \ - -Werror \ - -Weverything \ - -Wno-c++2a-compat \ - -Wno-deprecated-declarations \ - -Wno-documentation-unknown-command \ - -Wno-exit-time-destructors \ - -Wno-float-equal \ - -Wno-missing-prototypes \ - -Wno-padded \ - -Wno-range-loop-analysis \ - -Wno-extra-semi-stmt \ - -Wno-switch-enum -Wno-covered-switch-default \ - -Wno-weak-vtables" cmake -S . -B cmake-build-pedantic -GNinja -DCMAKE_BUILD_TYPE=Debug -DJSON_MultipleHeaders=ON -DJSON_BuildTests=On - cmake --build cmake-build-pedantic - -# calling GCC with most warnings -pedantic_gcc: - rm -fr cmake-build-pedantic - CXXFLAGS=" \ - -std=c++11 \ - -pedantic \ - -Werror \ - --all-warnings \ - --extra-warnings \ - -W \ - -Wno-abi-tag \ - -Waddress \ - -Waddress-of-packed-member \ - -Wno-aggregate-return \ - -Waggressive-loop-optimizations \ - -Waligned-new=all \ - -Wall \ - -Walloc-zero \ - -Walloca \ - -Wanalyzer-double-fclose \ - -Wanalyzer-double-free \ - -Wanalyzer-exposure-through-output-file \ - -Wanalyzer-file-leak \ - -Wanalyzer-free-of-non-heap \ - -Wanalyzer-malloc-leak \ - -Wanalyzer-null-argument \ - -Wanalyzer-null-dereference \ - -Wanalyzer-possible-null-argument \ - -Wanalyzer-possible-null-dereference \ - -Wanalyzer-stale-setjmp-buffer \ - -Wanalyzer-tainted-array-index \ - -Wanalyzer-too-complex \ - -Wanalyzer-unsafe-call-within-signal-handler \ - -Wanalyzer-use-after-free \ - -Wanalyzer-use-of-pointer-in-stale-stack-frame \ - -Warith-conversion \ - -Warray-bounds \ - -Warray-bounds=2 \ - -Wattribute-alias=2 \ - -Wattribute-warning \ - -Wattributes \ - -Wbool-compare \ - -Wbool-operation \ - -Wbuiltin-declaration-mismatch \ - -Wbuiltin-macro-redefined \ - -Wc++0x-compat \ - -Wc++11-compat \ - -Wc++14-compat \ - -Wc++17-compat \ - -Wc++1z-compat \ - -Wc++20-compat \ - -Wc++2a-compat \ - -Wcannot-profile \ - -Wcast-align \ - -Wcast-align=strict \ - -Wcast-function-type \ - -Wcast-qual \ - -Wcatch-value=3 \ - -Wchar-subscripts \ - -Wclass-conversion \ - -Wclass-memaccess \ - -Wclobbered \ - -Wcomma-subscript \ - -Wcomment \ - -Wcomments \ - -Wconditionally-supported \ - -Wconversion \ - -Wconversion-null \ - -Wcoverage-mismatch \ - -Wcpp \ - -Wctor-dtor-privacy \ - -Wdangling-else \ - -Wdate-time \ - -Wdelete-incomplete \ - -Wdelete-non-virtual-dtor \ - -Wdeprecated \ - -Wdeprecated-copy \ - -Wdeprecated-copy-dtor \ - -Wdeprecated-declarations \ - -Wdisabled-optimization \ - -Wdiv-by-zero \ - -Wdouble-promotion \ - -Wduplicated-branches \ - -Wduplicated-cond \ - -Weffc++ \ - -Wempty-body \ - -Wendif-labels \ - -Wenum-compare \ - -Wexpansion-to-defined \ - -Wextra \ - -Wextra-semi \ - -Wfloat-conversion \ - -Wfloat-equal \ - -Wformat -Wformat-contains-nul \ - -Wformat -Wformat-extra-args \ - -Wformat -Wformat-nonliteral \ - -Wformat -Wformat-security \ - -Wformat -Wformat-y2k \ - -Wformat -Wformat-zero-length \ - -Wformat-diag \ - -Wformat-overflow=2 \ - -Wformat-signedness \ - -Wformat-truncation=2 \ - -Wformat=2 \ - -Wframe-address \ - -Wfree-nonheap-object \ - -Whsa \ - -Wif-not-aligned \ - -Wignored-attributes \ - -Wignored-qualifiers \ - -Wimplicit-fallthrough=5 \ - -Winaccessible-base \ - -Winherited-variadic-ctor \ - -Winit-list-lifetime \ - -Winit-self \ - -Winline \ - -Wint-in-bool-context \ - -Wint-to-pointer-cast \ - -Winvalid-memory-model \ - -Winvalid-offsetof \ - -Winvalid-pch \ - -Wliteral-suffix \ - -Wlogical-not-parentheses \ - -Wlogical-op \ - -Wno-long-long \ - -Wlto-type-mismatch \ - -Wmain \ - -Wmaybe-uninitialized \ - -Wmemset-elt-size \ - -Wmemset-transposed-args \ - -Wmisleading-indentation \ - -Wmismatched-tags \ - -Wmissing-attributes \ - -Wmissing-braces \ - -Wno-missing-declarations \ - -Wmissing-field-initializers \ - -Wmissing-include-dirs \ - -Wmissing-profile \ - -Wmultichar \ - -Wmultiple-inheritance \ - -Wmultistatement-macros \ - -Wno-namespaces \ - -Wnarrowing \ - -Wno-noexcept \ - -Wnoexcept-type \ - -Wnon-template-friend \ - -Wnon-virtual-dtor \ - -Wnonnull \ - -Wnonnull-compare \ - -Wnonportable-cfstrings \ - -Wnormalized=nfkc \ - -Wnull-dereference \ - -Wodr \ - -Wold-style-cast \ - -Wopenmp-simd \ - -Woverflow \ - -Woverlength-strings \ - -Woverloaded-virtual \ - -Wpacked \ - -Wpacked-bitfield-compat \ - -Wpacked-not-aligned \ - -Wno-padded \ - -Wparentheses \ - -Wpedantic \ - -Wpessimizing-move \ - -Wplacement-new=2 \ - -Wpmf-conversions \ - -Wpointer-arith \ - -Wpointer-compare \ - -Wpragmas \ - -Wprio-ctor-dtor \ - -Wpsabi \ - -Wredundant-decls \ - -Wredundant-move \ - -Wredundant-tags \ - -Wregister \ - -Wreorder \ - -Wrestrict \ - -Wreturn-local-addr \ - -Wreturn-type \ - -Wscalar-storage-order \ - -Wsequence-point \ - -Wshadow=compatible-local \ - -Wshadow=global \ - -Wshadow=local \ - -Wshift-count-negative \ - -Wshift-count-overflow \ - -Wshift-negative-value \ - -Wshift-overflow=2 \ - -Wsign-compare \ - -Wsign-conversion \ - -Wsign-promo \ - -Wsized-deallocation \ - -Wsizeof-array-argument \ - -Wsizeof-pointer-div \ - -Wsizeof-pointer-memaccess \ - -Wstack-protector \ - -Wstrict-aliasing \ - -Wstrict-aliasing=3 \ - -Wstrict-null-sentinel \ - -Wstrict-overflow \ - -Wstrict-overflow=5 \ - -Wstring-compare \ - -Wstringop-overflow \ - -Wstringop-overflow=4 \ - -Wstringop-truncation \ - -Wsubobject-linkage \ - -Wsuggest-attribute=cold \ - -Wsuggest-attribute=const \ - -Wsuggest-attribute=format \ - -Wsuggest-attribute=malloc \ - -Wsuggest-attribute=noreturn \ - -Wsuggest-attribute=pure \ - -Wsuggest-final-methods \ - -Wsuggest-final-types \ - -Wsuggest-override \ - -Wswitch \ - -Wswitch-bool \ - -Wswitch-default \ - -Wno-switch-enum \ - -Wswitch-outside-range \ - -Wswitch-unreachable \ - -Wsync-nand \ - -Wsynth \ - -Wno-system-headers \ - -Wtautological-compare \ - -Wno-templates \ - -Wterminate \ - -Wtrampolines \ - -Wtrigraphs \ - -Wtype-limits \ - -Wundef \ - -Wuninitialized \ - -Wunknown-pragmas \ - -Wunreachable-code \ - -Wunsafe-loop-optimizations \ - -Wunused \ - -Wunused-but-set-parameter \ - -Wunused-but-set-variable \ - -Wunused-const-variable=2 \ - -Wunused-function \ - -Wunused-label \ - -Wno-unused-local-typedefs \ - -Wunused-macros \ - -Wunused-parameter \ - -Wunused-result \ - -Wunused-value \ - -Wunused-variable \ - -Wuseless-cast \ - -Wvarargs \ - -Wvariadic-macros \ - -Wvector-operation-performance \ - -Wvirtual-inheritance \ - -Wvirtual-move-assign \ - -Wvla \ - -Wvolatile \ - -Wvolatile-register-var \ - -Wwrite-strings \ - -Wzero-as-null-pointer-constant \ - -Wzero-length-bounds \ - " cmake -S . -B cmake-build-pedantic -GNinja -DCMAKE_BUILD_TYPE=Debug -DJSON_MultipleHeaders=ON -DJSON_BuildTests=On - cmake --build cmake-build-pedantic - ########################################################################## # benchmarks ########################################################################## @@ -444,33 +126,6 @@ fuzzing-stop: # Static analysis ########################################################################## -# call cppcheck -# Note: this target is called by Travis -cppcheck: - cppcheck --enable=warning --inline-suppr --inconclusive --force --std=c++11 $(AMALGAMATED_FILE) --error-exitcode=1 - -# call Clang Static Analyzer -clang_analyze: - rm -fr cmake-build-clang-analyze - mkdir cmake-build-clang-analyze - cd cmake-build-clang-analyze ; CCC_CXX=$(COMPILER_DIR)/clang++ CXX=$(COMPILER_DIR)/clang++ $(COMPILER_DIR)/scan-build cmake .. -GNinja -DJSON_BuildTests=On - cd cmake-build-clang-analyze ; \ - $(COMPILER_DIR)/scan-build \ - -enable-checker alpha.core.BoolAssignment,alpha.core.CallAndMessageUnInitRefArg,alpha.core.CastSize,alpha.core.CastToStruct,alpha.core.Conversion,alpha.core.DynamicTypeChecker,alpha.core.FixedAddr,alpha.core.PointerArithm,alpha.core.PointerSub,alpha.core.SizeofPtr,alpha.core.StackAddressAsyncEscape,alpha.core.TestAfterDivZero,alpha.deadcode.UnreachableCode,core.builtin.BuiltinFunctions,core.builtin.NoReturnFunctions,core.CallAndMessage,core.DivideZero,core.DynamicTypePropagation,core.NonnilStringConstants,core.NonNullParamChecker,core.NullDereference,core.StackAddressEscape,core.UndefinedBinaryOperatorResult,core.uninitialized.ArraySubscript,core.uninitialized.Assign,core.uninitialized.Branch,core.uninitialized.CapturedBlockVariable,core.uninitialized.UndefReturn,core.VLASize,cplusplus.InnerPointer,cplusplus.Move,cplusplus.NewDelete,cplusplus.NewDeleteLeaks,cplusplus.SelfAssignment,deadcode.DeadStores,nullability.NullableDereferenced,nullability.NullablePassedToNonnull,nullability.NullableReturnedFromNonnull,nullability.NullPassedToNonnull,nullability.NullReturnedFromNonnull \ - --use-c++=$(COMPILER_DIR)/clang++ -analyze-headers -o report ninja - open cmake-build-clang-analyze/report/*/index.html - -# call cpplint -# Note: some errors expected due to false positives -cpplint: - third_party/cpplint/cpplint.py \ - --filter=-whitespace,-legal,-readability/alt_tokens,-runtime/references,-runtime/explicit \ - --quiet --recursive $(SRCS) - -# call Clang-Tidy -clang_tidy: - $(COMPILER_DIR)/clang-tidy $(SRCS) -- -Iinclude -std=c++11 - # call PVS-Studio Analyzer pvs_studio: rm -fr cmake-build-pvs-studio @@ -480,26 +135,6 @@ pvs_studio: cd cmake-build-pvs-studio ; plog-converter -a'GA:1,2;64:1;CS' -t fullhtml PVS-Studio.log -o pvs open cmake-build-pvs-studio/pvs/index.html -# call Infer static analyzer -infer: - rm -fr cmake-build-infer - mkdir cmake-build-infer - cd cmake-build-infer ; infer compile -- cmake .. -DJSON_MultipleHeaders=ON ; infer run -- make -j 4 - -# call OCLint static analyzer -oclint: - oclint $(SRCS) -report-type html -enable-global-analysis -o oclint_report.html -max-priority-1=10000 -max-priority-2=10000 -max-priority-3=10000 -- -std=c++11 -Iinclude - open oclint_report.html - -# execute the test suite with Clang sanitizers (address and undefined behavior) -clang_sanitize: - rm -fr cmake-build-clang-sanitize - mkdir cmake-build-clang-sanitize - cd cmake-build-clang-sanitize ; CXX=$(COMPILER_DIR)/clang++ cmake .. -DJSON_Sanitizer=On -DJSON_MultipleHeaders=ON -DJSON_BuildTests=On -GNinja - cd cmake-build-clang-sanitize ; ninja - cd cmake-build-clang-sanitize ; ctest -j10 - - ########################################################################## # Code format and source amalgamation ########################################################################## @@ -547,42 +182,6 @@ check-amalgamation: @diff $(AMALGAMATED_FILE) $(AMALGAMATED_FILE)~ || (echo "===================================================================\n Amalgamation required! Please read the contribution guidelines\n in file .github/CONTRIBUTING.md.\n===================================================================" ; mv $(AMALGAMATED_FILE)~ $(AMALGAMATED_FILE) ; false) @mv $(AMALGAMATED_FILE)~ $(AMALGAMATED_FILE) -# check if every header in nlohmann includes sufficient headers to be compiled individually -check-single-includes: - @for x in $(SRCS); do \ - echo "Checking self-sufficiency of $$x..." ; \ - echo "#include <$$x>\nint main() {}\n" | $(SED) 's|include/||' > single_include_test.cpp; \ - $(CXX) $(CXXFLAGS) -Iinclude -std=c++11 single_include_test.cpp -o single_include_test; \ - rm -f single_include_test.cpp single_include_test; \ - done - - -########################################################################## -# CMake -########################################################################## - -# grep "^option" CMakeLists.txt test/CMakeLists.txt | $(SED) 's/(/ /' | awk '{print $2}' | xargs - -# check if all flags of our CMake files work -check_cmake_flags_do: - $(CMAKE_BINARY) --version - for flag in JSON_BuildTests JSON_Install JSON_MultipleHeaders JSON_Sanitizer JSON_Valgrind JSON_NoExceptions JSON_Coverage; do \ - rm -fr cmake_build; \ - mkdir cmake_build; \ - echo "\n\n$(CMAKE_BINARY) .. -D$$flag=On\n" ; \ - cd cmake_build ; \ - $(CMAKE_BINARY) -Werror=dev .. -D$$flag=On -DCMAKE_CXX_COMPILE_FEATURES="cxx_std_11;cxx_range_for" -DCMAKE_CXX_FLAGS="-std=gnu++11" ; \ - test -f Makefile || exit 1 ; \ - cd .. ; \ - done; - -# call target `check_cmake_flags_do` twice: once for minimal required CMake version 3.1.0 and once for the installed version -check_cmake_flags: - wget https://github.com/Kitware/CMake/releases/download/v3.1.0/cmake-3.1.0-Darwin64.tar.gz - tar xfz cmake-3.1.0-Darwin64.tar.gz - CMAKE_BINARY=$(abspath cmake-3.1.0-Darwin64/CMake.app/Contents/bin/cmake) $(MAKE) check_cmake_flags_do - CMAKE_BINARY=$(shell which cmake) $(MAKE) check_cmake_flags_do - ########################################################################## # ChangeLog @@ -629,7 +228,7 @@ clean: rm -fr json_unit json_benchmarks fuzz fuzz-testing *.dSYM test/*.dSYM oclint_report.html rm -fr benchmarks/files/numbers/*.json rm -fr cmake-3.1.0-Darwin64.tar.gz cmake-3.1.0-Darwin64 - rm -fr cmake-build-coverage cmake-build-benchmarks cmake-build-pedantic fuzz-testing cmake-build-clang-analyze cmake-build-pvs-studio cmake-build-infer cmake-build-clang-sanitize cmake_build + rm -fr cmake-build-benchmarks cmake-build-pedantic fuzz-testing cmake-build-clang-analyze cmake-build-pvs-studio cmake-build-infer cmake_build $(MAKE) clean -Cdoc ########################################################################## diff --git a/README.md b/README.md index eabf1302a..9f972c211 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Windows](https://github.com/nlohmann/json/workflows/Windows/badge.svg)](https://github.com/nlohmann/json/actions?query=workflow%3AWindows) [![Coverage Status](https://coveralls.io/repos/github/nlohmann/json/badge.svg?branch=develop)](https://coveralls.io/github/nlohmann/json?branch=develop) [![Coverity Scan Build Status](https://scan.coverity.com/projects/5550/badge.svg)](https://scan.coverity.com/projects/nlohmann-json) -[![Codacy Badge](https://api.codacy.com/project/badge/Grade/f3732b3327e34358a0e9d1fe9f661f08)](https://www.codacy.com/app/nlohmann/json?utm_source=github.com&utm_medium=referral&utm_content=nlohmann/json&utm_campaign=Badge_Grade) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/e0d1a9d5d6fd46fcb655c4cb930bb3e8)](https://www.codacy.com/gh/nlohmann/json/dashboard?utm_source=github.com&utm_medium=referral&utm_content=nlohmann/json&utm_campaign=Badge_Grade) [![Language grade: C/C++](https://img.shields.io/lgtm/grade/cpp/g/nlohmann/json.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/nlohmann/json/context:cpp) [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/json.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:json) [![Try online](https://img.shields.io/badge/try-online-blue.svg)](https://wandbox.org/permlink/3lCHrFUZANONKv7a) @@ -22,10 +22,7 @@ - [Design goals](#design-goals) - [Sponsors](#sponsors) -- [Integration](#integration) - - [CMake](#cmake) - - [Package Managers](#package-managers) - - [Pkg-config](#pkg-config) +- [Support](#support) ([documentation](https://json.nlohmann.me), [FAQ](http://127.0.0.1:8000/home/faq/), [discussions](https://github.com/nlohmann/json/discussions), [API](https://json.nlohmann.me/api/basic_json/), [bug issues](https://github.com/nlohmann/json/issues)) - [Examples](#examples) - [JSON as first-class data type](#json-as-first-class-data-type) - [Serialization / Deserialization](#serialization--deserialization) @@ -38,6 +35,10 @@ - [Specializing enum conversion](#specializing-enum-conversion) - [Binary formats (BSON, CBOR, MessagePack, and UBJSON)](#binary-formats-bson-cbor-messagepack-and-ubjson) - [Supported compilers](#supported-compilers) +- [Integration](#integration) + - [CMake](#cmake) + - [Package Managers](#package-managers) + - [Pkg-config](#pkg-config) - [License](#license) - [Contact](#contact) - [Thanks](#thanks) @@ -79,180 +80,16 @@ You can sponsor this library at [GitHub Sponsors](https://github.com/sponsors/nl Thanks everyone! +## Support -## Integration +:question: If you have a **question**, please check if it is already answered in the [**FAQ**](https://json.nlohmann.me/home/faq/) or the [**Q&A**](https://github.com/nlohmann/json/discussions/categories/q-a) section. If not, please [**ask a new question**](https://github.com/nlohmann/json/discussions/new) there. -[`json.hpp`](https://github.com/nlohmann/json/blob/develop/single_include/nlohmann/json.hpp) is the single required file in `single_include/nlohmann` or [released here](https://github.com/nlohmann/json/releases). You need to add +:books: If you want to **learn more** about how to use the library, check out the rest of the [**README**](#examples), have a look at [**code examples**](https://github.com/nlohmann/json/tree/develop/doc/examples), or browse through the [**help pages**](https://json.nlohmann.me). -```cpp -#include +:construction: If you want to understand the **API** better, check out the [**API Reference**](https://json.nlohmann.me/api/basic_json/) or the [**Doxygen documentation**](https://json.nlohmann.me/doxygen/index.html). -// for convenience -using json = nlohmann::json; -``` +:bug: If you found a **bug**, please check the [**FAQ**](https://json.nlohmann.me/home/faq/) if it is a known issue or the result of a design decision. Please also have a look at the [**issue list**](https://github.com/nlohmann/json/issues) before you [**create a new issue**](https://github.com/nlohmann/json/issues/new/choose). Please provide as many information as possible to help us understand and reproduce your issue. -to the files you want to process JSON and set the necessary switches to enable C++11 (e.g., `-std=c++11` for GCC and Clang). - -You can further use file [`include/nlohmann/json_fwd.hpp`](https://github.com/nlohmann/json/blob/develop/include/nlohmann/json_fwd.hpp) for forward-declarations. The installation of json_fwd.hpp (as part of cmake's install step), can be achieved by setting `-DJSON_MultipleHeaders=ON`. - -### CMake - -You can also use the `nlohmann_json::nlohmann_json` interface target in CMake. This target populates the appropriate usage requirements for `INTERFACE_INCLUDE_DIRECTORIES` to point to the appropriate include directories and `INTERFACE_COMPILE_FEATURES` for the necessary C++11 flags. - -#### External - -To use this library from a CMake project, you can locate it directly with `find_package()` and use the namespaced imported target from the generated package configuration: - -```cmake -# CMakeLists.txt -find_package(nlohmann_json 3.2.0 REQUIRED) -... -add_library(foo ...) -... -target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) -``` - -The package configuration file, `nlohmann_jsonConfig.cmake`, can be used either from an install tree or directly out of the build tree. - -#### Embedded - -To embed the library directly into an existing CMake project, place the entire source tree in a subdirectory and call `add_subdirectory()` in your `CMakeLists.txt` file: - -```cmake -# Typically you don't care so much for a third party library's tests to be -# run from your own project's code. -set(JSON_BuildTests OFF CACHE INTERNAL "") - -# If you only include this third party in PRIVATE source files, you do not -# need to install it when your main project gets installed. -# set(JSON_Install OFF CACHE INTERNAL "") - -# Don't use include(nlohmann_json/CMakeLists.txt) since that carries with it -# unintended consequences that will break the build. It's generally -# discouraged (although not necessarily well documented as such) to use -# include(...) for pulling in other CMake projects anyways. -add_subdirectory(nlohmann_json) -... -add_library(foo ...) -... -target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) -``` - -##### Embedded (FetchContent) - -Since CMake v3.11, -[FetchContent](https://cmake.org/cmake/help/v3.11/module/FetchContent.html) can -be used to automatically download the repository as a dependency at configure time. - -Example: -```cmake -include(FetchContent) - -FetchContent_Declare(json - GIT_REPOSITORY https://github.com/nlohmann/json.git - GIT_TAG v3.7.3) - -FetchContent_GetProperties(json) -if(NOT json_POPULATED) - FetchContent_Populate(json) - add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL) -endif() - -target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) -``` - -**Note**: The repository https://github.com/nlohmann/json download size is huge. -It contains all the dataset used for the benchmarks. You might want to depend on -a smaller repository. For instance, you might want to replace the URL above by -https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent - -#### Supporting Both - -To allow your project to support either an externally supplied or an embedded JSON library, you can use a pattern akin to the following: - -``` cmake -# Top level CMakeLists.txt -project(FOO) -... -option(FOO_USE_EXTERNAL_JSON "Use an external JSON library" OFF) -... -add_subdirectory(thirdparty) -... -add_library(foo ...) -... -# Note that the namespaced target will always be available regardless of the -# import method -target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) -``` -```cmake -# thirdparty/CMakeLists.txt -... -if(FOO_USE_EXTERNAL_JSON) - find_package(nlohmann_json 3.2.0 REQUIRED) -else() - set(JSON_BuildTests OFF CACHE INTERNAL "") - add_subdirectory(nlohmann_json) -endif() -... -``` - -`thirdparty/nlohmann_json` is then a complete copy of this source tree. - -### Package Managers - -:beer: If you are using OS X and [Homebrew](https://brew.sh), just type `brew tap nlohmann/json` and `brew install nlohmann-json` and you're set. If you want the bleeding edge rather than the latest release, use `brew install nlohmann-json --HEAD`. - -If you are using the [Meson Build System](https://mesonbuild.com), add this source tree as a [meson subproject](https://mesonbuild.com/Subprojects.html#using-a-subproject). You may also use the `include.zip` published in this project's [Releases](https://github.com/nlohmann/json/releases) to reduce the size of the vendored source tree. Alternatively, you can get a wrap file by downloading it from [Meson WrapDB](https://wrapdb.mesonbuild.com/nlohmann_json), or simply use `meson wrap install nlohmann_json`. Please see the meson project for any issues regarding the packaging. - -The provided meson.build can also be used as an alternative to cmake for installing `nlohmann_json` system-wide in which case a pkg-config file is installed. To use it, simply have your build system require the `nlohmann_json` pkg-config dependency. In Meson, it is preferred to use the [`dependency()`](https://mesonbuild.com/Reference-manual.html#dependency) object with a subproject fallback, rather than using the subproject directly. - -If you are using [Conan](https://www.conan.io/) to manage your dependencies, merely add [`nlohmann_json/x.y.z`](https://conan.io/center/nlohmann_json) to your `conanfile`'s requires, where `x.y.z` is the release version you want to use. Please file issues [here](https://github.com/conan-io/conan-center-index/issues) if you experience problems with the packages. - -If you are using [Spack](https://www.spack.io/) to manage your dependencies, you can use the [`nlohmann-json` package](https://spack.readthedocs.io/en/latest/package_list.html#nlohmann-json). Please see the [spack project](https://github.com/spack/spack) for any issues regarding the packaging. - -If you are using [hunter](https://github.com/cpp-pm/hunter) on your project for external dependencies, then you can use the [nlohmann_json package](https://hunter.readthedocs.io/en/latest/packages/pkg/nlohmann_json.html). Please see the hunter project for any issues regarding the packaging. - -If you are using [Buckaroo](https://buckaroo.pm), you can install this library's module with `buckaroo add github.com/buckaroo-pm/nlohmann-json`. Please file issues [here](https://github.com/buckaroo-pm/nlohmann-json). There is a demo repo [here](https://github.com/njlr/buckaroo-nholmann-json-example). - -If you are using [vcpkg](https://github.com/Microsoft/vcpkg/) on your project for external dependencies, then you can use the [nlohmann-json package](https://github.com/Microsoft/vcpkg/tree/master/ports/nlohmann-json). Please see the vcpkg project for any issues regarding the packaging. - -If you are using [cget](https://cget.readthedocs.io/en/latest/), you can install the latest development version with `cget install nlohmann/json`. A specific version can be installed with `cget install nlohmann/json@v3.1.0`. Also, the multiple header version can be installed by adding the `-DJSON_MultipleHeaders=ON` flag (i.e., `cget install nlohmann/json -DJSON_MultipleHeaders=ON`). - -If you are using [CocoaPods](https://cocoapods.org), you can use the library by adding pod `"nlohmann_json", '~>3.1.2'` to your podfile (see [an example](https://bitbucket.org/benman/nlohmann_json-cocoapod/src/master/)). Please file issues [here](https://bitbucket.org/benman/nlohmann_json-cocoapod/issues?status=new&status=open). - -If you are using [NuGet](https://www.nuget.org), you can use the package [nlohmann.json](https://www.nuget.org/packages/nlohmann.json/). Please check [this extensive description](https://github.com/nlohmann/json/issues/1132#issuecomment-452250255) on how to use the package. Please files issues [here](https://github.com/hnkb/nlohmann-json-nuget/issues). - -If you are using [conda](https://conda.io/), you can use the package [nlohmann_json](https://github.com/conda-forge/nlohmann_json-feedstock) from [conda-forge](https://conda-forge.org) executing `conda install -c conda-forge nlohmann_json`. Please file issues [here](https://github.com/conda-forge/nlohmann_json-feedstock/issues). - -If you are using [MSYS2](https://www.msys2.org/), your can use the [mingw-w64-nlohmann-json](https://packages.msys2.org/base/mingw-w64-nlohmann-json) package, just type `pacman -S mingw-w64-i686-nlohmann-json` or `pacman -S mingw-w64-x86_64-nlohmann-json` for installation. Please file issues [here](https://github.com/msys2/MINGW-packages/issues/new?title=%5Bnlohmann-json%5D) if you experience problems with the packages. - -If you are using [`build2`](https://build2.org), you can use the [`nlohmann-json`](https://cppget.org/nlohmann-json) package from the public repository https://cppget.org or directly from the [package's sources repository](https://github.com/build2-packaging/nlohmann-json). In your project's `manifest` file, just add `depends: nlohmann-json` (probably with some [version constraints](https://build2.org/build2-toolchain/doc/build2-toolchain-intro.xhtml#guide-add-remove-deps)). If you are not familiar with using dependencies in `build2`, [please read this introduction](https://build2.org/build2-toolchain/doc/build2-toolchain-intro.xhtml). -Please file issues [here](https://github.com/build2-packaging/nlohmann-json) if you experience problems with the packages. - -If you are using [`wsjcpp`](https://wsjcpp.org), you can use the command `wsjcpp install "https://github.com/nlohmann/json:develop"` to get the latest version. Note you can change the branch ":develop" to an existing tag or another branch. - -If you are using [`CPM.cmake`](https://github.com/TheLartians/CPM.cmake), you can check this [`example`](https://github.com/TheLartians/CPM.cmake/tree/master/examples/json). After [adding CPM script](https://github.com/TheLartians/CPM.cmake#adding-cpm) to your project, implement the following snippet to your CMake: - -```cmake -CPMAddPackage( - NAME nlohmann_json - GITHUB_REPOSITORY nlohmann/json - VERSION 3.9.1) -``` - -### Pkg-config - -If you are using bare Makefiles, you can use `pkg-config` to generate the include flags that point to where the library is installed: - -```sh -pkg-config nlohmann_json --cflags -``` - -Users of the Meson build system will also be able to use a system wide library, which will be found by `pkg-config`: - -```meson -json = dependency('nlohmann_json', required: true) -``` ## Examples @@ -1280,6 +1117,182 @@ The following compilers are currently used in continuous integration at [Travis] | Visual Studio 16 2019 MSVC 19.28.29912.0 (Build Engine version 16.9.0+57a23d249 for .NET Framework) | Windows-10.0.17763 | GitHub Actions | | Visual Studio 16 2019 MSVC 19.28.29912.0 (Build Engine version 16.9.0+57a23d249 for .NET Framework) | Windows-10.0.17763 | AppVeyor | + +## Integration + +[`json.hpp`](https://github.com/nlohmann/json/blob/develop/single_include/nlohmann/json.hpp) is the single required file in `single_include/nlohmann` or [released here](https://github.com/nlohmann/json/releases). You need to add + +```cpp +#include + +// for convenience +using json = nlohmann::json; +``` + +to the files you want to process JSON and set the necessary switches to enable C++11 (e.g., `-std=c++11` for GCC and Clang). + +You can further use file [`include/nlohmann/json_fwd.hpp`](https://github.com/nlohmann/json/blob/develop/include/nlohmann/json_fwd.hpp) for forward-declarations. The installation of json_fwd.hpp (as part of cmake's install step), can be achieved by setting `-DJSON_MultipleHeaders=ON`. + +### CMake + +You can also use the `nlohmann_json::nlohmann_json` interface target in CMake. This target populates the appropriate usage requirements for `INTERFACE_INCLUDE_DIRECTORIES` to point to the appropriate include directories and `INTERFACE_COMPILE_FEATURES` for the necessary C++11 flags. + +#### External + +To use this library from a CMake project, you can locate it directly with `find_package()` and use the namespaced imported target from the generated package configuration: + +```cmake +# CMakeLists.txt +find_package(nlohmann_json 3.2.0 REQUIRED) +... +add_library(foo ...) +... +target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) +``` + +The package configuration file, `nlohmann_jsonConfig.cmake`, can be used either from an install tree or directly out of the build tree. + +#### Embedded + +To embed the library directly into an existing CMake project, place the entire source tree in a subdirectory and call `add_subdirectory()` in your `CMakeLists.txt` file: + +```cmake +# Typically you don't care so much for a third party library's tests to be +# run from your own project's code. +set(JSON_BuildTests OFF CACHE INTERNAL "") + +# If you only include this third party in PRIVATE source files, you do not +# need to install it when your main project gets installed. +# set(JSON_Install OFF CACHE INTERNAL "") + +# Don't use include(nlohmann_json/CMakeLists.txt) since that carries with it +# unintended consequences that will break the build. It's generally +# discouraged (although not necessarily well documented as such) to use +# include(...) for pulling in other CMake projects anyways. +add_subdirectory(nlohmann_json) +... +add_library(foo ...) +... +target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) +``` + +##### Embedded (FetchContent) + +Since CMake v3.11, +[FetchContent](https://cmake.org/cmake/help/v3.11/module/FetchContent.html) can +be used to automatically download the repository as a dependency at configure time. + +Example: +```cmake +include(FetchContent) + +FetchContent_Declare(json + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG v3.7.3) + +FetchContent_GetProperties(json) +if(NOT json_POPULATED) + FetchContent_Populate(json) + add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL) +endif() + +target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) +``` + +**Note**: The repository https://github.com/nlohmann/json download size is huge. +It contains all the dataset used for the benchmarks. You might want to depend on +a smaller repository. For instance, you might want to replace the URL above by +https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent + +#### Supporting Both + +To allow your project to support either an externally supplied or an embedded JSON library, you can use a pattern akin to the following: + +``` cmake +# Top level CMakeLists.txt +project(FOO) +... +option(FOO_USE_EXTERNAL_JSON "Use an external JSON library" OFF) +... +add_subdirectory(thirdparty) +... +add_library(foo ...) +... +# Note that the namespaced target will always be available regardless of the +# import method +target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) +``` +```cmake +# thirdparty/CMakeLists.txt +... +if(FOO_USE_EXTERNAL_JSON) + find_package(nlohmann_json 3.2.0 REQUIRED) +else() + set(JSON_BuildTests OFF CACHE INTERNAL "") + add_subdirectory(nlohmann_json) +endif() +... +``` + +`thirdparty/nlohmann_json` is then a complete copy of this source tree. + +### Package Managers + +:beer: If you are using OS X and [Homebrew](https://brew.sh), just type `brew tap nlohmann/json` and `brew install nlohmann-json` and you're set. If you want the bleeding edge rather than the latest release, use `brew install nlohmann-json --HEAD`. + +If you are using the [Meson Build System](https://mesonbuild.com), add this source tree as a [meson subproject](https://mesonbuild.com/Subprojects.html#using-a-subproject). You may also use the `include.zip` published in this project's [Releases](https://github.com/nlohmann/json/releases) to reduce the size of the vendored source tree. Alternatively, you can get a wrap file by downloading it from [Meson WrapDB](https://wrapdb.mesonbuild.com/nlohmann_json), or simply use `meson wrap install nlohmann_json`. Please see the meson project for any issues regarding the packaging. + +The provided meson.build can also be used as an alternative to cmake for installing `nlohmann_json` system-wide in which case a pkg-config file is installed. To use it, simply have your build system require the `nlohmann_json` pkg-config dependency. In Meson, it is preferred to use the [`dependency()`](https://mesonbuild.com/Reference-manual.html#dependency) object with a subproject fallback, rather than using the subproject directly. + +If you are using [Conan](https://www.conan.io/) to manage your dependencies, merely add [`nlohmann_json/x.y.z`](https://conan.io/center/nlohmann_json) to your `conanfile`'s requires, where `x.y.z` is the release version you want to use. Please file issues [here](https://github.com/conan-io/conan-center-index/issues) if you experience problems with the packages. + +If you are using [Spack](https://www.spack.io/) to manage your dependencies, you can use the [`nlohmann-json` package](https://spack.readthedocs.io/en/latest/package_list.html#nlohmann-json). Please see the [spack project](https://github.com/spack/spack) for any issues regarding the packaging. + +If you are using [hunter](https://github.com/cpp-pm/hunter) on your project for external dependencies, then you can use the [nlohmann_json package](https://hunter.readthedocs.io/en/latest/packages/pkg/nlohmann_json.html). Please see the hunter project for any issues regarding the packaging. + +If you are using [Buckaroo](https://buckaroo.pm), you can install this library's module with `buckaroo add github.com/buckaroo-pm/nlohmann-json`. Please file issues [here](https://github.com/buckaroo-pm/nlohmann-json). There is a demo repo [here](https://github.com/njlr/buckaroo-nholmann-json-example). + +If you are using [vcpkg](https://github.com/Microsoft/vcpkg/) on your project for external dependencies, then you can use the [nlohmann-json package](https://github.com/Microsoft/vcpkg/tree/master/ports/nlohmann-json). Please see the vcpkg project for any issues regarding the packaging. + +If you are using [cget](https://cget.readthedocs.io/en/latest/), you can install the latest development version with `cget install nlohmann/json`. A specific version can be installed with `cget install nlohmann/json@v3.1.0`. Also, the multiple header version can be installed by adding the `-DJSON_MultipleHeaders=ON` flag (i.e., `cget install nlohmann/json -DJSON_MultipleHeaders=ON`). + +If you are using [CocoaPods](https://cocoapods.org), you can use the library by adding pod `"nlohmann_json", '~>3.1.2'` to your podfile (see [an example](https://bitbucket.org/benman/nlohmann_json-cocoapod/src/master/)). Please file issues [here](https://bitbucket.org/benman/nlohmann_json-cocoapod/issues?status=new&status=open). + +If you are using [NuGet](https://www.nuget.org), you can use the package [nlohmann.json](https://www.nuget.org/packages/nlohmann.json/). Please check [this extensive description](https://github.com/nlohmann/json/issues/1132#issuecomment-452250255) on how to use the package. Please files issues [here](https://github.com/hnkb/nlohmann-json-nuget/issues). + +If you are using [conda](https://conda.io/), you can use the package [nlohmann_json](https://github.com/conda-forge/nlohmann_json-feedstock) from [conda-forge](https://conda-forge.org) executing `conda install -c conda-forge nlohmann_json`. Please file issues [here](https://github.com/conda-forge/nlohmann_json-feedstock/issues). + +If you are using [MSYS2](https://www.msys2.org/), your can use the [mingw-w64-nlohmann-json](https://packages.msys2.org/base/mingw-w64-nlohmann-json) package, just type `pacman -S mingw-w64-i686-nlohmann-json` or `pacman -S mingw-w64-x86_64-nlohmann-json` for installation. Please file issues [here](https://github.com/msys2/MINGW-packages/issues/new?title=%5Bnlohmann-json%5D) if you experience problems with the packages. + +If you are using [`build2`](https://build2.org), you can use the [`nlohmann-json`](https://cppget.org/nlohmann-json) package from the public repository https://cppget.org or directly from the [package's sources repository](https://github.com/build2-packaging/nlohmann-json). In your project's `manifest` file, just add `depends: nlohmann-json` (probably with some [version constraints](https://build2.org/build2-toolchain/doc/build2-toolchain-intro.xhtml#guide-add-remove-deps)). If you are not familiar with using dependencies in `build2`, [please read this introduction](https://build2.org/build2-toolchain/doc/build2-toolchain-intro.xhtml). +Please file issues [here](https://github.com/build2-packaging/nlohmann-json) if you experience problems with the packages. + +If you are using [`wsjcpp`](https://wsjcpp.org), you can use the command `wsjcpp install "https://github.com/nlohmann/json:develop"` to get the latest version. Note you can change the branch ":develop" to an existing tag or another branch. + +If you are using [`CPM.cmake`](https://github.com/TheLartians/CPM.cmake), you can check this [`example`](https://github.com/TheLartians/CPM.cmake/tree/master/examples/json). After [adding CPM script](https://github.com/TheLartians/CPM.cmake#adding-cpm) to your project, implement the following snippet to your CMake: + +```cmake +CPMAddPackage( + NAME nlohmann_json + GITHUB_REPOSITORY nlohmann/json + VERSION 3.9.1) +``` + +### Pkg-config + +If you are using bare Makefiles, you can use `pkg-config` to generate the include flags that point to where the library is installed: + +```sh +pkg-config nlohmann_json --cflags +``` + +Users of the Meson build system will also be able to use a system wide library, which will be found by `pkg-config`: + +```meson +json = dependency('nlohmann_json', required: true) +``` + + ## License @@ -1546,6 +1559,7 @@ I deeply appreciate the help of the following people. - [Guillaume Racicot](https://github.com/gracicot) implemented `string_view` support and allowed C++20 support. - [Alex Reinking](https://github.com/alexreinking) improved CMake support for `FetchContent`. - [Hannes Domani](https://github.com/ssbssa) provided a GDB pretty printer. +- Lars Wirzenius reviewed the README file. Thanks a lot for helping out! Please [let me know](mailto:mail@nlohmann.me) if I forgot someone. diff --git a/cmake/ci.cmake b/cmake/ci.cmake index eaa2c6793..674399d65 100644 --- a/cmake/ci.cmake +++ b/cmake/ci.cmake @@ -756,7 +756,7 @@ else() ) endif() -set(JSON_CMAKE_FLAGS "JSON_BuildTests;JSON_Install;JSON_MultipleHeaders;JSON_Sanitizer;JSON_Valgrind;JSON_NoExceptions;JSON_Coverage;JSON_Diagnostics") +set(JSON_CMAKE_FLAGS "JSON_BuildTests;JSON_Install;JSON_MultipleHeaders;JSON_ImplicitConversions;JSON_Valgrind;JSON_Diagnostics;JSON_SystemInclude") foreach(JSON_CMAKE_FLAG ${JSON_CMAKE_FLAGS}) string(TOLOWER "ci_cmake_flag_${JSON_CMAKE_FLAG}" JSON_CMAKE_FLAG_TARGET) diff --git a/doc/Doxyfile b/doc/Doxyfile index 45ca8822b..7af897b80 100644 --- a/doc/Doxyfile +++ b/doc/Doxyfile @@ -1,4 +1,4 @@ -# Doxyfile 1.9.0 +# Doxyfile 1.9.1 #--------------------------------------------------------------------------- # Project related configuration options diff --git a/doc/avatars.png b/doc/avatars.png index e3c29989e..a10e06877 100644 Binary files a/doc/avatars.png and b/doc/avatars.png differ diff --git a/doc/docset/docSet.sql b/doc/docset/docSet.sql index 243612035..8bd4e0678 100644 --- a/doc/docset/docSet.sql +++ b/doc/docset/docSet.sql @@ -126,11 +126,26 @@ INSERT INTO searchIndex(name, type, path) VALUES ('Binary Values', 'Guide', 'fea INSERT INTO searchIndex(name, type, path) VALUES ('Comments', 'Guide', 'features/comments/index.html'); INSERT INTO searchIndex(name, type, path) VALUES ('Iterators', 'Guide', 'features/iterators/index.html'); INSERT INTO searchIndex(name, type, path) VALUES ('Types', 'Guide', 'features/types/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('Number Handling', 'Guide', 'features/types/number_handling/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('Element Access', 'Guide', 'features/element_access/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON Pointer', 'Guide', 'features/json_pointer/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON Patch and Diff', 'Guide', 'features/json_patch/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON Merge Patch', 'Guide', 'features/merge_patch/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('Object Order', 'Guide', 'features/object_order/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('Parsing and Exceptions', 'Guide', 'features/parsing/parse_exceptions/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('Parser Callbacks', 'Guide', 'features/parsing/parser_callbacks/index.html'); +INSERT INTO searchIndex(name, type, path) VALUES ('SAX Interface', 'Guide', 'features/parsing/sax_interface/index.html'); -- Macros INSERT INTO searchIndex(name, type, path) VALUES ('JSON_ASSERT', 'Macro', 'features/macros/index.html#json_assertx'); INSERT INTO searchIndex(name, type, path) VALUES ('JSON_CATCH_USER', 'Macro', 'features/macros/index.html#json_catch_userexception'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON_DIAGNOSTICS', 'Macro', 'features/macros/index.html#json_diagnostics'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON_HAS_CPP_11', 'Macro', 'features/macros/index.html#json_has_cpp_11-json_has_cpp_14-json_has_cpp_17-json_has_cpp_20'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON_HAS_CPP_14', 'Macro', 'features/macros/index.html#json_has_cpp_11-json_has_cpp_14-json_has_cpp_17-json_has_cpp_20'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON_HAS_CPP_17', 'Macro', 'features/macros/index.html#json_has_cpp_11-json_has_cpp_14-json_has_cpp_17-json_has_cpp_20'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON_HAS_CPP_20', 'Macro', 'features/macros/index.html#json_has_cpp_11-json_has_cpp_14-json_has_cpp_17-json_has_cpp_20'); INSERT INTO searchIndex(name, type, path) VALUES ('JSON_NOEXCEPTION', 'Macro', 'features/macros/index.html#json_noexception'); +INSERT INTO searchIndex(name, type, path) VALUES ('JSON_NO_IO', 'Macro', 'features/macros/index.html#json_no_io'); INSERT INTO searchIndex(name, type, path) VALUES ('JSON_SKIP_UNSUPPORTED_COMPILER_CHECK', 'Macro', 'features/macros/index.html#json_skip_unsupported_compiler_check'); INSERT INTO searchIndex(name, type, path) VALUES ('JSON_THROW_USER', 'Macro', 'features/macros/index.html#json_throw_userexception'); INSERT INTO searchIndex(name, type, path) VALUES ('JSON_TRY_USER', 'Macro', 'features/macros/index.html#json_try_user'); diff --git a/doc/json.gif b/doc/json.gif index 7542048ed..76fc02edd 100644 Binary files a/doc/json.gif and b/doc/json.gif differ diff --git a/doc/mkdocs/docs/features/parsing/parse_exceptions.md b/doc/mkdocs/docs/features/parsing/parse_exceptions.md index f0569b8e2..879dab0d3 100644 --- a/doc/mkdocs/docs/features/parsing/parse_exceptions.md +++ b/doc/mkdocs/docs/features/parsing/parse_exceptions.md @@ -1,4 +1,4 @@ -# Parsing and exceptions +# Parsing and Exceptions When the input is not valid JSON, an exception of type [`parse_error`](../../home/exceptions.md#parse-errors) is thrown. This exception contains the position in the input where the error occurred, together with a diagnostic message and the last read input token. The exceptions page contains a [list of examples for parse error exceptions](../../home/exceptions.md#parse-errors). In case you process untrusted input, always enclose your code with a `#!cpp try`/`#!cpp catch` block, like diff --git a/doc/mkdocs/docs/home/faq.md b/doc/mkdocs/docs/home/faq.md index af63cfb6a..23aa35a22 100644 --- a/doc/mkdocs/docs/home/faq.md +++ b/doc/mkdocs/docs/home/faq.md @@ -1,5 +1,43 @@ # Frequently Asked Questions (FAQ) +## Known bugs + +### Brace initialization yields arrays + +!!! question + + Why does + + ```cpp + json j{true}; + ``` + + and + + ```cpp + json j(true); + ``` + + yield different results (`#!json [true]` vs. `#!json true`)? + +This is a known issue, and -- even worse -- the behavior differs between GCC and Clang. The "culprit" for this is the library's constructor overloads for initializer lists to allow syntax like + +```cpp +json array = {1, 2, 3, 4}; +``` + +for arrays and + +```cpp +json object = {{"one", 1}, {"two", 2}}; +``` + +for objects. + +!!! tip + + To avoid any confusion and ensure portable code, **do not** use brace initialization with the types `basic_json`, `json`, or `ordered_json` unless you want to create an object or array as shown in the examples above. + ## Limitations ### Relaxed parsing @@ -8,7 +46,7 @@ - Can you add an option to ignore trailing commas? -For the same reason this library does not support [comments](#comments), this library also does not support any feature which would jeopardize interoperability. +This library does not support any feature which would jeopardize interoperability. ### Parse errors reading non-ASCII characters @@ -32,13 +70,24 @@ The library supports **Unicode input** as follows: In most cases, the parser is right to complain, because the input is not UTF-8 encoded. This is especially true for Microsoft Windows where Latin-1 or ISO 8859-1 is often the standard encoding. +## Exceptions + +### Parsing without exceptions + +!!! question + + Is it possible to indicate a parse error without throwing an exception? + +Yes, see [Parsing and exceptions](../features/parsing/parse_exceptions.md). + + ### Key name in exceptions !!! question Can I get the key of the object item that caused an exception? -No, this is not possible. See for a longer discussion. +Yes, you can. Please define the symbol [`JSON_DIAGNOSTICS`](../features/macros.md#json_diagnostics) to get [extended diagnostics messages](exceptions.md#extended-diagnostic-messages). ## Serialization issues @@ -61,6 +110,7 @@ The library uses `std::numeric_limits::digits10` (15 for IEEE `d The website https://float.exposed gives a good insight into the internal storage of floating-point numbers. +See [this section](../features/types/number_handling.md#number-serialization) on the library's number handling for more information. ## Compilation issues diff --git a/doc/mkdocs/docs/home/sponsors.md b/doc/mkdocs/docs/home/sponsors.md index e2c5d91f8..9097049d4 100644 --- a/doc/mkdocs/docs/home/sponsors.md +++ b/doc/mkdocs/docs/home/sponsors.md @@ -7,5 +7,7 @@ You can sponsor this library at [GitHub Sponsors](https://github.com/sponsors/nl - [Michael Hartmann](https://github.com/reFX-Mike) - [Stefan Hagen](https://github.com/sthagen) - [Steve Sperandeo](https://github.com/homer6) +- [Robert Jefe Lindstädt](https://github.com/eljefedelrodeodeljefe) +- [Steve Wagner](https://github.com/ciroque) Thanks everyone! diff --git a/doc/mkdocs/docs/index.md b/doc/mkdocs/docs/index.md index 39c52d748..0e49c836c 100644 --- a/doc/mkdocs/docs/index.md +++ b/doc/mkdocs/docs/index.md @@ -1,7 +1,3 @@ # JSON for Modern C++ -!!! note - - This page is under construction. - ![](images/json.gif) diff --git a/doc/mkdocs/docs/integration/cmake.md b/doc/mkdocs/docs/integration/cmake.md index 76f05dbe1..9f1ecc95a 100644 --- a/doc/mkdocs/docs/integration/cmake.md +++ b/doc/mkdocs/docs/integration/cmake.md @@ -1,8 +1,10 @@ # CMake +## Integration + You can also use the `nlohmann_json::nlohmann_json` interface target in CMake. This target populates the appropriate usage requirements for `INTERFACE_INCLUDE_DIRECTORIES` to point to the appropriate include directories and `INTERFACE_COMPILE_FEATURES` for the necessary C++11 flags. -## External +### External To use this library from a CMake project, you can locate it directly with `find_package()` and use the namespaced imported target from the generated package configuration: @@ -17,15 +19,11 @@ target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) The package configuration file, `nlohmann_jsonConfig.cmake`, can be used either from an install tree or directly out of the build tree. -## Embedded +### Embedded To embed the library directly into an existing CMake project, place the entire source tree in a subdirectory and call `add_subdirectory()` in your `CMakeLists.txt` file: ```cmake -# Typically you don't care so much for a third party library's tests to be -# run from your own project's code. -set(JSON_BuildTests OFF CACHE INTERNAL "") - # If you only include this third party in PRIVATE source files, you do not # need to install it when your main project gets installed. # set(JSON_Install OFF CACHE INTERNAL "") @@ -41,7 +39,7 @@ add_library(foo ...) target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) ``` -## Embedded (FetchContent) +### Embedded (FetchContent) Since CMake v3.11, [FetchContent](https://cmake.org/cmake/help/v3.11/module/FetchContent.html) can @@ -65,12 +63,11 @@ target_link_libraries(foo PRIVATE nlohmann_json::nlohmann_json) ``` !!! Note - The repository download size is huge. - It contains all the dataset used for the benchmarks. You might want to depend on - a smaller repository. For instance, you might want to replace the URL above by + The repository download size is quite large. + You might want to depend on a smaller repository. For instance, you might want to replace the URL above by . -## Supporting Both +### Supporting Both To allow your project to support either an externally supplied or an embedded JSON library, you can use a pattern akin to the following: @@ -101,3 +98,41 @@ endif() ``` `thirdparty/nlohmann_json` is then a complete copy of this source tree. + +## CMake Options + +### `JSON_BuildTests` + +Build the unit tests when [`BUILD_TESTING`](https://cmake.org/cmake/help/latest/command/enable_testing.html) is enabled. This option is `ON` by default if the library's CMake project is the top project. That is, when integrating the library as described above, the test suite is not built unless explicitly switched on with this option. + +### `JSON_CI` + +Enable CI build targets. The exact targets are used during the several CI steps and are subject to change without notice. This option is `OFF` by default. + +### `JSON_Diagnostics` + +Enable [extended diagnostic messages](../home/exceptions.md#extended-diagnostic-messages) by defining macro [`JSON_DIAGNOSTICS`](../features/macros.md#json_diagnostics). This option is `OFF` by default. + +### `JSON_FastTests` + +Skip expensive/slow test suites. This option is `OFF` by default. Depends on `JSON_BuildTests`. + +### `JSON_ImplicitConversions` + +Enable implicit conversions by defining macro [`JSON_USE_IMPLICIT_CONVERSIONS`](../features/macros.md#json_use_implicit_conversions). This option is `ON` by default. + +### `JSON_Install` + +Install CMake targets during install step. This option is `ON` by default if the library's CMake project is the top project. + +### `JSON_MultipleHeaders` + +Use non-amalgamated version of the library. This option is `OFF` by default. + +### `JSON_SystemInclude` + +Treat the library headers like system headers (i.e., adding `SYSTEM` to the [`target_include_directories`](https://cmake.org/cmake/help/latest/command/target_include_directories.html) call) to checks for this library by tools like Clang-Tidy. This option is `OFF` by default. + +### `JSON_Valgrind` + +Execute test suite with [Valgrind](https://valgrind.org). This option is `OFF` by default. Depends on `JSON_BuildTests`. diff --git a/doc/mkdocs/docs/integration/index.md b/doc/mkdocs/docs/integration/index.md index 5dd8cceb7..5ee4ff721 100644 --- a/doc/mkdocs/docs/integration/index.md +++ b/doc/mkdocs/docs/integration/index.md @@ -1,4 +1,4 @@ -# Integration +# Header only [`json.hpp`](https://github.com/nlohmann/json/blob/develop/single_include/nlohmann/json.hpp) is the single required file in `single_include/nlohmann` or [released here](https://github.com/nlohmann/json/releases). You need to add @@ -11,4 +11,4 @@ using json = nlohmann::json; to the files you want to process JSON and set the necessary switches to enable C++11 (e.g., `-std=c++11` for GCC and Clang). -You can further use file [`include/nlohmann/json_fwd.hpp`](https://github.com/nlohmann/json/blob/develop/include/nlohmann/json_fwd.hpp) for forward-declarations. The installation of json_fwd.hpp (as part of cmake's install step), can be achieved by setting `-DJSON_MultipleHeaders=ON`. +You can further use file [`include/nlohmann/json_fwd.hpp`](https://github.com/nlohmann/json/blob/develop/include/nlohmann/json_fwd.hpp) for forward-declarations. The installation of `json_fwd.hpp` (as part of CMake's install step), can be achieved by setting `-DJSON_MultipleHeaders=ON`. diff --git a/doc/mkdocs/docs/integration/package_managers.md b/doc/mkdocs/docs/integration/package_managers.md index 58b3eab55..b860296ab 100644 --- a/doc/mkdocs/docs/integration/package_managers.md +++ b/doc/mkdocs/docs/integration/package_managers.md @@ -24,7 +24,7 @@ brew install nlohmann-json --HEAD instead. -!!! example +??? example 1. Create the following file: @@ -63,7 +63,7 @@ The provided meson.build can also be used as an alternative to cmake for install If you are using [Conan](https://www.conan.io/) to manage your dependencies, merely add `nlohmann_json/x.y.z` to your `conanfile`'s requires, where `x.y.z` is the release version you want to use. Please file issues [here](https://github.com/conan-io/conan-center-index/issues) if you experience problems with the packages. -!!! example +??? example 1. Create the following files: @@ -116,7 +116,6 @@ If you are using [vcpkg](https://github.com/Microsoft/vcpkg/) on your project fo If you are using [cget](http://cget.readthedocs.io/en/latest/), you can install the latest development version with `cget install nlohmann/json`. A specific version can be installed with `cget install nlohmann/json@v3.1.0`. Also, the multiple header version can be installed by adding the `-DJSON_MultipleHeaders=ON` flag (i.e., `cget install nlohmann/json -DJSON_MultipleHeaders=ON`). - ## CocoaPods If you are using [CocoaPods](https://cocoapods.org), you can use the library by adding pod `"nlohmann_json", '~>3.1.2'` to your podfile (see [an example](https://bitbucket.org/benman/nlohmann_json-cocoapod/src/master/)). Please file issues [here](https://bitbucket.org/benman/nlohmann_json-cocoapod/issues?status=new&status=open). @@ -141,3 +140,14 @@ Please file issues [here](https://github.com/build2-packaging/nlohmann-json) if ## wsjcpp If you are using [`wsjcpp`](http://wsjcpp.org), you can use the command `wsjcpp install "https://github.com/nlohmann/json:develop"` to get the latest version. Note you can change the branch ":develop" to an existing tag or another branch. + +## CPM.cmake + +If you are using [`CPM.cmake`](https://github.com/TheLartians/CPM.cmake), you can check this [`example`](https://github.com/TheLartians/CPM.cmake/tree/master/examples/json). After [adding CPM script](https://github.com/TheLartians/CPM.cmake#adding-cpm) to your project, implement the following snippet to your CMake: + +```cmake +CPMAddPackage( + NAME nlohmann_json + GITHUB_REPOSITORY nlohmann/json + VERSION 3.9.1) +``` diff --git a/doc/mkdocs/docs/integration/pkg-config.md b/doc/mkdocs/docs/integration/pkg-config.md new file mode 100644 index 000000000..56f86444e --- /dev/null +++ b/doc/mkdocs/docs/integration/pkg-config.md @@ -0,0 +1,13 @@ +# Pkg-config + +If you are using bare Makefiles, you can use `pkg-config` to generate the include flags that point to where the library is installed: + +```sh +pkg-config nlohmann_json --cflags +``` + +Users of the [Meson build system](package_managers.md#meson) will also be able to use a system wide library, which will be found by `pkg-config`: + +```meson +json = dependency('nlohmann_json', required: true) +``` diff --git a/doc/mkdocs/mkdocs.yml b/doc/mkdocs/mkdocs.yml index b7ad380fe..7aa6e2c5d 100644 --- a/doc/mkdocs/mkdocs.yml +++ b/doc/mkdocs/mkdocs.yml @@ -69,8 +69,8 @@ nav: - integration/index.md - integration/cmake.md - integration/package_managers.md - - Doxygen: - - doxygen/index.html + - integration/pkg-config.md + - Doxygen: doxygen/index.html - API: - basic_json: - api/basic_json/index.md diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 9b9fedfdf..f02a15e8f 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -1082,38 +1082,41 @@ class binary_reader return false; } - string_t key; - if (len != std::size_t(-1)) + if (len != 0) { - for (std::size_t i = 0; i < len; ++i) + string_t key; + if (len != std::size_t(-1)) { - get(); - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + for (std::size_t i = 0; i < len; ++i) { - return false; - } + get(); + if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + { + return false; + } - if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) - { - return false; + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) + { + return false; + } + key.clear(); } - key.clear(); } - } - else - { - while (get() != 0xFF) + else { - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + while (get() != 0xFF) { - return false; - } + if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + { + return false; + } - if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) - { - return false; + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) + { + return false; + } + key.clear(); } - key.clear(); } } diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 98fb29a99..6df58a1cd 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -11,7 +11,7 @@ #include // pair, declval #ifndef JSON_NO_IO - #include //FILE * + #include // FILE * #include // istream #endif // JSON_NO_IO diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index aa612f9a1..6958ece69 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -5290,7 +5290,7 @@ std::size_t hash(const BasicJsonType& j) #include // pair, declval #ifndef JSON_NO_IO - #include //FILE * + #include // FILE * #include // istream #endif // JSON_NO_IO @@ -9325,38 +9325,41 @@ class binary_reader return false; } - string_t key; - if (len != std::size_t(-1)) + if (len != 0) { - for (std::size_t i = 0; i < len; ++i) + string_t key; + if (len != std::size_t(-1)) { - get(); - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + for (std::size_t i = 0; i < len; ++i) { - return false; - } + get(); + if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + { + return false; + } - if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) - { - return false; + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) + { + return false; + } + key.clear(); } - key.clear(); } - } - else - { - while (get() != 0xFF) + else { - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + while (get() != 0xFF) { - return false; - } + if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + { + return false; + } - if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) - { - return false; + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) + { + return false; + } + key.clear(); } - key.clear(); } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d8c586425..cbbdcace3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,8 +1,5 @@ -option(JSON_Sanitizer "Build test suite with Clang sanitizer" OFF) -option(JSON_Valgrind "Execute test suite with Valgrind" OFF) -option(JSON_NoExceptions "Build test suite without exceptions" OFF) -option(JSON_Coverage "Build test suite with coverage information" OFF) -option(JSON_FastTests "Whether to skip expensive tests" OFF) +option(JSON_Valgrind "Execute test suite with Valgrind." OFF) +option(JSON_FastTests "Skip expensive/slow tests." OFF) # download test data include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/download_test_data.cmake) @@ -11,13 +8,6 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/download_test_data.cmake) add_test(NAME "download_test_data" COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target download_test_data) set_tests_properties(download_test_data PROPERTIES FIXTURES_SETUP TEST_DATA) -if(JSON_Sanitizer) - message(STATUS "Building test suite with Clang sanitizer") - if(NOT MSVC) - set(CMAKE_CXX_FLAGS "-g -O0 -fsanitize=address -fsanitize=undefined -fsanitize=integer -fsanitize=nullability -fno-omit-frame-pointer -fno-sanitize-recover=all -fsanitize-recover=unsigned-integer-overflow") - endif() -endif() - if(JSON_Valgrind) find_program(CMAKE_MEMORYCHECK_COMMAND valgrind) message(STATUS "Executing test suite with Valgrind (${CMAKE_MEMORYCHECK_COMMAND})") @@ -25,36 +15,6 @@ if(JSON_Valgrind) separate_arguments(memcheck_command) endif() -if(JSON_NoExceptions) - message(STATUS "Building test suite without exceptions") - if(NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DJSON_NOEXCEPTION") - endif() - set(DOCTEST_TEST_FILTER --no-throw) -endif() - -if(JSON_Coverage) - message(STATUS "Building test suite with coverage information") - - # from https://github.com/RWTH-HPC/CMake-codecov/blob/master/cmake/FindGcov.cmake - get_filename_component(COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH) - string(REGEX MATCH "^[0-9]+" GCC_VERSION "${CMAKE_CXX_COMPILER_VERSION}") - find_program(GCOV_BIN NAMES gcov-${GCC_VERSION} gcov HINTS ${COMPILER_PATH}) - - # collect all source files from the chosen include dir - file(GLOB_RECURSE SOURCE_FILES ${NLOHMANN_JSON_INCLUDE_BUILD_DIR}*.hpp) - - # add target to collect coverage information and generate HTML file - # (filter script from https://stackoverflow.com/a/43726240/266378) - add_custom_target(lcov_html - COMMAND lcov --directory . --capture --output-file json.info --rc lcov_branch_coverage=1 - COMMAND lcov -e json.info ${SOURCE_FILES} --output-file json.info.filtered --gcov-tool ${GCOV_BIN} --rc lcov_branch_coverage=1 - COMMAND ${CMAKE_SOURCE_DIR}/test/thirdparty/imapdl/filterbr.py json.info.filtered > json.info.filtered.noexcept - COMMAND genhtml --title "JSON for Modern C++" --legend --demangle-cpp --output-directory html --show-details --branch-coverage json.info.filtered.noexcept - COMMENT "Generating HTML report test/html/index.html" - ) -endif() - ############################################################################# # doctest library with the main function to speed up build ############################################################################# @@ -108,11 +68,6 @@ foreach(file ${files}) target_include_directories(${testcase} PRIVATE ${CMAKE_BINARY_DIR}/include thirdparty/doctest thirdparty/fifo_map) target_link_libraries(${testcase} PRIVATE ${NLOHMANN_JSON_TARGET_NAME}) - if (JSON_Coverage) - target_compile_options(${testcase} PRIVATE --coverage) - target_link_libraries(${testcase} PRIVATE --coverage) - endif() - if (JSON_FastTests) add_test(NAME "${testcase}" COMMAND ${testcase} ${DOCTEST_TEST_FILTER} diff --git a/test/src/unit-json_pointer.cpp b/test/src/unit-json_pointer.cpp index d5881beac..1e86c0a41 100644 --- a/test/src/unit-json_pointer.cpp +++ b/test/src/unit-json_pointer.cpp @@ -358,6 +358,10 @@ TEST_CASE("JSON pointers") CHECK_THROWS_WITH(j_const[jp] == 1, throw_msg.c_str()); } +#if defined(_MSC_VER) +#pragma warning (push) +#pragma warning (disable : 4127) // on some machines, the check below is not constant +#endif if (sizeof(typename json::size_type) < sizeof(unsigned long long)) { auto size_type_max_uul = static_cast((std::numeric_limits::max)()); @@ -371,6 +375,10 @@ TEST_CASE("JSON pointers") CHECK_THROWS_WITH(j_const[jp] == 1, throw_msg.c_str()); } +#if defined(_MSC_VER) +#pragma warning (pop) +#endif + CHECK_THROWS_AS(j.at("/one"_json_pointer) = 1, json::parse_error&); CHECK_THROWS_WITH(j.at("/one"_json_pointer) = 1, "[json.exception.parse_error.109] parse error: array index 'one' is not a number"); diff --git a/test/src/unit-unicode.cpp b/test/src/unit-unicode.cpp deleted file mode 100644 index b45e990a7..000000000 --- a/test/src/unit-unicode.cpp +++ /dev/null @@ -1,1611 +0,0 @@ -/* - __ _____ _____ _____ - __| | __| | | | JSON for Modern C++ (test suite) -| | |__ | | | | | | version 3.9.1 -|_____|_____|_____|_|___| https://github.com/nlohmann/json - -Licensed under the MIT License . -SPDX-License-Identifier: MIT -Copyright (c) 2013-2019 Niels Lohmann . - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -#include "doctest_compatibility.h" - -// for some reason including this after the json header leads to linker errors with VS 2017... -#include - -#define JSON_TESTS_PRIVATE -#include -using nlohmann::json; - -#include -#include -#include -#include -#include - -namespace -{ -extern size_t calls; -size_t calls = 0; - -void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4); - -void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) -{ - std::string json_string; - - CAPTURE(byte1) - CAPTURE(byte2) - CAPTURE(byte3) - CAPTURE(byte4) - - json_string += std::string(1, static_cast(byte1)); - - if (byte2 != -1) - { - json_string += std::string(1, static_cast(byte2)); - } - - if (byte3 != -1) - { - json_string += std::string(1, static_cast(byte3)); - } - - if (byte4 != -1) - { - json_string += std::string(1, static_cast(byte4)); - } - - CAPTURE(json_string) - - // store the string in a JSON value - json j = json_string; - json j2 = "abc" + json_string + "xyz"; - - // dumping with ignore/replace must not throw in any case - auto s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); - auto s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore); - auto s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore); - auto s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore); - auto s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace); - auto s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); - auto s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); - auto s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); - - if (success_expected) - { - // strict mode must not throw if success is expected - auto s_strict = j.dump(); - // all dumps should agree on the string - CHECK(s_strict == s_ignored); - CHECK(s_strict == s_replaced); - } - else - { - // strict mode must throw if success is not expected - CHECK_THROWS_AS(j.dump(), json::type_error&); - // ignore and replace must create different dumps - CHECK(s_ignored != s_replaced); - - // check that replace string contains a replacement character - CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); - } - - // check that prefix and suffix are preserved - CHECK(s_ignored2.substr(1, 3) == "abc"); - CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); - CHECK(s_ignored2_ascii.substr(1, 3) == "abc"); - CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz"); - CHECK(s_replaced2.substr(1, 3) == "abc"); - CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz"); - CHECK(s_replaced2_ascii.substr(1, 3) == "abc"); - CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz"); -} - -void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4); - -// create and check a JSON string with up to four UTF-8 bytes -void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) -{ - if (++calls % 100000 == 0) - { - std::cout << calls << " of 8860608 UTF-8 strings checked" << std::endl; - } - - std::string json_string = "\""; - - CAPTURE(byte1) - json_string += std::string(1, static_cast(byte1)); - - if (byte2 != -1) - { - CAPTURE(byte2) - json_string += std::string(1, static_cast(byte2)); - } - - if (byte3 != -1) - { - CAPTURE(byte3) - json_string += std::string(1, static_cast(byte3)); - } - - if (byte4 != -1) - { - CAPTURE(byte4) - json_string += std::string(1, static_cast(byte4)); - } - - json_string += "\""; - - CAPTURE(json_string) - - json _; - if (success_expected) - { - CHECK_NOTHROW(_ = json::parse(json_string)); - } - else - { - CHECK_THROWS_AS(_ = json::parse(json_string), json::parse_error&); - } -} -} // namespace - -TEST_CASE("Unicode" * doctest::skip()) -{ - SECTION("RFC 3629") - { - /* - RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as - follows: - - A UTF-8 string is a sequence of octets representing a sequence of UCS - characters. An octet sequence is valid UTF-8 only if it matches the - following syntax, which is derived from the rules for encoding UTF-8 - and is expressed in the ABNF of [RFC2234]. - - UTF8-octets = *( UTF8-char ) - UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 - UTF8-1 = %x00-7F - UTF8-2 = %xC2-DF UTF8-tail - UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / - %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) - UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / - %xF4 %x80-8F 2( UTF8-tail ) - UTF8-tail = %x80-BF - */ - - SECTION("ill-formed first byte") - { - for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - - for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("UTF8-1 (x00-x7F)") - { - SECTION("well-formed") - { - for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1) - { - // unescaped control characters are parse errors in JSON - if (0x00 <= byte1 && byte1 <= 0x1F) - { - check_utf8string(false, byte1); - continue; - } - - // a single quote is a parse error in JSON - if (byte1 == 0x22) - { - check_utf8string(false, byte1); - continue; - } - - // a single backslash is a parse error in JSON - if (byte1 == 0x5C) - { - check_utf8string(false, byte1); - continue; - } - - // all other characters are OK - check_utf8string(true, byte1); - check_utf8dump(true, byte1); - } - } - } - - SECTION("UTF8-2 (xC2-xDF UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - check_utf8string(true, byte1, byte2); - check_utf8dump(true, byte1, byte2); - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 && byte2 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - } - - SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(true, byte1, byte2, byte3); - check_utf8dump(true, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) - { - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0xA0 <= byte2 && byte2 <= 0xBF) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) - { - for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - } - - SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(true, byte1, byte2, byte3); - check_utf8dump(true, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 && byte2 <= 0xBF) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - } - - SECTION("UTF8-3 (xED x80-9F UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(true, byte1, byte2, byte3); - check_utf8dump(true, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) - { - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 && byte2 <= 0x9F) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - } - - SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(true, byte1, byte2, byte3); - check_utf8dump(true, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 && byte2 <= 0xBF) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - } - - SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(true, byte1, byte2, byte3, byte4); - check_utf8dump(true, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: missing fourth byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x90 <= byte2 && byte2 <= 0xBF) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: wrong fourth byte") - { - for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) - { - for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) - { - // skip fourth second byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - } - - SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(true, byte1, byte2, byte3, byte4); - check_utf8dump(true, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: missing fourth byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 && byte2 <= 0xBF) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: wrong fourth byte") - { - for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) - { - // skip correct fourth byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - } - - SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)") - { - SECTION("well-formed") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(true, byte1, byte2, byte3, byte4); - check_utf8dump(true, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: missing second byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - check_utf8string(false, byte1); - check_utf8dump(false, byte1); - } - } - - SECTION("ill-formed: missing third byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) - { - check_utf8string(false, byte1, byte2); - check_utf8dump(false, byte1, byte2); - } - } - } - - SECTION("ill-formed: missing fourth byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - check_utf8string(false, byte1, byte2, byte3); - check_utf8dump(false, byte1, byte2, byte3); - } - } - } - } - - SECTION("ill-formed: wrong second byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) - { - // skip correct second byte - if (0x80 <= byte2 && byte2 <= 0x8F) - { - continue; - } - - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: wrong third byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) - { - for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) - { - // skip correct third byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) - { - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - - SECTION("ill-formed: wrong fourth byte") - { - for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) - { - for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) - { - for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) - { - for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) - { - // skip correct fourth byte - if (0x80 <= byte3 && byte3 <= 0xBF) - { - continue; - } - - check_utf8string(false, byte1, byte2, byte3, byte4); - check_utf8dump(false, byte1, byte2, byte3, byte4); - } - } - } - } - } - } - } - - SECTION("\\uxxxx sequences") - { - // create an escaped string from a code point - const auto codepoint_to_unicode = [](std::size_t cp) - { - // code points are represented as a six-character sequence: a - // reverse solidus, followed by the lowercase letter u, followed - // by four hexadecimal digits that encode the character's code - // point - std::stringstream ss; - ss << "\\u" << std::setw(4) << std::setfill('0') << std::hex << cp; - return ss.str(); - }; - - SECTION("correct sequences") - { - // generate all UTF-8 code points; in total, 1112064 code points are - // generated: 0x1FFFFF code points - 2048 invalid values between - // 0xD800 and 0xDFFF. - for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp) - { - // string to store the code point as in \uxxxx format - std::string json_text = "\""; - - // decide whether to use one or two \uxxxx sequences - if (cp < 0x10000u) - { - // The Unicode standard permanently reserves these code point - // values for UTF-16 encoding of the high and low surrogates, and - // they will never be assigned a character, so there should be no - // reason to encode them. The official Unicode standard says that - // no UTF forms, including UTF-16, can encode these code points. - if (cp >= 0xD800u && cp <= 0xDFFFu) - { - // if we would not skip these code points, we would get a - // "missing low surrogate" exception - continue; - } - - // code points in the Basic Multilingual Plane can be - // represented with one \uxxxx sequence - json_text += codepoint_to_unicode(cp); - } - else - { - // To escape an extended character that is not in the Basic - // Multilingual Plane, the character is represented as a - // 12-character sequence, encoding the UTF-16 surrogate pair - const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu); - const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu); - json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2); - } - - json_text += "\""; - CAPTURE(json_text) - json _; - CHECK_NOTHROW(_ = json::parse(json_text)); - } - } - - SECTION("incorrect sequences") - { - SECTION("incorrect surrogate values") - { - json _; - - CHECK_THROWS_AS(_ = json::parse("\"\\uDC00\\uDC00\""), json::parse_error&); - CHECK_THROWS_WITH(_ = json::parse("\"\\uDC00\\uDC00\""), - "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uDC00'"); - - CHECK_THROWS_AS(_ = json::parse("\"\\uD7FF\\uDC00\""), json::parse_error&); - CHECK_THROWS_WITH(_ = json::parse("\"\\uD7FF\\uDC00\""), - "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uD7FF\\uDC00'"); - - CHECK_THROWS_AS(_ = json::parse("\"\\uD800]\""), json::parse_error&); - CHECK_THROWS_WITH(_ = json::parse("\"\\uD800]\""), - "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800]'"); - - CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\v\""), json::parse_error&); - CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\v\""), - "[json.exception.parse_error.101] parse error at line 1, column 9: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\v'"); - - CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\u123\""), json::parse_error&); - CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\u123\""), - "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\uD800\\u123\"'"); - - CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\uDBFF\""), json::parse_error&); - CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\uDBFF\""), - "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uDBFF'"); - - CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\uE000\""), json::parse_error&); - CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\uE000\""), - "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uE000'"); - } - } - -#if 0 - SECTION("incorrect sequences") - { - SECTION("high surrogate without low surrogate") - { - // D800..DBFF are high surrogates and must be followed by low - // surrogates DC00..DFFF; here, nothing follows - for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp) - { - std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; - CAPTURE(json_text) - CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); - } - } - - SECTION("high surrogate with wrong low surrogate") - { - // D800..DBFF are high surrogates and must be followed by low - // surrogates DC00..DFFF; here a different sequence follows - for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1) - { - for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2) - { - if (0xDC00u <= cp2 && cp2 <= 0xDFFFu) - { - continue; - } - - std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\""; - CAPTURE(json_text) - CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); - } - } - } - - SECTION("low surrogate without high surrogate") - { - // low surrogates DC00..DFFF must follow high surrogates; here, - // they occur alone - for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp) - { - std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; - CAPTURE(json_text) - CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); - } - } - - } -#endif - } - - SECTION("read all unicode characters") - { - // read a file with all unicode characters stored as single-character - // strings in a JSON array - std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/all_unicode.json"); - json j; - CHECK_NOTHROW(f >> j); - - // the array has 1112064 + 1 elements (a terminating "null" value) - // Note: 1112064 = 0x1FFFFF code points - 2048 invalid values between - // 0xD800 and 0xDFFF. - CHECK(j.size() == 1112065); - - SECTION("check JSON Pointers") - { - for (const auto& s : j) - { - // skip non-string JSON values - if (!s.is_string()) - { - continue; - } - - auto ptr = s.get(); - - // tilde must be followed by 0 or 1 - if (ptr == "~") - { - ptr += "0"; - } - - // JSON Pointers must begin with "/" - ptr.insert(0, "/"); - - CHECK_NOTHROW(json::json_pointer("/" + ptr)); - - // check escape/unescape roundtrip - auto escaped = nlohmann::detail::escape(ptr); - nlohmann::detail::unescape(escaped); - CHECK(escaped == ptr); - } - } - } - - SECTION("ignore byte-order-mark") - { - SECTION("in a stream") - { - // read a file with a UTF-8 BOM - std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/bom.json"); - json j; - CHECK_NOTHROW(f >> j); - } - - SECTION("with an iterator") - { - std::string i = "\xef\xbb\xbf{\n \"foo\": true\n}"; - json _; - CHECK_NOTHROW(_ = json::parse(i.begin(), i.end())); - } - } - - SECTION("error for incomplete/wrong BOM") - { - json _; - CHECK_THROWS_AS(_ = json::parse("\xef\xbb"), json::parse_error&); - CHECK_THROWS_AS(_ = json::parse("\xef\xbb\xbb"), json::parse_error&); - } -} - -namespace -{ -void roundtrip(bool success_expected, const std::string& s); - -void roundtrip(bool success_expected, const std::string& s) -{ - CAPTURE(s) - json _; - - // create JSON string value - json j = s; - // create JSON text - std::string ps = std::string("\"") + s + "\""; - - if (success_expected) - { - // serialization succeeds - CHECK_NOTHROW(j.dump()); - - // exclude parse test for U+0000 - if (s[0] != '\0') - { - // parsing JSON text succeeds - CHECK_NOTHROW(_ = json::parse(ps)); - } - - // roundtrip succeeds - CHECK_NOTHROW(_ = json::parse(j.dump())); - - // after roundtrip, the same string is stored - json jr = json::parse(j.dump()); - CHECK(jr.get() == s); - } - else - { - // serialization fails - CHECK_THROWS_AS(j.dump(), json::type_error&); - - // parsing JSON text fails - CHECK_THROWS_AS(_ = json::parse(ps), json::parse_error&); - } -} -} // namespace - -TEST_CASE("Markus Kuhn's UTF-8 decoder capability and stress test") -{ - // Markus Kuhn - 2015-08-28 - CC BY 4.0 - // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt - - SECTION("1 Some correct UTF-8 text") - { - roundtrip(true, "κόσμε"); - } - - SECTION("2 Boundary condition test cases") - { - SECTION("2.1 First possible sequence of a certain length") - { - // 2.1.1 1 byte (U-00000000) - roundtrip(true, std::string("\0", 1)); - // 2.1.2 2 bytes (U-00000080) - roundtrip(true, "\xc2\x80"); - // 2.1.3 3 bytes (U-00000800) - roundtrip(true, "\xe0\xa0\x80"); - // 2.1.4 4 bytes (U-00010000) - roundtrip(true, "\xf0\x90\x80\x80"); - - // 2.1.5 5 bytes (U-00200000) - roundtrip(false, "\xF8\x88\x80\x80\x80"); - // 2.1.6 6 bytes (U-04000000) - roundtrip(false, "\xFC\x84\x80\x80\x80\x80"); - } - - SECTION("2.2 Last possible sequence of a certain length") - { - // 2.2.1 1 byte (U-0000007F) - roundtrip(true, "\x7f"); - // 2.2.2 2 bytes (U-000007FF) - roundtrip(true, "\xdf\xbf"); - // 2.2.3 3 bytes (U-0000FFFF) - roundtrip(true, "\xef\xbf\xbf"); - - // 2.2.4 4 bytes (U-001FFFFF) - roundtrip(false, "\xF7\xBF\xBF\xBF"); - // 2.2.5 5 bytes (U-03FFFFFF) - roundtrip(false, "\xFB\xBF\xBF\xBF\xBF"); - // 2.2.6 6 bytes (U-7FFFFFFF) - roundtrip(false, "\xFD\xBF\xBF\xBF\xBF\xBF"); - } - - SECTION("2.3 Other boundary conditions") - { - // 2.3.1 U-0000D7FF = ed 9f bf - roundtrip(true, "\xed\x9f\xbf"); - // 2.3.2 U-0000E000 = ee 80 80 - roundtrip(true, "\xee\x80\x80"); - // 2.3.3 U-0000FFFD = ef bf bd - roundtrip(true, "\xef\xbf\xbd"); - // 2.3.4 U-0010FFFF = f4 8f bf bf - roundtrip(true, "\xf4\x8f\xbf\xbf"); - - // 2.3.5 U-00110000 = f4 90 80 80 - roundtrip(false, "\xf4\x90\x80\x80"); - } - } - - SECTION("3 Malformed sequences") - { - SECTION("3.1 Unexpected continuation bytes") - { - // Each unexpected continuation byte should be separately signalled as a - // malformed sequence of its own. - - // 3.1.1 First continuation byte 0x80 - roundtrip(false, "\x80"); - // 3.1.2 Last continuation byte 0xbf - roundtrip(false, "\xbf"); - - // 3.1.3 2 continuation bytes - roundtrip(false, "\x80\xbf"); - // 3.1.4 3 continuation bytes - roundtrip(false, "\x80\xbf\x80"); - // 3.1.5 4 continuation bytes - roundtrip(false, "\x80\xbf\x80\xbf"); - // 3.1.6 5 continuation bytes - roundtrip(false, "\x80\xbf\x80\xbf\x80"); - // 3.1.7 6 continuation bytes - roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf"); - // 3.1.8 7 continuation bytes - roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf\x80"); - - // 3.1.9 Sequence of all 64 possible continuation bytes (0x80-0xbf) - roundtrip(false, "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"); - } - - SECTION("3.2 Lonely start characters") - { - // 3.2.1 All 32 first bytes of 2-byte sequences (0xc0-0xdf) - roundtrip(false, "\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf"); - // 3.2.2 All 16 first bytes of 3-byte sequences (0xe0-0xef) - roundtrip(false, "\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef"); - // 3.2.3 All 8 first bytes of 4-byte sequences (0xf0-0xf7) - roundtrip(false, "\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7"); - // 3.2.4 All 4 first bytes of 5-byte sequences (0xf8-0xfb) - roundtrip(false, "\xf8 \xf9 \xfa \xfb"); - // 3.2.5 All 2 first bytes of 6-byte sequences (0xfc-0xfd) - roundtrip(false, "\xfc \xfd"); - } - - SECTION("3.3 Sequences with last continuation byte missing") - { - // All bytes of an incomplete sequence should be signalled as a single - // malformed sequence, i.e., you should see only a single replacement - // character in each of the next 10 tests. (Characters as in section 2) - - // 3.3.1 2-byte sequence with last byte missing (U+0000) - roundtrip(false, "\xc0"); - // 3.3.2 3-byte sequence with last byte missing (U+0000) - roundtrip(false, "\xe0\x80"); - // 3.3.3 4-byte sequence with last byte missing (U+0000) - roundtrip(false, "\xf0\x80\x80"); - // 3.3.4 5-byte sequence with last byte missing (U+0000) - roundtrip(false, "\xf8\x80\x80\x80"); - // 3.3.5 6-byte sequence with last byte missing (U+0000) - roundtrip(false, "\xfc\x80\x80\x80\x80"); - // 3.3.6 2-byte sequence with last byte missing (U-000007FF) - roundtrip(false, "\xdf"); - // 3.3.7 3-byte sequence with last byte missing (U-0000FFFF) - roundtrip(false, "\xef\xbf"); - // 3.3.8 4-byte sequence with last byte missing (U-001FFFFF) - roundtrip(false, "\xf7\xbf\xbf"); - // 3.3.9 5-byte sequence with last byte missing (U-03FFFFFF) - roundtrip(false, "\xfb\xbf\xbf\xbf"); - // 3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF) - roundtrip(false, "\xfd\xbf\xbf\xbf\xbf"); - } - - SECTION("3.4 Concatenation of incomplete sequences") - { - // All the 10 sequences of 3.3 concatenated, you should see 10 malformed - // sequences being signalled: - roundtrip(false, "\xc0\xe0\x80\xf0\x80\x80\xf8\x80\x80\x80\xfc\x80\x80\x80\x80\xdf\xef\xbf\xf7\xbf\xbf\xfb\xbf\xbf\xbf\xfd\xbf\xbf\xbf\xbf"); - } - - SECTION("3.5 Impossible bytes") - { - // The following two bytes cannot appear in a correct UTF-8 string - - // 3.5.1 fe - roundtrip(false, "\xfe"); - // 3.5.2 ff - roundtrip(false, "\xff"); - // 3.5.3 fe fe ff ff - roundtrip(false, "\xfe\xfe\xff\xff"); - } - } - - SECTION("4 Overlong sequences") - { - // The following sequences are not malformed according to the letter of - // the Unicode 2.0 standard. However, they are longer then necessary and - // a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8 - // decoder" should reject them just like malformed sequences for two - // reasons: (1) It helps to debug applications if overlong sequences are - // not treated as valid representations of characters, because this helps - // to spot problems more quickly. (2) Overlong sequences provide - // alternative representations of characters, that could maliciously be - // used to bypass filters that check only for ASCII characters. For - // instance, a 2-byte encoded line feed (LF) would not be caught by a - // line counter that counts only 0x0a bytes, but it would still be - // processed as a line feed by an unsafe UTF-8 decoder later in the - // pipeline. From a security point of view, ASCII compatibility of UTF-8 - // sequences means also, that ASCII characters are *only* allowed to be - // represented by ASCII bytes in the range 0x00-0x7f. To ensure this - // aspect of ASCII compatibility, use only "safe UTF-8 decoders" that - // reject overlong UTF-8 sequences for which a shorter encoding exists. - - SECTION("4.1 Examples of an overlong ASCII character") - { - // With a safe UTF-8 decoder, all of the following five overlong - // representations of the ASCII character slash ("/") should be rejected - // like a malformed UTF-8 sequence, for instance by substituting it with - // a replacement character. If you see a slash below, you do not have a - // safe UTF-8 decoder! - - // 4.1.1 U+002F = c0 af - roundtrip(false, "\xc0\xaf"); - // 4.1.2 U+002F = e0 80 af - roundtrip(false, "\xe0\x80\xaf"); - // 4.1.3 U+002F = f0 80 80 af - roundtrip(false, "\xf0\x80\x80\xaf"); - // 4.1.4 U+002F = f8 80 80 80 af - roundtrip(false, "\xf8\x80\x80\x80\xaf"); - // 4.1.5 U+002F = fc 80 80 80 80 af - roundtrip(false, "\xfc\x80\x80\x80\x80\xaf"); - } - - SECTION("4.2 Maximum overlong sequences") - { - // Below you see the highest Unicode value that is still resulting in an - // overlong sequence if represented with the given number of bytes. This - // is a boundary test for safe UTF-8 decoders. All five characters should - // be rejected like malformed UTF-8 sequences. - - // 4.2.1 U-0000007F = c1 bf - roundtrip(false, "\xc1\xbf"); - // 4.2.2 U-000007FF = e0 9f bf - roundtrip(false, "\xe0\x9f\xbf"); - // 4.2.3 U-0000FFFF = f0 8f bf bf - roundtrip(false, "\xf0\x8f\xbf\xbf"); - // 4.2.4 U-001FFFFF = f8 87 bf bf bf - roundtrip(false, "\xf8\x87\xbf\xbf\xbf"); - // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf - roundtrip(false, "\xfc\x83\xbf\xbf\xbf\xbf"); - } - - SECTION("4.3 Overlong representation of the NUL character") - { - // The following five sequences should also be rejected like malformed - // UTF-8 sequences and should not be treated like the ASCII NUL - // character. - - // 4.3.1 U+0000 = c0 80 - roundtrip(false, "\xc0\x80"); - // 4.3.2 U+0000 = e0 80 80 - roundtrip(false, "\xe0\x80\x80"); - // 4.3.3 U+0000 = f0 80 80 80 - roundtrip(false, "\xf0\x80\x80\x80"); - // 4.3.4 U+0000 = f8 80 80 80 80 - roundtrip(false, "\xf8\x80\x80\x80\x80"); - // 4.3.5 U+0000 = fc 80 80 80 80 80 - roundtrip(false, "\xfc\x80\x80\x80\x80\x80"); - } - } - - SECTION("5 Illegal code positions") - { - // The following UTF-8 sequences should be rejected like malformed - // sequences, because they never represent valid ISO 10646 characters and - // a UTF-8 decoder that accepts them might introduce security problems - // comparable to overlong UTF-8 sequences. - - SECTION("5.1 Single UTF-16 surrogates") - { - // 5.1.1 U+D800 = ed a0 80 - roundtrip(false, "\xed\xa0\x80"); - // 5.1.2 U+DB7F = ed ad bf - roundtrip(false, "\xed\xad\xbf"); - // 5.1.3 U+DB80 = ed ae 80 - roundtrip(false, "\xed\xae\x80"); - // 5.1.4 U+DBFF = ed af bf - roundtrip(false, "\xed\xaf\xbf"); - // 5.1.5 U+DC00 = ed b0 80 - roundtrip(false, "\xed\xb0\x80"); - // 5.1.6 U+DF80 = ed be 80 - roundtrip(false, "\xed\xbe\x80"); - // 5.1.7 U+DFFF = ed bf bf - roundtrip(false, "\xed\xbf\xbf"); - } - - SECTION("5.2 Paired UTF-16 surrogates") - { - // 5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 - roundtrip(false, "\xed\xa0\x80\xed\xb0\x80"); - // 5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf - roundtrip(false, "\xed\xa0\x80\xed\xbf\xbf"); - // 5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 - roundtrip(false, "\xed\xad\xbf\xed\xb0\x80"); - // 5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf - roundtrip(false, "\xed\xad\xbf\xed\xbf\xbf"); - // 5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 - roundtrip(false, "\xed\xae\x80\xed\xb0\x80"); - // 5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf - roundtrip(false, "\xed\xae\x80\xed\xbf\xbf"); - // 5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 - roundtrip(false, "\xed\xaf\xbf\xed\xb0\x80"); - // 5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf - roundtrip(false, "\xed\xaf\xbf\xed\xbf\xbf"); - } - - SECTION("5.3 Noncharacter code positions") - { - // The following "noncharacters" are "reserved for internal use" by - // applications, and according to older versions of the Unicode Standard - // "should never be interchanged". Unicode Corrigendum #9 dropped the - // latter restriction. Nevertheless, their presence in incoming UTF-8 data - // can remain a potential security risk, depending on what use is made of - // these codes subsequently. Examples of such internal use: - // - // - Some file APIs with 16-bit characters may use the integer value -1 - // = U+FFFF to signal an end-of-file (EOF) or error condition. - // - // - In some UTF-16 receivers, code point U+FFFE might trigger a - // byte-swap operation (to convert between UTF-16LE and UTF-16BE). - // - // With such internal use of noncharacters, it may be desirable and safer - // to block those code points in UTF-8 decoders, as they should never - // occur legitimately in incoming UTF-8 data, and could trigger unsafe - // behaviour in subsequent processing. - - // Particularly problematic noncharacters in 16-bit applications: - - // 5.3.1 U+FFFE = ef bf be - roundtrip(true, "\xef\xbf\xbe"); - // 5.3.2 U+FFFF = ef bf bf - roundtrip(true, "\xef\xbf\xbf"); - - // 5.3.3 U+FDD0 .. U+FDEF - roundtrip(true, "\xEF\xB7\x90"); - roundtrip(true, "\xEF\xB7\x91"); - roundtrip(true, "\xEF\xB7\x92"); - roundtrip(true, "\xEF\xB7\x93"); - roundtrip(true, "\xEF\xB7\x94"); - roundtrip(true, "\xEF\xB7\x95"); - roundtrip(true, "\xEF\xB7\x96"); - roundtrip(true, "\xEF\xB7\x97"); - roundtrip(true, "\xEF\xB7\x98"); - roundtrip(true, "\xEF\xB7\x99"); - roundtrip(true, "\xEF\xB7\x9A"); - roundtrip(true, "\xEF\xB7\x9B"); - roundtrip(true, "\xEF\xB7\x9C"); - roundtrip(true, "\xEF\xB7\x9D"); - roundtrip(true, "\xEF\xB7\x9E"); - roundtrip(true, "\xEF\xB7\x9F"); - roundtrip(true, "\xEF\xB7\xA0"); - roundtrip(true, "\xEF\xB7\xA1"); - roundtrip(true, "\xEF\xB7\xA2"); - roundtrip(true, "\xEF\xB7\xA3"); - roundtrip(true, "\xEF\xB7\xA4"); - roundtrip(true, "\xEF\xB7\xA5"); - roundtrip(true, "\xEF\xB7\xA6"); - roundtrip(true, "\xEF\xB7\xA7"); - roundtrip(true, "\xEF\xB7\xA8"); - roundtrip(true, "\xEF\xB7\xA9"); - roundtrip(true, "\xEF\xB7\xAA"); - roundtrip(true, "\xEF\xB7\xAB"); - roundtrip(true, "\xEF\xB7\xAC"); - roundtrip(true, "\xEF\xB7\xAD"); - roundtrip(true, "\xEF\xB7\xAE"); - roundtrip(true, "\xEF\xB7\xAF"); - - // 5.3.4 U+nFFFE U+nFFFF (for n = 1..10) - roundtrip(true, "\xF0\x9F\xBF\xBF"); - roundtrip(true, "\xF0\xAF\xBF\xBF"); - roundtrip(true, "\xF0\xBF\xBF\xBF"); - roundtrip(true, "\xF1\x8F\xBF\xBF"); - roundtrip(true, "\xF1\x9F\xBF\xBF"); - roundtrip(true, "\xF1\xAF\xBF\xBF"); - roundtrip(true, "\xF1\xBF\xBF\xBF"); - roundtrip(true, "\xF2\x8F\xBF\xBF"); - roundtrip(true, "\xF2\x9F\xBF\xBF"); - roundtrip(true, "\xF2\xAF\xBF\xBF"); - } - } -} diff --git a/test/src/unit-unicode1.cpp b/test/src/unit-unicode1.cpp new file mode 100644 index 000000000..349bec452 --- /dev/null +++ b/test/src/unit-unicode1.cpp @@ -0,0 +1,655 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.9.1 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +// for some reason including this after the json header leads to linker errors with VS 2017... +#include +#include +using nlohmann::json; + +#include +#include +#include +#include + +TEST_CASE("Unicode (1/5)" * doctest::skip()) +{ + SECTION("\\uxxxx sequences") + { + // create an escaped string from a code point + const auto codepoint_to_unicode = [](std::size_t cp) + { + // code points are represented as a six-character sequence: a + // reverse solidus, followed by the lowercase letter u, followed + // by four hexadecimal digits that encode the character's code + // point + std::stringstream ss; + ss << "\\u" << std::setw(4) << std::setfill('0') << std::hex << cp; + return ss.str(); + }; + + SECTION("correct sequences") + { + // generate all UTF-8 code points; in total, 1112064 code points are + // generated: 0x1FFFFF code points - 2048 invalid values between + // 0xD800 and 0xDFFF. + for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp) + { + // string to store the code point as in \uxxxx format + std::string json_text = "\""; + + // decide whether to use one or two \uxxxx sequences + if (cp < 0x10000u) + { + // The Unicode standard permanently reserves these code point + // values for UTF-16 encoding of the high and low surrogates, and + // they will never be assigned a character, so there should be no + // reason to encode them. The official Unicode standard says that + // no UTF forms, including UTF-16, can encode these code points. + if (cp >= 0xD800u && cp <= 0xDFFFu) + { + // if we would not skip these code points, we would get a + // "missing low surrogate" exception + continue; + } + + // code points in the Basic Multilingual Plane can be + // represented with one \uxxxx sequence + json_text += codepoint_to_unicode(cp); + } + else + { + // To escape an extended character that is not in the Basic + // Multilingual Plane, the character is represented as a + // 12-character sequence, encoding the UTF-16 surrogate pair + const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu); + const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu); + json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2); + } + + json_text += "\""; + CAPTURE(json_text) + json _; + CHECK_NOTHROW(_ = json::parse(json_text)); + } + } + + SECTION("incorrect sequences") + { + SECTION("incorrect surrogate values") + { + json _; + + CHECK_THROWS_AS(_ = json::parse("\"\\uDC00\\uDC00\""), json::parse_error&); + CHECK_THROWS_WITH(_ = json::parse("\"\\uDC00\\uDC00\""), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uDC00'"); + + CHECK_THROWS_AS(_ = json::parse("\"\\uD7FF\\uDC00\""), json::parse_error&); + CHECK_THROWS_WITH(_ = json::parse("\"\\uD7FF\\uDC00\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uD7FF\\uDC00'"); + + CHECK_THROWS_AS(_ = json::parse("\"\\uD800]\""), json::parse_error&); + CHECK_THROWS_WITH(_ = json::parse("\"\\uD800]\""), + "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800]'"); + + CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\v\""), json::parse_error&); + CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\v\""), + "[json.exception.parse_error.101] parse error at line 1, column 9: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\v'"); + + CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\u123\""), json::parse_error&); + CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\u123\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\uD800\\u123\"'"); + + CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\uDBFF\""), json::parse_error&); + CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\uDBFF\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uDBFF'"); + + CHECK_THROWS_AS(_ = json::parse("\"\\uD800\\uE000\""), json::parse_error&); + CHECK_THROWS_WITH(_ = json::parse("\"\\uD800\\uE000\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uE000'"); + } + } + +#if 0 + SECTION("incorrect sequences") + { + SECTION("high surrogate without low surrogate") + { + // D800..DBFF are high surrogates and must be followed by low + // surrogates DC00..DFFF; here, nothing follows + for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp) + { + std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; + CAPTURE(json_text) + CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); + } + } + + SECTION("high surrogate with wrong low surrogate") + { + // D800..DBFF are high surrogates and must be followed by low + // surrogates DC00..DFFF; here a different sequence follows + for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1) + { + for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2) + { + if (0xDC00u <= cp2 && cp2 <= 0xDFFFu) + { + continue; + } + + std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\""; + CAPTURE(json_text) + CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); + } + } + } + + SECTION("low surrogate without high surrogate") + { + // low surrogates DC00..DFFF must follow high surrogates; here, + // they occur alone + for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp) + { + std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; + CAPTURE(json_text) + CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); + } + } + + } +#endif + } + + SECTION("read all unicode characters") + { + // read a file with all unicode characters stored as single-character + // strings in a JSON array + std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/all_unicode.json"); + json j; + CHECK_NOTHROW(f >> j); + + // the array has 1112064 + 1 elements (a terminating "null" value) + // Note: 1112064 = 0x1FFFFF code points - 2048 invalid values between + // 0xD800 and 0xDFFF. + CHECK(j.size() == 1112065); + + SECTION("check JSON Pointers") + { + for (const auto& s : j) + { + // skip non-string JSON values + if (!s.is_string()) + { + continue; + } + + auto ptr = s.get(); + + // tilde must be followed by 0 or 1 + if (ptr == "~") + { + ptr += "0"; + } + + // JSON Pointers must begin with "/" + ptr.insert(0, "/"); + + CHECK_NOTHROW(json::json_pointer("/" + ptr)); + + // check escape/unescape roundtrip + auto escaped = nlohmann::detail::escape(ptr); + nlohmann::detail::unescape(escaped); + CHECK(escaped == ptr); + } + } + } + + SECTION("ignore byte-order-mark") + { + SECTION("in a stream") + { + // read a file with a UTF-8 BOM + std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/bom.json"); + json j; + CHECK_NOTHROW(f >> j); + } + + SECTION("with an iterator") + { + std::string i = "\xef\xbb\xbf{\n \"foo\": true\n}"; + json _; + CHECK_NOTHROW(_ = json::parse(i.begin(), i.end())); + } + } + + SECTION("error for incomplete/wrong BOM") + { + json _; + CHECK_THROWS_AS(_ = json::parse("\xef\xbb"), json::parse_error&); + CHECK_THROWS_AS(_ = json::parse("\xef\xbb\xbb"), json::parse_error&); + } +} + +namespace +{ +void roundtrip(bool success_expected, const std::string& s); + +void roundtrip(bool success_expected, const std::string& s) +{ + CAPTURE(s) + json _; + + // create JSON string value + json j = s; + // create JSON text + std::string ps = std::string("\"") + s + "\""; + + if (success_expected) + { + // serialization succeeds + CHECK_NOTHROW(j.dump()); + + // exclude parse test for U+0000 + if (s[0] != '\0') + { + // parsing JSON text succeeds + CHECK_NOTHROW(_ = json::parse(ps)); + } + + // roundtrip succeeds + CHECK_NOTHROW(_ = json::parse(j.dump())); + + // after roundtrip, the same string is stored + json jr = json::parse(j.dump()); + CHECK(jr.get() == s); + } + else + { + // serialization fails + CHECK_THROWS_AS(j.dump(), json::type_error&); + + // parsing JSON text fails + CHECK_THROWS_AS(_ = json::parse(ps), json::parse_error&); + } +} +} // namespace + +TEST_CASE("Markus Kuhn's UTF-8 decoder capability and stress test") +{ + // Markus Kuhn - 2015-08-28 - CC BY 4.0 + // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt + + SECTION("1 Some correct UTF-8 text") + { + roundtrip(true, "κόσμε"); + } + + SECTION("2 Boundary condition test cases") + { + SECTION("2.1 First possible sequence of a certain length") + { + // 2.1.1 1 byte (U-00000000) + roundtrip(true, std::string("\0", 1)); + // 2.1.2 2 bytes (U-00000080) + roundtrip(true, "\xc2\x80"); + // 2.1.3 3 bytes (U-00000800) + roundtrip(true, "\xe0\xa0\x80"); + // 2.1.4 4 bytes (U-00010000) + roundtrip(true, "\xf0\x90\x80\x80"); + + // 2.1.5 5 bytes (U-00200000) + roundtrip(false, "\xF8\x88\x80\x80\x80"); + // 2.1.6 6 bytes (U-04000000) + roundtrip(false, "\xFC\x84\x80\x80\x80\x80"); + } + + SECTION("2.2 Last possible sequence of a certain length") + { + // 2.2.1 1 byte (U-0000007F) + roundtrip(true, "\x7f"); + // 2.2.2 2 bytes (U-000007FF) + roundtrip(true, "\xdf\xbf"); + // 2.2.3 3 bytes (U-0000FFFF) + roundtrip(true, "\xef\xbf\xbf"); + + // 2.2.4 4 bytes (U-001FFFFF) + roundtrip(false, "\xF7\xBF\xBF\xBF"); + // 2.2.5 5 bytes (U-03FFFFFF) + roundtrip(false, "\xFB\xBF\xBF\xBF\xBF"); + // 2.2.6 6 bytes (U-7FFFFFFF) + roundtrip(false, "\xFD\xBF\xBF\xBF\xBF\xBF"); + } + + SECTION("2.3 Other boundary conditions") + { + // 2.3.1 U-0000D7FF = ed 9f bf + roundtrip(true, "\xed\x9f\xbf"); + // 2.3.2 U-0000E000 = ee 80 80 + roundtrip(true, "\xee\x80\x80"); + // 2.3.3 U-0000FFFD = ef bf bd + roundtrip(true, "\xef\xbf\xbd"); + // 2.3.4 U-0010FFFF = f4 8f bf bf + roundtrip(true, "\xf4\x8f\xbf\xbf"); + + // 2.3.5 U-00110000 = f4 90 80 80 + roundtrip(false, "\xf4\x90\x80\x80"); + } + } + + SECTION("3 Malformed sequences") + { + SECTION("3.1 Unexpected continuation bytes") + { + // Each unexpected continuation byte should be separately signalled as a + // malformed sequence of its own. + + // 3.1.1 First continuation byte 0x80 + roundtrip(false, "\x80"); + // 3.1.2 Last continuation byte 0xbf + roundtrip(false, "\xbf"); + + // 3.1.3 2 continuation bytes + roundtrip(false, "\x80\xbf"); + // 3.1.4 3 continuation bytes + roundtrip(false, "\x80\xbf\x80"); + // 3.1.5 4 continuation bytes + roundtrip(false, "\x80\xbf\x80\xbf"); + // 3.1.6 5 continuation bytes + roundtrip(false, "\x80\xbf\x80\xbf\x80"); + // 3.1.7 6 continuation bytes + roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf"); + // 3.1.8 7 continuation bytes + roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf\x80"); + + // 3.1.9 Sequence of all 64 possible continuation bytes (0x80-0xbf) + roundtrip(false, "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"); + } + + SECTION("3.2 Lonely start characters") + { + // 3.2.1 All 32 first bytes of 2-byte sequences (0xc0-0xdf) + roundtrip(false, "\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf"); + // 3.2.2 All 16 first bytes of 3-byte sequences (0xe0-0xef) + roundtrip(false, "\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef"); + // 3.2.3 All 8 first bytes of 4-byte sequences (0xf0-0xf7) + roundtrip(false, "\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7"); + // 3.2.4 All 4 first bytes of 5-byte sequences (0xf8-0xfb) + roundtrip(false, "\xf8 \xf9 \xfa \xfb"); + // 3.2.5 All 2 first bytes of 6-byte sequences (0xfc-0xfd) + roundtrip(false, "\xfc \xfd"); + } + + SECTION("3.3 Sequences with last continuation byte missing") + { + // All bytes of an incomplete sequence should be signalled as a single + // malformed sequence, i.e., you should see only a single replacement + // character in each of the next 10 tests. (Characters as in section 2) + + // 3.3.1 2-byte sequence with last byte missing (U+0000) + roundtrip(false, "\xc0"); + // 3.3.2 3-byte sequence with last byte missing (U+0000) + roundtrip(false, "\xe0\x80"); + // 3.3.3 4-byte sequence with last byte missing (U+0000) + roundtrip(false, "\xf0\x80\x80"); + // 3.3.4 5-byte sequence with last byte missing (U+0000) + roundtrip(false, "\xf8\x80\x80\x80"); + // 3.3.5 6-byte sequence with last byte missing (U+0000) + roundtrip(false, "\xfc\x80\x80\x80\x80"); + // 3.3.6 2-byte sequence with last byte missing (U-000007FF) + roundtrip(false, "\xdf"); + // 3.3.7 3-byte sequence with last byte missing (U-0000FFFF) + roundtrip(false, "\xef\xbf"); + // 3.3.8 4-byte sequence with last byte missing (U-001FFFFF) + roundtrip(false, "\xf7\xbf\xbf"); + // 3.3.9 5-byte sequence with last byte missing (U-03FFFFFF) + roundtrip(false, "\xfb\xbf\xbf\xbf"); + // 3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF) + roundtrip(false, "\xfd\xbf\xbf\xbf\xbf"); + } + + SECTION("3.4 Concatenation of incomplete sequences") + { + // All the 10 sequences of 3.3 concatenated, you should see 10 malformed + // sequences being signalled: + roundtrip(false, "\xc0\xe0\x80\xf0\x80\x80\xf8\x80\x80\x80\xfc\x80\x80\x80\x80\xdf\xef\xbf\xf7\xbf\xbf\xfb\xbf\xbf\xbf\xfd\xbf\xbf\xbf\xbf"); + } + + SECTION("3.5 Impossible bytes") + { + // The following two bytes cannot appear in a correct UTF-8 string + + // 3.5.1 fe + roundtrip(false, "\xfe"); + // 3.5.2 ff + roundtrip(false, "\xff"); + // 3.5.3 fe fe ff ff + roundtrip(false, "\xfe\xfe\xff\xff"); + } + } + + SECTION("4 Overlong sequences") + { + // The following sequences are not malformed according to the letter of + // the Unicode 2.0 standard. However, they are longer then necessary and + // a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8 + // decoder" should reject them just like malformed sequences for two + // reasons: (1) It helps to debug applications if overlong sequences are + // not treated as valid representations of characters, because this helps + // to spot problems more quickly. (2) Overlong sequences provide + // alternative representations of characters, that could maliciously be + // used to bypass filters that check only for ASCII characters. For + // instance, a 2-byte encoded line feed (LF) would not be caught by a + // line counter that counts only 0x0a bytes, but it would still be + // processed as a line feed by an unsafe UTF-8 decoder later in the + // pipeline. From a security point of view, ASCII compatibility of UTF-8 + // sequences means also, that ASCII characters are *only* allowed to be + // represented by ASCII bytes in the range 0x00-0x7f. To ensure this + // aspect of ASCII compatibility, use only "safe UTF-8 decoders" that + // reject overlong UTF-8 sequences for which a shorter encoding exists. + + SECTION("4.1 Examples of an overlong ASCII character") + { + // With a safe UTF-8 decoder, all of the following five overlong + // representations of the ASCII character slash ("/") should be rejected + // like a malformed UTF-8 sequence, for instance by substituting it with + // a replacement character. If you see a slash below, you do not have a + // safe UTF-8 decoder! + + // 4.1.1 U+002F = c0 af + roundtrip(false, "\xc0\xaf"); + // 4.1.2 U+002F = e0 80 af + roundtrip(false, "\xe0\x80\xaf"); + // 4.1.3 U+002F = f0 80 80 af + roundtrip(false, "\xf0\x80\x80\xaf"); + // 4.1.4 U+002F = f8 80 80 80 af + roundtrip(false, "\xf8\x80\x80\x80\xaf"); + // 4.1.5 U+002F = fc 80 80 80 80 af + roundtrip(false, "\xfc\x80\x80\x80\x80\xaf"); + } + + SECTION("4.2 Maximum overlong sequences") + { + // Below you see the highest Unicode value that is still resulting in an + // overlong sequence if represented with the given number of bytes. This + // is a boundary test for safe UTF-8 decoders. All five characters should + // be rejected like malformed UTF-8 sequences. + + // 4.2.1 U-0000007F = c1 bf + roundtrip(false, "\xc1\xbf"); + // 4.2.2 U-000007FF = e0 9f bf + roundtrip(false, "\xe0\x9f\xbf"); + // 4.2.3 U-0000FFFF = f0 8f bf bf + roundtrip(false, "\xf0\x8f\xbf\xbf"); + // 4.2.4 U-001FFFFF = f8 87 bf bf bf + roundtrip(false, "\xf8\x87\xbf\xbf\xbf"); + // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf + roundtrip(false, "\xfc\x83\xbf\xbf\xbf\xbf"); + } + + SECTION("4.3 Overlong representation of the NUL character") + { + // The following five sequences should also be rejected like malformed + // UTF-8 sequences and should not be treated like the ASCII NUL + // character. + + // 4.3.1 U+0000 = c0 80 + roundtrip(false, "\xc0\x80"); + // 4.3.2 U+0000 = e0 80 80 + roundtrip(false, "\xe0\x80\x80"); + // 4.3.3 U+0000 = f0 80 80 80 + roundtrip(false, "\xf0\x80\x80\x80"); + // 4.3.4 U+0000 = f8 80 80 80 80 + roundtrip(false, "\xf8\x80\x80\x80\x80"); + // 4.3.5 U+0000 = fc 80 80 80 80 80 + roundtrip(false, "\xfc\x80\x80\x80\x80\x80"); + } + } + + SECTION("5 Illegal code positions") + { + // The following UTF-8 sequences should be rejected like malformed + // sequences, because they never represent valid ISO 10646 characters and + // a UTF-8 decoder that accepts them might introduce security problems + // comparable to overlong UTF-8 sequences. + + SECTION("5.1 Single UTF-16 surrogates") + { + // 5.1.1 U+D800 = ed a0 80 + roundtrip(false, "\xed\xa0\x80"); + // 5.1.2 U+DB7F = ed ad bf + roundtrip(false, "\xed\xad\xbf"); + // 5.1.3 U+DB80 = ed ae 80 + roundtrip(false, "\xed\xae\x80"); + // 5.1.4 U+DBFF = ed af bf + roundtrip(false, "\xed\xaf\xbf"); + // 5.1.5 U+DC00 = ed b0 80 + roundtrip(false, "\xed\xb0\x80"); + // 5.1.6 U+DF80 = ed be 80 + roundtrip(false, "\xed\xbe\x80"); + // 5.1.7 U+DFFF = ed bf bf + roundtrip(false, "\xed\xbf\xbf"); + } + + SECTION("5.2 Paired UTF-16 surrogates") + { + // 5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 + roundtrip(false, "\xed\xa0\x80\xed\xb0\x80"); + // 5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf + roundtrip(false, "\xed\xa0\x80\xed\xbf\xbf"); + // 5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 + roundtrip(false, "\xed\xad\xbf\xed\xb0\x80"); + // 5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf + roundtrip(false, "\xed\xad\xbf\xed\xbf\xbf"); + // 5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 + roundtrip(false, "\xed\xae\x80\xed\xb0\x80"); + // 5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf + roundtrip(false, "\xed\xae\x80\xed\xbf\xbf"); + // 5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 + roundtrip(false, "\xed\xaf\xbf\xed\xb0\x80"); + // 5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf + roundtrip(false, "\xed\xaf\xbf\xed\xbf\xbf"); + } + + SECTION("5.3 Noncharacter code positions") + { + // The following "noncharacters" are "reserved for internal use" by + // applications, and according to older versions of the Unicode Standard + // "should never be interchanged". Unicode Corrigendum #9 dropped the + // latter restriction. Nevertheless, their presence in incoming UTF-8 data + // can remain a potential security risk, depending on what use is made of + // these codes subsequently. Examples of such internal use: + // + // - Some file APIs with 16-bit characters may use the integer value -1 + // = U+FFFF to signal an end-of-file (EOF) or error condition. + // + // - In some UTF-16 receivers, code point U+FFFE might trigger a + // byte-swap operation (to convert between UTF-16LE and UTF-16BE). + // + // With such internal use of noncharacters, it may be desirable and safer + // to block those code points in UTF-8 decoders, as they should never + // occur legitimately in incoming UTF-8 data, and could trigger unsafe + // behaviour in subsequent processing. + + // Particularly problematic noncharacters in 16-bit applications: + + // 5.3.1 U+FFFE = ef bf be + roundtrip(true, "\xef\xbf\xbe"); + // 5.3.2 U+FFFF = ef bf bf + roundtrip(true, "\xef\xbf\xbf"); + + // 5.3.3 U+FDD0 .. U+FDEF + roundtrip(true, "\xEF\xB7\x90"); + roundtrip(true, "\xEF\xB7\x91"); + roundtrip(true, "\xEF\xB7\x92"); + roundtrip(true, "\xEF\xB7\x93"); + roundtrip(true, "\xEF\xB7\x94"); + roundtrip(true, "\xEF\xB7\x95"); + roundtrip(true, "\xEF\xB7\x96"); + roundtrip(true, "\xEF\xB7\x97"); + roundtrip(true, "\xEF\xB7\x98"); + roundtrip(true, "\xEF\xB7\x99"); + roundtrip(true, "\xEF\xB7\x9A"); + roundtrip(true, "\xEF\xB7\x9B"); + roundtrip(true, "\xEF\xB7\x9C"); + roundtrip(true, "\xEF\xB7\x9D"); + roundtrip(true, "\xEF\xB7\x9E"); + roundtrip(true, "\xEF\xB7\x9F"); + roundtrip(true, "\xEF\xB7\xA0"); + roundtrip(true, "\xEF\xB7\xA1"); + roundtrip(true, "\xEF\xB7\xA2"); + roundtrip(true, "\xEF\xB7\xA3"); + roundtrip(true, "\xEF\xB7\xA4"); + roundtrip(true, "\xEF\xB7\xA5"); + roundtrip(true, "\xEF\xB7\xA6"); + roundtrip(true, "\xEF\xB7\xA7"); + roundtrip(true, "\xEF\xB7\xA8"); + roundtrip(true, "\xEF\xB7\xA9"); + roundtrip(true, "\xEF\xB7\xAA"); + roundtrip(true, "\xEF\xB7\xAB"); + roundtrip(true, "\xEF\xB7\xAC"); + roundtrip(true, "\xEF\xB7\xAD"); + roundtrip(true, "\xEF\xB7\xAE"); + roundtrip(true, "\xEF\xB7\xAF"); + + // 5.3.4 U+nFFFE U+nFFFF (for n = 1..10) + roundtrip(true, "\xF0\x9F\xBF\xBF"); + roundtrip(true, "\xF0\xAF\xBF\xBF"); + roundtrip(true, "\xF0\xBF\xBF\xBF"); + roundtrip(true, "\xF1\x8F\xBF\xBF"); + roundtrip(true, "\xF1\x9F\xBF\xBF"); + roundtrip(true, "\xF1\xAF\xBF\xBF"); + roundtrip(true, "\xF1\xBF\xBF\xBF"); + roundtrip(true, "\xF2\x8F\xBF\xBF"); + roundtrip(true, "\xF2\x9F\xBF\xBF"); + roundtrip(true, "\xF2\xAF\xBF\xBF"); + } + } +} diff --git a/test/src/unit-unicode2.cpp b/test/src/unit-unicode2.cpp new file mode 100644 index 000000000..074704bba --- /dev/null +++ b/test/src/unit-unicode2.cpp @@ -0,0 +1,625 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.9.1 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +// for some reason including this after the json header leads to linker errors with VS 2017... +#include + +#include +using nlohmann::json; + +#include +#include +#include +#include +#include + +namespace +{ +extern size_t calls; +size_t calls = 0; + +void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + static std::string json_string; + json_string.clear(); + + CAPTURE(byte1) + CAPTURE(byte2) + CAPTURE(byte3) + CAPTURE(byte4) + + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + json_string += std::string(1, static_cast(byte4)); + } + + CAPTURE(json_string) + + // store the string in a JSON value + static json j; + static json j2; + j = json_string; + j2 = "abc" + json_string + "xyz"; + + static std::string s_ignored; + static std::string s_ignored2; + static std::string s_ignored_ascii; + static std::string s_ignored2_ascii; + static std::string s_replaced; + static std::string s_replaced2; + static std::string s_replaced_ascii; + static std::string s_replaced2_ascii; + + // dumping with ignore/replace must not throw in any case + s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore); + s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore); + s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); + s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + + if (success_expected) + { + static std::string s_strict; + // strict mode must not throw if success is expected + s_strict = j.dump(); + // all dumps should agree on the string + CHECK(s_strict == s_ignored); + CHECK(s_strict == s_replaced); + } + else + { + // strict mode must throw if success is not expected + CHECK_THROWS_AS(j.dump(), json::type_error&); + // ignore and replace must create different dumps + CHECK(s_ignored != s_replaced); + + // check that replace string contains a replacement character + CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); + } + + // check that prefix and suffix are preserved + CHECK(s_ignored2.substr(1, 3) == "abc"); + CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); + CHECK(s_ignored2_ascii.substr(1, 3) == "abc"); + CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz"); + CHECK(s_replaced2.substr(1, 3) == "abc"); + CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz"); + CHECK(s_replaced2_ascii.substr(1, 3) == "abc"); + CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz"); +} + +void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +// create and check a JSON string with up to four UTF-8 bytes +void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + if (++calls % 100000 == 0) + { + std::cout << calls << " of 455355 UTF-8 strings checked" << std::endl; + } + + static std::string json_string; + json_string = "\""; + + CAPTURE(byte1) + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + CAPTURE(byte2) + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + CAPTURE(byte3) + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + CAPTURE(byte4) + json_string += std::string(1, static_cast(byte4)); + } + + json_string += "\""; + + CAPTURE(json_string) + + json _; + if (success_expected) + { + CHECK_NOTHROW(_ = json::parse(json_string)); + } + else + { + CHECK_THROWS_AS(_ = json::parse(json_string), json::parse_error&); + } +} +} // namespace + +TEST_CASE("Unicode (2/5)" * doctest::skip()) +{ + SECTION("RFC 3629") + { + /* + RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as + follows: + + A UTF-8 string is a sequence of octets representing a sequence of UCS + characters. An octet sequence is valid UTF-8 only if it matches the + following syntax, which is derived from the rules for encoding UTF-8 + and is expressed in the ABNF of [RFC2234]. + + UTF8-octets = *( UTF8-char ) + UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 + UTF8-1 = %x00-7F + UTF8-2 = %xC2-DF UTF8-tail + UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + %xF4 %x80-8F 2( UTF8-tail ) + UTF8-tail = %x80-BF + */ + + SECTION("ill-formed first byte") + { + for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + + for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("UTF8-1 (x00-x7F)") + { + SECTION("well-formed") + { + for (int byte1 = 0x00; byte1 <= 0x7F; ++byte1) + { + // unescaped control characters are parse errors in JSON + if (0x00 <= byte1 && byte1 <= 0x1F) + { + check_utf8string(false, byte1); + continue; + } + + // a single quote is a parse error in JSON + if (byte1 == 0x22) + { + check_utf8string(false, byte1); + continue; + } + + // a single backslash is a parse error in JSON + if (byte1 == 0x5C) + { + check_utf8string(false, byte1); + continue; + } + + // all other characters are OK + check_utf8string(true, byte1); + check_utf8dump(true, byte1); + } + } + } + + SECTION("UTF8-2 (xC2-xDF UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + check_utf8string(true, byte1, byte2); + check_utf8dump(true, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 && byte2 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + } + + SECTION("UTF8-3 (xE0 xA0-BF UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + check_utf8dump(true, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0xA0 <= byte2 && byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1) + { + for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + } + + SECTION("UTF8-3 (xE1-xEC UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + check_utf8dump(true, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 && byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + } + + SECTION("UTF8-3 (xED x80-9F UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + check_utf8dump(true, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + { + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 && byte2 <= 0x9F) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xED; byte1 <= 0xED; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + } + + SECTION("UTF8-3 (xEE-xEF UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(true, byte1, byte2, byte3); + check_utf8dump(true, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 && byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + } + } +} diff --git a/test/src/unit-unicode3.cpp b/test/src/unit-unicode3.cpp new file mode 100644 index 000000000..81bf89ba0 --- /dev/null +++ b/test/src/unit-unicode3.cpp @@ -0,0 +1,339 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.9.1 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +// for some reason including this after the json header leads to linker errors with VS 2017... +#include + +#include +using nlohmann::json; + +#include +#include +#include +#include +#include + +namespace +{ +extern size_t calls; +size_t calls = 0; + +void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + static std::string json_string; + json_string.clear(); + + CAPTURE(byte1) + CAPTURE(byte2) + CAPTURE(byte3) + CAPTURE(byte4) + + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + json_string += std::string(1, static_cast(byte4)); + } + + CAPTURE(json_string) + + // store the string in a JSON value + static json j; + static json j2; + j = json_string; + j2 = "abc" + json_string + "xyz"; + + static std::string s_ignored; + static std::string s_ignored2; + static std::string s_ignored_ascii; + static std::string s_ignored2_ascii; + static std::string s_replaced; + static std::string s_replaced2; + static std::string s_replaced_ascii; + static std::string s_replaced2_ascii; + + // dumping with ignore/replace must not throw in any case + s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore); + s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore); + s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); + s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + + if (success_expected) + { + static std::string s_strict; + // strict mode must not throw if success is expected + s_strict = j.dump(); + // all dumps should agree on the string + CHECK(s_strict == s_ignored); + CHECK(s_strict == s_replaced); + } + else + { + // strict mode must throw if success is not expected + CHECK_THROWS_AS(j.dump(), json::type_error&); + // ignore and replace must create different dumps + CHECK(s_ignored != s_replaced); + + // check that replace string contains a replacement character + CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); + } + + // check that prefix and suffix are preserved + CHECK(s_ignored2.substr(1, 3) == "abc"); + CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); + CHECK(s_ignored2_ascii.substr(1, 3) == "abc"); + CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz"); + CHECK(s_replaced2.substr(1, 3) == "abc"); + CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz"); + CHECK(s_replaced2_ascii.substr(1, 3) == "abc"); + CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz"); +} + +void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +// create and check a JSON string with up to four UTF-8 bytes +void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + if (++calls % 100000 == 0) + { + std::cout << calls << " of 1641521 UTF-8 strings checked" << std::endl; + } + + static std::string json_string; + json_string = "\""; + + CAPTURE(byte1) + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + CAPTURE(byte2) + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + CAPTURE(byte3) + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + CAPTURE(byte4) + json_string += std::string(1, static_cast(byte4)); + } + + json_string += "\""; + + CAPTURE(json_string) + + json _; + if (success_expected) + { + CHECK_NOTHROW(_ = json::parse(json_string)); + } + else + { + CHECK_THROWS_AS(_ = json::parse(json_string), json::parse_error&); + } +} +} // namespace + +TEST_CASE("Unicode (3/5)" * doctest::skip()) +{ + SECTION("RFC 3629") + { + /* + RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as + follows: + + A UTF-8 string is a sequence of octets representing a sequence of UCS + characters. An octet sequence is valid UTF-8 only if it matches the + following syntax, which is derived from the rules for encoding UTF-8 + and is expressed in the ABNF of [RFC2234]. + + UTF8-octets = *( UTF8-char ) + UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 + UTF8-1 = %x00-7F + UTF8-2 = %xC2-DF UTF8-tail + UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + %xF4 %x80-8F 2( UTF8-tail ) + UTF8-tail = %x80-BF + */ + + SECTION("UTF8-4 (xF0 x90-BF UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(true, byte1, byte2, byte3, byte4); + check_utf8dump(true, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x90 <= byte2 && byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: wrong fourth byte") + { + for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1) + { + for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip fourth second byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + } + } +} diff --git a/test/src/unit-unicode4.cpp b/test/src/unit-unicode4.cpp new file mode 100644 index 000000000..2a268c1fb --- /dev/null +++ b/test/src/unit-unicode4.cpp @@ -0,0 +1,339 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.9.1 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +// for some reason including this after the json header leads to linker errors with VS 2017... +#include + +#include +using nlohmann::json; + +#include +#include +#include +#include +#include + +namespace +{ +extern size_t calls; +size_t calls = 0; + +void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + static std::string json_string; + json_string.clear(); + + CAPTURE(byte1) + CAPTURE(byte2) + CAPTURE(byte3) + CAPTURE(byte4) + + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + json_string += std::string(1, static_cast(byte4)); + } + + CAPTURE(json_string) + + // store the string in a JSON value + static json j; + static json j2; + j = json_string; + j2 = "abc" + json_string + "xyz"; + + static std::string s_ignored; + static std::string s_ignored2; + static std::string s_ignored_ascii; + static std::string s_ignored2_ascii; + static std::string s_replaced; + static std::string s_replaced2; + static std::string s_replaced_ascii; + static std::string s_replaced2_ascii; + + // dumping with ignore/replace must not throw in any case + s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore); + s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore); + s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); + s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + + if (success_expected) + { + static std::string s_strict; + // strict mode must not throw if success is expected + s_strict = j.dump(); + // all dumps should agree on the string + CHECK(s_strict == s_ignored); + CHECK(s_strict == s_replaced); + } + else + { + // strict mode must throw if success is not expected + CHECK_THROWS_AS(j.dump(), json::type_error&); + // ignore and replace must create different dumps + CHECK(s_ignored != s_replaced); + + // check that replace string contains a replacement character + CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); + } + + // check that prefix and suffix are preserved + CHECK(s_ignored2.substr(1, 3) == "abc"); + CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); + CHECK(s_ignored2_ascii.substr(1, 3) == "abc"); + CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz"); + CHECK(s_replaced2.substr(1, 3) == "abc"); + CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz"); + CHECK(s_replaced2_ascii.substr(1, 3) == "abc"); + CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz"); +} + +void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +// create and check a JSON string with up to four UTF-8 bytes +void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + if (++calls % 100000 == 0) + { + std::cout << calls << " of 5517507 UTF-8 strings checked" << std::endl; + } + + static std::string json_string; + json_string = "\""; + + CAPTURE(byte1) + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + CAPTURE(byte2) + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + CAPTURE(byte3) + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + CAPTURE(byte4) + json_string += std::string(1, static_cast(byte4)); + } + + json_string += "\""; + + CAPTURE(json_string) + + json _; + if (success_expected) + { + CHECK_NOTHROW(_ = json::parse(json_string)); + } + else + { + CHECK_THROWS_AS(_ = json::parse(json_string), json::parse_error&); + } +} +} // namespace + +TEST_CASE("Unicode (4/5)" * doctest::skip()) +{ + SECTION("RFC 3629") + { + /* + RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as + follows: + + A UTF-8 string is a sequence of octets representing a sequence of UCS + characters. An octet sequence is valid UTF-8 only if it matches the + following syntax, which is derived from the rules for encoding UTF-8 + and is expressed in the ABNF of [RFC2234]. + + UTF8-octets = *( UTF8-char ) + UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 + UTF8-1 = %x00-7F + UTF8-2 = %xC2-DF UTF8-tail + UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + %xF4 %x80-8F 2( UTF8-tail ) + UTF8-tail = %x80-BF + */ + + SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(true, byte1, byte2, byte3, byte4); + check_utf8dump(true, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 && byte2 <= 0xBF) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: wrong fourth byte") + { + for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip correct fourth byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + } + } +} diff --git a/test/src/unit-unicode5.cpp b/test/src/unit-unicode5.cpp new file mode 100644 index 000000000..abe851469 --- /dev/null +++ b/test/src/unit-unicode5.cpp @@ -0,0 +1,339 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.9.1 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +// for some reason including this after the json header leads to linker errors with VS 2017... +#include + +#include +using nlohmann::json; + +#include +#include +#include +#include +#include + +namespace +{ +extern size_t calls; +size_t calls = 0; + +void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + static std::string json_string; + json_string.clear(); + + CAPTURE(byte1) + CAPTURE(byte2) + CAPTURE(byte3) + CAPTURE(byte4) + + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + json_string += std::string(1, static_cast(byte4)); + } + + CAPTURE(json_string) + + // store the string in a JSON value + static json j; + static json j2; + j = json_string; + j2 = "abc" + json_string + "xyz"; + + static std::string s_ignored; + static std::string s_ignored2; + static std::string s_ignored_ascii; + static std::string s_ignored2_ascii; + static std::string s_replaced; + static std::string s_replaced2; + static std::string s_replaced_ascii; + static std::string s_replaced2_ascii; + + // dumping with ignore/replace must not throw in any case + s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore); + s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore); + s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore); + s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); + s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); + s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + + if (success_expected) + { + static std::string s_strict; + // strict mode must not throw if success is expected + s_strict = j.dump(); + // all dumps should agree on the string + CHECK(s_strict == s_ignored); + CHECK(s_strict == s_replaced); + } + else + { + // strict mode must throw if success is not expected + CHECK_THROWS_AS(j.dump(), json::type_error&); + // ignore and replace must create different dumps + CHECK(s_ignored != s_replaced); + + // check that replace string contains a replacement character + CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); + } + + // check that prefix and suffix are preserved + CHECK(s_ignored2.substr(1, 3) == "abc"); + CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); + CHECK(s_ignored2_ascii.substr(1, 3) == "abc"); + CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz"); + CHECK(s_replaced2.substr(1, 3) == "abc"); + CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz"); + CHECK(s_replaced2_ascii.substr(1, 3) == "abc"); + CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz"); +} + +void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4); + +// create and check a JSON string with up to four UTF-8 bytes +void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1) +{ + if (++calls % 100000 == 0) + { + std::cout << calls << " of 1246225 UTF-8 strings checked" << std::endl; + } + + static std::string json_string; + json_string = "\""; + + CAPTURE(byte1) + json_string += std::string(1, static_cast(byte1)); + + if (byte2 != -1) + { + CAPTURE(byte2) + json_string += std::string(1, static_cast(byte2)); + } + + if (byte3 != -1) + { + CAPTURE(byte3) + json_string += std::string(1, static_cast(byte3)); + } + + if (byte4 != -1) + { + CAPTURE(byte4) + json_string += std::string(1, static_cast(byte4)); + } + + json_string += "\""; + + CAPTURE(json_string) + + json _; + if (success_expected) + { + CHECK_NOTHROW(_ = json::parse(json_string)); + } + else + { + CHECK_THROWS_AS(_ = json::parse(json_string), json::parse_error&); + } +} +} // namespace + +TEST_CASE("Unicode (5/5)" * doctest::skip()) +{ + SECTION("RFC 3629") + { + /* + RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as + follows: + + A UTF-8 string is a sequence of octets representing a sequence of UCS + characters. An octet sequence is valid UTF-8 only if it matches the + following syntax, which is derived from the rules for encoding UTF-8 + and is expressed in the ABNF of [RFC2234]. + + UTF8-octets = *( UTF8-char ) + UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 + UTF8-1 = %x00-7F + UTF8-2 = %xC2-DF UTF8-tail + UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + %xF4 %x80-8F 2( UTF8-tail ) + UTF8-tail = %x80-BF + */ + + SECTION("UTF8-4 (xF4 x80-8F UTF8-tail UTF8-tail)") + { + SECTION("well-formed") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(true, byte1, byte2, byte3, byte4); + check_utf8dump(true, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: missing second byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + check_utf8string(false, byte1); + check_utf8dump(false, byte1); + } + } + + SECTION("ill-formed: missing third byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + check_utf8string(false, byte1, byte2); + check_utf8dump(false, byte1, byte2); + } + } + } + + SECTION("ill-formed: missing fourth byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + check_utf8string(false, byte1, byte2, byte3); + check_utf8dump(false, byte1, byte2, byte3); + } + } + } + } + + SECTION("ill-formed: wrong second byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2) + { + // skip correct second byte + if (0x80 <= byte2 && byte2 <= 0x8F) + { + continue; + } + + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: wrong third byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3) + { + // skip correct third byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4) + { + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + + SECTION("ill-formed: wrong fourth byte") + { + for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1) + { + for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2) + { + for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3) + { + for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4) + { + // skip correct fourth byte + if (0x80 <= byte3 && byte3 <= 0xBF) + { + continue; + } + + check_utf8string(false, byte1, byte2, byte3, byte4); + check_utf8dump(false, byte1, byte2, byte3, byte4); + } + } + } + } + } + } + } +}