From 3162a30858d66b819f6833b2e8f34a5d96644915 Mon Sep 17 00:00:00 2001 From: Florian Albrechtskirchinger Date: Mon, 1 Aug 2022 14:47:13 +0200 Subject: [PATCH 1/3] Move fuzzer sources to tests/fuzz/src --- tests/{ => fuzz}/src/fuzzer-driver_afl.cpp | 0 tests/{ => fuzz}/src/fuzzer-parse_bjdata.cpp | 16 +++++++++++++--- tests/{ => fuzz}/src/fuzzer-parse_bson.cpp | 16 +++++++++++++--- tests/{ => fuzz}/src/fuzzer-parse_cbor.cpp | 16 +++++++++++++--- tests/{ => fuzz}/src/fuzzer-parse_json.cpp | 16 +++++++++++++--- tests/{ => fuzz}/src/fuzzer-parse_msgpack.cpp | 16 +++++++++++++--- tests/{ => fuzz}/src/fuzzer-parse_ubjson.cpp | 16 +++++++++++++--- 7 files changed, 78 insertions(+), 18 deletions(-) rename tests/{ => fuzz}/src/fuzzer-driver_afl.cpp (100%) rename tests/{ => fuzz}/src/fuzzer-parse_bjdata.cpp (86%) rename tests/{ => fuzz}/src/fuzzer-parse_bson.cpp (81%) rename tests/{ => fuzz}/src/fuzzer-parse_cbor.cpp (80%) rename tests/{ => fuzz}/src/fuzzer-parse_json.cpp (80%) rename tests/{ => fuzz}/src/fuzzer-parse_msgpack.cpp (80%) rename tests/{ => fuzz}/src/fuzzer-parse_ubjson.cpp (86%) diff --git a/tests/src/fuzzer-driver_afl.cpp b/tests/fuzz/src/fuzzer-driver_afl.cpp similarity index 100% rename from tests/src/fuzzer-driver_afl.cpp rename to tests/fuzz/src/fuzzer-driver_afl.cpp diff --git a/tests/src/fuzzer-parse_bjdata.cpp b/tests/fuzz/src/fuzzer-parse_bjdata.cpp similarity index 86% rename from tests/src/fuzzer-parse_bjdata.cpp rename to tests/fuzz/src/fuzzer-parse_bjdata.cpp index 0ead3755f..df6facb2c 100644 --- a/tests/src/fuzzer-parse_bjdata.cpp +++ b/tests/fuzz/src/fuzzer-parse_bjdata.cpp @@ -25,12 +25,16 @@ The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer drivers. */ -#include -#include #include using json = nlohmann::json; +#ifdef __AFL_LEAK_CHECK + extern "C" void _exit(int); +#else + #define __AFL_LEAK_CHECK() do {} while(false) // NOLINT(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp) +#endif + // see http://llvm.org/docs/LibFuzzer.html extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { @@ -40,6 +44,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) std::vector vec1(data, data + size); json j1 = json::from_bjdata(vec1); + // parse errors must raise an exception and not silently result in discarded values + assert(!j1.is_discarded()); + try { // step 2.1: round trip without adding size annotations to container types @@ -64,7 +71,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) catch (const json::parse_error&) { // parsing a BJData serialization must not fail - assert(false); + __builtin_trap(); } } catch (const json::parse_error&) @@ -80,6 +87,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // out of range errors may happen if provided sizes are excessive } + // do a leak check if fuzzing with AFL++ and LSAN + __AFL_LEAK_CHECK(); + // return 0 - non-zero return values are reserved for future use return 0; } diff --git a/tests/src/fuzzer-parse_bson.cpp b/tests/fuzz/src/fuzzer-parse_bson.cpp similarity index 81% rename from tests/src/fuzzer-parse_bson.cpp rename to tests/fuzz/src/fuzzer-parse_bson.cpp index b74c39513..adeef41a8 100644 --- a/tests/src/fuzzer-parse_bson.cpp +++ b/tests/fuzz/src/fuzzer-parse_bson.cpp @@ -19,12 +19,16 @@ The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer drivers. */ -#include -#include #include using json = nlohmann::json; +#ifdef __AFL_LEAK_CHECK + extern "C" void _exit(int); +#else + #define __AFL_LEAK_CHECK() do {} while(false) // NOLINT(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp) +#endif + // see http://llvm.org/docs/LibFuzzer.html extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { @@ -34,6 +38,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) std::vector vec1(data, data + size); json j1 = json::from_bson(vec1); + // parse errors must raise an exception and not silently result in discarded values + assert(!j1.is_discarded()); + if (j1.is_discarded()) { return 0; @@ -53,7 +60,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) catch (const json::parse_error&) { // parsing a BSON serialization must not fail - assert(false); + __builtin_trap(); } } catch (const json::parse_error&) @@ -69,6 +76,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // out of range errors can occur during parsing, too } + // do a leak check if fuzzing with AFL++ and LSAN + __AFL_LEAK_CHECK(); + // return 0 - non-zero return values are reserved for future use return 0; } diff --git a/tests/src/fuzzer-parse_cbor.cpp b/tests/fuzz/src/fuzzer-parse_cbor.cpp similarity index 80% rename from tests/src/fuzzer-parse_cbor.cpp rename to tests/fuzz/src/fuzzer-parse_cbor.cpp index 187cdefe3..15d5f293e 100644 --- a/tests/src/fuzzer-parse_cbor.cpp +++ b/tests/fuzz/src/fuzzer-parse_cbor.cpp @@ -19,12 +19,16 @@ The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer drivers. */ -#include -#include #include using json = nlohmann::json; +#ifdef __AFL_LEAK_CHECK + extern "C" void _exit(int); +#else + #define __AFL_LEAK_CHECK() do {} while(false) // NOLINT(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp) +#endif + // see http://llvm.org/docs/LibFuzzer.html extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { @@ -34,6 +38,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) std::vector vec1(data, data + size); json j1 = json::from_cbor(vec1); + // parse errors must raise an exception and not silently result in discarded values + assert(!j1.is_discarded()); + try { // step 2: round trip @@ -48,7 +55,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) catch (const json::parse_error&) { // parsing a CBOR serialization must not fail - assert(false); + __builtin_trap(); } } catch (const json::parse_error&) @@ -64,6 +71,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // out of range errors can occur during parsing, too } + // do a leak check if fuzzing with AFL++ and LSAN + __AFL_LEAK_CHECK(); + // return 0 - non-zero return values are reserved for future use return 0; } diff --git a/tests/src/fuzzer-parse_json.cpp b/tests/fuzz/src/fuzzer-parse_json.cpp similarity index 80% rename from tests/src/fuzzer-parse_json.cpp rename to tests/fuzz/src/fuzzer-parse_json.cpp index 9955ee154..f5e642786 100644 --- a/tests/src/fuzzer-parse_json.cpp +++ b/tests/fuzz/src/fuzzer-parse_json.cpp @@ -20,12 +20,16 @@ The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer drivers. */ -#include -#include #include using json = nlohmann::json; +#ifdef __AFL_LEAK_CHECK + extern "C" void _exit(int); +#else + #define __AFL_LEAK_CHECK() do {} while(false) // NOLINT(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp) +#endif + // see http://llvm.org/docs/LibFuzzer.html extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { @@ -34,6 +38,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // step 1: parse input json j1 = json::parse(data, data + size); + // parse errors must raise an exception and not silently result in discarded values + assert(!j1.is_discarded()); + try { // step 2: round trip @@ -53,7 +60,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) catch (const json::parse_error&) { // parsing a JSON serialization must not fail - assert(false); + __builtin_trap(); } } catch (const json::parse_error&) @@ -65,6 +72,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // out of range errors may happen if provided sizes are excessive } + // do a leak check if fuzzing with AFL++ and LSAN + __AFL_LEAK_CHECK(); + // return 0 - non-zero return values are reserved for future use return 0; } diff --git a/tests/src/fuzzer-parse_msgpack.cpp b/tests/fuzz/src/fuzzer-parse_msgpack.cpp similarity index 80% rename from tests/src/fuzzer-parse_msgpack.cpp rename to tests/fuzz/src/fuzzer-parse_msgpack.cpp index 9d7c0e3b1..dd57adf01 100644 --- a/tests/src/fuzzer-parse_msgpack.cpp +++ b/tests/fuzz/src/fuzzer-parse_msgpack.cpp @@ -19,12 +19,16 @@ The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer drivers. */ -#include -#include #include using json = nlohmann::json; +#ifdef __AFL_LEAK_CHECK + extern "C" void _exit(int); +#else + #define __AFL_LEAK_CHECK() do {} while(false) // NOLINT(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp) +#endif + // see http://llvm.org/docs/LibFuzzer.html extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { @@ -34,6 +38,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) std::vector vec1(data, data + size); json j1 = json::from_msgpack(vec1); + // parse errors must raise an exception and not silently result in discarded values + assert(!j1.is_discarded()); + try { // step 2: round trip @@ -48,7 +55,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) catch (const json::parse_error&) { // parsing a MessagePack serialization must not fail - assert(false); + __builtin_trap(); } } catch (const json::parse_error&) @@ -64,6 +71,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // out of range errors may happen if provided sizes are excessive } + // do a leak check if fuzzing with AFL++ and LSAN + __AFL_LEAK_CHECK(); + // return 0 - non-zero return values are reserved for future use return 0; } diff --git a/tests/src/fuzzer-parse_ubjson.cpp b/tests/fuzz/src/fuzzer-parse_ubjson.cpp similarity index 86% rename from tests/src/fuzzer-parse_ubjson.cpp rename to tests/fuzz/src/fuzzer-parse_ubjson.cpp index b40300154..37543528c 100644 --- a/tests/src/fuzzer-parse_ubjson.cpp +++ b/tests/fuzz/src/fuzzer-parse_ubjson.cpp @@ -25,12 +25,16 @@ The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer drivers. */ -#include -#include #include using json = nlohmann::json; +#ifdef __AFL_LEAK_CHECK + extern "C" void _exit(int); +#else + #define __AFL_LEAK_CHECK() do {} while(false) // NOLINT(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp) +#endif + // see http://llvm.org/docs/LibFuzzer.html extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { @@ -40,6 +44,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) std::vector vec1(data, data + size); json j1 = json::from_ubjson(vec1); + // parse errors must raise an exception and not silently result in discarded values + assert(!j1.is_discarded()); + try { // step 2.1: round trip without adding size annotations to container types @@ -64,7 +71,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) catch (const json::parse_error&) { // parsing a UBJSON serialization must not fail - assert(false); + __builtin_trap(); } } catch (const json::parse_error&) @@ -80,6 +87,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // out of range errors may happen if provided sizes are excessive } + // do a leak check if fuzzing with AFL++ and LSAN + __AFL_LEAK_CHECK(); + // return 0 - non-zero return values are reserved for future use return 0; } From 5fb19f98565ad16bb4f32099fcf33c19e03abbdd Mon Sep 17 00:00:00 2001 From: Florian Albrechtskirchinger Date: Mon, 1 Aug 2022 14:48:10 +0200 Subject: [PATCH 2/3] Add the CMake integration for the fuzzing tool --- CMakeLists.txt | 4 +- tests/CMakeLists.txt | 8 +++ tests/fuzz/CMakeLists.txt | 104 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 tests/fuzz/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 1945b2cd3..e23376cf5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,8 @@ option(JSON_Install "Install CMake targets during install option(JSON_MultipleHeaders "Use non-amalgamated version of the library." ON) option(JSON_SystemInclude "Include as system headers (skip for clang-tidy)." OFF) +option(JSON_BuildFuzzers "Build fuzz testing binaries. Requires JSON_BuildTests=ON." OFF) + if (JSON_CI) include(ci) endif () @@ -145,7 +147,7 @@ CONFIGURE_FILE( ## ## TESTS -## create and configure the unit test target +## create and configure the unit test target; build fuzzers, if enabled ## if (JSON_BuildTests) include(CTest) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 65b610f0e..d98d3c9f7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -172,3 +172,11 @@ add_subdirectory(cmake_add_subdirectory) add_subdirectory(cmake_fetch_content) add_subdirectory(cmake_fetch_content2) add_subdirectory(cmake_target_include_directories) + +############################################################################# +# fuzz testing +############################################################################# + +if(JSON_BuildFuzzers) + add_subdirectory(fuzz) +endif() diff --git a/tests/fuzz/CMakeLists.txt b/tests/fuzz/CMakeLists.txt new file mode 100644 index 000000000..7adf4b8f2 --- /dev/null +++ b/tests/fuzz/CMakeLists.txt @@ -0,0 +1,104 @@ +set(JSON_FUZZ_ENGINE "afl++" CACHE STRING "The engine to use for fuzz testing.") +set(JSON_FUZZ_TARGETS "json;bjdata;bson;cbor;msgpack;ubjson" CACHE STRING "List of targets/formats to fuzz test.") +set(JSON_FUZZ_SANITIZERS "asan+cfisan+lsan+ubsan;msan" CACHE STRING "List of sanitizers/combinations of sanitizers to build fuzzers for.") +set(JSON_FUZZ_CORPUS_MAX_SIZE "5k" CACHE STRING "Maximum file size for corpus data.") +option(JSON_FUZZ_MINIMIZE_CORPUS "Minimize the corpa generated from test data." ON) +set(JSON_FUZZ_TEMP_DIR "" CACHE PATH "Path to temporary directory. Should be on a tmpfs or equivalent (AFL++).") +set(JSON_FUZZ_NUM_JOBS 8 CACHE STRING "Number of parallel fuzzing jobs.") +set(JSON_FUZZ_AFL_DIR "" CACHE PATH "Path to AFL++.") +set(JSON_FUZZ_AFL_INSTRUMENTATIONS "laf-intel;complog" CACHE STRING "List of AFL++ instrumentations to build fuzzers for.") + +option(JSON_FUZZ_AFL_EXIT_WHEN_DONE "Exit fuzzer when no new paths have been discovered for a while." ON) + +set(JSON_FUZZ_MAX_TIME "" ON) + +include(fuzz) + +# find_program() requires permission to execute but not to read +cmake_policy(SET CMP0109 NEW) + +############################################################################# +# validate settings +############################################################################# + +# check fuzz engine +string(TOLOWER "${JSON_FUZZ_ENGINE}" fuzz_engine) +if(NOT "${fuzz_engine}" STREQUAL "afl++") + message(FATAL_ERROR "Unsupoorted fuzz engine: ${fuzz_engine}") +endif() +set(JSON_FUZZ_ENGINE "${fuzz_engine}" CACHE STRING "" FORCE) + +if(${JSON_FUZZ_ENGINE} STREQUAL afl++) + # find compiler + find_program (JSON_FUZZ_CXX_COMPILER + NAMES afl-clang-lto++ afl-clang-fast++ + DOC "AFL++ C++ compiler" REQUIRED) + find_program (JSON_FUZZ_AFL_FUZZ + NAMES afl-fuzz + DOC "AFL++ fuzzer runner" REQUIRED) + if(JSON_FUZZ_MINIMIZE_CORPUS) + find_program (JSON_FUZZ_AFL_CMIN + NAMES afl-cmin + DOC "AFL++ corpus minimizer" REQUIRED) + endif() +elseif(${JSON_FUZZ_ENGINE} STREQUAL libfuzzer) + if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) + set(JSON_FUZZ_CXX_COMPILER "${CMAKE_CXX_COMPILER}") + endif() + find_program (JSON_FUZZ_CXX_COMPILER + NAMES clang++ + DOC "Clang C++ compiler" REQUIRED) +endif() + +# TODO does libFuzzer hammer the disk as much as AFL++? +if(${JSON_FUZZ_ENGINE} STREQUAL afl++) + if(NOT JSON_FUZZ_TEMP_DIR) + # try to default AFL++ temp. directory to XDG_RUNTIME_DIR on Linux + if(UNIX AND NOT APPLE AND DEFINED ENV{XDG_RUNTIME_DIR}) + find_program(mount_program mount) + if(mount_program) + execute_process(COMMAND ${mount_program} + OUTPUT_VARIABLE mount_output + ERROR_QUIET) + string(REGEX MATCH "[^ ]+ on $ENV{XDG_RUNTIME_DIR}(/[^ ]*)? type tmpfs" mount_match "${mount_output}") + if(mount_match) + string(RANDOM suffix) + set(temp_dir "$ENV{XDG_RUNTIME_DIR}/json_fuzz_tmp.${suffix}") + set(JSON_FUZZ_TEMP_DIR "${temp_dir}" CACHE PATH "" FORCE) + endif() + endif() + endif() + endif() + + if(NOT JSON_FUZZ_TEMP_DIR) + message(WARNING "JSON_FUZZ_TEMP_DIR should point to a directory on an in-memory file system.") + string(RANDOM suffix) + set(temp_dir "$ENV{CMAKE_CURRENT_BINARY_DIR}/json_fuzz_tmp.${suffix}") + set(JSON_FUZZ_TEMP_DIR "${temp_dir}" CACHE PATH "" FORCE) + endif() + + message(STATUS "Temporary directory for fuzzing: ${JSON_FUZZ_TEMP_DIR}") +endif() + +############################################################################# +# set up fuzzing +############################################################################# + +foreach(target ${JSON_FUZZ_TARGETS}) + json_fuzz_add_fuzzers(${target} + SOURCES src/fuzzer-parse_${target}.cpp + ENGINE ${JSON_FUZZ_ENGINE} + SANITIZERS ${JSON_FUZZ_SANITIZERS} + INSTRUMENTATIONS ${JSON_FUZZ_AFL_INSTRUMENTATIONS}) + + json_fuzz_add_corpus(${target} GLOB "**/*.${target}" + MAX_SIZE ${JSON_FUZZ_CORPUS_MAX_SIZE}) + + if(JSON_FUZZ_MINIMIZE_CORPUS) + json_fuzz_minimize_corpus(${target} + ENGINE ${JSON_FUZZ_ENGINE}) + endif() + + json_fuzz_add_fuzz_run_target(${target} + TEMP_DIR "${JSON_FUZZ_TEMP_DIR}") +endforeach() From adf7cc7743bdb28be314eef3388137303ba24ad8 Mon Sep 17 00:00:00 2001 From: Florian Albrechtskirchinger Date: Mon, 1 Aug 2022 14:54:07 +0200 Subject: [PATCH 3/3] Add Python fuzzing tool --- tools/fuzz/fuzz.py | 388 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100755 tools/fuzz/fuzz.py diff --git a/tools/fuzz/fuzz.py b/tools/fuzz/fuzz.py new file mode 100755 index 000000000..7d5e3f2de --- /dev/null +++ b/tools/fuzz/fuzz.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 + +import datetime +import logging +import pathlib +import shlex +import shutil +import signal +import subprocess +import sys + +DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S' + +START_TIME = datetime.datetime.now() +class ExitHandler(logging.StreamHandler): + def __init__(self, level): + '''.''' + super().__init__() + self.level = level + + def emit(self, record): + if record.levelno >= self.level: + sys.exit(1) + +class Pool: + def __init__(self, name, size, allocation_size, allocate_fn, prio_pools=None): + self.name = name + self.total_size = size + self.size = 0 + if allocation_size < 0: + self.allocation_size = size + else: + self.allocation_size = allocation_size + self.allocate_fn = allocate_fn + self.prio_pools = prio_pools + + def next_allocation_size(self): + if self.prio_pools: + for pool in self.prio_pools: + if pool.next_allocation_size() > 0: + return 0 + + size = self.allocation_size + if self.total_size >= 0: + available = max(0, self.total_size - self.size) + size = min(size, available) + return size + + def allocate(self): + size = self.next_allocation_size() + if size > 0: + self.allocate_fn(size) + self.size += size + return True + return False + +class Job: + def __init__(self, name, args, env=None, is_main=False): + self.name = name + self.args = args + self.env = env or {} + self.is_main = is_main + self.proc = None + + def is_running(self): + return self.proc and self.proc.poll() == None + + def start(self): + popen_args = {'env': self.env} + + if self.is_main: + command='' + for env_key, env_val in popen_args['env'].items(): + command += f'{env_key}={env_val} ' + command += " ".join([shlex.quote(str(arg)) for arg in self.args]) + + print(command) + print('\n\n') + else: + log = open(f"{self.name}-{START_TIME.date()}T{START_TIME.time()}.log", "w") + + popen_args.update({ + 'stdin': subprocess.DEVNULL, + 'stdout': log, + 'stderr': subprocess.STDOUT, + }) + popen_args['env']['AFL_NO_UI'] = '1' + + command='' + for env_key, env_val in popen_args['env'].items(): + command += f'{env_key}={env_val} ' + command += " ".join([shlex.quote(str(arg)) for arg in self.args]) + + log.write(command) + log.write('\n\n') + + self.proc = subprocess.Popen(self.args, **popen_args) + + def wait(self, timeout=None): + return self.proc.wait(timeout) + + def terminate(self): + if not self.proc.poll(): + self.proc.send_signal(signal.SIGINT) + try: + self.proc.wait(0.5) + except subprocess.TimeoutExpired: + pass + + self.proc.terminate() + self.proc.wait() + + return self.proc.poll() + +class AFLJobAllocator: + DEFAULT_FUZZER_BIN = -1 + FUZZER_BIN = -2 + + def __init__(self, args): + self.args = args + self.jobs = [] + + def allocate_jobs(self): + self.sanitizers = self.args.sanitizers + self.power_schedules = ['explore', 'coe', 'lin', 'quad', 'exploit'] + self.jobs.clear() + pools = [ + Pool('main', 1, 1, self.allocate_main_job), + Pool('sanitizer', len(self.sanitizers), -1, self.allocate_sanitizer_job), + Pool('complog', 2 if 'complog' in self.args.instrumentations else 0, 1, self.allocate_complog_job), + Pool('laf-intel', 3 if 'laf-intel' in self.args.instrumentations else 0, 1, self.allocate_laf_intel_job), + Pool('MOpt', int(self.args.num_jobs / 3 + 0.5), 1, self.allocate_mopt_job), + Pool('power schedule', len(self.power_schedules), 1, self.allocate_power_sched_job), + ] + pools.append(Pool('filler', -1, 1, self.allocate_filler_job, pools.copy())) + + while len(self.jobs) < self.args.num_jobs: + for pool in pools: + if len(self.jobs) >= self.args.num_jobs: + break + pool.allocate() + + if self.args.verbose: + label = '# allocated job(s):' + print(f'{label:<24} {len(self.jobs)}') + for pool in pools: + label = f'{pool.name} job(s):' + print(f'{label:<24} {pool.size}') + + def resolve(self, args, fuzzer_bin): + replacements = { + self.DEFAULT_FUZZER_BIN: self.args.default_fuzzer_bin, + self.FUZZER_BIN: fuzzer_bin + } + return [replacements.get(arg, arg) for arg in (args or [])] + + def allocate_job(self, suffix=None, args=None, use_default_fuzzer_bin=False): + fuzzer_bin = self.args.fuzzer_bin + if suffix: + bin_suffix = self.args.fuzzer_bin_suffix + name = fuzzer_bin.name + name = name[:len(name)-len(bin_suffix)] + fuzzer_bin = fuzzer_bin.with_name(f'{name}.{suffix}{bin_suffix}') + + name = self.job_name() + is_main = len(self.jobs) == 0 + if is_main: + dist_args = ['-M', name] + else: + dist_args = ['-S', name] + args = self.resolve(args, fuzzer_bin) + if use_default_fuzzer_bin: + fuzzer_bin = self.args.default_fuzzer_bin + fuzzer_args = [ + self.args.driver, + '-i', self.args.input_dir, + '-o', self.args.output_dir, + '-t', '+1000', + *dist_args, + *args, + '--', fuzzer_bin + ] + fuzzer_env = { + 'AFL_IMPORT_FIRST': '0', + 'AFL_CMPLOG_ONLY_NEW': '1' + } + if self.args.resume: + fuzzer_env['AFL_AUTORESUME'] = '1' + if self.args.tmp_dir: + tmp_dir = self.args.tmp_dir / name + tmp_dir.mkdir(parents=True, exist_ok=True) + fuzzer_env['AFL_TMPDIR'] = tmp_dir + + self.jobs.append(Job(name, fuzzer_args, fuzzer_env, is_main)) + + def job_name(self): + return f'{self.args.fuzzer_bin.name}{len(self.jobs)}' + + def allocate_main_job(self, n): + assert(n == 1 and len(self.jobs) == 0) + self.allocate_job(args=['-Z']) + + def allocate_sanitizer_job(self, n): + for _ in range(n): + self.allocate_job(suffix=self.sanitizers.pop(0)) + + def allocate_complog_job(self, n): + for _ in range(n): + self.allocate_job(suffix='complog', args=['-c', self.FUZZER_BIN], use_default_fuzzer_bin=True) + + def allocate_laf_intel_job(self, n): + for _ in range(n): + self.allocate_job(suffix='laf-intel') + + def allocate_power_sched_job(self, n): + for _ in range(n): + self.allocate_job(args=['-p', self.power_schedules.pop(0)]) + + def allocate_mopt_job(self, n): + for _ in range(n): + self.allocate_job(args=['-L', '0']) + + def allocate_filler_job(self, n): + for _ in range(n): + self.allocate_job() + +class Fuzz: + def __init__(self, args, error_fn): + self.args = args + self.error_fn = error_fn + self.dispatch() + + def dispatch(self): + cmd = f'cmd_{self.args.command}'.replace('-', '_') + if not hasattr(self, cmd): + self.error_fn(f'unknown command: {self.args.command} ({self.args.engine})') + getattr(self, cmd)() + + def check_io_dirs(self): + if not self.args.input_dir.exists() or not self.args.input_dir.is_dir(): + self.args.error_fn('input_dir must be an existing directory') + + if self.args.output_dir.exists(): + if not self.args.output_dir.is_dir() or len(list(self.args.output_dir.iterdir())): + self.args.error_fn('output_dir must either not exists or be an empty directory') + else: + self.args.output_dir.mkdir(parents=True) + + def cmd_generate_corpus(self): + self.check_io_dirs() + + for f in self.args.input_dir.glob(self.args.glob): + if f.stat().st_size <= self.args.max_size: + shutil.copy(f, self.args.output_dir) + +class AFLFuzz(Fuzz): + def cmd_minimize_corpus(self): + self.check_io_dirs() + + if not self.args.fuzzer_bin: + self.args.error_fn('required argument missing: -f/--fuzzer-bin') + + proc = subprocess.run([self.args.minimizer_bin, + '-i', self.args.input_dir, '-o', self.args.output_dir, + '--', self.args.fuzzer_bin]) + + if proc.returncode != 0: + raise RuntimeError('subprocess failed') + + if self.args.replace_input_dir: + shutil.rmtree(self.args.input_dir) + shutil.move(self.args.output_dir, self.args.input_dir) + + def cmd_run(self): + self.args.default_fuzzer_bin = self.args.fuzzer_bin = self.args.fuzzer_bin.resolve() + if not self.args.driver.is_absolute(): + driver = shutil.which(self.args.driver) + if not driver: + args.error_fn('driver does not reference an executable in PATH') + self.args.driver = pathlib.Path(driver) + + allocator = AFLJobAllocator(self.args) + allocator.allocate_jobs() + + try: + for job in allocator.jobs: + cur_input = (self.args.tmp_dir or self.args.output_dir) / job.name / '.cur_input' + cur_input.unlink(missing_ok=True) + job.start() + while any([job.is_running() for job in allocator.jobs]): + for job in allocator.jobs: + try: + ret = job.wait(0.5) + if ret != 0: + print(f'{job.name} failed ({ret})') + except subprocess.TimeoutExpired: + pass + except KeyboardInterrupt: + pass + finally: + for job in allocator.jobs: + ret = job.terminate() + if ret != 0: + print(f'{job.name} failed ({ret})') + +class LLVMFuzz(Fuzz): + pass + +def cmake_list(arg): + return arg.split(';') + +def iec_number(arg): + units = 'kmgt' + n, u = arg[:-1], arg[-1:] + if u.isalpha(): + n = int(n) + u = units.find(u.lower()) + 1 + if u < 1: + raise ValueError() + n *= 1024**u + else: + n = int(arg) + return n + +if __name__ == '__main__': + import argparse + + ec = 1 + + # setup logging + logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', + datefmt=DATETIME_FORMAT, level=logging.INFO) + log = logging.getLogger() + log.addHandler(ExitHandler(logging.ERROR)) + + # parse command line arguments + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output.') + subparsers = parser.add_subparsers(title='commands', required=True) + + gen_corpus_parser = subparsers.add_parser('generate-corpus', aliases=['gen-corpus']) + gen_corpus_parser.set_defaults(command='generate-corpus', error_fn=gen_corpus_parser.error) + gen_corpus_parser.add_argument('-i', '--input-dir', dest='input_dir', type=pathlib.Path, metavar='', help='Input corpus directory.', required=True) + gen_corpus_parser.add_argument('-o', '--output-dir', dest='output_dir', type=pathlib.Path, metavar='', help='Output corpus directory.', required=True) + gen_corpus_parser.add_argument('-g', '--glob', dest='glob', metavar='', default='**/*.*', help='Glob pattern of files to include.') + gen_corpus_parser.add_argument('-m', '--max-size', dest='max_size', type=iec_number, default=5*1024, help='Maximum file size.') + + min_corpus_parser = subparsers.add_parser('minimize-corpus', aliases=['min-corpus']) + min_corpus_parser.set_defaults(command='minimize-corpus', error_fn=min_corpus_parser.error) + min_corpus_parser.add_argument('-e', '--fuzz-engine', dest='engine', type=str.lower, metavar='', choices=['afl++', 'libfuzzer'], help='The fuzzing engine to use. AFL++ or libFuzzer.', required=True) + min_corpus_parser.add_argument('-i', '--input-dir', dest='input_dir', type=pathlib.Path, metavar='', help='Input corpus directory.', required=True) + min_corpus_parser.add_argument('-o', '--output-dir', dest='output_dir', type=pathlib.Path, metavar='', help='Output corpus directory.', required=True) + min_corpus_parser.add_argument('-r', '--replace-input-dir', dest='replace_input_dir', action='store_true', help='Replace input directory with output directory.') + min_corpus_parser.add_argument('-b', '--fuzzer-bin', dest='fuzzer_bin', type=pathlib.Path, help='Fuzzer binary.', required=True) + min_corpus_parser.add_argument('-m', '--minimizer-bin', dest='minimizer_bin', type=pathlib.Path, help='Path to or name of afl-cmin binary (AFL++).') + + run_parser = subparsers.add_parser('run') + run_parser.set_defaults(command='run', error_fn=run_parser.error) + run_parser.add_argument('-e', '--fuzz-engine', dest='engine', type=str.lower, metavar='', choices=['afl++', 'libfuzzer'], help='The fuzzing engine to use. AFL++ or libFuzzer.', required=True) + run_parser.add_argument('-i', '--input-dir', dest='input_dir', type=pathlib.Path, metavar='', help='Corpus directory.', required=True) + run_parser.add_argument('-o', '--output-dir', dest='output_dir', type=pathlib.Path, metavar='', help='Findings directory.', required=True) + run_parser.add_argument('-S', '--sanitizers', dest='sanitizers', type=cmake_list, metavar='', default=[], help='Semicolon-separated list of sanitizers or plus-separated sanitizer combos.') + run_parser.add_argument('-I', '--instrumentations', dest='instrumentations', type=cmake_list, metavar='', default=[], help='Semicolon-separated list of instrumentations. (AFL++ only)') + run_parser.add_argument('-j', '--parallel', dest='num_jobs', type=int, metavar='', default=8, help='Number of parallel fuzzing jobs.') + run_parser.add_argument('-r', '--resume', dest='resume', action='store_true', help='Resume fuzzing or restart.') + run_parser.add_argument('-d', '--driver', dest='driver', type=pathlib.Path, default=pathlib.Path('afl-fuzz'), help='Path to or name of driver (afl-fuzz) binary.', required=True) + run_parser.add_argument('-b', '--fuzzer-bin', dest='fuzzer_bin', type=pathlib.Path, help='Fuzzer binary.', required=True) + run_parser.add_argument('-B', '--fuzzer-bin-suffix', dest='fuzzer_bin_suffix', default='', help='Fuzzer binary suffix.') + run_parser.add_argument('-t', '--tmp-dir', dest='tmp_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='Path to temporary directory.') + + args = parser.parse_args() + + try: + if 'engine' in args: + if args.engine == 'afl++': + fuzz_class = AFLFuzz + elif args.engine == 'libfuzzer': + fuzz_class = LLVMFuzz + else: + fuzz_class = Fuzz + fuzz_class(args, parser.error) + ec = 0 + except Exception: + log.exception('an error occurred:') + finally: + if args.verbose: + log.info(f'exiting with code {ec}') + sys.exit(ec)