Merge branch 'feature/fastcov' into develop

This commit is contained in:
Niels Lohmann 2019-04-06 09:15:43 +02:00
commit ee4028b8e4
No known key found for this signature in database
GPG Key ID: 7F3CEA63AE251B69
6 changed files with 464 additions and 7 deletions

View File

@ -35,6 +35,7 @@ all:
@echo "check-fast - compile and execute test suite (skip long-running tests)"
@echo "clean - remove built files"
@echo "coverage - create coverage information with lcov"
@echo "coverage-fast - create coverage information with fastcov"
@echo "cppcheck - analyze code with cppcheck"
@echo "cpplint - analyze code with cpplint"
@echo "clang_tidy - analyze code with Clang-Tidy"
@ -82,6 +83,14 @@ coverage:
cd build_coverage ; ninja lcov_html
open build_coverage/test/html/index.html
coverage-fast:
rm -fr build_coverage
mkdir build_coverage
cd build_coverage ; CXX=$(COMPILER_DIR)/g++ cmake .. -GNinja -DJSON_Coverage=ON -DJSON_MultipleHeaders=ON
cd build_coverage ; ninja
cd build_coverage ; ctest -E '.*_default' -j10
cd build_coverage ; ninja fastcov_html
open build_coverage/test/html/index.html
##########################################################################
# documentation tests

View File

@ -1281,9 +1281,11 @@ The library itself consists of a single header file licensed under the MIT licen
- [**doctest**](https://github.com/onqtam/doctest) for the unit tests
- [**Doozer**](https://doozer.io) for [continuous integration](https://doozer.io/nlohmann/json) on Linux (CentOS, Raspbian, Fedora)
- [**Doxygen**](http://www.stack.nl/~dimitri/doxygen/) to generate [documentation](https://nlohmann.github.io/json/)
- [**fastcov**](https://github.com/RPGillespie6/fastcov) to process coverage information
- [**git-update-ghpages**](https://github.com/rstacruz/git-update-ghpages) to upload the documentation to gh-pages
- [**GitHub Changelog Generator**](https://github.com/skywinder/github-changelog-generator) to generate the [ChangeLog](https://github.com/nlohmann/json/blob/develop/ChangeLog.md)
- [**Google Benchmark**](https://github.com/google/benchmark) to implement the benchmarks
- [**lcov**](http://ltp.sourceforge.net/coverage/lcov.php) to process coverage information and create a HTML view
- [**libFuzzer**](http://llvm.org/docs/LibFuzzer.html) to implement fuzz testing for OSS-Fuzz
- [**OSS-Fuzz**](https://github.com/google/oss-fuzz) for continuous fuzz testing of the library ([project repository](https://github.com/google/oss-fuzz/tree/master/projects/json))
- [**Probot**](https://probot.github.io) for automating maintainer tasks such as closing stale issues, requesting missing information, or detecting toxic comments.

View File

@ -27,17 +27,17 @@ endif()
if(JSON_Coverage)
message(STATUS "Building test suite with coverage information")
#if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# message(FATAL_ERROR "JSON_Coverage requires GCC.")
#endif()
if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
message(FATAL_ERROR "JSON_Coverage requires GCC.")
endif()
# enable profiling
set(CMAKE_CXX_FLAGS "--coverage -g -O0 -fprofile-arcs -ftest-coverage")
# from https://github.com/RWTH-HPC/CMake-codecov/blob/master/cmake/FindGcov.cmake
#get_filename_component(COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH)
#string(REGEX MATCH "^[0-9]+" GCC_VERSION "${CMAKE_CXX_COMPILER_VERSION}")
#find_program(GCOV_BIN NAMES gcov-${GCC_VERSION} gcov HINTS ${COMPILER_PATH})
get_filename_component(COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH)
string(REGEX MATCH "^[0-9]+" GCC_VERSION "${CMAKE_CXX_COMPILER_VERSION}")
find_program(GCOV_BIN NAMES gcov-${GCC_VERSION} gcov HINTS ${COMPILER_PATH})
# collect all source files from the chosen include dir
file(GLOB_RECURSE SOURCE_FILES ${NLOHMANN_JSON_INCLUDE_BUILD_DIR}*.hpp)
@ -46,11 +46,18 @@ if(JSON_Coverage)
# (filter script from https://stackoverflow.com/a/43726240/266378)
add_custom_target(lcov_html
COMMAND lcov --directory . --capture --output-file json.info --rc lcov_branch_coverage=1
COMMAND lcov -e json.info ${SOURCE_FILES} --output-file json.info.filtered --rc lcov_branch_coverage=1
COMMAND lcov -e json.info ${SOURCE_FILES} --output-file json.info.filtered --gcov-tool ${GCOV_BIN} --rc lcov_branch_coverage=1
COMMAND ${CMAKE_SOURCE_DIR}/test/thirdparty/imapdl/filterbr.py json.info.filtered > json.info.filtered.noexcept
COMMAND genhtml --title "JSON for Modern C++" --legend --demangle-cpp --output-directory html --show-details --branch-coverage json.info.filtered.noexcept
COMMENT "Generating HTML report test/html/index.html"
)
add_custom_target(fastcov_html
COMMAND ${CMAKE_SOURCE_DIR}/test/thirdparty/fastcov/fastcov.py --branch-coverage --lcov -o json.info --gcov ${GCOV_BIN} --compiler-directory ${CMAKE_BINARY_DIR} --source-files ${SOURCE_FILES}
COMMAND ${CMAKE_SOURCE_DIR}/test/thirdparty/imapdl/filterbr.py json.info > json.info.noexcept
COMMAND genhtml --title "JSON for Modern C++" --legend --demangle-cpp --output-directory html --show-details --branch-coverage json.info.noexcept
COMMENT "Generating HTML report test/html/index.html"
)
endif()
#############################################################################

21
test/thirdparty/fastcov/LICENSE vendored Normal file
View File

@ -0,0 +1,21 @@
The MIT License
Copyright (c) 2018-2019 Bryan Gillespie
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

46
test/thirdparty/fastcov/README.md vendored Normal file
View File

@ -0,0 +1,46 @@
# fastcov
A massively parallel gcov wrapper for generating intermediate coverage formats *fast*
The goal of fastcov is to generate code coverage intermediate formats *as fast as possible* (ideally < 1 second), even for large projects with hundreds of gcda objects. The intermediate formats may then be consumed by a report generator such as lcov's genhtml, or a dedicated front end such as coveralls. fastcov was originally designed to be a drop-in replacement for lcov (application coverage only, not kernel coverage).
Currently the only intermediate formats supported are gcov json format and lcov info format. Adding support for other formats should require just a few lines of python to transform gcov json format to the desired shape.
In order to achieve the massive speed gains, a few constraints apply:
1. GCC version >= 9.0.0
These versions of GCOV have support for JSON intermediate format as well as streaming report data straight to stdout
2. Object files must be either be built:
- Using absolute paths for all `-I` flags passed to the compiler
- Invoking the compiler from the same root directory
If you use CMake, you are almost certainly satisfying the second constraint (unless you care about `ExternalProject` coverage).
## Sample Usage:
```bash
$ cd build_dir
$ fastcov.py --zerocounters
$ <run unit tests>
$ fastcov.py --exclude /usr/include --lcov -o report.info
$ genhtml -o code_coverage report.info
```
## Legacy fastcov
It is possible to reap most of the benefits of fastcov for GCC version < 9.0.0 and >= 7.1.0. However, there will be a *potential* header file loss of correctness.
`fastcov_legacy.py` can be used with pre GCC-9 down to GCC 7.1.0 but with a few penalties due to gcov limitations. This is because running gcov in parallel generates .gcov header reports in parallel which overwrite each other. This isn't a problem unless your header files have actual logic (i.e. header only library) that you want to measure coverage for. Use the `-F` flag to specify which gcda files should not be run in parallel in order to capture accurate header file data just for those. I don't plan on supporting `fastcov_legacy.py` aside from basic bug fixes.
## Benchmarks
Anecdotal testing on my own projects indicate that fastcov is over 100x faster than lcov and over 30x faster than gcovr:
Project Size: ~250 .gcda, ~500 .gcov generated by gcov
Time to process all gcda and parse all gcov:
- fastcov: ~700ms
- lcov: ~90s
- gcovr: ~30s

372
test/thirdparty/fastcov/fastcov.py vendored Executable file
View File

@ -0,0 +1,372 @@
#!/usr/bin/env python3
"""
Author: Bryan Gillespie
A massively parallel gcov wrapper for generating intermediate coverage formats fast
The goal of fastcov is to generate code coverage intermediate formats as fast as possible
(ideally < 1 second), even for large projects with hundreds of gcda objects. The intermediate
formats may then be consumed by a report generator such as lcov's genhtml, or a dedicated front
end such as coveralls.
Sample Usage:
$ cd build_dir
$ ./fastcov.py --zerocounters
$ <run unit tests>
$ ./fastcov.py --exclude /usr/include test/ --lcov -o report.info
$ genhtml -o code_coverage report.info
"""
import re
import os
import sys
import glob
import json
import time
import argparse
import threading
import subprocess
import multiprocessing
MINIMUM_GCOV = (9,0,0)
MINIMUM_CHUNK_SIZE = 5
# Interesting metrics
START_TIME = time.time()
GCOVS_TOTAL = []
GCOVS_SKIPPED = []
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
def stopwatch():
"""Return number of seconds since last time this was called"""
global START_TIME
end_time = time.time()
delta = end_time - START_TIME
START_TIME = end_time
return delta
def parseVersionFromLine(version_str):
"""Given a string containing a dotted integer version, parse out integers and return as tuple"""
version = re.search(r'(\d+\.\d+\.\d+)[^\.]', version_str)
if not version:
return (0,0,0)
return tuple(map(int, version.group(1).split(".")))
def getGcovVersion(gcov):
p = subprocess.Popen([gcov, "-v"], stdout=subprocess.PIPE)
output = p.communicate()[0].decode('UTF-8')
p.wait()
return parseVersionFromLine(output.split("\n")[0])
def removeFiles(files):
for file in files:
os.remove(file)
def getFilteredGcdaFiles(gcda_files, exclude):
def excludeGcda(gcda):
for ex in exclude:
if ex in gcda:
return False
return True
return list(filter(excludeGcda, gcda_files))
def getGcdaFiles(cwd, gcda_files):
if not gcda_files:
gcda_files = glob.glob(os.path.join(os.path.abspath(cwd), "**/*.gcda"), recursive=True)
return gcda_files
def gcovWorker(cwd, gcov, files, chunk, gcov_filter_options, branch_coverage):
gcov_args = "-it"
if branch_coverage:
gcov_args += "b"
p = subprocess.Popen([gcov, gcov_args] + chunk, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
for line in iter(p.stdout.readline, b''):
intermediate_json = json.loads(line.decode(sys.stdout.encoding))
intermediate_json_files = processGcovs(cwd, intermediate_json["files"], gcov_filter_options)
for f in intermediate_json_files:
files.append(f) #thread safe, there might be a better way to do this though
GCOVS_TOTAL.append(len(intermediate_json["files"]))
GCOVS_SKIPPED.append(len(intermediate_json["files"])-len(intermediate_json_files))
p.wait()
def processGcdas(cwd, gcov, jobs, gcda_files, gcov_filter_options, branch_coverage):
chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(gcda_files) / jobs) + 1)
threads = []
intermediate_json_files = []
for chunk in chunks(gcda_files, chunk_size):
t = threading.Thread(target=gcovWorker, args=(cwd, gcov, intermediate_json_files, chunk, gcov_filter_options, branch_coverage))
threads.append(t)
t.start()
log("Spawned %d gcov threads, each processing at most %d gcda files" % (len(threads), chunk_size))
for t in threads:
t.join()
return intermediate_json_files
def processGcov(cwd, gcov, files, gcov_filter_options):
# Add absolute path
gcov["file_abs"] = os.path.abspath(os.path.join(cwd, gcov["file"]))
# If explicit sources were passed, check for match
if gcov_filter_options["sources"]:
if gcov["file_abs"] in gcov_filter_options["sources"]:
files.append(gcov)
return
# Check include filter
if gcov_filter_options["include"]:
for ex in gcov_filter_options["include"]:
if ex in gcov["file"]:
files.append(gcov)
break
return
# Check exclude filter
for ex in gcov_filter_options["exclude"]:
if ex in gcov["file"]:
return
files.append(gcov)
def processGcovs(cwd, gcov_files, gcov_filter_options):
files = []
for gcov in gcov_files:
processGcov(cwd, gcov, files, gcov_filter_options)
return files
def dumpBranchCoverageToLcovInfo(f, branches):
branch_miss = 0
for line_num, branch_counts in branches.items():
for i, count in enumerate(branch_counts):
#Branch (<line number>, <block number>, <branch number>, <taken>)
f.write("BRDA:%s,%d,%d,%d\n" % (line_num, int(i/2), i, count))
branch_miss += int(count == 0)
f.write("BRF:%d\n" % len(branches)) #Branches Found
f.write("BRH:%d\n" % (len(branches) - branch_miss)) #Branches Hit
def dumpToLcovInfo(fastcov_json, output):
with open(output, "w") as f:
for sf, data in fastcov_json["sources"].items():
f.write("SF:%s\n" % sf) #Source File
fn_miss = 0
for function, fdata in data["functions"].items():
f.write("FN:%d,%s\n" % (fdata["start_line"], function)) #Function Start Line
f.write("FNDA:%d,%s\n" % (fdata["execution_count"], function)) #Function Hits
fn_miss += int(fdata["execution_count"] == 0)
f.write("FNF:%d\n" % len(data["functions"])) #Functions Found
f.write("FNH:%d\n" % (len(data["functions"]) - fn_miss)) #Functions Hit
if data["branches"]:
dumpBranchCoverageToLcovInfo(f, data["branches"])
line_miss = 0
for line_num, count in data["lines"].items():
f.write("DA:%s,%d\n" % (line_num, count)) #Line
line_miss += int(count == 0)
f.write("LF:%d\n" % len(data["lines"])) #Lines Found
f.write("LH:%d\n" % (len(data["lines"]) - line_miss)) #Lines Hit
f.write("end_of_record\n")
def exclMarkerWorker(fastcov_sources, chunk):
for source in chunk:
# If there are no covered lines, skip
if not fastcov_sources[source]["lines"]:
continue
start_line = 0
end_line = 0
with open(source) as f:
for i, line in enumerate(f, 1): #Start enumeration at line 1
if not "LCOV_EXCL" in line:
continue
if "LCOV_EXCL_LINE" in line:
if str(i) in fastcov_sources[source]["lines"]:
del fastcov_sources[source]["lines"][str(i)]
if str(i) in fastcov_sources[source]["branches"]:
del fastcov_sources[source]["branches"][str(i)]
elif "LCOV_EXCL_START" in line:
start_line = i
elif "LCOV_EXCL_STOP" in line:
end_line = i
if not start_line:
end_line = 0
continue
for key in ["lines", "branches"]:
for line_num in list(fastcov_sources[source][key].keys()):
if int(line_num) <= end_line and int(line_num) >= start_line:
del fastcov_sources[source][key][line_num]
start_line = end_line = 0
def scanExclusionMarkers(fastcov_json, jobs):
chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(fastcov_json["sources"]) / jobs) + 1)
threads = []
for chunk in chunks(list(fastcov_json["sources"].keys()), chunk_size):
t = threading.Thread(target=exclMarkerWorker, args=(fastcov_json["sources"], chunk))
threads.append(t)
t.start()
log("Spawned %d threads each scanning at most %d source files" % (len(threads), chunk_size))
for t in threads:
t.join()
def distillFunction(function_raw, functions):
function_name = function_raw["name"]
if function_name not in functions:
functions[function_name] = {
"start_line": function_raw["start_line"],
"execution_count": function_raw["execution_count"]
}
else:
functions[function_name]["execution_count"] += function_raw["execution_count"]
def distillLine(line_raw, lines, branches):
line_number = str(line_raw["line_number"])
if line_number not in lines:
lines[line_number] = line_raw["count"]
else:
lines[line_number] += line_raw["count"]
for i, branch in enumerate(line_raw["branches"]):
if line_number not in branches:
branches[line_number] = []
blen = len(branches[line_number])
glen = len(line_raw["branches"])
if blen < glen:
branches[line_number] += [0] * (glen - blen)
branches[line_number][i] += branch["count"]
def distillSource(source_raw, sources):
source_name = source_raw["file_abs"]
if source_name not in sources:
sources[source_name] = {
"functions": {},
"branches": {},
"lines": {},
}
for function in source_raw["functions"]:
distillFunction(function, sources[source_name]["functions"])
for line in source_raw["lines"]:
distillLine(line, sources[source_name]["lines"], sources[source_name]["branches"])
def distillReport(report_raw):
report_json = {
"sources": {}
}
for source in report_raw:
distillSource(source, report_json["sources"])
return report_json
def dumpToJson(intermediate, output):
with open(output, "w") as f:
json.dump(intermediate, f)
def log(line):
if not args.quiet:
print("[{:.3f}s] {}".format(stopwatch(), line))
def getGcovFilterOptions(args):
return {
"sources": set([os.path.abspath(s) for s in args.sources]), #Make paths absolute, use set for fast lookups
"include": args.includepost,
"exclude": args.excludepost,
}
def main(args):
# Need at least gcov 9.0.0 because that's when gcov JSON and stdout streaming was introduced
current_gcov_version = getGcovVersion(args.gcov)
if current_gcov_version < MINIMUM_GCOV:
sys.stderr.write("Minimum gcov version {} required, found {}\n".format(".".join(map(str, MINIMUM_GCOV)), ".".join(map(str, current_gcov_version))))
exit(1)
# Get list of gcda files to process
gcda_files = getGcdaFiles(args.directory, args.gcda_files)
log("Found {} .gcda files ".format(len(gcda_files)))
# If gcda filtering is enabled, filter them out now
if args.excludepre:
gcda_files = getFilteredGcdaFiles(gcda_files, args.excludepre)
log("{} .gcda files after filtering".format(len(gcda_files)))
# We "zero" the "counters" by simply deleting all gcda files
if args.zerocounters:
removeFiles(gcda_files)
log("{} .gcda files removed".format(len(gcda_files)))
return
# Fire up one gcov per cpu and start processing gcdas
gcov_filter_options = getGcovFilterOptions(args)
intermediate_json_files = processGcdas(args.cdirectory, args.gcov, args.jobs, gcda_files, gcov_filter_options, args.branchcoverage)
# Summarize processing results
gcov_total = sum(GCOVS_TOTAL)
gcov_skipped = sum(GCOVS_SKIPPED)
log("Processed {} .gcov files ({} total, {} skipped)".format(gcov_total - gcov_skipped, gcov_total, gcov_skipped))
# Distill all the extraneous info gcov gives us down to the core report
fastcov_json = distillReport(intermediate_json_files)
log("Aggregated raw gcov JSON into fastcov JSON report")
# Dump to desired file format
if args.lcov:
scanExclusionMarkers(fastcov_json, args.jobs)
log("Scanned {} source files for exclusion markers".format(len(fastcov_json["sources"])))
dumpToLcovInfo(fastcov_json, args.output)
log("Created lcov info file '{}'".format(args.output))
elif args.gcov_raw:
dumpToJson(intermediate_json_files, args.output)
log("Created gcov raw json file '{}'".format(args.output))
else:
dumpToJson(fastcov_json, args.output)
log("Created fastcov json file '{}'".format(args.output))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='A parallel gcov wrapper for fast coverage report generation')
parser.add_argument('-z', '--zerocounters', dest='zerocounters', action="store_true", help='Recursively delete all gcda files')
# Enable Branch Coverage
parser.add_argument('-b', '--branch-coverage', dest='branchcoverage', action="store_true", help='Include branch counts in the coverage report')
# Filtering Options
parser.add_argument('-s', '--source-files', dest='sources', nargs="+", metavar='', default=[], help='Filter: Specify exactly which source files should be included in the final report. Paths must be either absolute or relative to current directory.')
parser.add_argument('-e', '--exclude', dest='excludepost', nargs="+", metavar='', default=[], help='Filter: Exclude source files from final report if they contain one of the provided substrings (i.e. /usr/include test/, etc.)')
parser.add_argument('-i', '--include', dest='includepost', nargs="+", metavar='', default=[], help='Filter: Only include source files in final report that contain one of the provided substrings (i.e. src/ etc.)')
parser.add_argument('-f', '--gcda-files', dest='gcda_files', nargs="+", metavar='', default=[], help='Filter: Specify exactly which gcda files should be processed instead of recursively searching the search directory.')
parser.add_argument('-E', '--exclude-gcda', dest='excludepre', nargs="+", metavar='', default=[], help='Filter: Exclude gcda files from being processed via simple find matching (not regex)')
parser.add_argument('-g', '--gcov', dest='gcov', default='gcov', help='Which gcov binary to use')
parser.add_argument('-d', '--search-directory', dest='directory', default=".", help='Base directory to recursively search for gcda files (default: .)')
parser.add_argument('-c', '--compiler-directory', dest='cdirectory', default=".", help='Base directory compiler was invoked from (default: .) \
This needs to be set if invoking fastcov from somewhere other than the base compiler directory.')
parser.add_argument('-j', '--jobs', dest='jobs', type=int, default=multiprocessing.cpu_count(), help='Number of parallel gcov to spawn (default: %d).' % multiprocessing.cpu_count())
parser.add_argument('-m', '--minimum-chunk-size', dest='minimum_chunk', type=int, default=5, help='Minimum number of files a thread should process (default: 5). \
If you have only 4 gcda files but they are monstrously huge, you could change this value to a 1 so that each thread will only process 1 gcda. Otherise fastcov will spawn only 1 thread to process all of them.')
parser.add_argument('-l', '--lcov', dest='lcov', action="store_true", help='Output in lcov info format instead of fastcov json')
parser.add_argument('-r', '--gcov-raw', dest='gcov_raw', action="store_true", help='Output in gcov raw json instead of fastcov json')
parser.add_argument('-o', '--output', dest='output', default="coverage.json", help='Name of output file (default: coverage.json)')
parser.add_argument('-q', '--quiet', dest='quiet', action="store_true", help='Suppress output to stdout')
args = parser.parse_args()
main(args)