diff options
author | Wladimir J. van der Laan <laanwj@protonmail.com> | 2020-07-30 15:20:19 +0200 |
---|---|---|
committer | Wladimir J. van der Laan <laanwj@protonmail.com> | 2020-07-30 15:34:17 +0200 |
commit | 4ebe2f6e752c453ff572eda4a108e747d6586c97 (patch) | |
tree | 59fdc087589ab55e8ade08e91fadeb150bd9733f /src/bench/bench.h | |
parent | 2a784723f0c0e642353dc74ec6aef4d5f8345044 (diff) | |
parent | 78c312c983255e15fc274de2368a2ec13ce81cbf (diff) | |
download | bitcoin-4ebe2f6e752c453ff572eda4a108e747d6586c97.tar.xz |
Merge #18011: Replace current benchmarking framework with nanobench
78c312c983255e15fc274de2368a2ec13ce81cbf Replace current benchmarking framework with nanobench (Martin Ankerl)
Pull request description:
Replace current benchmarking framework with nanobench
This replaces the current benchmarking framework with nanobench [1], an
MIT licensed single-header benchmarking library, of which I am the
autor. This has in my opinion several advantages, especially on Linux:
* fast: Running all benchmarks takes ~6 seconds instead of 4m13s on
an Intel i7-8700 CPU @ 3.20GHz.
* accurate: I ran e.g. the benchmark for SipHash_32b 10 times and
calculate standard deviation / mean = coefficient of variation:
* 0.57% CV for old benchmarking framework
* 0.20% CV for nanobench
So the benchmark results with nanobench seem to vary less than with
the old framework.
* It automatically determines runtime based on clock precision, no need
to specify number of evaluations.
* measure instructions, cycles, branches, instructions per cycle,
branch misses (only Linux, when performance counters are available)
* output in markdown table format.
* Warn about unstable environment (frequency scaling, turbo, ...)
* For better profiling, it is possible to set the environment variable
NANOBENCH_ENDLESS to force endless running of a particular benchmark
without the need to recompile. This makes it to e.g. run "perf top"
and look at hotspots.
Here is an example copy & pasted from the terminal output:
| ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 2.52 | 396,529,415.94 | 0.6% | 25.42 | 8.02 | 3.169 | 0.06 | 0.0% | 0.03 | `bench/crypto_hash.cpp RIPEMD160`
| 1.87 | 535,161,444.83 | 0.3% | 21.36 | 5.95 | 3.589 | 0.06 | 0.0% | 0.02 | `bench/crypto_hash.cpp SHA1`
| 3.22 | 310,344,174.79 | 1.1% | 36.80 | 10.22 | 3.601 | 0.09 | 0.0% | 0.04 | `bench/crypto_hash.cpp SHA256`
| 2.01 | 496,375,796.23 | 0.0% | 18.72 | 6.43 | 2.911 | 0.01 | 1.0% | 0.00 | `bench/crypto_hash.cpp SHA256D64_1024`
| 7.23 | 138,263,519.35 | 0.1% | 82.66 | 23.11 | 3.577 | 1.63 | 0.1% | 0.00 | `bench/crypto_hash.cpp SHA256_32b`
| 3.04 | 328,780,166.40 | 0.3% | 35.82 | 9.69 | 3.696 | 0.03 | 0.0% | 0.03 | `bench/crypto_hash.cpp SHA512`
[1] https://github.com/martinus/nanobench
ACKs for top commit:
laanwj:
ACK 78c312c983255e15fc274de2368a2ec13ce81cbf
Tree-SHA512: 9e18770b18b6f95a7d0105a4a5497d31cf4eb5efe6574f4482f6f1b4c88d7e0946b9a4a1e9e8e6ecbf41a3f2d7571240677dcb45af29a6f0584e89b25f32e49e
Diffstat (limited to 'src/bench/bench.h')
-rw-r--r-- | src/bench/bench.h | 116 |
1 files changed, 19 insertions, 97 deletions
diff --git a/src/bench/bench.h b/src/bench/bench.h index 629bca9a73..bafc7f8716 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -11,131 +11,53 @@ #include <string> #include <vector> +#include <bench/nanobench.h> #include <boost/preprocessor/cat.hpp> #include <boost/preprocessor/stringize.hpp> -// Simple micro-benchmarking framework; API mostly matches a subset of the Google Benchmark -// framework (see https://github.com/google/benchmark) -// Why not use the Google Benchmark framework? Because adding Yet Another Dependency -// (that uses cmake as its build system and has lots of features we don't need) isn't -// worth it. - /* * Usage: -static void CODE_TO_TIME(benchmark::State& state) +static void CODE_TO_TIME(benchmark::Bench& bench) { ... do any setup needed... - while (state.KeepRunning()) { + nanobench::Config().run([&] { ... do stuff you want to time... - } + }); ... do any cleanup needed... } -// default to running benchmark for 5000 iterations -BENCHMARK(CODE_TO_TIME, 5000); +BENCHMARK(CODE_TO_TIME); */ namespace benchmark { -// In case high_resolution_clock is steady, prefer that, otherwise use steady_clock. -struct best_clock { - using hi_res_clock = std::chrono::high_resolution_clock; - using steady_clock = std::chrono::steady_clock; - using type = std::conditional<hi_res_clock::is_steady, hi_res_clock, steady_clock>::type; -}; -using clock = best_clock::type; -using time_point = clock::time_point; -using duration = clock::duration; - -class Printer; - -class State -{ -public: - std::string m_name; - uint64_t m_num_iters_left; - const uint64_t m_num_iters; - const uint64_t m_num_evals; - std::vector<double> m_elapsed_results; - time_point m_start_time; - bool UpdateTimer(time_point finish_time); +using ankerl::nanobench::Bench; - State(std::string name, uint64_t num_evals, double num_iters, Printer& printer) : m_name(name), m_num_iters_left(0), m_num_iters(num_iters), m_num_evals(num_evals) - { - } +typedef std::function<void(Bench&)> BenchFunction; - inline bool KeepRunning() - { - if (m_num_iters_left--) { - return true; - } - - bool result = UpdateTimer(clock::now()); - // measure again so runtime of UpdateTimer is not included - m_start_time = clock::now(); - return result; - } +struct Args { + std::string regex_filter; + bool is_list_only; + std::vector<double> asymptote; + std::string output_csv; + std::string output_json; }; -typedef std::function<void(State&)> BenchFunction; - class BenchRunner { - struct Bench { - BenchFunction func; - uint64_t num_iters_for_one_second; - }; - typedef std::map<std::string, Bench> BenchmarkMap; + typedef std::map<std::string, BenchFunction> BenchmarkMap; static BenchmarkMap& benchmarks(); public: - BenchRunner(std::string name, BenchFunction func, uint64_t num_iters_for_one_second); - - static void RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only); -}; + BenchRunner(std::string name, BenchFunction func); -// interface to output benchmark results. -class Printer -{ -public: - virtual ~Printer() {} - virtual void header() = 0; - virtual void result(const State& state) = 0; - virtual void footer() = 0; -}; - -// default printer to console, shows min, max, median. -class ConsolePrinter : public Printer -{ -public: - void header() override; - void result(const State& state) override; - void footer() override; -}; - -// creates box plot with plotly.js -class PlotlyPrinter : public Printer -{ -public: - PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height); - void header() override; - void result(const State& state) override; - void footer() override; - -private: - std::string m_plotly_url; - int64_t m_width; - int64_t m_height; + static void RunAll(const Args& args); }; } - - -// BENCHMARK(foo, num_iters_for_one_second) expands to: benchmark::BenchRunner bench_11foo("foo", num_iterations); -// Choose a num_iters_for_one_second that takes roughly 1 second. The goal is that all benchmarks should take approximately -// the same time, and scaling factor can be used that the total time is appropriate for your system. -#define BENCHMARK(n, num_iters_for_one_second) \ - benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n, (num_iters_for_one_second)); +// BENCHMARK(foo) expands to: benchmark::BenchRunner bench_11foo("foo"); +#define BENCHMARK(n) \ + benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n); #endif // BITCOIN_BENCH_BENCH_H |