Merge #18011: Replace current benchmarking framework with nanobench

78c312c983255e15fc274de2368a2ec13ce81cbf Replace current benchmarking framework with nanobench (Martin Ankerl) Pull request description: Replace current benchmarking framework with nanobench This replaces the current benchmarking framework with nanobench [1], an MIT licensed single-header benchmarking library, of which I am the autor. This has in my opinion several advantages, especially on Linux: * fast: Running all benchmarks takes ~6 seconds instead of 4m13s on an Intel i7-8700 CPU @ 3.20GHz. * accurate: I ran e.g. the benchmark for SipHash_32b 10 times and calculate standard deviation / mean = coefficient of variation: * 0.57% CV for old benchmarking framework * 0.20% CV for nanobench So the benchmark results with nanobench seem to vary less than with the old framework. * It automatically determines runtime based on clock precision, no need to specify number of evaluations. * measure instructions, cycles, branches, instructions per cycle, branch misses (only Linux, when performance counters are available) * output in markdown table format. * Warn about unstable environment (frequency scaling, turbo, ...) * For better profiling, it is possible to set the environment variable NANOBENCH_ENDLESS to force endless running of a particular benchmark without the need to recompile. This makes it to e.g. run "perf top" and look at hotspots. Here is an example copy & pasted from the terminal output: | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 2.52 | 396,529,415.94 | 0.6% | 25.42 | 8.02 | 3.169 | 0.06 | 0.0% | 0.03 | `bench/crypto_hash.cpp RIPEMD160` | 1.87 | 535,161,444.83 | 0.3% | 21.36 | 5.95 | 3.589 | 0.06 | 0.0% | 0.02 | `bench/crypto_hash.cpp SHA1` | 3.22 | 310,344,174.79 | 1.1% | 36.80 | 10.22 | 3.601 | 0.09 | 0.0% | 0.04 | `bench/crypto_hash.cpp SHA256` | 2.01 | 496,375,796.23 | 0.0% | 18.72 | 6.43 | 2.911 | 0.01 | 1.0% | 0.00 | `bench/crypto_hash.cpp SHA256D64_1024` | 7.23 | 138,263,519.35 | 0.1% | 82.66 | 23.11 | 3.577 | 1.63 | 0.1% | 0.00 | `bench/crypto_hash.cpp SHA256_32b` | 3.04 | 328,780,166.40 | 0.3% | 35.82 | 9.69 | 3.696 | 0.03 | 0.0% | 0.03 | `bench/crypto_hash.cpp SHA512` [1] https://github.com/martinus/nanobench ACKs for top commit: laanwj: ACK 78c312c983255e15fc274de2368a2ec13ce81cbf Tree-SHA512: 9e18770b18b6f95a7d0105a4a5497d31cf4eb5efe6574f4482f6f1b4c88d7e0946b9a4a1e9e8e6ecbf41a3f2d7571240677dcb45af29a6f0584e89b25f32e49e
author: Wladimir J. van der Laan <laanwj@protonmail.com> 2020-07-30 15:20:19 +0200
committer: Wladimir J. van der Laan <laanwj@protonmail.com> 2020-07-30 15:34:17 +0200
commit: 4ebe2f6e752c453ff572eda4a108e747d6586c97 (patch)
tree: 59fdc087589ab55e8ade08e91fadeb150bd9733f /src/bench/bench.h
parent: 2a784723f0c0e642353dc74ec6aef4d5f8345044 (diff)
parent: 78c312c983255e15fc274de2368a2ec13ce81cbf (diff)
download: bitcoin-4ebe2f6e752c453ff572eda4a108e747d6586c97.tar.xz
1 files changed, 19 insertions, 97 deletions
diff --git a/src/bench/bench.h b/src/bench/bench.h
index 629bca9a73..bafc7f8716 100644
--- a/src/bench/bench.h
+++ b/src/bench/bench.h
@@ -11,131 +11,53 @@
 #include <string>
 #include <vector>
 
+#include <bench/nanobench.h>
 #include <boost/preprocessor/cat.hpp>
 #include <boost/preprocessor/stringize.hpp>
 
-// Simple micro-benchmarking framework; API mostly matches a subset of the Google Benchmark
-// framework (see https://github.com/google/benchmark)
-// Why not use the Google Benchmark framework? Because adding Yet Another Dependency
-// (that uses cmake as its build system and has lots of features we don't need) isn't
-// worth it.
-
 /*
  * Usage:
 
-static void CODE_TO_TIME(benchmark::State& state)
+static void CODE_TO_TIME(benchmark::Bench& bench)
 {
     ... do any setup needed...
-    while (state.KeepRunning()) {
+    nanobench::Config().run([&] {
        ... do stuff you want to time...
-    }
+    });
     ... do any cleanup needed...
 }
 
-// default to running benchmark for 5000 iterations
-BENCHMARK(CODE_TO_TIME, 5000);
+BENCHMARK(CODE_TO_TIME);
 
  */
 
 namespace benchmark {
-// In case high_resolution_clock is steady, prefer that, otherwise use steady_clock.
-struct best_clock {
-    using hi_res_clock = std::chrono::high_resolution_clock;
-    using steady_clock = std::chrono::steady_clock;
-    using type = std::conditional<hi_res_clock::is_steady, hi_res_clock, steady_clock>::type;
-};
-using clock = best_clock::type;
-using time_point = clock::time_point;
-using duration = clock::duration;
-
-class Printer;
-
-class State
-{
-public:
-    std::string m_name;
-    uint64_t m_num_iters_left;
-    const uint64_t m_num_iters;
-    const uint64_t m_num_evals;
-    std::vector<double> m_elapsed_results;
-    time_point m_start_time;
 
-    bool UpdateTimer(time_point finish_time);
+using ankerl::nanobench::Bench;
 
-    State(std::string name, uint64_t num_evals, double num_iters, Printer& printer) : m_name(name), m_num_iters_left(0), m_num_iters(num_iters), m_num_evals(num_evals)
-    {
-    }
+typedef std::function<void(Bench&)> BenchFunction;
 
-    inline bool KeepRunning()
-    {
-        if (m_num_iters_left--) {
-            return true;
-        }
-
-        bool result = UpdateTimer(clock::now());
-        // measure again so runtime of UpdateTimer is not included
-        m_start_time = clock::now();
-        return result;
-    }
+struct Args {
+    std::string regex_filter;
+    bool is_list_only;
+    std::vector<double> asymptote;
+    std::string output_csv;
+    std::string output_json;
 };
 
-typedef std::function<void(State&)> BenchFunction;
-
 class BenchRunner
 {
-    struct Bench {
-        BenchFunction func;
-        uint64_t num_iters_for_one_second;
-    };
-    typedef std::map<std::string, Bench> BenchmarkMap;
+    typedef std::map<std::string, BenchFunction> BenchmarkMap;
     static BenchmarkMap& benchmarks();
 
 public:
-    BenchRunner(std::string name, BenchFunction func, uint64_t num_iters_for_one_second);
-
-    static void RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only);
-};
+    BenchRunner(std::string name, BenchFunction func);
 
-// interface to output benchmark results.
-class Printer
-{
-public:
-    virtual ~Printer() {}
-    virtual void header() = 0;
-    virtual void result(const State& state) = 0;
-    virtual void footer() = 0;
-};
-
-// default printer to console, shows min, max, median.
-class ConsolePrinter : public Printer
-{
-public:
-    void header() override;
-    void result(const State& state) override;
-    void footer() override;
-};
-
-// creates box plot with plotly.js
-class PlotlyPrinter : public Printer
-{
-public:
-    PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height);
-    void header() override;
-    void result(const State& state) override;
-    void footer() override;
-
-private:
-    std::string m_plotly_url;
-    int64_t m_width;
-    int64_t m_height;
+    static void RunAll(const Args& args);
 };
 }
-
-
-// BENCHMARK(foo, num_iters_for_one_second) expands to:  benchmark::BenchRunner bench_11foo("foo", num_iterations);
-// Choose a num_iters_for_one_second that takes roughly 1 second. The goal is that all benchmarks should take approximately
-// the same time, and scaling factor can be used that the total time is appropriate for your system.
-#define BENCHMARK(n, num_iters_for_one_second) \
-    benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n, (num_iters_for_one_second));
+// BENCHMARK(foo) expands to:  benchmark::BenchRunner bench_11foo("foo");
+#define BENCHMARK(n) \
+    benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n);
 
 #endif // BITCOIN_BENCH_BENCH_H
author	Wladimir J. van der Laan <laanwj@protonmail.com>	2020-07-30 15:20:19 +0200
committer	Wladimir J. van der Laan <laanwj@protonmail.com>	2020-07-30 15:34:17 +0200
commit	4ebe2f6e752c453ff572eda4a108e747d6586c97 (patch)
tree	59fdc087589ab55e8ade08e91fadeb150bd9733f /src/bench/bench.h
parent	2a784723f0c0e642353dc74ec6aef4d5f8345044 (diff)
parent	78c312c983255e15fc274de2368a2ec13ce81cbf (diff)
download	bitcoin-4ebe2f6e752c453ff572eda4a108e747d6586c97.tar.xz