aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorW. J. van der Laan <laanwj@protonmail.com>2021-09-24 17:10:06 +0200
committerW. J. van der Laan <laanwj@protonmail.com>2021-09-24 18:39:04 +0200
commit03cb2b480bd5e35cd6bafbca3ff6adcba52dab8b (patch)
treea296732e8b1d9b064312e9c3da0e2910672e27d5 /src
parent01b5cfb9511a22ce4e1525d5e9f2185c6e4401bd (diff)
parente148a5233292d156cda76cb20afb6641fc20f25e (diff)
downloadbitcoin-03cb2b480bd5e35cd6bafbca3ff6adcba52dab8b.tar.xz
Merge bitcoin/bitcoin#23025: bench: update nanobench add `-min_time`
e148a5233292d156cda76cb20afb6641fc20f25e bench: fixed ubsan implicit conversion (Martin Ankerl) da4e2f1da0388d424659fa8c853fcaf37b4b5959 bench: various args improvements (Jon Atack) d312fd94a1083cdbf071f2888aab43c62d358151 bench: clean up includes (Jon Atack) 1f10f1663e53474038b9111c4264a250cffe7501 bench: add usage description and documentation (Martin Ankerl) d3c6f8bfa12f78635752878b28e66cec0c85d4a9 bench: introduce -min_time argument (Martin Ankerl) 9fef8329322277d9c14c8df1867cb3c61477c431 bench: make EvictionProtection.* work with any number of iterations (Martin Ankerl) 153e6860e84df0a3d52e5a3b2fe9c37b5e0b029a bench: change AddrManGood to AddrManAddThenGood (Martin Ankerl) 468b232f71562280aae16876bc257ec24f5fcccb bench: remove unnecessary & incorrect multiplication in MuHashDiv (Martin Ankerl) eed99cf272426e5957bee35dc8e7d0798aec8ec0 bench: update nanobench from 4.3.4 to 4.3.6 (Martin Ankerl) Pull request description: This PR updates the nanobench with the latest release from upstream, v4.3.6. It fixes the missing performance counters. Due to discussions on #22999 I have done some work that should make the benchmark results more reliable. It introduces a new flag `-min_time` that allows to run a benchmark for much longer then the default. When results are unreliable, choosing a large timeframe here should usually get repeatable results even when frequency scaling cannot be disabled. The default is now 10ms. For this to work I have changed the `AddrManGood` and `EvictionProtection` benchmarks so they work with any number of iterations. Also, this adds more usage documentation to `bench_bitcoin -h` and I've cherry-picked two changes from #22999 authored by Jon Atack ACKs for top commit: jonatack: re-ACK e148a5233292d156cda76cb20afb6641fc20f25e laanwj: Code review ACK e148a5233292d156cda76cb20afb6641fc20f25e Tree-SHA512: 2da6de19a5c85ac234b190025e195c727546166dbb75e3f9267e667a73677ba1e29b7765877418a42b1407b65df901e0130763936525e6f1450f18f08837c40c
Diffstat (limited to 'src')
-rw-r--r--src/bench/addrman.cpp38
-rw-r--r--src/bench/bech32.cpp1
-rw-r--r--src/bench/bench.cpp17
-rw-r--r--src/bench/bench.h3
-rw-r--r--src/bench/bench_bitcoin.cpp64
-rw-r--r--src/bench/crypto_hash.cpp8
-rw-r--r--src/bench/nanobench.h43
-rw-r--r--src/bench/peer_eviction.cpp10
-rw-r--r--src/bench/rollingbloom.cpp16
9 files changed, 128 insertions, 72 deletions
diff --git a/src/bench/addrman.cpp b/src/bench/addrman.cpp
index e5dd571a4c..bebf86a09d 100644
--- a/src/bench/addrman.cpp
+++ b/src/bench/addrman.cpp
@@ -103,41 +103,33 @@ static void AddrManGetAddr(benchmark::Bench& bench)
});
}
-static void AddrManGood(benchmark::Bench& bench)
+static void AddrManAddThenGood(benchmark::Bench& bench)
{
- /* Create many CAddrMan objects - one to be modified at each loop iteration.
- * This is necessary because the CAddrMan::Good() method modifies the
- * object, affecting the timing of subsequent calls to the same method and
- * we want to do the same amount of work in every loop iteration. */
-
- bench.epochs(5).epochIterations(1);
- const uint64_t addrman_count{bench.epochs() * bench.epochIterations()};
- Assert(addrman_count == 5U);
-
- std::vector<std::unique_ptr<CAddrMan>> addrmans(addrman_count);
- for (size_t i{0}; i < addrman_count; ++i) {
- addrmans[i] = std::make_unique<CAddrMan>(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
- FillAddrMan(*addrmans[i]);
- }
-
auto markSomeAsGood = [](CAddrMan& addrman) {
for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) {
for (size_t addr_i = 0; addr_i < NUM_ADDRESSES_PER_SOURCE; ++addr_i) {
- if (addr_i % 32 == 0) {
- addrman.Good(g_addresses[source_i][addr_i]);
- }
+ addrman.Good(g_addresses[source_i][addr_i]);
}
}
};
- uint64_t i = 0;
+ CreateAddresses();
+
bench.run([&] {
- markSomeAsGood(*addrmans.at(i));
- ++i;
+ // To make the benchmark independent of the number of evaluations, we always prepare a new addrman.
+ // This is necessary because CAddrMan::Good() method modifies the object, affecting the timing of subsequent calls
+ // to the same method and we want to do the same amount of work in every loop iteration.
+ //
+ // This has some overhead (exactly the result of AddrManAdd benchmark), but that overhead is constant so improvements in
+ // CAddrMan::Good() will still be noticeable.
+ CAddrMan addrman(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
+ AddAddressesToAddrMan(addrman);
+
+ markSomeAsGood(addrman);
});
}
BENCHMARK(AddrManAdd);
BENCHMARK(AddrManSelect);
BENCHMARK(AddrManGetAddr);
-BENCHMARK(AddrManGood);
+BENCHMARK(AddrManAddThenGood);
diff --git a/src/bench/bech32.cpp b/src/bench/bech32.cpp
index 8e10862a37..bc3685818e 100644
--- a/src/bench/bech32.cpp
+++ b/src/bench/bech32.cpp
@@ -3,7 +3,6 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <bench/bench.h>
-#include <bench/nanobench.h>
#include <bech32.h>
#include <util/strencodings.h>
diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp
index 012057e792..030bc43396 100644
--- a/src/bench/bench.cpp
+++ b/src/bench/bench.cpp
@@ -4,11 +4,18 @@
#include <bench/bench.h>
-#include <chainparams.h>
#include <test/util/setup_common.h>
-#include <validation.h>
+#include <chrono>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <map>
#include <regex>
+#include <string>
+#include <vector>
+
+using namespace std::chrono_literals;
const std::function<void(const std::string&)> G_TEST_LOG_FUN{};
@@ -61,6 +68,12 @@ void benchmark::BenchRunner::RunAll(const Args& args)
Bench bench;
bench.name(p.first);
+ if (args.min_time > 0ms) {
+ // convert to nanos before dividing to reduce rounding errors
+ std::chrono::nanoseconds min_time_ns = args.min_time;
+ bench.minEpochTime(min_time_ns / bench.epochs());
+ }
+
if (args.asymptote.empty()) {
p.second(bench);
} else {
diff --git a/src/bench/bench.h b/src/bench/bench.h
index c4fcd80e33..b0e4006ee3 100644
--- a/src/bench/bench.h
+++ b/src/bench/bench.h
@@ -41,11 +41,12 @@ using ankerl::nanobench::Bench;
typedef std::function<void(Bench&)> BenchFunction;
struct Args {
- std::string regex_filter;
bool is_list_only;
+ std::chrono::milliseconds min_time;
std::vector<double> asymptote;
std::string output_csv;
std::string output_json;
+ std::string regex_filter;
};
class BenchRunner
diff --git a/src/bench/bench_bitcoin.cpp b/src/bench/bench_bitcoin.cpp
index aab777cac1..33a9ea80ed 100644
--- a/src/bench/bench_bitcoin.cpp
+++ b/src/bench/bench_bitcoin.cpp
@@ -4,21 +4,28 @@
#include <bench/bench.h>
+#include <clientversion.h>
#include <crypto/sha256.h>
#include <util/strencodings.h>
#include <util/system.h>
-#include <memory>
+#include <chrono>
+#include <cstdint>
+#include <iostream>
+#include <sstream>
+#include <vector>
static const char* DEFAULT_BENCH_FILTER = ".*";
+static constexpr int64_t DEFAULT_MIN_TIME_MS{10};
static void SetupBenchArgs(ArgsManager& argsman)
{
SetupHelpOptions(argsman);
- argsman.AddArg("-asymptote=n1,n2,n3,...", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
+ argsman.AddArg("-asymptote=<n1,n2,n3,...>", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
argsman.AddArg("-filter=<regex>", strprintf("Regular expression filter to select benchmark by name (default: %s)", DEFAULT_BENCH_FILTER), ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
- argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
+ argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_BOOL, OptionsCategory::OPTIONS);
+ argsman.AddArg("-min_time=<milliseconds>", strprintf("Minimum runtime per benchmark, in milliseconds (default: %d)", DEFAULT_MIN_TIME_MS), ArgsManager::ALLOW_INT, OptionsCategory::OPTIONS);
argsman.AddArg("-output_csv=<output.csv>", "Generate CSV file with the most important benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
argsman.AddArg("-output_json=<output.json>", "Generate JSON file with all benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
}
@@ -48,17 +55,62 @@ int main(int argc, char** argv)
}
if (HelpRequested(argsman)) {
- std::cout << argsman.GetHelpMessage();
+ std::cout << "Usage: bench_bitcoin [options]\n"
+ "\n"
+ << argsman.GetHelpMessage()
+ << "Description:\n"
+ "\n"
+ " bench_bitcoin executes microbenchmarks. The quality of the benchmark results\n"
+ " highly depend on the stability of the machine. It can sometimes be difficult\n"
+ " to get stable, repeatable results, so here are a few tips:\n"
+ "\n"
+ " * Use pyperf [1] to disable frequency scaling, turbo boost etc. For best\n"
+ " results, use CPU pinning and CPU isolation (see [2]).\n"
+ "\n"
+ " * Each call of run() should do exactly the same work. E.g. inserting into\n"
+ " a std::vector doesn't do that as it will reallocate on certain calls. Make\n"
+ " sure each run has exactly the same preconditions.\n"
+ "\n"
+ " * If results are still not reliable, increase runtime with e.g.\n"
+ " -min_time=5000 to let a benchmark run for at least 5 seconds.\n"
+ "\n"
+ " * bench_bitcoin uses nanobench [3] for which there is extensive\n"
+ " documentation available online.\n"
+ "\n"
+ "Environment Variables:\n"
+ "\n"
+ " To attach a profiler you can run a benchmark in endless mode. This can be\n"
+ " done with the environment variable NANOBENCH_ENDLESS. E.g. like so:\n"
+ "\n"
+ " NANOBENCH_ENDLESS=MuHash ./bench_bitcoin -filter=MuHash\n"
+ "\n"
+ " In rare cases it can be useful to suppress stability warnings. This can be\n"
+ " done with the environment variable NANOBENCH_SUPPRESS_WARNINGS, e.g:\n"
+ "\n"
+ " NANOBENCH_SUPPRESS_WARNINGS=1 ./bench_bitcoin\n"
+ "\n"
+ "Notes:\n"
+ "\n"
+ " 1. pyperf\n"
+ " https://github.com/psf/pyperf\n"
+ "\n"
+ " 2. CPU pinning & isolation\n"
+ " https://pyperf.readthedocs.io/en/latest/system.html\n"
+ "\n"
+ " 3. nanobench\n"
+ " https://github.com/martinus/nanobench\n"
+ "\n";
return EXIT_SUCCESS;
}
benchmark::Args args;
- args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER);
- args.is_list_only = argsman.GetBoolArg("-list", false);
args.asymptote = parseAsymptote(argsman.GetArg("-asymptote", ""));
+ args.is_list_only = argsman.GetBoolArg("-list", false);
+ args.min_time = std::chrono::milliseconds(argsman.GetArg("-min_time", DEFAULT_MIN_TIME_MS));
args.output_csv = argsman.GetArg("-output_csv", "");
args.output_json = argsman.GetArg("-output_json", "");
+ args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER);
benchmark::BenchRunner::RunAll(args);
diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp
index 30fe11be6b..d36e504bfc 100644
--- a/src/bench/crypto_hash.cpp
+++ b/src/bench/crypto_hash.cpp
@@ -110,9 +110,9 @@ static void MuHash(benchmark::Bench& bench)
{
MuHash3072 acc;
unsigned char key[32] = {0};
- int i = 0;
+ uint32_t i = 0;
bench.run([&] {
- key[0] = ++i;
+ key[0] = ++i & 0xFF;
acc *= MuHash3072(key);
});
}
@@ -134,10 +134,6 @@ static void MuHashDiv(benchmark::Bench& bench)
FastRandomContext rng(true);
MuHash3072 muhash{rng.randbytes(32)};
- for (size_t i = 0; i < bench.epochIterations(); ++i) {
- acc *= muhash;
- }
-
bench.run([&] {
acc /= muhash;
});
diff --git a/src/bench/nanobench.h b/src/bench/nanobench.h
index 030d6ebf6a..27df08fb69 100644
--- a/src/bench/nanobench.h
+++ b/src/bench/nanobench.h
@@ -33,7 +33,7 @@
// see https://semver.org/
#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
-#define ANKERL_NANOBENCH_VERSION_PATCH 4 // backwards-compatible bug fixes
+#define ANKERL_NANOBENCH_VERSION_PATCH 6 // backwards-compatible bug fixes
///////////////////////////////////////////////////////////////////////////////////////////////////
// public facing api - as minimal as possible
@@ -88,13 +88,15 @@
} while (0)
#endif
-#if defined(__linux__) && defined(PERF_EVENT_IOC_ID) && defined(PERF_COUNT_HW_REF_CPU_CYCLES) && defined(PERF_FLAG_FD_CLOEXEC) && \
- !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
-// only enable perf counters on kernel 3.14 which seems to have all the necessary defines. The three PERF_... defines are not in
-// kernel 2.6.32 (all others are).
-# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
-#else
-# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
+#define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
+#if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
+# include <linux/version.h>
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
+// PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3
+// PERF_FLAG_FD_CLOEXEC since kernel 3.14
+# undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
+# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
+# endif
#endif
#if defined(__clang__)
@@ -2210,20 +2212,20 @@ struct IterationLogic::Impl {
columns.emplace_back(10, 1, "err%", "%", rErrorMedian * 100.0);
double rInsMedian = -1.0;
- if (mResult.has(Result::Measure::instructions)) {
+ if (mBench.performanceCounters() && mResult.has(Result::Measure::instructions)) {
rInsMedian = mResult.median(Result::Measure::instructions);
columns.emplace_back(18, 2, "ins/" + mBench.unit(), "", rInsMedian / mBench.batch());
}
double rCycMedian = -1.0;
- if (mResult.has(Result::Measure::cpucycles)) {
+ if (mBench.performanceCounters() && mResult.has(Result::Measure::cpucycles)) {
rCycMedian = mResult.median(Result::Measure::cpucycles);
columns.emplace_back(18, 2, "cyc/" + mBench.unit(), "", rCycMedian / mBench.batch());
}
if (rInsMedian > 0.0 && rCycMedian > 0.0) {
columns.emplace_back(9, 3, "IPC", "", rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);
}
- if (mResult.has(Result::Measure::branchinstructions)) {
+ if (mBench.performanceCounters() && mResult.has(Result::Measure::branchinstructions)) {
double rBraMedian = mResult.median(Result::Measure::branchinstructions);
columns.emplace_back(17, 2, "bra/" + mBench.unit(), "", rBraMedian / mBench.batch());
if (mResult.has(Result::Measure::branchmisses)) {
@@ -2402,6 +2404,14 @@ public:
return (a + divisor / 2) / divisor;
}
+ ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
+ static inline uint32_t mix(uint32_t x) noexcept {
+ x ^= x << 13;
+ x ^= x >> 17;
+ x ^= x << 5;
+ return x;
+ }
+
template <typename Op>
ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
void calibrate(Op&& op) {
@@ -2441,15 +2451,10 @@ public:
uint64_t const numIters = 100000U + (std::random_device{}() & 3);
uint64_t n = numIters;
uint32_t x = 1234567;
- auto fn = [&]() {
- x ^= x << 13;
- x ^= x >> 17;
- x ^= x << 5;
- };
beginMeasure();
while (n-- > 0) {
- fn();
+ x = mix(x);
}
endMeasure();
detail::doNotOptimizeAway(x);
@@ -2459,8 +2464,8 @@ public:
beginMeasure();
while (n-- > 0) {
// we now run *twice* so we can easily calculate the overhead
- fn();
- fn();
+ x = mix(x);
+ x = mix(x);
}
endMeasure();
detail::doNotOptimizeAway(x);
diff --git a/src/bench/peer_eviction.cpp b/src/bench/peer_eviction.cpp
index 46fd9d999e..8429f18613 100644
--- a/src/bench/peer_eviction.cpp
+++ b/src/bench/peer_eviction.cpp
@@ -20,19 +20,17 @@ static void EvictionProtectionCommon(
{
using Candidates = std::vector<NodeEvictionCandidate>;
FastRandomContext random_context{true};
- bench.warmup(100).epochIterations(1100);
Candidates candidates{GetRandomNodeEvictionCandidates(num_candidates, random_context)};
for (auto& c : candidates) {
candidate_setup_fn(c);
}
- std::vector<Candidates> copies{
- static_cast<size_t>(bench.epochs() * bench.epochIterations()), candidates};
- size_t i{0};
+
bench.run([&] {
- ProtectEvictionCandidatesByRatio(copies.at(i));
- ++i;
+ // creating a copy has an overhead of about 3%, so it does not influence the benchmark results much.
+ auto copy = candidates;
+ ProtectEvictionCandidatesByRatio(copy);
});
}
diff --git a/src/bench/rollingbloom.cpp b/src/bench/rollingbloom.cpp
index 997ab56549..28167767db 100644
--- a/src/bench/rollingbloom.cpp
+++ b/src/bench/rollingbloom.cpp
@@ -13,16 +13,16 @@ static void RollingBloom(benchmark::Bench& bench)
uint32_t count = 0;
bench.run([&] {
count++;
- data[0] = count;
- data[1] = count >> 8;
- data[2] = count >> 16;
- data[3] = count >> 24;
+ data[0] = count & 0xFF;
+ data[1] = (count >> 8) & 0xFF;
+ data[2] = (count >> 16) & 0xFF;
+ data[3] = (count >> 24) & 0xFF;
filter.insert(data);
- data[0] = count >> 24;
- data[1] = count >> 16;
- data[2] = count >> 8;
- data[3] = count;
+ data[0] = (count >> 24) & 0xFF;
+ data[1] = (count >> 16) & 0xFF;
+ data[2] = (count >> 8) & 0xFF;
+ data[3] = count & 0xFF;
filter.contains(data);
});
}