diff options
author | Wladimir J. van der Laan <laanwj@gmail.com> | 2016-11-22 10:22:08 +0100 |
---|---|---|
committer | Wladimir J. van der Laan <laanwj@gmail.com> | 2016-11-22 12:20:57 +0100 |
commit | 35328187463a7078b4206e394c21d5515929c7de (patch) | |
tree | e310e5beca1e7ad006cb48423c64f72d6a4d00fd /src/bench | |
parent | 55b2eddcc8fd407aa62fd280f4767e3034a7eb04 (diff) |
bench: Add support for measuring CPU cycles
This adds cycle min/max/avg to the statistics.
Supported on x86 and x86_64 (natively through rdtsc), as well as Linux
(perf syscall).
Diffstat (limited to 'src/bench')
-rw-r--r-- | src/bench/bench.cpp | 22 | ||||
-rw-r--r-- | src/bench/bench.h | 10 | ||||
-rw-r--r-- | src/bench/perf.cpp | 53 | ||||
-rw-r--r-- | src/bench/perf.h | 37 |
4 files changed, 118 insertions, 4 deletions
diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index 8942da8c74..af3d152c9a 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -3,6 +3,7 @@ // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" +#include "perf.h" #include <iostream> #include <iomanip> @@ -26,7 +27,9 @@ BenchRunner::BenchRunner(std::string name, BenchFunction func) void BenchRunner::RunAll(double elapsedTimeForOne) { - std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n"; + perf_init(); + std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "," + << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; for (std::map<std::string,BenchFunction>::iterator it = benchmarks.begin(); it != benchmarks.end(); ++it) { @@ -35,6 +38,7 @@ BenchRunner::RunAll(double elapsedTimeForOne) BenchFunction& func = it->second; func(state); } + perf_fini(); } bool State::KeepRunning() @@ -44,8 +48,10 @@ bool State::KeepRunning() return true; } double now; + uint64_t nowCycles; if (count == 0) { lastTime = beginTime = now = gettimedouble(); + lastCycles = beginCycles = nowCycles = perf_cpucycles(); } else { now = gettimedouble(); @@ -53,6 +59,13 @@ bool State::KeepRunning() double elapsedOne = elapsed * countMaskInv; if (elapsedOne < minTime) minTime = elapsedOne; if (elapsedOne > maxTime) maxTime = elapsedOne; + + // We only use relative values, so don't have to handle 64-bit wrap-around specially + nowCycles = perf_cpucycles(); + uint64_t elapsedOneCycles = (nowCycles - lastCycles) * countMaskInv; + if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; + if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; + if (elapsed*128 < maxElapsed) { // If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing. // The restart avoids including the overhead of this code in the measurement. @@ -61,6 +74,8 @@ bool State::KeepRunning() count = 0; minTime = std::numeric_limits<double>::max(); maxTime = std::numeric_limits<double>::min(); + minCycles = std::numeric_limits<uint64_t>::max(); + maxCycles = std::numeric_limits<uint64_t>::min(); return true; } if (elapsed*16 < maxElapsed) { @@ -72,6 +87,7 @@ bool State::KeepRunning() } } lastTime = now; + lastCycles = nowCycles; ++count; if (now - beginTime < maxElapsed) return true; // Keep going @@ -80,7 +96,9 @@ bool State::KeepRunning() // Output results double average = (now-beginTime)/count; - std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n"; + int64_t averageCycles = (nowCycles-beginCycles)/count; + std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "," + << minCycles << "," << maxCycles << "," << averageCycles << "\n"; return false; } diff --git a/src/bench/bench.h b/src/bench/bench.h index f13b145aaf..caf73e949b 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -41,12 +41,18 @@ namespace benchmark { double maxElapsed; double beginTime; double lastTime, minTime, maxTime, countMaskInv; - int64_t count; - int64_t countMask; + uint64_t count; + uint64_t countMask; + uint64_t beginCycles; + uint64_t lastCycles; + uint64_t minCycles; + uint64_t maxCycles; public: State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) { minTime = std::numeric_limits<double>::max(); maxTime = std::numeric_limits<double>::min(); + minCycles = std::numeric_limits<uint64_t>::max(); + maxCycles = std::numeric_limits<uint64_t>::min(); countMask = 1; countMaskInv = 1./(countMask + 1); } diff --git a/src/bench/perf.cpp b/src/bench/perf.cpp new file mode 100644 index 0000000000..1f43e5d3ac --- /dev/null +++ b/src/bench/perf.cpp @@ -0,0 +1,53 @@ +// Copyright (c) 2016 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include "perf.h" + +#if defined(__i386__) || defined(__x86_64__) + +/* These architectures support quering the cycle counter + * from user space, no need for any syscall overhead. + */ +void perf_init(void) { } +void perf_fini(void) { } + +#elif defined(__linux__) + +#include <unistd.h> +#include <sys/syscall.h> +#include <linux/perf_event.h> + +static int fd = -1; +static struct perf_event_attr attr; + +void perf_init(void) +{ + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; + fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); +} + +void perf_fini(void) +{ + if (fd != -1) { + close(fd); + } +} + +uint64_t perf_cpucycles(void) +{ + uint64_t result = 0; + if (fd == -1 || read(fd, &result, sizeof(result)) < (ssize_t)sizeof(result)) { + return 0; + } + return result; +} + +#else /* Unhandled platform */ + +void perf_init(void) { } +void perf_fini(void) { } +uint64_t perf_cpucycles(void) { return 0; } + +#endif diff --git a/src/bench/perf.h b/src/bench/perf.h new file mode 100644 index 0000000000..681bd0c8a2 --- /dev/null +++ b/src/bench/perf.h @@ -0,0 +1,37 @@ +// Copyright (c) 2016 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +/** Functions for measurement of CPU cycles */ +#ifndef H_PERF +#define H_PERF + +#include <stdint.h> + +#if defined(__i386__) + +static inline uint64_t perf_cpucycles(void) +{ + uint64_t x; + __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); + return x; +} + +#elif defined(__x86_64__) + +static inline uint64_t perf_cpucycles(void) +{ + uint32_t hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return ((uint64_t)lo)|(((uint64_t)hi)<<32); +} +#else + +uint64_t perf_cpucycles(void); + +#endif + +void perf_init(void); +void perf_fini(void); + +#endif // H_PERF |