diff options
author | Wladimir J. van der Laan <laanwj@gmail.com> | 2017-11-08 08:32:54 +0100 |
---|---|---|
committer | Wladimir J. van der Laan <laanwj@gmail.com> | 2017-11-08 08:33:07 +0100 |
commit | 5776582b7f3efc9af1c35b68a51bd2c83b848a32 (patch) | |
tree | 91db60d78f0bab96dee2bec752f39c7531039810 /src | |
parent | dd561667cb7ccbbfed3134b05a565971ef6f5873 (diff) | |
parent | 24a0bddf4ae13d8f1fa0436a547de67dcf6d4f2a (diff) |
Merge #11562: bench: use std::chrono rather than gettimeofday
24a0bdd bench: prefer a steady clock if the resolution is no worse (Cory Fields)
c515d26 bench: switch to std::chrono for time measurements (Cory Fields)
Pull request description:
gettimeofday has portability issues, see for example #11558.
Regardless of large-scale clock refactors in the future, I think it's fine for bench to just use std::chrono itself.
Note that this may slightly improve bench accuracy and changes the display from tiny floats to nanosecond counts instead.
Tree-SHA512: 122355456d01ec6cfcf6867991715cf3a95eabbf5a4f2adc26a059b50382ffb318b7639cdd575197fc4ee5be8b967c0404f1f920d6f5bd4ddd0bd63b5e5c5632
Diffstat (limited to 'src')
-rw-r--r-- | src/bench/bench.cpp | 34 | ||||
-rw-r--r-- | src/bench/bench.h | 26 | ||||
-rw-r--r-- | src/bench/rollingbloom.cpp | 7 |
3 files changed, 38 insertions, 29 deletions
diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index 7b307d6f42..dd4ba5ab0e 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -8,29 +8,22 @@ #include <assert.h> #include <iostream> #include <iomanip> -#include <sys/time.h> benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() { static std::map<std::string, benchmark::BenchFunction> benchmarks_map; return benchmarks_map; } -static double gettimedouble(void) { - struct timeval tv; - gettimeofday(&tv, nullptr); - return tv.tv_usec * 0.000001 + tv.tv_sec; -} - benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func) { benchmarks().insert(std::make_pair(name, func)); } void -benchmark::BenchRunner::RunAll(double elapsedTimeForOne) +benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne) { perf_init(); - std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "," + std::cout << "#Benchmark" << "," << "count" << "," << "min(ns)" << "," << "max(ns)" << "," << "average(ns)" << "," << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; for (const auto &p: benchmarks()) { @@ -46,16 +39,17 @@ bool benchmark::State::KeepRunning() ++count; return true; } - double now; + time_point now; + uint64_t nowCycles; if (count == 0) { - lastTime = beginTime = now = gettimedouble(); + lastTime = beginTime = now = clock::now(); lastCycles = beginCycles = nowCycles = perf_cpucycles(); } else { - now = gettimedouble(); - double elapsed = now - lastTime; - double elapsedOne = elapsed / (countMask + 1); + now = clock::now(); + auto elapsed = now - lastTime; + auto elapsedOne = elapsed / (countMask + 1); if (elapsedOne < minTime) minTime = elapsedOne; if (elapsedOne > maxTime) maxTime = elapsedOne; @@ -70,8 +64,8 @@ bool benchmark::State::KeepRunning() // The restart avoids including the overhead of this code in the measurement. countMask = ((countMask<<3)|7) & ((1LL<<60)-1); count = 0; - minTime = std::numeric_limits<double>::max(); - maxTime = std::numeric_limits<double>::min(); + minTime = duration::max(); + maxTime = duration::zero(); minCycles = std::numeric_limits<uint64_t>::max(); maxCycles = std::numeric_limits<uint64_t>::min(); return true; @@ -94,9 +88,13 @@ bool benchmark::State::KeepRunning() assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above"); // Output results - double average = (now-beginTime)/count; + // Duration casts are only necessary here because hardware with sub-nanosecond clocks + // will lose precision. + int64_t min_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(minTime).count(); + int64_t max_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(maxTime).count(); + int64_t avg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>((now-beginTime)/count).count(); int64_t averageCycles = (nowCycles-beginCycles)/count; - std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "," + std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed << "," << minCycles << "," << maxCycles << "," << averageCycles << "\n"; std::cout.copyfmt(std::ios(nullptr)); diff --git a/src/bench/bench.h b/src/bench/bench.h index 79109eaa56..d276f4ee91 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -9,6 +9,7 @@ #include <limits> #include <map> #include <string> +#include <chrono> #include <boost/preprocessor/cat.hpp> #include <boost/preprocessor/stringize.hpp> @@ -36,12 +37,23 @@ BENCHMARK(CODE_TO_TIME); */ namespace benchmark { + // On many systems, the high_resolution_clock offers no better resolution than the steady_clock. + // If that's the case, prefer the steady_clock. + struct best_clock { + using hi_res_clock = std::chrono::high_resolution_clock; + using steady_clock = std::chrono::steady_clock; + static constexpr bool steady_is_high_res = std::ratio_less_equal<steady_clock::period, hi_res_clock::period>::value; + using type = std::conditional<steady_is_high_res, steady_clock, hi_res_clock>::type; + }; + using clock = best_clock::type; + using time_point = clock::time_point; + using duration = clock::duration; class State { std::string name; - double maxElapsed; - double beginTime; - double lastTime, minTime, maxTime; + duration maxElapsed; + time_point beginTime, lastTime; + duration minTime, maxTime; uint64_t count; uint64_t countMask; uint64_t beginCycles; @@ -49,9 +61,9 @@ namespace benchmark { uint64_t minCycles; uint64_t maxCycles; public: - State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) { - minTime = std::numeric_limits<double>::max(); - maxTime = std::numeric_limits<double>::min(); + State(std::string _name, duration _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) { + minTime = duration::max(); + maxTime = duration::zero(); minCycles = std::numeric_limits<uint64_t>::max(); maxCycles = std::numeric_limits<uint64_t>::min(); countMask = 1; @@ -69,7 +81,7 @@ namespace benchmark { public: BenchRunner(std::string name, BenchFunction func); - static void RunAll(double elapsedTimeForOne=1.0); + static void RunAll(duration elapsedTimeForOne = std::chrono::seconds(1)); }; } diff --git a/src/bench/rollingbloom.cpp b/src/bench/rollingbloom.cpp index 73c02cf718..a93d0fb0a5 100644 --- a/src/bench/rollingbloom.cpp +++ b/src/bench/rollingbloom.cpp @@ -6,7 +6,6 @@ #include "bench.h" #include "bloom.h" -#include "utiltime.h" static void RollingBloom(benchmark::State& state) { @@ -23,10 +22,10 @@ static void RollingBloom(benchmark::State& state) data[2] = count >> 16; data[3] = count >> 24; if (countnow == nEntriesPerGeneration) { - int64_t b = GetTimeMicros(); + auto b = benchmark::clock::now(); filter.insert(data); - int64_t e = GetTimeMicros(); - std::cout << "RollingBloom-refresh,1," << (e-b)*0.000001 << "," << (e-b)*0.000001 << "," << (e-b)*0.000001 << "\n"; + auto total = std::chrono::duration_cast<std::chrono::nanoseconds>(benchmark::clock::now() - b).count(); + std::cout << "RollingBloom-refresh,1," << total << "," << total << "," << total << "\n"; countnow = 0; } else { filter.insert(data); |