diff options
author | Gregory Maxwell <greg@xiph.org> | 2016-05-29 01:36:52 +0000 |
---|---|---|
committer | Gregory Maxwell <greg@xiph.org> | 2016-05-30 22:07:56 +0000 |
commit | 63ff57db4beb2e92b3d8ed396da016f29f790195 (patch) | |
tree | 70f938749a9c7ca4e5401f382b485942cf3b181b /src/bench | |
parent | a80de15113166354cdf208e3d8b6e25f4511a591 (diff) |
Avoid integer division in the benchmark inner-most loop.
Previously the benchmark code used an integer division (%) with
a non-constant in the inner-loop. This is quite slow on many
processors, especially ones like ARM that lack a hardware divide.
Even on fairly recent x86_64 like haswell an integer division can
take something like 100 cycles-- making it comparable to the
runtime of siphash.
This change avoids the division by using bitmasking instead. This
was especially easy since the count was only increased by doubling.
This change also restarts the timing when the execution time was
very low this avoids mintimes of zero in cases where one execution
ends up below the timer resolution. It also reduces the impact of
the overhead on the final result.
The formatting of the prints is changed to not use scientific
notation make it more machine readable (in particular, gnuplot
croaks on the non-fixedpoint, and it doesn't sort correctly).
This also hoists out all the floating point divisions out of the
semi-hot path because it was easy to do so.
It might be prudent to break out the critical test into a macro
just to guarantee that it gets inlined. It might also make sense
to just save out the intermediate counts and times and get the
floating point completely out of the timing loop (because e.g.
on hardware without a fast hardware FPU like some ARM it will
still be slow enough to distort the results). I haven't done
either of these in this commit.
Diffstat (limited to 'src/bench')
-rw-r--r-- | src/bench/bench.cpp | 35 | ||||
-rw-r--r-- | src/bench/bench.h | 7 |
2 files changed, 28 insertions, 14 deletions
diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index 6ee3cdc27a..227546a7a7 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -5,6 +5,7 @@ #include "bench.h" #include <iostream> +#include <iomanip> #include <sys/time.h> using namespace benchmark; @@ -25,7 +26,7 @@ BenchRunner::BenchRunner(std::string name, BenchFunction func) void BenchRunner::RunAll(double elapsedTimeForOne) { - std::cout << "Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n"; + std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n"; for (std::map<std::string,BenchFunction>::iterator it = benchmarks.begin(); it != benchmarks.end(); ++it) { @@ -38,22 +39,34 @@ BenchRunner::RunAll(double elapsedTimeForOne) bool State::KeepRunning() { + if (count & countMask) { + ++count; + return true; + } double now; if (count == 0) { - beginTime = now = gettimedouble(); + lastTime = beginTime = now = gettimedouble(); } else { - // timeCheckCount is used to avoid calling gettime most of the time, - // so benchmarks that run very quickly get consistent results. - if ((count+1)%timeCheckCount != 0) { - ++count; - return true; // keep going - } now = gettimedouble(); - double elapsedOne = (now - lastTime)/timeCheckCount; + double elapsed = now - lastTime; + double elapsedOne = elapsed * countMaskInv; if (elapsedOne < minTime) minTime = elapsedOne; if (elapsedOne > maxTime) maxTime = elapsedOne; - if (elapsedOne*timeCheckCount < maxElapsed/16) timeCheckCount *= 2; + if (elapsed*128 < maxElapsed) { + // If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing. + // The restart avoids including the overhead of this code in the measurement. + countMask = ((countMask<<3)|7) & ((1LL<<60)-1); + countMaskInv = 1./(countMask+1); + count = 0; + minTime = std::numeric_limits<double>::max(); + maxTime = std::numeric_limits<double>::min(); + return true; + } + if (elapsed*16 < maxElapsed) { + countMask = ((countMask<<1)|1) & ((1LL<<60)-1); + countMaskInv = 1./(countMask+1); + } } lastTime = now; ++count; @@ -64,7 +77,7 @@ bool State::KeepRunning() // Output results double average = (now-beginTime)/count; - std::cout << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n"; + std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n"; return false; } diff --git a/src/bench/bench.h b/src/bench/bench.h index 5ce13c642b..f13b145aaf 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -40,14 +40,15 @@ namespace benchmark { std::string name; double maxElapsed; double beginTime; - double lastTime, minTime, maxTime; + double lastTime, minTime, maxTime, countMaskInv; int64_t count; - int64_t timeCheckCount; + int64_t countMask; public: State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) { minTime = std::numeric_limits<double>::max(); maxTime = std::numeric_limits<double>::min(); - timeCheckCount = 1; + countMask = 1; + countMaskInv = 1./(countMask + 1); } bool KeepRunning(); }; |