diff options
Diffstat (limited to 'src/bench/bench.cpp')
-rw-r--r-- | src/bench/bench.cpp | 182 |
1 files changed, 110 insertions, 72 deletions
diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index 1482452814..edbad09ebd 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -8,98 +8,136 @@ #include <assert.h> #include <iostream> #include <iomanip> +#include <algorithm> +#include <regex> +#include <numeric> -benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() { - static std::map<std::string, benchmark::BenchFunction> benchmarks_map; - return benchmarks_map; +void benchmark::ConsolePrinter::header() +{ + std::cout << "# Benchmark, evals, iterations, total, min, max, median" << std::endl; } -benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func) +void benchmark::ConsolePrinter::result(const State& state) { - benchmarks().insert(std::make_pair(name, func)); + auto results = state.m_elapsed_results; + std::sort(results.begin(), results.end()); + + double total = state.m_num_iters * std::accumulate(results.begin(), results.end(), 0.0); + + double front = 0; + double back = 0; + double median = 0; + + if (!results.empty()) { + front = results.front(); + back = results.back(); + + size_t mid = results.size() / 2; + median = results[mid]; + if (0 == results.size() % 2) { + median = (results[mid] + results[mid + 1]) / 2; + } + } + + std::cout << std::setprecision(6); + std::cout << state.m_name << ", " << state.m_num_evals << ", " << state.m_num_iters << ", " << total << ", " << front << ", " << back << ", " << median << std::endl; } -void -benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne) +void benchmark::ConsolePrinter::footer() {} +benchmark::PlotlyPrinter::PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height) + : m_plotly_url(plotly_url), m_width(width), m_height(height) { - perf_init(); - if (std::ratio_less_equal<benchmark::clock::period, std::micro>::value) { - std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n"; - } - std::cout << "#Benchmark" << "," << "count" << "," << "min(ns)" << "," << "max(ns)" << "," << "average(ns)" << "," - << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; +} - for (const auto &p: benchmarks()) { - State state(p.first, elapsedTimeForOne); - p.second(state); - } - perf_fini(); +void benchmark::PlotlyPrinter::header() +{ + std::cout << "<html><head>" + << "<script src=\"" << m_plotly_url << "\"></script>" + << "</head><body><div id=\"myDiv\" style=\"width:" << m_width << "px; height:" << m_height << "px\"></div>" + << "<script> var data = [" + << std::endl; } -bool benchmark::State::KeepRunning() +void benchmark::PlotlyPrinter::result(const State& state) { - if (count & countMask) { - ++count; - return true; + std::cout << "{ " << std::endl + << " name: '" << state.m_name << "', " << std::endl + << " y: ["; + + const char* prefix = ""; + for (const auto& e : state.m_elapsed_results) { + std::cout << prefix << std::setprecision(6) << e; + prefix = ", "; } - time_point now; + std::cout << "]," << std::endl + << " boxpoints: 'all', jitter: 0.3, pointpos: 0, type: 'box'," + << std::endl + << "}," << std::endl; +} + +void benchmark::PlotlyPrinter::footer() +{ + std::cout << "]; var layout = { showlegend: false, yaxis: { rangemode: 'tozero', autorange: true } };" + << "Plotly.newPlot('myDiv', data, layout);" + << "</script></body></html>"; +} - uint64_t nowCycles; - if (count == 0) { - lastTime = beginTime = now = clock::now(); - lastCycles = beginCycles = nowCycles = perf_cpucycles(); + +benchmark::BenchRunner::BenchmarkMap& benchmark::BenchRunner::benchmarks() +{ + static std::map<std::string, Bench> benchmarks_map; + return benchmarks_map; +} + +benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func, uint64_t num_iters_for_one_second) +{ + benchmarks().insert(std::make_pair(name, Bench{func, num_iters_for_one_second})); +} + +void benchmark::BenchRunner::RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only) +{ + perf_init(); + if (!std::ratio_less_equal<benchmark::clock::period, std::micro>::value) { + std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n"; } - else { - now = clock::now(); - auto elapsed = now - lastTime; - auto elapsedOne = elapsed / (countMask + 1); - if (elapsedOne < minTime) minTime = elapsedOne; - if (elapsedOne > maxTime) maxTime = elapsedOne; - - // We only use relative values, so don't have to handle 64-bit wrap-around specially - nowCycles = perf_cpucycles(); - uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1); - if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; - if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; - - if (elapsed*128 < maxElapsed) { - // If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing. - // The restart avoids including the overhead of this code in the measurement. - countMask = ((countMask<<3)|7) & ((1LL<<60)-1); - count = 0; - minTime = duration::max(); - maxTime = duration::zero(); - minCycles = std::numeric_limits<uint64_t>::max(); - maxCycles = std::numeric_limits<uint64_t>::min(); - return true; + + std::regex reFilter(filter); + std::smatch baseMatch; + + printer.header(); + + for (const auto& p : benchmarks()) { + if (!std::regex_match(p.first, baseMatch, reFilter)) { + continue; + } + + uint64_t num_iters = static_cast<uint64_t>(p.second.num_iters_for_one_second * scaling); + if (0 == num_iters) { + num_iters = 1; } - if (elapsed*16 < maxElapsed) { - uint64_t newCountMask = ((countMask<<1)|1) & ((1LL<<60)-1); - if ((count & newCountMask)==0) { - countMask = newCountMask; - } + State state(p.first, num_evals, num_iters, printer); + if (!is_list_only) { + p.second.func(state); } + printer.result(state); } - lastTime = now; - lastCycles = nowCycles; - ++count; - if (now - beginTime < maxElapsed) return true; // Keep going + printer.footer(); - --count; + perf_fini(); +} - assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above"); +bool benchmark::State::UpdateTimer(const benchmark::time_point current_time) +{ + if (m_start_time != time_point()) { + std::chrono::duration<double> diff = current_time - m_start_time; + m_elapsed_results.push_back(diff.count() / m_num_iters); - // Output results - // Duration casts are only necessary here because hardware with sub-nanosecond clocks - // will lose precision. - int64_t min_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(minTime).count(); - int64_t max_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(maxTime).count(); - int64_t avg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>((now-beginTime)/count).count(); - int64_t averageCycles = (nowCycles-beginCycles)/count; - std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed << "," - << minCycles << "," << maxCycles << "," << averageCycles << "\n"; - std::cout.copyfmt(std::ios(nullptr)); + if (m_elapsed_results.size() == m_num_evals) { + return false; + } + } - return false; + m_num_iters_left = m_num_iters - 1; + return true; } |