8 files changed, 159 insertions, 18 deletions
diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp
index 1bd9d06b80..b0df3d2b04 100644
--- a/src/bench/bench.cpp
+++ b/src/bench/bench.cpp
@@ -9,7 +9,10 @@
 #include <iomanip>
 #include <sys/time.h>
 
-std::map<std::string, benchmark::BenchFunction> benchmark::BenchRunner::benchmarks;
+benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() {
+    static std::map<std::string, benchmark::BenchFunction> benchmarks_map;
+    return benchmarks_map;
+}
 
 static double gettimedouble(void) {
     struct timeval tv;
@@ -19,7 +22,7 @@ static double gettimedouble(void) {
 
 benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func)
 {
-    benchmarks.insert(std::make_pair(name, func));
+    benchmarks().insert(std::make_pair(name, func));
 }
 
 void
@@ -29,12 +32,9 @@ benchmark::BenchRunner::RunAll(double elapsedTimeForOne)
     std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << ","
               << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n";
 
-    for (std::map<std::string,benchmark::BenchFunction>::iterator it = benchmarks.begin();
-         it != benchmarks.end(); ++it) {
-
-        State state(it->first, elapsedTimeForOne);
-        benchmark::BenchFunction& func = it->second;
-        func(state);
+    for (const auto &p: benchmarks()) {
+        State state(p.first, elapsedTimeForOne);
+        p.second(state);
     }
     perf_fini();
 }
@@ -92,6 +92,8 @@ bool benchmark::State::KeepRunning()
 
     --count;
 
+    assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above");
+
     // Output results
     double average = (now-beginTime)/count;
     int64_t averageCycles = (nowCycles-beginCycles)/count;
diff --git a/src/bench/bench.h b/src/bench/bench.h
index 80dad6a8ef..f12a41126c 100644
--- a/src/bench/bench.h
+++ b/src/bench/bench.h
@@ -14,7 +14,7 @@
 
 // Simple micro-benchmarking framework; API mostly matches a subset of the Google Benchmark
 // framework (see https://github.com/google/benchmark)
-// Wny not use the Google Benchmark framework? Because adding Yet Another Dependency
+// Why not use the Google Benchmark framework? Because adding Yet Another Dependency
 // (that uses cmake as its build system and has lots of features we don't need) isn't
 // worth it.
 
@@ -63,7 +63,8 @@ namespace benchmark {
 
     class BenchRunner
     {
-        static std::map<std::string, BenchFunction> benchmarks;
+        typedef std::map<std::string, BenchFunction> BenchmarkMap;
+        static BenchmarkMap &benchmarks();
 
     public:
         BenchRunner(std::string name, BenchFunction func);
diff --git a/src/bench/checkqueue.cpp b/src/bench/checkqueue.cpp
new file mode 100644
index 0000000000..6fa9fe4fe8
--- /dev/null
+++ b/src/bench/checkqueue.cpp
@@ -0,0 +1,103 @@
+// Copyright (c) 2015 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include "bench.h"
+#include "util.h"
+#include "validation.h"
+#include "checkqueue.h"
+#include "prevector.h"
+#include <vector>
+#include <boost/thread/thread.hpp>
+#include "random.h"
+
+
+// This Benchmark tests the CheckQueue with the lightest
+// weight Checks, so it should make any lock contention
+// particularly visible
+static const int MIN_CORES = 2;
+static const size_t BATCHES = 101;
+static const size_t BATCH_SIZE = 30;
+static const int PREVECTOR_SIZE = 28;
+static const int QUEUE_BATCH_SIZE = 128;
+static void CCheckQueueSpeed(benchmark::State& state)
+{
+    struct FakeJobNoWork {
+        bool operator()()
+        {
+            return true;
+        }
+        void swap(FakeJobNoWork& x){};
+    };
+    CCheckQueue<FakeJobNoWork> queue {QUEUE_BATCH_SIZE};
+    boost::thread_group tg;
+    for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) {
+       tg.create_thread([&]{queue.Thread();});
+    }
+    while (state.KeepRunning()) {
+        CCheckQueueControl<FakeJobNoWork> control(&queue);
+
+        // We call Add a number of times to simulate the behavior of adding
+        // a block of transactions at once.
+
+        std::vector<std::vector<FakeJobNoWork>> vBatches(BATCHES);
+        for (auto& vChecks : vBatches) {
+            vChecks.resize(BATCH_SIZE);
+        }
+        for (auto& vChecks : vBatches) {
+            // We can't make vChecks in the inner loop because we want to measure
+            // the cost of getting the memory to each thread and we might get the same
+            // memory
+            control.Add(vChecks);
+        }
+        // control waits for completion by RAII, but
+        // it is done explicitly here for clarity
+        control.Wait();
+    }
+    tg.interrupt_all();
+    tg.join_all();
+}
+
+// This Benchmark tests the CheckQueue with a slightly realistic workload,
+// where checks all contain a prevector that is indirect 50% of the time
+// and there is a little bit of work done between calls to Add.
+static void CCheckQueueSpeedPrevectorJob(benchmark::State& state)
+{
+    struct PrevectorJob {
+        prevector<PREVECTOR_SIZE, uint8_t> p;
+        PrevectorJob(){
+        }
+        PrevectorJob(FastRandomContext& insecure_rand){
+            p.resize(insecure_rand.rand32() % (PREVECTOR_SIZE*2));
+        }
+        bool operator()()
+        {
+            return true;
+        }
+        void swap(PrevectorJob& x){p.swap(x.p);};
+    };
+    CCheckQueue<PrevectorJob> queue {QUEUE_BATCH_SIZE};
+    boost::thread_group tg;
+    for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) {
+       tg.create_thread([&]{queue.Thread();});
+    }
+    while (state.KeepRunning()) {
+        // Make insecure_rand here so that each iteration is identical.
+        FastRandomContext insecure_rand(true);
+        CCheckQueueControl<PrevectorJob> control(&queue);
+        std::vector<std::vector<PrevectorJob>> vBatches(BATCHES);
+        for (auto& vChecks : vBatches) {
+            vChecks.reserve(BATCH_SIZE);
+            for (size_t x = 0; x < BATCH_SIZE; ++x)
+                vChecks.emplace_back(insecure_rand);
+            control.Add(vChecks);
+        }
+        // control waits for completion by RAII, but
+        // it is done explicitly here for clarity
+        control.Wait();
+    }
+    tg.interrupt_all();
+    tg.join_all();
+}
+BENCHMARK(CCheckQueueSpeed);
+BENCHMARK(CCheckQueueSpeedPrevectorJob);
diff --git a/src/bench/coin_selection.cpp b/src/bench/coin_selection.cpp
index 29fbd34631..06882f1514 100644
--- a/src/bench/coin_selection.cpp
+++ b/src/bench/coin_selection.cpp
@@ -20,7 +20,7 @@ static void addCoin(const CAmount& nValue, const CWallet& wallet, std::vector<CO
     CWalletTx* wtx = new CWalletTx(&wallet, MakeTransactionRef(std::move(tx)));
 
     int nAge = 6 * 24;
-    COutput output(wtx, nInput, nAge, true, true);
+    COutput output(wtx, nInput, nAge, true /* spendable */, true /* solvable */, true /* safe */);
     vCoins.push_back(output);
 }
 
diff --git a/src/bench/lockedpool.cpp b/src/bench/lockedpool.cpp
index 5df5b1ac6e..43a1422795 100644
--- a/src/bench/lockedpool.cpp
+++ b/src/bench/lockedpool.cpp
@@ -13,7 +13,7 @@
 #define BITER 5000
 #define MSIZE 2048
 
-static void LockedPool(benchmark::State& state)
+static void BenchLockedPool(benchmark::State& state)
 {
     void *synth_base = reinterpret_cast<void*>(0x08000000);
     const size_t synth_size = 1024*1024;
@@ -43,5 +43,5 @@ static void LockedPool(benchmark::State& state)
     addr.clear();
 }
 
-BENCHMARK(LockedPool);
+BENCHMARK(BenchLockedPool);
 
diff --git a/src/bench/mempool_eviction.cpp b/src/bench/mempool_eviction.cpp
index 5790d51a82..073bbde016 100644
--- a/src/bench/mempool_eviction.cpp
+++ b/src/bench/mempool_eviction.cpp
@@ -12,14 +12,13 @@
 static void AddTx(const CTransaction& tx, const CAmount& nFee, CTxMemPool& pool)
 {
     int64_t nTime = 0;
-    double dPriority = 10.0;
     unsigned int nHeight = 1;
     bool spendsCoinbase = false;
     unsigned int sigOpCost = 4;
     LockPoints lp;
     pool.addUnchecked(tx.GetHash(), CTxMemPoolEntry(
-                                        MakeTransactionRef(tx), nFee, nTime, dPriority, nHeight,
-                                        tx.GetValueOut(), spendsCoinbase, sigOpCost, lp));
+                                        MakeTransactionRef(tx), nFee, nTime, nHeight,
+                                        spendsCoinbase, sigOpCost, lp));
 }
 
 // Right now this is only testing eviction performance in an extremely small
@@ -97,7 +96,7 @@ static void MempoolEviction(benchmark::State& state)
     tx7.vout[1].scriptPubKey = CScript() << OP_7 << OP_EQUAL;
     tx7.vout[1].nValue = 10 * COIN;
 
-    CTxMemPool pool(CFeeRate(1000));
+    CTxMemPool pool;
 
     while (state.KeepRunning()) {
         AddTx(tx1, 10000LL, pool);
diff --git a/src/bench/perf.cpp b/src/bench/perf.cpp
index 1f43e5d3ac..a549ec29ea 100644
--- a/src/bench/perf.cpp
+++ b/src/bench/perf.cpp
@@ -6,7 +6,7 @@
 
 #if defined(__i386__) || defined(__x86_64__)
 
-/* These architectures support quering the cycle counter
+/* These architectures support querying the cycle counter
  * from user space, no need for any syscall overhead.
  */
 void perf_init(void) { }
diff --git a/src/bench/prevector_destructor.cpp b/src/bench/prevector_destructor.cpp
new file mode 100644
index 0000000000..55af3de4fe
--- /dev/null
+++ b/src/bench/prevector_destructor.cpp
@@ -0,0 +1,36 @@
+// Copyright (c) 2015-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include "bench.h"
+#include "prevector.h"
+
+static void PrevectorDestructor(benchmark::State& state)
+{
+    while (state.KeepRunning()) {
+        for (auto x = 0; x < 1000; ++x) {
+            prevector<28, unsigned char> t0;
+            prevector<28, unsigned char> t1;
+            t0.resize(28);
+            t1.resize(29);
+        }
+    }
+}
+
+static void PrevectorClear(benchmark::State& state)
+{
+
+    while (state.KeepRunning()) {
+        for (auto x = 0; x < 1000; ++x) {
+            prevector<28, unsigned char> t0;
+            prevector<28, unsigned char> t1;
+            t0.resize(28);
+            t0.clear();
+            t1.resize(29);
+            t0.clear();
+        }
+    }
+}
+
+BENCHMARK(PrevectorDestructor);
+BENCHMARK(PrevectorClear);