aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2018-03-01 12:12:55 +0100
committerWladimir J. van der Laan <laanwj@gmail.com>2018-03-01 12:13:08 +0100
commit32987d5aebc4e0fb757ad77dc727e5d7ee9d394b (patch)
treec40fa26e4d0350f6609b3557030c800ea9921631 /src
parent9e2ed253f50500e1db4927d511d3ac0d47aed8df (diff)
parent5aad635b78c8359adae9b2af015b67b7325c0e0b (diff)
Merge #12549: Make prevector::resize() and other prevector operations much faster
5aad635 Use memset() to optimize prevector::resize() (Evan Klitzke) e46be25 Reduce redundant code of prevector and speed it up (Akio Nakamura) f0e7aa7 Add new prevector benchmarks. (Evan Klitzke) Pull request description: This branch optimizes various `prevector` operations, especially resizing vectors. While profiling the `loadblk` thread I noticed that a lot of time was being spent in `prevector::resize()` which led to this work. I have some data here indicating that it takes up **37%** of the time in `ReadBlockFromDisk()`: https://monad.io/readblockfromdisk.svg This branch improves things significantly. For trivial types, the new results for the prevector benchmark are: * `PrevectorClearTrivial` which tests `prevector::clear()` becomes 24.6x faster * `PrevectorDestructorTrivial` which tests `prevector::~prevector()` becomes 20.5x faster * `PrevectorResizeTrivial` which tests `prevector::resize()` becomes 20.3x faster Note that in practice it looks like the prevector is only used to contain `unsigned char` types, which is a trivial type. The benchmarks are testing a bit of an extreme case, but the changes here are motivated by the profiling data for `ReadBlockFromDisk()` I linked to above. The pull request here consists of a series of three commits: * The first adds new benchmarks but does not change the prevector code. * The second is from @AkioNak , and merges some prevector optimizations he submitted in #11988 * The third optimizes `prevector::resize()` to use `memset()` when the prevector contains trivially constructible types Tree-SHA512: 28f7cbb91a19f9f43b6a5942781d7eb2e3197389186b666f086b69df12bee37773140f765426d715bfb8ebff79cb27a5f1206d0325b54b4aa65598b50fb18368
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.bench.include2
-rw-r--r--src/bench/prevector.cpp77
-rw-r--r--src/bench/prevector_destructor.cpp36
-rw-r--r--src/compat.h10
-rw-r--r--src/prevector.h114
5 files changed, 151 insertions, 88 deletions
diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include
index 13c27299f8..748c5b7887 100644
--- a/src/Makefile.bench.include
+++ b/src/Makefile.bench.include
@@ -27,7 +27,7 @@ bench_bench_bitcoin_SOURCES = \
bench/lockedpool.cpp \
bench/perf.cpp \
bench/perf.h \
- bench/prevector_destructor.cpp
+ bench/prevector.cpp
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_BENCH_FILES)
diff --git a/src/bench/prevector.cpp b/src/bench/prevector.cpp
new file mode 100644
index 0000000000..d0f28d1a3e
--- /dev/null
+++ b/src/bench/prevector.cpp
@@ -0,0 +1,77 @@
+// Copyright (c) 2015-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <compat.h>
+#include <prevector.h>
+
+#include <bench/bench.h>
+
+struct nontrivial_t {
+ int x;
+ nontrivial_t() :x(-1) {}
+};
+static_assert(!IS_TRIVIALLY_CONSTRUCTIBLE<nontrivial_t>::value,
+ "expected nontrivial_t to not be trivially constructible");
+
+typedef unsigned char trivial_t;
+static_assert(IS_TRIVIALLY_CONSTRUCTIBLE<trivial_t>::value,
+ "expected trivial_t to be trivially constructible");
+
+template <typename T>
+static void PrevectorDestructor(benchmark::State& state)
+{
+ while (state.KeepRunning()) {
+ for (auto x = 0; x < 1000; ++x) {
+ prevector<28, T> t0;
+ prevector<28, T> t1;
+ t0.resize(28);
+ t1.resize(29);
+ }
+ }
+}
+
+template <typename T>
+static void PrevectorClear(benchmark::State& state)
+{
+
+ while (state.KeepRunning()) {
+ for (auto x = 0; x < 1000; ++x) {
+ prevector<28, T> t0;
+ prevector<28, T> t1;
+ t0.resize(28);
+ t0.clear();
+ t1.resize(29);
+ t0.clear();
+ }
+ }
+}
+
+template <typename T>
+void PrevectorResize(benchmark::State& state)
+{
+ while (state.KeepRunning()) {
+ prevector<28, T> t0;
+ prevector<28, T> t1;
+ for (auto x = 0; x < 1000; ++x) {
+ t0.resize(28);
+ t0.resize(0);
+ t1.resize(29);
+ t1.resize(0);
+ }
+ }
+}
+
+#define PREVECTOR_TEST(name, nontrivops, trivops) \
+ static void Prevector ## name ## Nontrivial(benchmark::State& state) { \
+ PrevectorResize<nontrivial_t>(state); \
+ } \
+ BENCHMARK(Prevector ## name ## Nontrivial, nontrivops); \
+ static void Prevector ## name ## Trivial(benchmark::State& state) { \
+ PrevectorResize<trivial_t>(state); \
+ } \
+ BENCHMARK(Prevector ## name ## Trivial, trivops);
+
+PREVECTOR_TEST(Clear, 28300, 88600)
+PREVECTOR_TEST(Destructor, 28800, 88900)
+PREVECTOR_TEST(Resize, 28900, 90300)
diff --git a/src/bench/prevector_destructor.cpp b/src/bench/prevector_destructor.cpp
deleted file mode 100644
index 39d0ee5eb1..0000000000
--- a/src/bench/prevector_destructor.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) 2015-2017 The Bitcoin Core developers
-// Distributed under the MIT software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#include <bench/bench.h>
-#include <prevector.h>
-
-static void PrevectorDestructor(benchmark::State& state)
-{
- while (state.KeepRunning()) {
- for (auto x = 0; x < 1000; ++x) {
- prevector<28, unsigned char> t0;
- prevector<28, unsigned char> t1;
- t0.resize(28);
- t1.resize(29);
- }
- }
-}
-
-static void PrevectorClear(benchmark::State& state)
-{
-
- while (state.KeepRunning()) {
- for (auto x = 0; x < 1000; ++x) {
- prevector<28, unsigned char> t0;
- prevector<28, unsigned char> t1;
- t0.resize(28);
- t0.clear();
- t1.resize(29);
- t0.clear();
- }
- }
-}
-
-BENCHMARK(PrevectorDestructor, 5700);
-BENCHMARK(PrevectorClear, 5600);
diff --git a/src/compat.h b/src/compat.h
index aae84b1181..8a0f901304 100644
--- a/src/compat.h
+++ b/src/compat.h
@@ -10,6 +10,16 @@
#include <config/bitcoin-config.h>
#endif
+#include <type_traits>
+
+// GCC 4.8 is missing some C++11 type_traits,
+// https://www.gnu.org/software/gcc/gcc-5/changes.html
+#if defined(__GNUC__) && __GNUC__ < 5
+#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivial
+#else
+#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivially_constructible
+#endif
+
#ifdef WIN32
#ifdef _WIN32_WINNT
#undef _WIN32_WINNT
diff --git a/src/prevector.h b/src/prevector.h
index f8d6a09145..103ead82cc 100644
--- a/src/prevector.h
+++ b/src/prevector.h
@@ -10,9 +10,12 @@
#include <stdint.h>
#include <string.h>
+#include <cstddef>
#include <iterator>
#include <type_traits>
+#include <compat.h>
+
#pragma pack(push, 1)
/** Implements a drop-in replacement for std::vector<T> which stores up to N
* elements directly (without heap allocation). The types Size and Diff are
@@ -194,16 +197,42 @@ private:
T* item_ptr(difference_type pos) { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
const T* item_ptr(difference_type pos) const { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
+ void fill(T* dst, ptrdiff_t count) {
+ if (IS_TRIVIALLY_CONSTRUCTIBLE<T>::value) {
+ // The most common use of prevector is where T=unsigned char. For
+ // trivially constructible types, we can use memset() to avoid
+ // looping.
+ ::memset(dst, 0, count * sizeof(T));
+ } else {
+ for (auto i = 0; i < count; ++i) {
+ new(static_cast<void*>(dst + i)) T();
+ }
+ }
+ }
+
+ void fill(T* dst, ptrdiff_t count, const T& value) {
+ for (auto i = 0; i < count; ++i) {
+ new(static_cast<void*>(dst + i)) T(value);
+ }
+ }
+
+ template<typename InputIterator>
+ void fill(T* dst, InputIterator first, InputIterator last) {
+ while (first != last) {
+ new(static_cast<void*>(dst)) T(*first);
+ ++dst;
+ ++first;
+ }
+ }
+
public:
void assign(size_type n, const T& val) {
clear();
if (capacity() < n) {
change_capacity(n);
}
- while (size() < n) {
- _size++;
- new(static_cast<void*>(item_ptr(size() - 1))) T(val);
- }
+ _size += n;
+ fill(item_ptr(0), n, val);
}
template<typename InputIterator>
@@ -213,11 +242,8 @@ public:
if (capacity() < n) {
change_capacity(n);
}
- while (first != last) {
- _size++;
- new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
- ++first;
- }
+ _size += n;
+ fill(item_ptr(0), first, last);
}
prevector() : _size(0), _union{{}} {}
@@ -228,31 +254,23 @@ public:
explicit prevector(size_type n, const T& val = T()) : _size(0) {
change_capacity(n);
- while (size() < n) {
- _size++;
- new(static_cast<void*>(item_ptr(size() - 1))) T(val);
- }
+ _size += n;
+ fill(item_ptr(0), n, val);
}
template<typename InputIterator>
prevector(InputIterator first, InputIterator last) : _size(0) {
size_type n = last - first;
change_capacity(n);
- while (first != last) {
- _size++;
- new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
- ++first;
- }
+ _size += n;
+ fill(item_ptr(0), first, last);
}
prevector(const prevector<N, T, Size, Diff>& other) : _size(0) {
- change_capacity(other.size());
- const_iterator it = other.begin();
- while (it != other.end()) {
- _size++;
- new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
- ++it;
- }
+ size_type n = other.size();
+ change_capacity(n);
+ _size += n;
+ fill(item_ptr(0), other.begin(), other.end());
}
prevector(prevector<N, T, Size, Diff>&& other) : _size(0) {
@@ -263,14 +281,7 @@ public:
if (&other == this) {
return *this;
}
- resize(0);
- change_capacity(other.size());
- const_iterator it = other.begin();
- while (it != other.end()) {
- _size++;
- new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
- ++it;
- }
+ assign(other.begin(), other.end());
return *this;
}
@@ -314,16 +325,20 @@ public:
}
void resize(size_type new_size) {
- if (size() > new_size) {
+ size_type cur_size = size();
+ if (cur_size == new_size) {
+ return;
+ }
+ if (cur_size > new_size) {
erase(item_ptr(new_size), end());
+ return;
}
if (new_size > capacity()) {
change_capacity(new_size);
}
- while (size() < new_size) {
- _size++;
- new(static_cast<void*>(item_ptr(size() - 1))) T();
- }
+ ptrdiff_t increase = new_size - cur_size;
+ fill(item_ptr(cur_size), increase);
+ _size += increase;
}
void reserve(size_type new_capacity) {
@@ -346,10 +361,11 @@ public:
if (capacity() < new_size) {
change_capacity(new_size + (new_size >> 1));
}
- memmove(item_ptr(p + 1), item_ptr(p), (size() - p) * sizeof(T));
+ T* ptr = item_ptr(p);
+ memmove(ptr + 1, ptr, (size() - p) * sizeof(T));
_size++;
- new(static_cast<void*>(item_ptr(p))) T(value);
- return iterator(item_ptr(p));
+ new(static_cast<void*>(ptr)) T(value);
+ return iterator(ptr);
}
void insert(iterator pos, size_type count, const T& value) {
@@ -358,11 +374,10 @@ public:
if (capacity() < new_size) {
change_capacity(new_size + (new_size >> 1));
}
- memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
+ T* ptr = item_ptr(p);
+ memmove(ptr + count, ptr, (size() - p) * sizeof(T));
_size += count;
- for (size_type i = 0; i < count; i++) {
- new(static_cast<void*>(item_ptr(p + i))) T(value);
- }
+ fill(item_ptr(p), count, value);
}
template<typename InputIterator>
@@ -373,13 +388,10 @@ public:
if (capacity() < new_size) {
change_capacity(new_size + (new_size >> 1));
}
- memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
+ T* ptr = item_ptr(p);
+ memmove(ptr + count, ptr, (size() - p) * sizeof(T));
_size += count;
- while (first != last) {
- new(static_cast<void*>(item_ptr(p))) T(*first);
- ++p;
- ++first;
- }
+ fill(ptr, first, last);
}
iterator erase(iterator pos) {