diff options
author | Wladimir J. van der Laan <laanwj@gmail.com> | 2018-03-01 12:12:55 +0100 |
---|---|---|
committer | Wladimir J. van der Laan <laanwj@gmail.com> | 2018-03-01 12:13:08 +0100 |
commit | 32987d5aebc4e0fb757ad77dc727e5d7ee9d394b (patch) | |
tree | c40fa26e4d0350f6609b3557030c800ea9921631 /src | |
parent | 9e2ed253f50500e1db4927d511d3ac0d47aed8df (diff) | |
parent | 5aad635b78c8359adae9b2af015b67b7325c0e0b (diff) |
Merge #12549: Make prevector::resize() and other prevector operations much faster
5aad635 Use memset() to optimize prevector::resize() (Evan Klitzke)
e46be25 Reduce redundant code of prevector and speed it up (Akio Nakamura)
f0e7aa7 Add new prevector benchmarks. (Evan Klitzke)
Pull request description:
This branch optimizes various `prevector` operations, especially resizing vectors. While profiling the `loadblk` thread I noticed that a lot of time was being spent in `prevector::resize()` which led to this work. I have some data here indicating that it takes up **37%** of the time in `ReadBlockFromDisk()`: https://monad.io/readblockfromdisk.svg
This branch improves things significantly. For trivial types, the new results for the prevector benchmark are:
* `PrevectorClearTrivial` which tests `prevector::clear()` becomes 24.6x faster
* `PrevectorDestructorTrivial` which tests `prevector::~prevector()` becomes 20.5x faster
* `PrevectorResizeTrivial` which tests `prevector::resize()` becomes 20.3x faster
Note that in practice it looks like the prevector is only used to contain `unsigned char` types, which is a trivial type. The benchmarks are testing a bit of an extreme case, but the changes here are motivated by the profiling data for `ReadBlockFromDisk()` I linked to above.
The pull request here consists of a series of three commits:
* The first adds new benchmarks but does not change the prevector code.
* The second is from @AkioNak , and merges some prevector optimizations he submitted in #11988
* The third optimizes `prevector::resize()` to use `memset()` when the prevector contains trivially constructible types
Tree-SHA512: 28f7cbb91a19f9f43b6a5942781d7eb2e3197389186b666f086b69df12bee37773140f765426d715bfb8ebff79cb27a5f1206d0325b54b4aa65598b50fb18368
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.bench.include | 2 | ||||
-rw-r--r-- | src/bench/prevector.cpp | 77 | ||||
-rw-r--r-- | src/bench/prevector_destructor.cpp | 36 | ||||
-rw-r--r-- | src/compat.h | 10 | ||||
-rw-r--r-- | src/prevector.h | 114 |
5 files changed, 151 insertions, 88 deletions
diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include index 13c27299f8..748c5b7887 100644 --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -27,7 +27,7 @@ bench_bench_bitcoin_SOURCES = \ bench/lockedpool.cpp \ bench/perf.cpp \ bench/perf.h \ - bench/prevector_destructor.cpp + bench/prevector.cpp nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_BENCH_FILES) diff --git a/src/bench/prevector.cpp b/src/bench/prevector.cpp new file mode 100644 index 0000000000..d0f28d1a3e --- /dev/null +++ b/src/bench/prevector.cpp @@ -0,0 +1,77 @@ +// Copyright (c) 2015-2017 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include <compat.h> +#include <prevector.h> + +#include <bench/bench.h> + +struct nontrivial_t { + int x; + nontrivial_t() :x(-1) {} +}; +static_assert(!IS_TRIVIALLY_CONSTRUCTIBLE<nontrivial_t>::value, + "expected nontrivial_t to not be trivially constructible"); + +typedef unsigned char trivial_t; +static_assert(IS_TRIVIALLY_CONSTRUCTIBLE<trivial_t>::value, + "expected trivial_t to be trivially constructible"); + +template <typename T> +static void PrevectorDestructor(benchmark::State& state) +{ + while (state.KeepRunning()) { + for (auto x = 0; x < 1000; ++x) { + prevector<28, T> t0; + prevector<28, T> t1; + t0.resize(28); + t1.resize(29); + } + } +} + +template <typename T> +static void PrevectorClear(benchmark::State& state) +{ + + while (state.KeepRunning()) { + for (auto x = 0; x < 1000; ++x) { + prevector<28, T> t0; + prevector<28, T> t1; + t0.resize(28); + t0.clear(); + t1.resize(29); + t0.clear(); + } + } +} + +template <typename T> +void PrevectorResize(benchmark::State& state) +{ + while (state.KeepRunning()) { + prevector<28, T> t0; + prevector<28, T> t1; + for (auto x = 0; x < 1000; ++x) { + t0.resize(28); + t0.resize(0); + t1.resize(29); + t1.resize(0); + } + } +} + +#define PREVECTOR_TEST(name, nontrivops, trivops) \ + static void Prevector ## name ## Nontrivial(benchmark::State& state) { \ + PrevectorResize<nontrivial_t>(state); \ + } \ + BENCHMARK(Prevector ## name ## Nontrivial, nontrivops); \ + static void Prevector ## name ## Trivial(benchmark::State& state) { \ + PrevectorResize<trivial_t>(state); \ + } \ + BENCHMARK(Prevector ## name ## Trivial, trivops); + +PREVECTOR_TEST(Clear, 28300, 88600) +PREVECTOR_TEST(Destructor, 28800, 88900) +PREVECTOR_TEST(Resize, 28900, 90300) diff --git a/src/bench/prevector_destructor.cpp b/src/bench/prevector_destructor.cpp deleted file mode 100644 index 39d0ee5eb1..0000000000 --- a/src/bench/prevector_destructor.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2015-2017 The Bitcoin Core developers -// Distributed under the MIT software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include <bench/bench.h> -#include <prevector.h> - -static void PrevectorDestructor(benchmark::State& state) -{ - while (state.KeepRunning()) { - for (auto x = 0; x < 1000; ++x) { - prevector<28, unsigned char> t0; - prevector<28, unsigned char> t1; - t0.resize(28); - t1.resize(29); - } - } -} - -static void PrevectorClear(benchmark::State& state) -{ - - while (state.KeepRunning()) { - for (auto x = 0; x < 1000; ++x) { - prevector<28, unsigned char> t0; - prevector<28, unsigned char> t1; - t0.resize(28); - t0.clear(); - t1.resize(29); - t0.clear(); - } - } -} - -BENCHMARK(PrevectorDestructor, 5700); -BENCHMARK(PrevectorClear, 5600); diff --git a/src/compat.h b/src/compat.h index aae84b1181..8a0f901304 100644 --- a/src/compat.h +++ b/src/compat.h @@ -10,6 +10,16 @@ #include <config/bitcoin-config.h> #endif +#include <type_traits> + +// GCC 4.8 is missing some C++11 type_traits, +// https://www.gnu.org/software/gcc/gcc-5/changes.html +#if defined(__GNUC__) && __GNUC__ < 5 +#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivial +#else +#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivially_constructible +#endif + #ifdef WIN32 #ifdef _WIN32_WINNT #undef _WIN32_WINNT diff --git a/src/prevector.h b/src/prevector.h index f8d6a09145..103ead82cc 100644 --- a/src/prevector.h +++ b/src/prevector.h @@ -10,9 +10,12 @@ #include <stdint.h> #include <string.h> +#include <cstddef> #include <iterator> #include <type_traits> +#include <compat.h> + #pragma pack(push, 1) /** Implements a drop-in replacement for std::vector<T> which stores up to N * elements directly (without heap allocation). The types Size and Diff are @@ -194,16 +197,42 @@ private: T* item_ptr(difference_type pos) { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); } const T* item_ptr(difference_type pos) const { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); } + void fill(T* dst, ptrdiff_t count) { + if (IS_TRIVIALLY_CONSTRUCTIBLE<T>::value) { + // The most common use of prevector is where T=unsigned char. For + // trivially constructible types, we can use memset() to avoid + // looping. + ::memset(dst, 0, count * sizeof(T)); + } else { + for (auto i = 0; i < count; ++i) { + new(static_cast<void*>(dst + i)) T(); + } + } + } + + void fill(T* dst, ptrdiff_t count, const T& value) { + for (auto i = 0; i < count; ++i) { + new(static_cast<void*>(dst + i)) T(value); + } + } + + template<typename InputIterator> + void fill(T* dst, InputIterator first, InputIterator last) { + while (first != last) { + new(static_cast<void*>(dst)) T(*first); + ++dst; + ++first; + } + } + public: void assign(size_type n, const T& val) { clear(); if (capacity() < n) { change_capacity(n); } - while (size() < n) { - _size++; - new(static_cast<void*>(item_ptr(size() - 1))) T(val); - } + _size += n; + fill(item_ptr(0), n, val); } template<typename InputIterator> @@ -213,11 +242,8 @@ public: if (capacity() < n) { change_capacity(n); } - while (first != last) { - _size++; - new(static_cast<void*>(item_ptr(size() - 1))) T(*first); - ++first; - } + _size += n; + fill(item_ptr(0), first, last); } prevector() : _size(0), _union{{}} {} @@ -228,31 +254,23 @@ public: explicit prevector(size_type n, const T& val = T()) : _size(0) { change_capacity(n); - while (size() < n) { - _size++; - new(static_cast<void*>(item_ptr(size() - 1))) T(val); - } + _size += n; + fill(item_ptr(0), n, val); } template<typename InputIterator> prevector(InputIterator first, InputIterator last) : _size(0) { size_type n = last - first; change_capacity(n); - while (first != last) { - _size++; - new(static_cast<void*>(item_ptr(size() - 1))) T(*first); - ++first; - } + _size += n; + fill(item_ptr(0), first, last); } prevector(const prevector<N, T, Size, Diff>& other) : _size(0) { - change_capacity(other.size()); - const_iterator it = other.begin(); - while (it != other.end()) { - _size++; - new(static_cast<void*>(item_ptr(size() - 1))) T(*it); - ++it; - } + size_type n = other.size(); + change_capacity(n); + _size += n; + fill(item_ptr(0), other.begin(), other.end()); } prevector(prevector<N, T, Size, Diff>&& other) : _size(0) { @@ -263,14 +281,7 @@ public: if (&other == this) { return *this; } - resize(0); - change_capacity(other.size()); - const_iterator it = other.begin(); - while (it != other.end()) { - _size++; - new(static_cast<void*>(item_ptr(size() - 1))) T(*it); - ++it; - } + assign(other.begin(), other.end()); return *this; } @@ -314,16 +325,20 @@ public: } void resize(size_type new_size) { - if (size() > new_size) { + size_type cur_size = size(); + if (cur_size == new_size) { + return; + } + if (cur_size > new_size) { erase(item_ptr(new_size), end()); + return; } if (new_size > capacity()) { change_capacity(new_size); } - while (size() < new_size) { - _size++; - new(static_cast<void*>(item_ptr(size() - 1))) T(); - } + ptrdiff_t increase = new_size - cur_size; + fill(item_ptr(cur_size), increase); + _size += increase; } void reserve(size_type new_capacity) { @@ -346,10 +361,11 @@ public: if (capacity() < new_size) { change_capacity(new_size + (new_size >> 1)); } - memmove(item_ptr(p + 1), item_ptr(p), (size() - p) * sizeof(T)); + T* ptr = item_ptr(p); + memmove(ptr + 1, ptr, (size() - p) * sizeof(T)); _size++; - new(static_cast<void*>(item_ptr(p))) T(value); - return iterator(item_ptr(p)); + new(static_cast<void*>(ptr)) T(value); + return iterator(ptr); } void insert(iterator pos, size_type count, const T& value) { @@ -358,11 +374,10 @@ public: if (capacity() < new_size) { change_capacity(new_size + (new_size >> 1)); } - memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T)); + T* ptr = item_ptr(p); + memmove(ptr + count, ptr, (size() - p) * sizeof(T)); _size += count; - for (size_type i = 0; i < count; i++) { - new(static_cast<void*>(item_ptr(p + i))) T(value); - } + fill(item_ptr(p), count, value); } template<typename InputIterator> @@ -373,13 +388,10 @@ public: if (capacity() < new_size) { change_capacity(new_size + (new_size >> 1)); } - memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T)); + T* ptr = item_ptr(p); + memmove(ptr + count, ptr, (size() - p) * sizeof(T)); _size += count; - while (first != last) { - new(static_cast<void*>(item_ptr(p))) T(*first); - ++p; - ++first; - } + fill(ptr, first, last); } iterator erase(iterator pos) { |