diff options
author | MarcoFalke <falke.marco@gmail.com> | 2022-01-10 10:53:39 +0100 |
---|---|---|
committer | MarcoFalke <falke.marco@gmail.com> | 2022-01-10 10:53:45 +0100 |
commit | 3d0850cec103950731bfc0f935ad997a4149d4b3 (patch) | |
tree | 65eb7ff0124e24ad7c09e63789c32abbeca95cb1 | |
parent | 542e405a8517e50bf6687a02af69117931234fde (diff) | |
parent | efab28b06bfaa50c41337e84136cb58437e7ba00 (diff) |
Merge bitcoin/bitcoin#23994: Consolidate all uses of the fast range mapping technique in util/fastrange.h
efab28b06bfaa50c41337e84136cb58437e7ba00 Add FastRange32 function and use it throughout the codebase (Pieter Wuille)
96ecd6fa3e0f53c3a25eb7c328220b819f8dde03 scripted-diff: rename MapIntoRange to FastRange64 (Pieter Wuille)
c6d15c45d971fb25551b7a66a2615e3f0bee999b [moveonly] Move MapIntoRange() to separate util/fastrange.h (Pieter Wuille)
Pull request description:
Several places in the codebase use the fast range mapping technique described in https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/, some for 32-bit ranges, some for 64-bit ones.
Move all of these to `util/fastrange.h`, and give them a consistent name.
ACKs for top commit:
Sjors:
ACK efab28b06bfaa50c41337e84136cb58437e7ba00
shaavan:
reACK efab28b06bfaa50c41337e84136cb58437e7ba00
MarcoFalke:
review ACK efab28b06bfaa50c41337e84136cb58437e7ba00 🍸
Tree-SHA512: 3190a25ef21d17f0ab2afcd9b8d5a1813fdfac0d93996878017e84ff62eee412c823d6149ae8e92cfc3214458641e83ace4b022b4a0fe0679f78dbaee21c6227
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/blockfilter.cpp | 2 | ||||
-rw-r--r-- | src/common/bloom.cpp | 13 | ||||
-rw-r--r-- | src/cuckoocache.h | 27 | ||||
-rw-r--r-- | src/test/fuzz/golomb_rice.cpp | 2 | ||||
-rw-r--r-- | src/util/fastrange.h | 51 | ||||
-rw-r--r-- | src/util/golombrice.h | 33 |
7 files changed, 71 insertions, 58 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index b946a325c9..0b177480c8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -240,6 +240,7 @@ BITCOIN_CORE_H = \ util/check.h \ util/epochguard.h \ util/error.h \ + util/fastrange.h \ util/fees.h \ util/getuniquepath.h \ util/golombrice.h \ diff --git a/src/blockfilter.cpp b/src/blockfilter.cpp index 566254d839..63a9ba498f 100644 --- a/src/blockfilter.cpp +++ b/src/blockfilter.cpp @@ -29,7 +29,7 @@ uint64_t GCSFilter::HashToRange(const Element& element) const uint64_t hash = CSipHasher(m_params.m_siphash_k0, m_params.m_siphash_k1) .Write(element.data(), element.size()) .Finalize(); - return MapIntoRange(hash, m_F); + return FastRange64(hash, m_F); } std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) const diff --git a/src/common/bloom.cpp b/src/common/bloom.cpp index c3603b5d2a..0bb72dbcbb 100644 --- a/src/common/bloom.cpp +++ b/src/common/bloom.cpp @@ -11,6 +11,7 @@ #include <script/standard.h> #include <span.h> #include <streams.h> +#include <util/fastrange.h> #include <algorithm> #include <cmath> @@ -191,14 +192,6 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash); } - -// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value -// which should be the case for a good hash. -// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ -static inline uint32_t FastMod(uint32_t x, size_t n) { - return ((uint64_t)x * (uint64_t)n) >> 32; -} - void CRollingBloomFilter::insert(Span<const unsigned char> vKey) { if (nEntriesThisGeneration == nEntriesPerGeneration) { @@ -223,7 +216,7 @@ void CRollingBloomFilter::insert(Span<const unsigned char> vKey) uint32_t h = RollingBloomHash(n, nTweak, vKey); int bit = h & 0x3F; /* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */ - uint32_t pos = FastMod(h, data.size()); + uint32_t pos = FastRange32(h, data.size()); /* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */ data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit; data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit; @@ -235,7 +228,7 @@ bool CRollingBloomFilter::contains(Span<const unsigned char> vKey) const for (int n = 0; n < nHashFuncs; n++) { uint32_t h = RollingBloomHash(n, nTweak, vKey); int bit = h & 0x3F; - uint32_t pos = FastMod(h, data.size()); + uint32_t pos = FastRange32(h, data.size()); /* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */ if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) { return false; diff --git a/src/cuckoocache.h b/src/cuckoocache.h index 15cb55c3ce..d0dc61c7e6 100644 --- a/src/cuckoocache.h +++ b/src/cuckoocache.h @@ -5,6 +5,8 @@ #ifndef BITCOIN_CUCKOOCACHE_H #define BITCOIN_CUCKOOCACHE_H +#include <util/fastrange.h> + #include <algorithm> // std::find #include <array> #include <atomic> @@ -219,13 +221,8 @@ private: * One option would be to implement the same trick the compiler uses and compute the * constants for exact division based on the size, as described in "{N}-bit Unsigned * Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is - * somewhat complicated and the result is still slower than other options: - * - * Instead we treat the 32-bit random number as a Q32 fixed-point number in the range - * [0, 1) and simply multiply it by the size. Then we just shift the result down by - * 32-bits to get our bucket number. The result has non-uniformity the same as a - * mod, but it is much faster to compute. More about this technique can be found at - * https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ . + * somewhat complicated and the result is still slower than an even simpler option: + * see the FastRange32 function in util/fastrange.h. * * The resulting non-uniformity is also more equally distributed which would be * advantageous for something like linear probing, though it shouldn't matter @@ -241,14 +238,14 @@ private: */ inline std::array<uint32_t, 8> compute_hashes(const Element& e) const { - return {{(uint32_t)(((uint64_t)hash_function.template operator()<0>(e) * (uint64_t)size) >> 32), - (uint32_t)(((uint64_t)hash_function.template operator()<1>(e) * (uint64_t)size) >> 32), - (uint32_t)(((uint64_t)hash_function.template operator()<2>(e) * (uint64_t)size) >> 32), - (uint32_t)(((uint64_t)hash_function.template operator()<3>(e) * (uint64_t)size) >> 32), - (uint32_t)(((uint64_t)hash_function.template operator()<4>(e) * (uint64_t)size) >> 32), - (uint32_t)(((uint64_t)hash_function.template operator()<5>(e) * (uint64_t)size) >> 32), - (uint32_t)(((uint64_t)hash_function.template operator()<6>(e) * (uint64_t)size) >> 32), - (uint32_t)(((uint64_t)hash_function.template operator()<7>(e) * (uint64_t)size) >> 32)}}; + return {{FastRange32(hash_function.template operator()<0>(e), size), + FastRange32(hash_function.template operator()<1>(e), size), + FastRange32(hash_function.template operator()<2>(e), size), + FastRange32(hash_function.template operator()<3>(e), size), + FastRange32(hash_function.template operator()<4>(e), size), + FastRange32(hash_function.template operator()<5>(e), size), + FastRange32(hash_function.template operator()<6>(e), size), + FastRange32(hash_function.template operator()<7>(e), size)}}; } /** invalid returns a special index that can never be inserted to diff --git a/src/test/fuzz/golomb_rice.cpp b/src/test/fuzz/golomb_rice.cpp index 2d0b29953c..b4bb4c6dc6 100644 --- a/src/test/fuzz/golomb_rice.cpp +++ b/src/test/fuzz/golomb_rice.cpp @@ -25,7 +25,7 @@ uint64_t HashToRange(const std::vector<uint8_t>& element, const uint64_t f) const uint64_t hash = CSipHasher(0x0706050403020100ULL, 0x0F0E0D0C0B0A0908ULL) .Write(element.data(), element.size()) .Finalize(); - return MapIntoRange(hash, f); + return FastRange64(hash, f); } std::vector<uint64_t> BuildHashedSet(const std::unordered_set<std::vector<uint8_t>, ByteVectorHash>& elements, const uint64_t f) diff --git a/src/util/fastrange.h b/src/util/fastrange.h new file mode 100644 index 0000000000..77cb883ce0 --- /dev/null +++ b/src/util/fastrange.h @@ -0,0 +1,51 @@ +// Copyright (c) 2018-2020 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_UTIL_FASTRANGE_H +#define BITCOIN_UTIL_FASTRANGE_H + +#include <cstdint> + +/* This file offers implementations of the fast range reduction technique described + * in https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + * + * In short, they take an integer x and a range n, and return the upper bits of + * (x * n). If x is uniformly distributed over its domain, the result is as close to + * uniformly distributed over [0, n) as (x mod n) would be, but significantly faster. + */ + +/** Fast range reduction with 32-bit input and 32-bit range. */ +static inline uint32_t FastRange32(uint32_t x, uint32_t n) +{ + return (uint64_t{x} * n) >> 32; +} + +/** Fast range reduction with 64-bit input and 64-bit range. */ +static inline uint64_t FastRange64(uint64_t x, uint64_t n) +{ +#ifdef __SIZEOF_INT128__ + return (static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(n)) >> 64; +#else + // To perform the calculation on 64-bit numbers without losing the + // result to overflow, split the numbers into the most significant and + // least significant 32 bits and perform multiplication piece-wise. + // + // See: https://stackoverflow.com/a/26855440 + const uint64_t x_hi = x >> 32; + const uint64_t x_lo = x & 0xFFFFFFFF; + const uint64_t n_hi = n >> 32; + const uint64_t n_lo = n & 0xFFFFFFFF; + + const uint64_t ac = x_hi * n_hi; + const uint64_t ad = x_hi * n_lo; + const uint64_t bc = x_lo * n_hi; + const uint64_t bd = x_lo * n_lo; + + const uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF); + const uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32); + return upper64; +#endif +} + +#endif // BITCOIN_UTIL_FASTRANGE_H diff --git a/src/util/golombrice.h b/src/util/golombrice.h index f14cb594db..4ff4f6d7e5 100644 --- a/src/util/golombrice.h +++ b/src/util/golombrice.h @@ -5,6 +5,8 @@ #ifndef BITCOIN_UTIL_GOLOMBRICE_H #define BITCOIN_UTIL_GOLOMBRICE_H +#include <util/fastrange.h> + #include <streams.h> #include <cstdint> @@ -40,35 +42,4 @@ uint64_t GolombRiceDecode(BitStreamReader<IStream>& bitreader, uint8_t P) return (q << P) + r; } -// Map a value x that is uniformly distributed in the range [0, 2^64) to a -// value uniformly distributed in [0, n) by returning the upper 64 bits of -// x * n. -// -// See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ -static inline uint64_t MapIntoRange(uint64_t x, uint64_t n) -{ -#ifdef __SIZEOF_INT128__ - return (static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(n)) >> 64; -#else - // To perform the calculation on 64-bit numbers without losing the - // result to overflow, split the numbers into the most significant and - // least significant 32 bits and perform multiplication piece-wise. - // - // See: https://stackoverflow.com/a/26855440 - const uint64_t x_hi = x >> 32; - const uint64_t x_lo = x & 0xFFFFFFFF; - const uint64_t n_hi = n >> 32; - const uint64_t n_lo = n & 0xFFFFFFFF; - - const uint64_t ac = x_hi * n_hi; - const uint64_t ad = x_hi * n_lo; - const uint64_t bc = x_lo * n_hi; - const uint64_t bd = x_lo * n_lo; - - const uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF); - const uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32); - return upper64; -#endif -} - #endif // BITCOIN_UTIL_GOLOMBRICE_H |