aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/bloom.cpp13
-rw-r--r--src/cuckoocache.h27
-rw-r--r--src/util/fastrange.h20
3 files changed, 30 insertions, 30 deletions
diff --git a/src/common/bloom.cpp b/src/common/bloom.cpp
index c3603b5d2a..0bb72dbcbb 100644
--- a/src/common/bloom.cpp
+++ b/src/common/bloom.cpp
@@ -11,6 +11,7 @@
#include <script/standard.h>
#include <span.h>
#include <streams.h>
+#include <util/fastrange.h>
#include <algorithm>
#include <cmath>
@@ -191,14 +192,6 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak,
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
}
-
-// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value
-// which should be the case for a good hash.
-// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
-static inline uint32_t FastMod(uint32_t x, size_t n) {
- return ((uint64_t)x * (uint64_t)n) >> 32;
-}
-
void CRollingBloomFilter::insert(Span<const unsigned char> vKey)
{
if (nEntriesThisGeneration == nEntriesPerGeneration) {
@@ -223,7 +216,7 @@ void CRollingBloomFilter::insert(Span<const unsigned char> vKey)
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
/* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
- uint32_t pos = FastMod(h, data.size());
+ uint32_t pos = FastRange32(h, data.size());
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
@@ -235,7 +228,7 @@ bool CRollingBloomFilter::contains(Span<const unsigned char> vKey) const
for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
- uint32_t pos = FastMod(h, data.size());
+ uint32_t pos = FastRange32(h, data.size());
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
return false;
diff --git a/src/cuckoocache.h b/src/cuckoocache.h
index 15cb55c3ce..d0dc61c7e6 100644
--- a/src/cuckoocache.h
+++ b/src/cuckoocache.h
@@ -5,6 +5,8 @@
#ifndef BITCOIN_CUCKOOCACHE_H
#define BITCOIN_CUCKOOCACHE_H
+#include <util/fastrange.h>
+
#include <algorithm> // std::find
#include <array>
#include <atomic>
@@ -219,13 +221,8 @@ private:
* One option would be to implement the same trick the compiler uses and compute the
* constants for exact division based on the size, as described in "{N}-bit Unsigned
* Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is
- * somewhat complicated and the result is still slower than other options:
- *
- * Instead we treat the 32-bit random number as a Q32 fixed-point number in the range
- * [0, 1) and simply multiply it by the size. Then we just shift the result down by
- * 32-bits to get our bucket number. The result has non-uniformity the same as a
- * mod, but it is much faster to compute. More about this technique can be found at
- * https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ .
+ * somewhat complicated and the result is still slower than an even simpler option:
+ * see the FastRange32 function in util/fastrange.h.
*
* The resulting non-uniformity is also more equally distributed which would be
* advantageous for something like linear probing, though it shouldn't matter
@@ -241,14 +238,14 @@ private:
*/
inline std::array<uint32_t, 8> compute_hashes(const Element& e) const
{
- return {{(uint32_t)(((uint64_t)hash_function.template operator()<0>(e) * (uint64_t)size) >> 32),
- (uint32_t)(((uint64_t)hash_function.template operator()<1>(e) * (uint64_t)size) >> 32),
- (uint32_t)(((uint64_t)hash_function.template operator()<2>(e) * (uint64_t)size) >> 32),
- (uint32_t)(((uint64_t)hash_function.template operator()<3>(e) * (uint64_t)size) >> 32),
- (uint32_t)(((uint64_t)hash_function.template operator()<4>(e) * (uint64_t)size) >> 32),
- (uint32_t)(((uint64_t)hash_function.template operator()<5>(e) * (uint64_t)size) >> 32),
- (uint32_t)(((uint64_t)hash_function.template operator()<6>(e) * (uint64_t)size) >> 32),
- (uint32_t)(((uint64_t)hash_function.template operator()<7>(e) * (uint64_t)size) >> 32)}};
+ return {{FastRange32(hash_function.template operator()<0>(e), size),
+ FastRange32(hash_function.template operator()<1>(e), size),
+ FastRange32(hash_function.template operator()<2>(e), size),
+ FastRange32(hash_function.template operator()<3>(e), size),
+ FastRange32(hash_function.template operator()<4>(e), size),
+ FastRange32(hash_function.template operator()<5>(e), size),
+ FastRange32(hash_function.template operator()<6>(e), size),
+ FastRange32(hash_function.template operator()<7>(e), size)}};
}
/** invalid returns a special index that can never be inserted to
diff --git a/src/util/fastrange.h b/src/util/fastrange.h
index 963d21c03a..77cb883ce0 100644
--- a/src/util/fastrange.h
+++ b/src/util/fastrange.h
@@ -7,11 +7,21 @@
#include <cstdint>
-// Map a value x that is uniformly distributed in the range [0, 2^64) to a
-// value uniformly distributed in [0, n) by returning the upper 64 bits of
-// x * n.
-//
-// See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+/* This file offers implementations of the fast range reduction technique described
+ * in https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+ *
+ * In short, they take an integer x and a range n, and return the upper bits of
+ * (x * n). If x is uniformly distributed over its domain, the result is as close to
+ * uniformly distributed over [0, n) as (x mod n) would be, but significantly faster.
+ */
+
+/** Fast range reduction with 32-bit input and 32-bit range. */
+static inline uint32_t FastRange32(uint32_t x, uint32_t n)
+{
+ return (uint64_t{x} * n) >> 32;
+}
+
+/** Fast range reduction with 64-bit input and 64-bit range. */
static inline uint64_t FastRange64(uint64_t x, uint64_t n)
{
#ifdef __SIZEOF_INT128__