aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2018-05-18 18:39:46 +0200
committerWladimir J. van der Laan <laanwj@gmail.com>2018-05-18 18:46:35 +0200
commitd9ebb63919fb311ace0ae977e3183ccb80ed7d3c (patch)
treeb70e82a08a22fb810bf6879c31c8d768a1251512
parent1a8b12c69c0158e6440c53a34b03a32fc0580878 (diff)
parent9aac9f90d5e56752cc6cbfac48063ad29a01143c (diff)
Merge #13176: Improve CRollingBloomFilter performance: replace modulus with FastMod
9aac9f90d5e56752cc6cbfac48063ad29a01143c replace modulus with FastMod (Martin Ankerl) Pull request description: Not sure if this is optimization is necessary, but anyway I have some spare time so here it is. This replaces the slow modulo operation with a much faster 64bit multiplication & shift. This works when the hash is uniformly distributed between 0 and 2^32-1. This speeds up the benchmark by a factor of about 1.3: ``` RollingBloom, 5, 1500000, 3.73733, 4.97569e-07, 4.99002e-07, 4.98372e-07 # before RollingBloom, 5, 1500000, 2.86842, 3.81630e-07, 3.83730e-07, 3.82473e-07 # FastMod ``` Be aware that this changes the internal data of the filter, so this should probably not be used for CBloomFilter because of interoperability problems. Tree-SHA512: 04104f3fb09f56c9d14458a6aad919aeb0a5af944e8ee6a31f00e93c753e22004648c1cd65bf36752b6addec528d19fb665c27b955ce1666a85a928e17afa47a
-rw-r--r--src/bloom.cpp13
1 files changed, 11 insertions, 2 deletions
diff --git a/src/bloom.cpp b/src/bloom.cpp
index f07b5b6066..f8e28edded 100644
--- a/src/bloom.cpp
+++ b/src/bloom.cpp
@@ -245,6 +245,14 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak,
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
}
+
+// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value
+// which should be the case for a good hash.
+// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+static inline uint32_t FastMod(uint32_t x, size_t n) {
+ return ((uint64_t)x * (uint64_t)n) >> 32;
+}
+
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
{
if (nEntriesThisGeneration == nEntriesPerGeneration) {
@@ -268,7 +276,8 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
- uint32_t pos = (h >> 6) % data.size();
+ /* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
+ uint32_t pos = FastMod(h, data.size());
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
@@ -286,7 +295,7 @@ bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
- uint32_t pos = (h >> 6) % data.size();
+ uint32_t pos = FastMod(h, data.size());
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
return false;