diff options
author | Pieter Wuille <pieter.wuille@gmail.com> | 2015-11-27 13:20:29 +0100 |
---|---|---|
committer | Pieter Wuille <pieter.wuille@gmail.com> | 2015-11-28 18:53:55 +0100 |
commit | 086ee67d839b33bf475177f680fcc848a0625266 (patch) | |
tree | 068fc2942de60c7c058652bc1d57d99f74908e25 /src/bloom.h | |
parent | 92aa7311d64cb1a0109d63d6bf7406c119bf94cd (diff) |
Switch to a more efficient rolling Bloom filter
For each 'bit' in the filter we really maintain 2 bits, which store either:
0: not set
1-3: set in generation N
After (nElements / 2) insertions, we switch to a new generation, and wipe
entries which already had the new generation number, effectively switching
from the last 1.5 * nElements set to the last 1.0 * nElements set.
This is 25% more space efficient than the previous implementation, and can
(at peak) store 1.5 times the requested amount of history (though only
1.0 times the requested history is guaranteed).
The existing unit tests should be sufficient.
Diffstat (limited to 'src/bloom.h')
-rw-r--r-- | src/bloom.h | 26 |
1 files changed, 21 insertions, 5 deletions
diff --git a/src/bloom.h b/src/bloom.h index a4dba8cb4f..98cfbdb833 100644 --- a/src/bloom.h +++ b/src/bloom.h @@ -110,8 +110,11 @@ public: * reset() is provided, which also changes nTweak to decrease the impact of * false-positives. * - * contains(item) will always return true if item was one of the last N things + * contains(item) will always return true if item was one of the last N to 1.5*N * insert()'ed ... but may also return true for items that were not inserted. + * + * It needs around 1.8 bytes per element per factor 0.1 of false positive rate. + * (More accurately: 3/(log(256)*log(2)) * log(1/fpRate) * nElements bytes) */ class CRollingBloomFilter { @@ -129,10 +132,23 @@ public: void reset(); private: - unsigned int nBloomSize; - unsigned int nInsertions; - CBloomFilter b1, b2; -}; + int nEntriesPerGeneration; + int nEntriesThisGeneration; + int nGeneration; + std::vector<uint32_t> data; + unsigned int nTweak; + int nHashFuncs; + + unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const; + inline int get(uint32_t position) const { + return (data[(position >> 4) % data.size()] >> (2 * (position & 0xF))) & 0x3; + } + + inline void put(uint32_t position, uint32_t val) { + uint32_t& cell = data[(position >> 4) % data.size()]; + cell = (cell & ~(((uint32_t)3) << (2 * (position & 0xF)))) | (val << (2 * (position & 0xF))); + } +}; #endif // BITCOIN_BLOOM_H |