diff options
author | Gavin Andresen <gavinandresen@gmail.com> | 2015-04-24 13:14:45 -0400 |
---|---|---|
committer | Pieter Wuille <pieter.wuille@gmail.com> | 2015-04-30 07:58:29 -0700 |
commit | 69a5f8be0abda1e462f8ef44acadd2cbfaa850fb (patch) | |
tree | 0369fcb002ab0d63aba1522b151ff89248c44818 /src/bloom.cpp | |
parent | 8a10000222cb49eb253b41802ecf312adaf79439 (diff) |
Rolling bloom filter class
For when you need to keep track of the last N items
you've seen, and can tolerate some false-positives.
Rebased-by: Pieter Wuille <pieter.wuille@gmail.com>
Diffstat (limited to 'src/bloom.cpp')
-rw-r--r-- | src/bloom.cpp | 83 |
1 files changed, 67 insertions, 16 deletions
diff --git a/src/bloom.cpp b/src/bloom.cpp index e60576f4b4..36cba491c4 100644 --- a/src/bloom.cpp +++ b/src/bloom.cpp @@ -21,22 +21,33 @@ using namespace std; CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) : -/** - * The ideal size for a bloom filter with a given number of elements and false positive rate is: - * - nElements * log(fp rate) / ln(2)^2 - * We ignore filter parameters which will create a bloom filter larger than the protocol limits - */ -vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8), -/** - * The ideal number of hash functions is filter size * ln(2) / number of elements - * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits - * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas - */ -isFull(false), -isEmpty(false), -nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)), -nTweak(nTweakIn), -nFlags(nFlagsIn) + /** + * The ideal size for a bloom filter with a given number of elements and false positive rate is: + * - nElements * log(fp rate) / ln(2)^2 + * We ignore filter parameters which will create a bloom filter larger than the protocol limits + */ + vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8), + /** + * The ideal number of hash functions is filter size * ln(2) / number of elements + * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits + * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas + */ + isFull(false), + isEmpty(false), + nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)), + nTweak(nTweakIn), + nFlags(nFlagsIn) +{ +} + +// Private constructor used by CRollingBloomFilter +CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) : + vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8), + isFull(false), + isEmpty(true), + nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)), + nTweak(nTweakIn), + nFlags(BLOOM_UPDATE_NONE) { } @@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull() isFull = full; isEmpty = empty; } + +CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) : + b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak) +{ + // Implemented using two bloom filters of 2 * nElements each. + // We fill them up, and clear them, staggered, every nElements + // inserted, so at least one always contains the last nElements + // inserted. + nBloomSize = nElements * 2; + nInsertions = 0; +} + +void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey) +{ + if (nInsertions == 0) { + b1.clear(); + } else if (nInsertions == nBloomSize / 2) { + b2.clear(); + } + b1.insert(vKey); + b2.insert(vKey); + if (++nInsertions == nBloomSize) { + nInsertions = 0; + } +} + +bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const +{ + if (nInsertions < nBloomSize / 2) { + return b2.contains(vKey); + } + return b1.contains(vKey); +} + +void CRollingBloomFilter::clear() +{ + b1.clear(); + b2.clear(); + nInsertions = 0; +} |