Rolling bloom filter class

For when you need to keep track of the last N items you've seen, and can tolerate some false-positives. Rebased-by: Pieter Wuille <pieter.wuille@gmail.com>
author: Gavin Andresen <gavinandresen@gmail.com> 2015-04-24 13:14:45 -0400
committer: Pieter Wuille <pieter.wuille@gmail.com> 2015-04-30 07:58:29 -0700
commit: 69a5f8be0abda1e462f8ef44acadd2cbfaa850fb (patch)
tree: 0369fcb002ab0d63aba1522b151ff89248c44818 /src/bloom.cpp
parent: 8a10000222cb49eb253b41802ecf312adaf79439 (diff)
1 files changed, 67 insertions, 16 deletions
diff --git a/src/bloom.cpp b/src/bloom.cpp
index e60576f4b4..36cba491c4 100644
--- a/src/bloom.cpp
+++ b/src/bloom.cpp
@@ -21,22 +21,33 @@
 using namespace std;
 
 CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
-/**
- * The ideal size for a bloom filter with a given number of elements and false positive rate is:
- * - nElements * log(fp rate) / ln(2)^2
- * We ignore filter parameters which will create a bloom filter larger than the protocol limits
- */
-vData(min((unsigned int)(-1  / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
-/**
- * The ideal number of hash functions is filter size * ln(2) / number of elements
- * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
- * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
- */
-isFull(false),
-isEmpty(false),
-nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
-nTweak(nTweakIn),
-nFlags(nFlagsIn)
+    /**
+     * The ideal size for a bloom filter with a given number of elements and false positive rate is:
+     * - nElements * log(fp rate) / ln(2)^2
+     * We ignore filter parameters which will create a bloom filter larger than the protocol limits
+     */
+    vData(min((unsigned int)(-1  / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
+    /**
+     * The ideal number of hash functions is filter size * ln(2) / number of elements
+     * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
+     * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
+     */
+    isFull(false),
+    isEmpty(false),
+    nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
+    nTweak(nTweakIn),
+    nFlags(nFlagsIn)
+{
+}
+
+// Private constructor used by CRollingBloomFilter
+CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
+    vData((unsigned int)(-1  / LN2SQUARED * nElements * log(nFPRate)) / 8),
+    isFull(false),
+    isEmpty(true),
+    nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
+    nTweak(nTweakIn),
+    nFlags(BLOOM_UPDATE_NONE)
 {
 }
 
@@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull()
     isFull = full;
     isEmpty = empty;
 }
+
+CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
+    b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
+{
+    // Implemented using two bloom filters of 2 * nElements each.
+    // We fill them up, and clear them, staggered, every nElements
+    // inserted, so at least one always contains the last nElements
+    // inserted.
+    nBloomSize = nElements * 2;
+    nInsertions = 0;
+}
+
+void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
+{
+    if (nInsertions == 0) {
+        b1.clear();
+    } else if (nInsertions == nBloomSize / 2) {
+        b2.clear();
+    }
+    b1.insert(vKey);
+    b2.insert(vKey);
+    if (++nInsertions == nBloomSize) {
+        nInsertions = 0;
+    }
+}
+
+bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
+{
+    if (nInsertions < nBloomSize / 2) {
+        return b2.contains(vKey);
+    }
+    return b1.contains(vKey);
+}
+
+void CRollingBloomFilter::clear()
+{
+    b1.clear();
+    b2.clear();
+    nInsertions = 0;
+}
author	Gavin Andresen <gavinandresen@gmail.com>	2015-04-24 13:14:45 -0400
committer	Pieter Wuille <pieter.wuille@gmail.com>	2015-04-30 07:58:29 -0700
commit	69a5f8be0abda1e462f8ef44acadd2cbfaa850fb (patch)
tree	0369fcb002ab0d63aba1522b151ff89248c44818 /src/bloom.cpp
parent	8a10000222cb49eb253b41802ecf312adaf79439 (diff)