From 69a5f8be0abda1e462f8ef44acadd2cbfaa850fb Mon Sep 17 00:00:00 2001 From: Gavin Andresen Date: Fri, 24 Apr 2015 13:14:45 -0400 Subject: Rolling bloom filter class For when you need to keep track of the last N items you've seen, and can tolerate some false-positives. Rebased-by: Pieter Wuille --- src/test/bloom_tests.cpp | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'src/test/bloom_tests.cpp') diff --git a/src/test/bloom_tests.cpp b/src/test/bloom_tests.cpp index 73a146f05c..1bda8a7ea1 100644 --- a/src/test/bloom_tests.cpp +++ b/src/test/bloom_tests.cpp @@ -8,6 +8,7 @@ #include "clientversion.h" #include "key.h" #include "merkleblock.h" +#include "random.h" #include "serialize.h" #include "streams.h" #include "uint256.h" @@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none) BOOST_CHECK(!filter.contains(COutPoint(uint256S("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0))); } +static std::vector RandomData() +{ + uint256 r = GetRandHash(); + return std::vector(r.begin(), r.end()); +} + +BOOST_AUTO_TEST_CASE(rolling_bloom) +{ + // last-100-entry, 1% false positive: + CRollingBloomFilter rb1(100, 0.01, 0); + + // Overfill: + static const int DATASIZE=399; + std::vector data[DATASIZE]; + for (int i = 0; i < DATASIZE; i++) { + data[i] = RandomData(); + rb1.insert(data[i]); + } + // Last 100 guaranteed to be remembered: + for (int i = 299; i < DATASIZE; i++) { + BOOST_CHECK(rb1.contains(data[i])); + } + + // false positive rate is 1%, so we should get about 100 hits if + // testing 10,000 random keys. We get worst-case false positive + // behavior when the filter is as full as possible, which is + // when we've inserted one minus an integer multiple of nElement*2. + unsigned int nHits = 0; + for (int i = 0; i < 10000; i++) { + if (rb1.contains(RandomData())) + ++nHits; + } + // Run test_bitcoin with --log_level=message to see BOOST_TEST_MESSAGEs: + BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~100 expected)"); + + // Insanely unlikely to get a fp count outside this range: + BOOST_CHECK(nHits > 25); + BOOST_CHECK(nHits < 175); + + BOOST_CHECK(rb1.contains(data[DATASIZE-1])); + rb1.clear(); + BOOST_CHECK(!rb1.contains(data[DATASIZE-1])); + + // Now roll through data, make sure last 100 entries + // are always remembered: + for (int i = 0; i < DATASIZE; i++) { + if (i >= 100) + BOOST_CHECK(rb1.contains(data[i-100])); + rb1.insert(data[i]); + } + + // Insert 999 more random entries: + for (int i = 0; i < 999; i++) { + rb1.insert(RandomData()); + } + // Sanity check to make sure the filter isn't just filling up: + nHits = 0; + for (int i = 0; i < DATASIZE; i++) { + if (rb1.contains(data[i])) + ++nHits; + } + // Expect about 5 false positives, more than 100 means + // something is definitely broken. + BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~5 expected)"); + BOOST_CHECK(nHits < 100); + + // last-1000-entry, 0.01% false positive: + CRollingBloomFilter rb2(1000, 0.001, 0); + for (int i = 0; i < DATASIZE; i++) { + rb2.insert(data[i]); + } + // ... room for all of them: + for (int i = 0; i < DATASIZE; i++) { + BOOST_CHECK(rb2.contains(data[i])); + } +} + BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.3