aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2015-05-01 11:52:09 +0200
committerWladimir J. van der Laan <laanwj@gmail.com>2015-05-01 12:52:18 +0200
commitb46e7c24e58efd06f60ae0f9ab85a53283e23059 (patch)
tree1e02d1aa11957b3f40361a9994feaa567c970404
parent8a10000222cb49eb253b41802ecf312adaf79439 (diff)
parentf46a680f423ed1de5316d176e2292edefd916a95 (diff)
Merge pull request #6064
f46a680 Better mruset unit test (Pieter Wuille) d4d5022 Use ring buffer of set iterators instead of deque of copies in mruset (Pieter Wuille) d81cff3 Replace mruset setAddrKnown with CRollingBloomFilter addrKnown (Gavin Andresen) 69a5f8b Rolling bloom filter class (Gavin Andresen)
-rw-r--r--src/bloom.cpp83
-rw-r--r--src/bloom.h28
-rw-r--r--src/main.cpp10
-rw-r--r--src/mruset.h36
-rw-r--r--src/net.cpp6
-rw-r--r--src/net.h6
-rw-r--r--src/test/bloom_tests.cpp78
-rw-r--r--src/test/mruset_tests.cpp126
8 files changed, 255 insertions, 118 deletions
diff --git a/src/bloom.cpp b/src/bloom.cpp
index e60576f4b4..36cba491c4 100644
--- a/src/bloom.cpp
+++ b/src/bloom.cpp
@@ -21,22 +21,33 @@
using namespace std;
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
-/**
- * The ideal size for a bloom filter with a given number of elements and false positive rate is:
- * - nElements * log(fp rate) / ln(2)^2
- * We ignore filter parameters which will create a bloom filter larger than the protocol limits
- */
-vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
-/**
- * The ideal number of hash functions is filter size * ln(2) / number of elements
- * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
- * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
- */
-isFull(false),
-isEmpty(false),
-nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
-nTweak(nTweakIn),
-nFlags(nFlagsIn)
+ /**
+ * The ideal size for a bloom filter with a given number of elements and false positive rate is:
+ * - nElements * log(fp rate) / ln(2)^2
+ * We ignore filter parameters which will create a bloom filter larger than the protocol limits
+ */
+ vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
+ /**
+ * The ideal number of hash functions is filter size * ln(2) / number of elements
+ * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
+ * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
+ */
+ isFull(false),
+ isEmpty(false),
+ nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
+ nTweak(nTweakIn),
+ nFlags(nFlagsIn)
+{
+}
+
+// Private constructor used by CRollingBloomFilter
+CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
+ vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8),
+ isFull(false),
+ isEmpty(true),
+ nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
+ nTweak(nTweakIn),
+ nFlags(BLOOM_UPDATE_NONE)
{
}
@@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull()
isFull = full;
isEmpty = empty;
}
+
+CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
+ b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
+{
+ // Implemented using two bloom filters of 2 * nElements each.
+ // We fill them up, and clear them, staggered, every nElements
+ // inserted, so at least one always contains the last nElements
+ // inserted.
+ nBloomSize = nElements * 2;
+ nInsertions = 0;
+}
+
+void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
+{
+ if (nInsertions == 0) {
+ b1.clear();
+ } else if (nInsertions == nBloomSize / 2) {
+ b2.clear();
+ }
+ b1.insert(vKey);
+ b2.insert(vKey);
+ if (++nInsertions == nBloomSize) {
+ nInsertions = 0;
+ }
+}
+
+bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
+{
+ if (nInsertions < nBloomSize / 2) {
+ return b2.contains(vKey);
+ }
+ return b1.contains(vKey);
+}
+
+void CRollingBloomFilter::clear()
+{
+ b1.clear();
+ b2.clear();
+ nInsertions = 0;
+}
diff --git a/src/bloom.h b/src/bloom.h
index 191ffa19b3..7bab379a39 100644
--- a/src/bloom.h
+++ b/src/bloom.h
@@ -53,6 +53,10 @@ private:
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
+ // Private constructor for CRollingBloomFilter, no restrictions on size
+ CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
+ friend class CRollingBloomFilter;
+
public:
/**
* Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
@@ -97,4 +101,28 @@ public:
void UpdateEmptyFull();
};
+/**
+ * RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
+ * Construct it with the number of items to keep track of, and a false-positive rate.
+ *
+ * contains(item) will always return true if item was one of the last N things
+ * insert()'ed ... but may also return true for items that were not inserted.
+ */
+class CRollingBloomFilter
+{
+public:
+ CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
+
+ void insert(const std::vector<unsigned char>& vKey);
+ bool contains(const std::vector<unsigned char>& vKey) const;
+
+ void clear();
+
+private:
+ unsigned int nBloomSize;
+ unsigned int nInsertions;
+ CBloomFilter b1, b2;
+};
+
+
#endif // BITCOIN_BLOOM_H
diff --git a/src/main.cpp b/src/main.cpp
index e6248c6617..a6b717d57f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -3995,7 +3995,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
{
LOCK(cs_vNodes);
// Use deterministic randomness to send to the same nodes for 24 hours
- // at a time so the setAddrKnowns of the chosen nodes prevent repeats
+ // at a time so the addrKnowns of the chosen nodes prevent repeats
static uint256 hashSalt;
if (hashSalt.IsNull())
hashSalt = GetRandHash();
@@ -4779,9 +4779,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
LOCK(cs_vNodes);
BOOST_FOREACH(CNode* pnode, vNodes)
{
- // Periodically clear setAddrKnown to allow refresh broadcasts
+ // Periodically clear addrKnown to allow refresh broadcasts
if (nLastRebroadcast)
- pnode->setAddrKnown.clear();
+ pnode->addrKnown.clear();
// Rebroadcast our address
AdvertizeLocal(pnode);
@@ -4799,9 +4799,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
vAddr.reserve(pto->vAddrToSend.size());
BOOST_FOREACH(const CAddress& addr, pto->vAddrToSend)
{
- // returns true if wasn't already contained in the set
- if (pto->setAddrKnown.insert(addr).second)
+ if (!pto->addrKnown.contains(addr.GetKey()))
{
+ pto->addrKnown.insert(addr.GetKey());
vAddr.push_back(addr);
// receiver rejects addr messages larger than 1000
if (vAddr.size() >= 1000)
diff --git a/src/mruset.h b/src/mruset.h
index 1969f419cb..398aa173bf 100644
--- a/src/mruset.h
+++ b/src/mruset.h
@@ -1,12 +1,12 @@
-// Copyright (c) 2012 The Bitcoin Core developers
+// Copyright (c) 2012-2015 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_MRUSET_H
#define BITCOIN_MRUSET_H
-#include <deque>
#include <set>
+#include <vector>
#include <utility>
/** STL-like set container that only keeps the most recent N elements. */
@@ -22,11 +22,13 @@ public:
protected:
std::set<T> set;
- std::deque<T> queue;
- size_type nMaxSize;
+ std::vector<iterator> order;
+ size_type first_used;
+ size_type first_unused;
+ const size_type nMaxSize;
public:
- mruset(size_type nMaxSizeIn = 0) { nMaxSize = nMaxSizeIn; }
+ mruset(size_type nMaxSizeIn = 1) : nMaxSize(nMaxSizeIn) { clear(); }
iterator begin() const { return set.begin(); }
iterator end() const { return set.end(); }
size_type size() const { return set.size(); }
@@ -36,7 +38,9 @@ public:
void clear()
{
set.clear();
- queue.clear();
+ order.assign(nMaxSize, set.end());
+ first_used = 0;
+ first_unused = 0;
}
bool inline friend operator==(const mruset<T>& a, const mruset<T>& b) { return a.set == b.set; }
bool inline friend operator==(const mruset<T>& a, const std::set<T>& b) { return a.set == b; }
@@ -45,25 +49,17 @@ public:
{
std::pair<iterator, bool> ret = set.insert(x);
if (ret.second) {
- if (nMaxSize && queue.size() == nMaxSize) {
- set.erase(queue.front());
- queue.pop_front();
+ if (set.size() == nMaxSize + 1) {
+ set.erase(order[first_used]);
+ order[first_used] = set.end();
+ if (++first_used == nMaxSize) first_used = 0;
}
- queue.push_back(x);
+ order[first_unused] = ret.first;
+ if (++first_unused == nMaxSize) first_unused = 0;
}
return ret;
}
size_type max_size() const { return nMaxSize; }
- size_type max_size(size_type s)
- {
- if (s)
- while (queue.size() > s) {
- set.erase(queue.front());
- queue.pop_front();
- }
- nMaxSize = s;
- return nMaxSize;
- }
};
#endif // BITCOIN_MRUSET_H
diff --git a/src/net.cpp b/src/net.cpp
index 731c810935..2de04fc574 100644
--- a/src/net.cpp
+++ b/src/net.cpp
@@ -1905,7 +1905,10 @@ bool CAddrDB::Read(CAddrMan& addr)
unsigned int ReceiveFloodSize() { return 1000*GetArg("-maxreceivebuffer", 5*1000); }
unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }
-CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) : ssSend(SER_NETWORK, INIT_PROTO_VERSION), setAddrKnown(5000)
+CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) :
+ ssSend(SER_NETWORK, INIT_PROTO_VERSION),
+ addrKnown(5000, 0.001, insecure_rand()),
+ setInventoryKnown(SendBufferSize() / 1000)
{
nServices = 0;
hSocket = hSocketIn;
@@ -1934,7 +1937,6 @@ CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fIn
nStartingHeight = -1;
fGetAddr = false;
fRelayTxes = false;
- setInventoryKnown.max_size(SendBufferSize() / 1000);
pfilter = new CBloomFilter();
nPingNonceSent = 0;
nPingUsecStart = 0;
diff --git a/src/net.h b/src/net.h
index 9fc6ce68d0..24e927c9f6 100644
--- a/src/net.h
+++ b/src/net.h
@@ -300,7 +300,7 @@ public:
// flood relay
std::vector<CAddress> vAddrToSend;
- mruset<CAddress> setAddrKnown;
+ CRollingBloomFilter addrKnown;
bool fGetAddr;
std::set<uint256> setKnown;
@@ -380,7 +380,7 @@ public:
void AddAddressKnown(const CAddress& addr)
{
- setAddrKnown.insert(addr);
+ addrKnown.insert(addr.GetKey());
}
void PushAddress(const CAddress& addr)
@@ -388,7 +388,7 @@ public:
// Known checking here is only to save space from duplicates.
// SendMessages will filter it again for knowns that were added
// after addresses were pushed.
- if (addr.IsValid() && !setAddrKnown.count(addr)) {
+ if (addr.IsValid() && !addrKnown.contains(addr.GetKey())) {
if (vAddrToSend.size() >= MAX_ADDR_TO_SEND) {
vAddrToSend[insecure_rand() % vAddrToSend.size()] = addr;
} else {
diff --git a/src/test/bloom_tests.cpp b/src/test/bloom_tests.cpp
index 73a146f05c..1bda8a7ea1 100644
--- a/src/test/bloom_tests.cpp
+++ b/src/test/bloom_tests.cpp
@@ -8,6 +8,7 @@
#include "clientversion.h"
#include "key.h"
#include "merkleblock.h"
+#include "random.h"
#include "serialize.h"
#include "streams.h"
#include "uint256.h"
@@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none)
BOOST_CHECK(!filter.contains(COutPoint(uint256S("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0)));
}
+static std::vector<unsigned char> RandomData()
+{
+ uint256 r = GetRandHash();
+ return std::vector<unsigned char>(r.begin(), r.end());
+}
+
+BOOST_AUTO_TEST_CASE(rolling_bloom)
+{
+ // last-100-entry, 1% false positive:
+ CRollingBloomFilter rb1(100, 0.01, 0);
+
+ // Overfill:
+ static const int DATASIZE=399;
+ std::vector<unsigned char> data[DATASIZE];
+ for (int i = 0; i < DATASIZE; i++) {
+ data[i] = RandomData();
+ rb1.insert(data[i]);
+ }
+ // Last 100 guaranteed to be remembered:
+ for (int i = 299; i < DATASIZE; i++) {
+ BOOST_CHECK(rb1.contains(data[i]));
+ }
+
+ // false positive rate is 1%, so we should get about 100 hits if
+ // testing 10,000 random keys. We get worst-case false positive
+ // behavior when the filter is as full as possible, which is
+ // when we've inserted one minus an integer multiple of nElement*2.
+ unsigned int nHits = 0;
+ for (int i = 0; i < 10000; i++) {
+ if (rb1.contains(RandomData()))
+ ++nHits;
+ }
+ // Run test_bitcoin with --log_level=message to see BOOST_TEST_MESSAGEs:
+ BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~100 expected)");
+
+ // Insanely unlikely to get a fp count outside this range:
+ BOOST_CHECK(nHits > 25);
+ BOOST_CHECK(nHits < 175);
+
+ BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
+ rb1.clear();
+ BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
+
+ // Now roll through data, make sure last 100 entries
+ // are always remembered:
+ for (int i = 0; i < DATASIZE; i++) {
+ if (i >= 100)
+ BOOST_CHECK(rb1.contains(data[i-100]));
+ rb1.insert(data[i]);
+ }
+
+ // Insert 999 more random entries:
+ for (int i = 0; i < 999; i++) {
+ rb1.insert(RandomData());
+ }
+ // Sanity check to make sure the filter isn't just filling up:
+ nHits = 0;
+ for (int i = 0; i < DATASIZE; i++) {
+ if (rb1.contains(data[i]))
+ ++nHits;
+ }
+ // Expect about 5 false positives, more than 100 means
+ // something is definitely broken.
+ BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~5 expected)");
+ BOOST_CHECK(nHits < 100);
+
+ // last-1000-entry, 0.01% false positive:
+ CRollingBloomFilter rb2(1000, 0.001, 0);
+ for (int i = 0; i < DATASIZE; i++) {
+ rb2.insert(data[i]);
+ }
+ // ... room for all of them:
+ for (int i = 0; i < DATASIZE; i++) {
+ BOOST_CHECK(rb2.contains(data[i]));
+ }
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/test/mruset_tests.cpp b/src/test/mruset_tests.cpp
index bd4e9c1d38..2b68f8899e 100644
--- a/src/test/mruset_tests.cpp
+++ b/src/test/mruset_tests.cpp
@@ -17,83 +17,65 @@
using namespace std;
-class mrutester
-{
-private:
- mruset<int> mru;
- std::set<int> set;
-
-public:
- mrutester() { mru.max_size(MAX_SIZE); }
- int size() const { return set.size(); }
-
- void insert(int n)
- {
- mru.insert(n);
- set.insert(n);
- BOOST_CHECK(mru == set);
- }
-};
-
BOOST_FIXTURE_TEST_SUITE(mruset_tests, BasicTestingSetup)
-// Test that an mruset behaves like a set, as long as no more than MAX_SIZE elements are in it
-BOOST_AUTO_TEST_CASE(mruset_like_set)
-{
-
- for (int nTest=0; nTest<NUM_TESTS; nTest++)
- {
- mrutester tester;
- while (tester.size() < MAX_SIZE)
- tester.insert(GetRandInt(2 * MAX_SIZE));
- }
-
-}
-
-// Test that an mruset's size never exceeds its max_size
-BOOST_AUTO_TEST_CASE(mruset_limited_size)
+BOOST_AUTO_TEST_CASE(mruset_test)
{
- for (int nTest=0; nTest<NUM_TESTS; nTest++)
- {
- mruset<int> mru(MAX_SIZE);
- for (int nAction=0; nAction<3*MAX_SIZE; nAction++)
- {
- int n = GetRandInt(2 * MAX_SIZE);
- mru.insert(n);
- BOOST_CHECK(mru.size() <= MAX_SIZE);
+ // The mruset being tested.
+ mruset<int> mru(5000);
+
+ // Run the test 10 times.
+ for (int test = 0; test < 10; test++) {
+ // Reset mru.
+ mru.clear();
+
+ // A deque + set to simulate the mruset.
+ std::deque<int> rep;
+ std::set<int> all;
+
+ // Insert 10000 random integers below 15000.
+ for (int j=0; j<10000; j++) {
+ int add = GetRandInt(15000);
+ mru.insert(add);
+
+ // Add the number to rep/all as well.
+ if (all.count(add) == 0) {
+ all.insert(add);
+ rep.push_back(add);
+ if (all.size() == 5001) {
+ all.erase(rep.front());
+ rep.pop_front();
+ }
+ }
+
+ // Do a full comparison between mru and the simulated mru every 1000 and every 5001 elements.
+ if (j % 1000 == 0 || j % 5001 == 0) {
+ mruset<int> mru2 = mru; // Also try making a copy
+
+ // Check that all elements that should be in there, are in there.
+ BOOST_FOREACH(int x, rep) {
+ BOOST_CHECK(mru.count(x));
+ BOOST_CHECK(mru2.count(x));
+ }
+
+ // Check that all elements that are in there, should be in there.
+ BOOST_FOREACH(int x, mru) {
+ BOOST_CHECK(all.count(x));
+ }
+
+ // Check that all elements that are in there, should be in there.
+ BOOST_FOREACH(int x, mru2) {
+ BOOST_CHECK(all.count(x));
+ }
+
+ for (int t = 0; t < 10; t++) {
+ int r = GetRandInt(15000);
+ BOOST_CHECK(all.count(r) == mru.count(r));
+ BOOST_CHECK(all.count(r) == mru2.count(r));
+ }
+ }
}
}
}
-// 16-bit permutation function
-int static permute(int n)
-{
- // hexadecimals of pi; verified to be linearly independent
- static const int table[16] = {0x243F, 0x6A88, 0x85A3, 0x08D3, 0x1319, 0x8A2E, 0x0370, 0x7344,
- 0xA409, 0x3822, 0x299F, 0x31D0, 0x082E, 0xFA98, 0xEC4E, 0x6C89};
-
- int ret = 0;
- for (int bit=0; bit<16; bit++)
- if (n & (1<<bit))
- ret ^= table[bit];
-
- return ret;
-}
-
-// Test that an mruset acts like a moving window, if no duplicate elements are added
-BOOST_AUTO_TEST_CASE(mruset_window)
-{
- mruset<int> mru(MAX_SIZE);
- for (int n=0; n<10*MAX_SIZE; n++)
- {
- mru.insert(permute(n));
-
- set<int> tester;
- for (int m=max(0,n-MAX_SIZE+1); m<=n; m++)
- tester.insert(permute(m));
-
- BOOST_CHECK(mru == tester);
- }
-}
-
BOOST_AUTO_TEST_SUITE_END()