aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/bloom.cpp83
-rw-r--r--src/bloom.h28
-rw-r--r--src/main.cpp10
-rw-r--r--src/mruset.h36
-rw-r--r--src/net.cpp6
-rw-r--r--src/net.h6
-rw-r--r--src/test/bloom_tests.cpp78
-rw-r--r--src/test/mruset_tests.cpp126
8 files changed, 255 insertions, 118 deletions
diff --git a/src/bloom.cpp b/src/bloom.cpp
index e60576f4b4..36cba491c4 100644
--- a/src/bloom.cpp
+++ b/src/bloom.cpp
@@ -21,22 +21,33 @@
using namespace std;
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
-/**
- * The ideal size for a bloom filter with a given number of elements and false positive rate is:
- * - nElements * log(fp rate) / ln(2)^2
- * We ignore filter parameters which will create a bloom filter larger than the protocol limits
- */
-vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
-/**
- * The ideal number of hash functions is filter size * ln(2) / number of elements
- * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
- * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
- */
-isFull(false),
-isEmpty(false),
-nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
-nTweak(nTweakIn),
-nFlags(nFlagsIn)
+ /**
+ * The ideal size for a bloom filter with a given number of elements and false positive rate is:
+ * - nElements * log(fp rate) / ln(2)^2
+ * We ignore filter parameters which will create a bloom filter larger than the protocol limits
+ */
+ vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
+ /**
+ * The ideal number of hash functions is filter size * ln(2) / number of elements
+ * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
+ * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
+ */
+ isFull(false),
+ isEmpty(false),
+ nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
+ nTweak(nTweakIn),
+ nFlags(nFlagsIn)
+{
+}
+
+// Private constructor used by CRollingBloomFilter
+CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
+ vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8),
+ isFull(false),
+ isEmpty(true),
+ nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
+ nTweak(nTweakIn),
+ nFlags(BLOOM_UPDATE_NONE)
{
}
@@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull()
isFull = full;
isEmpty = empty;
}
+
+CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
+ b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
+{
+ // Implemented using two bloom filters of 2 * nElements each.
+ // We fill them up, and clear them, staggered, every nElements
+ // inserted, so at least one always contains the last nElements
+ // inserted.
+ nBloomSize = nElements * 2;
+ nInsertions = 0;
+}
+
+void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
+{
+ if (nInsertions == 0) {
+ b1.clear();
+ } else if (nInsertions == nBloomSize / 2) {
+ b2.clear();
+ }
+ b1.insert(vKey);
+ b2.insert(vKey);
+ if (++nInsertions == nBloomSize) {
+ nInsertions = 0;
+ }
+}
+
+bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
+{
+ if (nInsertions < nBloomSize / 2) {
+ return b2.contains(vKey);
+ }
+ return b1.contains(vKey);
+}
+
+void CRollingBloomFilter::clear()
+{
+ b1.clear();
+ b2.clear();
+ nInsertions = 0;
+}
diff --git a/src/bloom.h b/src/bloom.h
index 191ffa19b3..7bab379a39 100644
--- a/src/bloom.h
+++ b/src/bloom.h
@@ -53,6 +53,10 @@ private:
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
+ // Private constructor for CRollingBloomFilter, no restrictions on size
+ CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
+ friend class CRollingBloomFilter;
+
public:
/**
* Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
@@ -97,4 +101,28 @@ public:
void UpdateEmptyFull();
};
+/**
+ * RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
+ * Construct it with the number of items to keep track of, and a false-positive rate.
+ *
+ * contains(item) will always return true if item was one of the last N things
+ * insert()'ed ... but may also return true for items that were not inserted.
+ */
+class CRollingBloomFilter
+{
+public:
+ CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
+
+ void insert(const std::vector<unsigned char>& vKey);
+ bool contains(const std::vector<unsigned char>& vKey) const;
+
+ void clear();
+
+private:
+ unsigned int nBloomSize;
+ unsigned int nInsertions;
+ CBloomFilter b1, b2;
+};
+
+
#endif // BITCOIN_BLOOM_H
diff --git a/src/main.cpp b/src/main.cpp
index e6248c6617..a6b717d57f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -3995,7 +3995,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
{
LOCK(cs_vNodes);
// Use deterministic randomness to send to the same nodes for 24 hours
- // at a time so the setAddrKnowns of the chosen nodes prevent repeats
+ // at a time so the addrKnowns of the chosen nodes prevent repeats
static uint256 hashSalt;
if (hashSalt.IsNull())
hashSalt = GetRandHash();
@@ -4779,9 +4779,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
LOCK(cs_vNodes);
BOOST_FOREACH(CNode* pnode, vNodes)
{
- // Periodically clear setAddrKnown to allow refresh broadcasts
+ // Periodically clear addrKnown to allow refresh broadcasts
if (nLastRebroadcast)
- pnode->setAddrKnown.clear();
+ pnode->addrKnown.clear();
// Rebroadcast our address
AdvertizeLocal(pnode);
@@ -4799,9 +4799,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
vAddr.reserve(pto->vAddrToSend.size());
BOOST_FOREACH(const CAddress& addr, pto->vAddrToSend)
{
- // returns true if wasn't already contained in the set
- if (pto->setAddrKnown.insert(addr).second)
+ if (!pto->addrKnown.contains(addr.GetKey()))
{
+ pto->addrKnown.insert(addr.GetKey());
vAddr.push_back(addr);
// receiver rejects addr messages larger than 1000
if (vAddr.size() >= 1000)
diff --git a/src/mruset.h b/src/mruset.h
index 1969f419cb..398aa173bf 100644
--- a/src/mruset.h
+++ b/src/mruset.h
@@ -1,12 +1,12 @@
-// Copyright (c) 2012 The Bitcoin Core developers
+// Copyright (c) 2012-2015 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_MRUSET_H
#define BITCOIN_MRUSET_H
-#include <deque>
#include <set>
+#include <vector>
#include <utility>
/** STL-like set container that only keeps the most recent N elements. */
@@ -22,11 +22,13 @@ public:
protected:
std::set<T> set;
- std::deque<T> queue;
- size_type nMaxSize;
+ std::vector<iterator> order;
+ size_type first_used;
+ size_type first_unused;
+ const size_type nMaxSize;
public:
- mruset(size_type nMaxSizeIn = 0) { nMaxSize = nMaxSizeIn; }
+ mruset(size_type nMaxSizeIn = 1) : nMaxSize(nMaxSizeIn) { clear(); }
iterator begin() const { return set.begin(); }
iterator end() const { return set.end(); }
size_type size() const { return set.size(); }
@@ -36,7 +38,9 @@ public:
void clear()
{
set.clear();
- queue.clear();
+ order.assign(nMaxSize, set.end());
+ first_used = 0;
+ first_unused = 0;
}
bool inline friend operator==(const mruset<T>& a, const mruset<T>& b) { return a.set == b.set; }
bool inline friend operator==(const mruset<T>& a, const std::set<T>& b) { return a.set == b; }
@@ -45,25 +49,17 @@ public:
{
std::pair<iterator, bool> ret = set.insert(x);
if (ret.second) {
- if (nMaxSize && queue.size() == nMaxSize) {
- set.erase(queue.front());
- queue.pop_front();
+ if (set.size() == nMaxSize + 1) {
+ set.erase(order[first_used]);
+ order[first_used] = set.end();
+ if (++first_used == nMaxSize) first_used = 0;
}
- queue.push_back(x);
+ order[first_unused] = ret.first;
+ if (++first_unused == nMaxSize) first_unused = 0;
}
return ret;
}
size_type max_size() const { return nMaxSize; }
- size_type max_size(size_type s)
- {
- if (s)
- while (queue.size() > s) {
- set.erase(queue.front());
- queue.pop_front();
- }
- nMaxSize = s;
- return nMaxSize;
- }
};
#endif // BITCOIN_MRUSET_H
diff --git a/src/net.cpp b/src/net.cpp
index 731c810935..2de04fc574 100644
--- a/src/net.cpp
+++ b/src/net.cpp
@@ -1905,7 +1905,10 @@ bool CAddrDB::Read(CAddrMan& addr)
unsigned int ReceiveFloodSize() { return 1000*GetArg("-maxreceivebuffer", 5*1000); }
unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }
-CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) : ssSend(SER_NETWORK, INIT_PROTO_VERSION), setAddrKnown(5000)
+CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) :
+ ssSend(SER_NETWORK, INIT_PROTO_VERSION),
+ addrKnown(5000, 0.001, insecure_rand()),
+ setInventoryKnown(SendBufferSize() / 1000)
{
nServices = 0;
hSocket = hSocketIn;
@@ -1934,7 +1937,6 @@ CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fIn
nStartingHeight = -1;
fGetAddr = false;
fRelayTxes = false;
- setInventoryKnown.max_size(SendBufferSize() / 1000);
pfilter = new CBloomFilter();
nPingNonceSent = 0;
nPingUsecStart = 0;
diff --git a/src/net.h b/src/net.h
index 9fc6ce68d0..24e927c9f6 100644
--- a/src/net.h
+++ b/src/net.h
@@ -300,7 +300,7 @@ public:
// flood relay
std::vector<CAddress> vAddrToSend;
- mruset<CAddress> setAddrKnown;
+ CRollingBloomFilter addrKnown;
bool fGetAddr;
std::set<uint256> setKnown;
@@ -380,7 +380,7 @@ public:
void AddAddressKnown(const CAddress& addr)
{
- setAddrKnown.insert(addr);
+ addrKnown.insert(addr.GetKey());
}
void PushAddress(const CAddress& addr)
@@ -388,7 +388,7 @@ public:
// Known checking here is only to save space from duplicates.
// SendMessages will filter it again for knowns that were added
// after addresses were pushed.
- if (addr.IsValid() && !setAddrKnown.count(addr)) {
+ if (addr.IsValid() && !addrKnown.contains(addr.GetKey())) {
if (vAddrToSend.size() >= MAX_ADDR_TO_SEND) {
vAddrToSend[insecure_rand() % vAddrToSend.size()] = addr;
} else {
diff --git a/src/test/bloom_tests.cpp b/src/test/bloom_tests.cpp
index 73a146f05c..1bda8a7ea1 100644
--- a/src/test/bloom_tests.cpp
+++ b/src/test/bloom_tests.cpp
@@ -8,6 +8,7 @@
#include "clientversion.h"
#include "key.h"
#include "merkleblock.h"
+#include "random.h"
#include "serialize.h"
#include "streams.h"
#include "uint256.h"
@@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none)
BOOST_CHECK(!filter.contains(COutPoint(uint256S("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0)));
}
+static std::vector<unsigned char> RandomData()
+{
+ uint256 r = GetRandHash();
+ return std::vector<unsigned char>(r.begin(), r.end());
+}
+
+BOOST_AUTO_TEST_CASE(rolling_bloom)
+{
+ // last-100-entry, 1% false positive:
+ CRollingBloomFilter rb1(100, 0.01, 0);
+
+ // Overfill:
+ static const int DATASIZE=399;
+ std::vector<unsigned char> data[DATASIZE];
+ for (int i = 0; i < DATASIZE; i++) {
+ data[i] = RandomData();
+ rb1.insert(data[i]);
+ }
+ // Last 100 guaranteed to be remembered:
+ for (int i = 299; i < DATASIZE; i++) {
+ BOOST_CHECK(rb1.contains(data[i]));
+ }
+
+ // false positive rate is 1%, so we should get about 100 hits if
+ // testing 10,000 random keys. We get worst-case false positive
+ // behavior when the filter is as full as possible, which is
+ // when we've inserted one minus an integer multiple of nElement*2.
+ unsigned int nHits = 0;
+ for (int i = 0; i < 10000; i++) {
+ if (rb1.contains(RandomData()))
+ ++nHits;
+ }
+ // Run test_bitcoin with --log_level=message to see BOOST_TEST_MESSAGEs:
+ BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~100 expected)");
+
+ // Insanely unlikely to get a fp count outside this range:
+ BOOST_CHECK(nHits > 25);
+ BOOST_CHECK(nHits < 175);
+
+ BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
+ rb1.clear();
+ BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
+
+ // Now roll through data, make sure last 100 entries
+ // are always remembered:
+ for (int i = 0; i < DATASIZE; i++) {
+ if (i >= 100)
+ BOOST_CHECK(rb1.contains(data[i-100]));
+ rb1.insert(data[i]);
+ }
+
+ // Insert 999 more random entries:
+ for (int i = 0; i < 999; i++) {
+ rb1.insert(RandomData());
+ }
+ // Sanity check to make sure the filter isn't just filling up:
+ nHits = 0;
+ for (int i = 0; i < DATASIZE; i++) {
+ if (rb1.contains(data[i]))
+ ++nHits;
+ }
+ // Expect about 5 false positives, more than 100 means
+ // something is definitely broken.
+ BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~5 expected)");
+ BOOST_CHECK(nHits < 100);
+
+ // last-1000-entry, 0.01% false positive:
+ CRollingBloomFilter rb2(1000, 0.001, 0);
+ for (int i = 0; i < DATASIZE; i++) {
+ rb2.insert(data[i]);
+ }
+ // ... room for all of them:
+ for (int i = 0; i < DATASIZE; i++) {
+ BOOST_CHECK(rb2.contains(data[i]));
+ }
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/test/mruset_tests.cpp b/src/test/mruset_tests.cpp
index bd4e9c1d38..2b68f8899e 100644
--- a/src/test/mruset_tests.cpp
+++ b/src/test/mruset_tests.cpp
@@ -17,83 +17,65 @@
using namespace std;
-class mrutester
-{
-private:
- mruset<int> mru;
- std::set<int> set;
-
-public:
- mrutester() { mru.max_size(MAX_SIZE); }
- int size() const { return set.size(); }
-
- void insert(int n)
- {
- mru.insert(n);
- set.insert(n);
- BOOST_CHECK(mru == set);
- }
-};
-
BOOST_FIXTURE_TEST_SUITE(mruset_tests, BasicTestingSetup)
-// Test that an mruset behaves like a set, as long as no more than MAX_SIZE elements are in it
-BOOST_AUTO_TEST_CASE(mruset_like_set)
-{
-
- for (int nTest=0; nTest<NUM_TESTS; nTest++)
- {
- mrutester tester;
- while (tester.size() < MAX_SIZE)
- tester.insert(GetRandInt(2 * MAX_SIZE));
- }
-
-}
-
-// Test that an mruset's size never exceeds its max_size
-BOOST_AUTO_TEST_CASE(mruset_limited_size)
+BOOST_AUTO_TEST_CASE(mruset_test)
{
- for (int nTest=0; nTest<NUM_TESTS; nTest++)
- {
- mruset<int> mru(MAX_SIZE);
- for (int nAction=0; nAction<3*MAX_SIZE; nAction++)
- {
- int n = GetRandInt(2 * MAX_SIZE);
- mru.insert(n);
- BOOST_CHECK(mru.size() <= MAX_SIZE);
+ // The mruset being tested.
+ mruset<int> mru(5000);
+
+ // Run the test 10 times.
+ for (int test = 0; test < 10; test++) {
+ // Reset mru.
+ mru.clear();
+
+ // A deque + set to simulate the mruset.
+ std::deque<int> rep;
+ std::set<int> all;
+
+ // Insert 10000 random integers below 15000.
+ for (int j=0; j<10000; j++) {
+ int add = GetRandInt(15000);
+ mru.insert(add);
+
+ // Add the number to rep/all as well.
+ if (all.count(add) == 0) {
+ all.insert(add);
+ rep.push_back(add);
+ if (all.size() == 5001) {
+ all.erase(rep.front());
+ rep.pop_front();
+ }
+ }
+
+ // Do a full comparison between mru and the simulated mru every 1000 and every 5001 elements.
+ if (j % 1000 == 0 || j % 5001 == 0) {
+ mruset<int> mru2 = mru; // Also try making a copy
+
+ // Check that all elements that should be in there, are in there.
+ BOOST_FOREACH(int x, rep) {
+ BOOST_CHECK(mru.count(x));
+ BOOST_CHECK(mru2.count(x));
+ }
+
+ // Check that all elements that are in there, should be in there.
+ BOOST_FOREACH(int x, mru) {
+ BOOST_CHECK(all.count(x));
+ }
+
+ // Check that all elements that are in there, should be in there.
+ BOOST_FOREACH(int x, mru2) {
+ BOOST_CHECK(all.count(x));
+ }
+
+ for (int t = 0; t < 10; t++) {
+ int r = GetRandInt(15000);
+ BOOST_CHECK(all.count(r) == mru.count(r));
+ BOOST_CHECK(all.count(r) == mru2.count(r));
+ }
+ }
}
}
}
-// 16-bit permutation function
-int static permute(int n)
-{
- // hexadecimals of pi; verified to be linearly independent
- static const int table[16] = {0x243F, 0x6A88, 0x85A3, 0x08D3, 0x1319, 0x8A2E, 0x0370, 0x7344,
- 0xA409, 0x3822, 0x299F, 0x31D0, 0x082E, 0xFA98, 0xEC4E, 0x6C89};
-
- int ret = 0;
- for (int bit=0; bit<16; bit++)
- if (n & (1<<bit))
- ret ^= table[bit];
-
- return ret;
-}
-
-// Test that an mruset acts like a moving window, if no duplicate elements are added
-BOOST_AUTO_TEST_CASE(mruset_window)
-{
- mruset<int> mru(MAX_SIZE);
- for (int n=0; n<10*MAX_SIZE; n++)
- {
- mru.insert(permute(n));
-
- set<int> tester;
- for (int m=max(0,n-MAX_SIZE+1); m<=n; m++)
- tester.insert(permute(m));
-
- BOOST_CHECK(mru == tester);
- }
-}
-
BOOST_AUTO_TEST_SUITE_END()