aboutsummaryrefslogtreecommitdiff
path: root/src/addrman.h
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@protonmail.com>2020-01-29 13:53:46 +0100
committerWladimir J. van der Laan <laanwj@protonmail.com>2020-01-29 13:55:43 +0100
commit01fc5891fb5702a744184af362da2850fd38038e (patch)
tree3f15d5327701c596c3bfe3271631b0ff3aaca231 /src/addrman.h
parentc434282d2cb816fd92f7e229385169e783c84fdd (diff)
parent3c1bc40205a3fcab606e70b0e3c13d68b2860e34 (diff)
downloadbitcoin-01fc5891fb5702a744184af362da2850fd38038e.tar.xz
Merge #16702: p2p: supplying and using asmap to improve IP bucketing in addrman
3c1bc40205a3fcab606e70b0e3c13d68b2860e34 Add extra logging of asmap use and bucketing (Gleb Naumenko) e4658aa8eaf1629dd5af8cf7b9717a8e72028251 Return mapped AS in RPC call getpeerinfo (Gleb Naumenko) ec45646de9e62b3d42c85716bfeb06d8f2b507dc Integrate ASN bucketing in Addrman and add tests (Gleb Naumenko) 8feb4e4b667361bf23344149c01594abebd56fdb Add asmap utility which queries a mapping (Gleb Naumenko) Pull request description: This PR attempts to solve the problem explained in #16599. A particular attack which encouraged us to work on this issue is explained here [[Erebus Attack against Bitcoin Peer-to-Peer Network](https://erebus-attack.comp.nus.edu.sg/)] (by @muoitranduc) Instead of relying on /16 prefix to diversify the connections every node creates, we would instead rely on the (ip -> ASN) mapping, if this mapping is provided. A .map file can be created by every user independently based on a router dump, or provided along with the Bitcoin release. Currently we use the python scripts written by @sipa to create a .map file, which is no larger than 2MB (awesome!). Here I suggest adding a field to peers.dat which would represent a hash of asmap file used while serializing addrman (or 0 for /16 prefix legacy approach). In this case, every time the file is updated (or grouping method changed), all buckets will be re-computed. I believe that alternative selective re-bucketing for only updated ranges would require substantial changes. TODO: - ~~more unit tests~~ - ~~find a way to test the code without including >1 MB mapping file in the repo.~~ - find a way to check that mapping file is not corrupted (checksum?) - comments and separate tests for asmap.cpp - make python code for .map generation public - figure out asmap distribution (?) ~Interesting corner case: I’m using std::hash to compute a fingerprint of asmap, and std::hash returns size_t. I guess if a user updates the OS to 64-bit, then the hash of asap will change? Does it even matter?~ ACKs for top commit: laanwj: re-ACK 3c1bc40205a3fcab606e70b0e3c13d68b2860e34 jamesob: ACK 3c1bc40205a3fcab606e70b0e3c13d68b2860e34 ([`jamesob/ackr/16702.3.naumenkogs.p2p_supplying_and_using`](https://github.com/jamesob/bitcoin/tree/ackr/16702.3.naumenkogs.p2p_supplying_and_using)) jonatack: ACK 3c1bc40205a3fcab606e70b0e3c13d68b2860e34 Tree-SHA512: e2dc6171188d5cdc2ab2c022fa49ed73a14a0acb8ae4c5ffa970172a0365942a249ad3d57e5fb134bc156a3492662c983f74bd21e78d316629dcadf71576800c
Diffstat (limited to 'src/addrman.h')
-rw-r--r--src/addrman.h101
1 files changed, 76 insertions, 25 deletions
diff --git a/src/addrman.h b/src/addrman.h
index 8685ea5049..5901611bee 100644
--- a/src/addrman.h
+++ b/src/addrman.h
@@ -12,11 +12,17 @@
#include <sync.h>
#include <timedata.h>
#include <util/system.h>
+#include <clientversion.h>
#include <map>
#include <set>
#include <stdint.h>
#include <vector>
+#include <iostream>
+#include <streams.h>
+#include <fs.h>
+#include <hash.h>
+
/**
* Extended statistics about a CAddress
@@ -68,15 +74,15 @@ public:
}
//! Calculate in which "tried" bucket this entry belongs
- int GetTriedBucket(const uint256 &nKey) const;
+ int GetTriedBucket(const uint256 &nKey, const std::vector<bool> &asmap) const;
//! Calculate in which "new" bucket this entry belongs, given a certain source
- int GetNewBucket(const uint256 &nKey, const CNetAddr& src) const;
+ int GetNewBucket(const uint256 &nKey, const CNetAddr& src, const std::vector<bool> &asmap) const;
//! Calculate in which "new" bucket this entry belongs, using its default source
- int GetNewBucket(const uint256 &nKey) const
+ int GetNewBucket(const uint256 &nKey, const std::vector<bool> &asmap) const
{
- return GetNewBucket(nKey, source);
+ return GetNewBucket(nKey, source, asmap);
}
//! Calculate in which position of a bucket to store this entry.
@@ -170,6 +176,7 @@ static const int64_t ADDRMAN_TEST_WINDOW = 40*60; // 40 minutes
*/
class CAddrMan
{
+friend class CAddrManTest;
protected:
//! critical section to protect the inner data structures
mutable RecursiveMutex cs;
@@ -264,9 +271,29 @@ protected:
void SetServices_(const CService &addr, ServiceFlags nServices) EXCLUSIVE_LOCKS_REQUIRED(cs);
public:
+ // Compressed IP->ASN mapping, loaded from a file when a node starts.
+ // Should be always empty if no file was provided.
+ // This mapping is then used for bucketing nodes in Addrman.
+ //
+ // If asmap is provided, nodes will be bucketed by
+ // AS they belong to, in order to make impossible for a node
+ // to connect to several nodes hosted in a single AS.
+ // This is done in response to Erebus attack, but also to generally
+ // diversify the connections every node creates,
+ // especially useful when a large fraction of nodes
+ // operate under a couple of cloud providers.
+ //
+ // If a new asmap was provided, the existing records
+ // would be re-bucketed accordingly.
+ std::vector<bool> m_asmap;
+
+ // Read asmap from provided binary file
+ static std::vector<bool> DecodeAsmap(fs::path path);
+
+
/**
* serialized format:
- * * version byte (currently 1)
+ * * version byte (1 for pre-asmap files, 2 for files including asmap version)
* * 0x20 + nKey (serialized as if it were a vector, for backward compatibility)
* * nNew
* * nTried
@@ -298,7 +325,7 @@ public:
{
LOCK(cs);
- unsigned char nVersion = 1;
+ unsigned char nVersion = 2;
s << nVersion;
s << ((unsigned char)32);
s << nKey;
@@ -341,6 +368,13 @@ public:
}
}
}
+ // Store asmap version after bucket entries so that it
+ // can be ignored by older clients for backward compatibility.
+ uint256 asmap_version;
+ if (m_asmap.size() != 0) {
+ asmap_version = SerializeHash(m_asmap);
+ }
+ s << asmap_version;
}
template<typename Stream>
@@ -349,7 +383,6 @@ public:
LOCK(cs);
Clear();
-
unsigned char nVersion;
s >> nVersion;
unsigned char nKeySize;
@@ -379,16 +412,6 @@ public:
mapAddr[info] = n;
info.nRandomPos = vRandom.size();
vRandom.push_back(n);
- if (nVersion != 1 || nUBuckets != ADDRMAN_NEW_BUCKET_COUNT) {
- // In case the new table data cannot be used (nVersion unknown, or bucket count wrong),
- // immediately try to give them a reference based on their primary source address.
- int nUBucket = info.GetNewBucket(nKey);
- int nUBucketPos = info.GetBucketPosition(nKey, true, nUBucket);
- if (vvNew[nUBucket][nUBucketPos] == -1) {
- vvNew[nUBucket][nUBucketPos] = n;
- info.nRefCount++;
- }
- }
}
nIdCount = nNew;
@@ -397,7 +420,7 @@ public:
for (int n = 0; n < nTried; n++) {
CAddrInfo info;
s >> info;
- int nKBucket = info.GetTriedBucket(nKey);
+ int nKBucket = info.GetTriedBucket(nKey, m_asmap);
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
if (vvTried[nKBucket][nKBucketPos] == -1) {
info.nRandomPos = vRandom.size();
@@ -413,7 +436,9 @@ public:
}
nTried -= nLost;
- // Deserialize positions in the new table (if possible).
+ // Store positions in the new table buckets to apply later (if possible).
+ std::map<int, int> entryToBucket; // Represents which entry belonged to which bucket when serializing
+
for (int bucket = 0; bucket < nUBuckets; bucket++) {
int nSize = 0;
s >> nSize;
@@ -421,12 +446,38 @@ public:
int nIndex = 0;
s >> nIndex;
if (nIndex >= 0 && nIndex < nNew) {
- CAddrInfo &info = mapInfo[nIndex];
- int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
- if (nVersion == 1 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS) {
- info.nRefCount++;
- vvNew[bucket][nUBucketPos] = nIndex;
- }
+ entryToBucket[nIndex] = bucket;
+ }
+ }
+ }
+
+ uint256 supplied_asmap_version;
+ if (m_asmap.size() != 0) {
+ supplied_asmap_version = SerializeHash(m_asmap);
+ }
+ uint256 serialized_asmap_version;
+ if (nVersion > 1) {
+ s >> serialized_asmap_version;
+ }
+
+ for (int n = 0; n < nNew; n++) {
+ CAddrInfo &info = mapInfo[n];
+ int bucket = entryToBucket[n];
+ int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
+ if (nVersion == 2 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 &&
+ info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) {
+ // Bucketing has not changed, using existing bucket positions for the new table
+ vvNew[bucket][nUBucketPos] = n;
+ info.nRefCount++;
+ } else {
+ // In case the new table data cannot be used (nVersion unknown, bucket count wrong or new asmap),
+ // try to give them a reference based on their primary source address.
+ LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
+ bucket = info.GetNewBucket(nKey, m_asmap);
+ nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
+ if (vvNew[bucket][nUBucketPos] == -1) {
+ vvNew[bucket][nUBucketPos] = n;
+ info.nRefCount++;
}
}
}