From ec45646de9e62b3d42c85716bfeb06d8f2b507dc Mon Sep 17 00:00:00 2001 From: Gleb Naumenko Date: Tue, 24 Dec 2019 13:18:44 -0500 Subject: Integrate ASN bucketing in Addrman and add tests Instead of using /16 netgroups to bucket nodes in Addrman for connection diversification, ASN, which better represents an actor in terms of network-layer infrastructure, is used. For testing, asmap.raw is used. It represents a minimal asmap needed for testing purposes. --- src/addrman.h | 101 +++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 76 insertions(+), 25 deletions(-) (limited to 'src/addrman.h') diff --git a/src/addrman.h b/src/addrman.h index e54184ce35..40ecef4625 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -12,11 +12,17 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include + /** * Extended statistics about a CAddress @@ -72,15 +78,15 @@ public: } //! Calculate in which "tried" bucket this entry belongs - int GetTriedBucket(const uint256 &nKey) const; + int GetTriedBucket(const uint256 &nKey, const std::vector &asmap) const; //! Calculate in which "new" bucket this entry belongs, given a certain source - int GetNewBucket(const uint256 &nKey, const CNetAddr& src) const; + int GetNewBucket(const uint256 &nKey, const CNetAddr& src, const std::vector &asmap) const; //! Calculate in which "new" bucket this entry belongs, using its default source - int GetNewBucket(const uint256 &nKey) const + int GetNewBucket(const uint256 &nKey, const std::vector &asmap) const { - return GetNewBucket(nKey, source); + return GetNewBucket(nKey, source, asmap); } //! Calculate in which position of a bucket to store this entry. @@ -174,6 +180,7 @@ static const int64_t ADDRMAN_TEST_WINDOW = 40*60; // 40 minutes */ class CAddrMan { +friend class CAddrManTest; protected: //! critical section to protect the inner data structures mutable CCriticalSection cs; @@ -268,9 +275,29 @@ protected: void SetServices_(const CService &addr, ServiceFlags nServices) EXCLUSIVE_LOCKS_REQUIRED(cs); public: + // Compressed IP->ASN mapping, loaded from a file when a node starts. + // Should be always empty if no file was provided. + // This mapping is then used for bucketing nodes in Addrman. + // + // If asmap is provided, nodes will be bucketed by + // AS they belong to, in order to make impossible for a node + // to connect to several nodes hosted in a single AS. + // This is done in response to Erebus attack, but also to generally + // diversify the connections every node creates, + // especially useful when a large fraction of nodes + // operate under a couple of cloud providers. + // + // If a new asmap was provided, the existing records + // would be re-bucketed accordingly. + std::vector m_asmap; + + // Read asmap from provided binary file + static std::vector DecodeAsmap(fs::path path); + + /** * serialized format: - * * version byte (currently 1) + * * version byte (1 for pre-asmap files, 2 for files including asmap version) * * 0x20 + nKey (serialized as if it were a vector, for backward compatibility) * * nNew * * nTried @@ -302,7 +329,7 @@ public: { LOCK(cs); - unsigned char nVersion = 1; + unsigned char nVersion = 2; s << nVersion; s << ((unsigned char)32); s << nKey; @@ -345,6 +372,13 @@ public: } } } + // Store asmap version after bucket entries so that it + // can be ignored by older clients for backward compatibility. + uint256 asmap_version; + if (m_asmap.size() != 0) { + asmap_version = SerializeHash(m_asmap); + } + s << asmap_version; } template @@ -353,7 +387,6 @@ public: LOCK(cs); Clear(); - unsigned char nVersion; s >> nVersion; unsigned char nKeySize; @@ -383,16 +416,6 @@ public: mapAddr[info] = n; info.nRandomPos = vRandom.size(); vRandom.push_back(n); - if (nVersion != 1 || nUBuckets != ADDRMAN_NEW_BUCKET_COUNT) { - // In case the new table data cannot be used (nVersion unknown, or bucket count wrong), - // immediately try to give them a reference based on their primary source address. - int nUBucket = info.GetNewBucket(nKey); - int nUBucketPos = info.GetBucketPosition(nKey, true, nUBucket); - if (vvNew[nUBucket][nUBucketPos] == -1) { - vvNew[nUBucket][nUBucketPos] = n; - info.nRefCount++; - } - } } nIdCount = nNew; @@ -401,7 +424,7 @@ public: for (int n = 0; n < nTried; n++) { CAddrInfo info; s >> info; - int nKBucket = info.GetTriedBucket(nKey); + int nKBucket = info.GetTriedBucket(nKey, m_asmap); int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket); if (vvTried[nKBucket][nKBucketPos] == -1) { info.nRandomPos = vRandom.size(); @@ -417,7 +440,9 @@ public: } nTried -= nLost; - // Deserialize positions in the new table (if possible). + // Store positions in the new table buckets to apply later (if possible). + std::map entryToBucket; // Represents which entry belonged to which bucket when serializing + for (int bucket = 0; bucket < nUBuckets; bucket++) { int nSize = 0; s >> nSize; @@ -425,12 +450,38 @@ public: int nIndex = 0; s >> nIndex; if (nIndex >= 0 && nIndex < nNew) { - CAddrInfo &info = mapInfo[nIndex]; - int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); - if (nVersion == 1 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS) { - info.nRefCount++; - vvNew[bucket][nUBucketPos] = nIndex; - } + entryToBucket[nIndex] = bucket; + } + } + } + + uint256 supplied_asmap_version; + if (m_asmap.size() != 0) { + supplied_asmap_version = SerializeHash(m_asmap); + } + uint256 serialized_asmap_version; + if (nVersion > 1) { + s >> serialized_asmap_version; + } + + for (int n = 0; n < nNew; n++) { + CAddrInfo &info = mapInfo[n]; + int bucket = entryToBucket[n]; + int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); + if (nVersion == 2 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && + info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) { + // Bucketing has not changed, using existing bucket positions for the new table + vvNew[bucket][nUBucketPos] = n; + info.nRefCount++; + } else { + // In case the new table data cannot be used (nVersion unknown, bucket count wrong or new asmap), + // try to give them a reference based on their primary source address. + LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n"); + bucket = info.GetNewBucket(nKey, m_asmap); + nUBucketPos = info.GetBucketPosition(nKey, true, bucket); + if (vvNew[bucket][nUBucketPos] == -1) { + vvNew[bucket][nUBucketPos] = n; + info.nRefCount++; } } } -- cgit v1.2.3