1 files changed, 131 insertions, 75 deletions
diff --git a/src/addrman.h b/src/addrman.h
index d47217683c..8116d0b763 100644
--- a/src/addrman.h
+++ b/src/addrman.h
@@ -79,17 +79,20 @@ public:
     }
 
     //! Calculate in which "tried" bucket this entry belongs
-    int GetTriedBucket(const std::vector<unsigned char> &nKey) const;
+    int GetTriedBucket(const uint256 &nKey) const;
 
     //! Calculate in which "new" bucket this entry belongs, given a certain source
-    int GetNewBucket(const std::vector<unsigned char> &nKey, const CNetAddr& src) const;
+    int GetNewBucket(const uint256 &nKey, const CNetAddr& src) const;
 
     //! Calculate in which "new" bucket this entry belongs, using its default source
-    int GetNewBucket(const std::vector<unsigned char> &nKey) const
+    int GetNewBucket(const uint256 &nKey) const
     {
         return GetNewBucket(nKey, source);
     }
 
+    //! Calculate in which position of a bucket to store this entry.
+    int GetBucketPosition(const uint256 &nKey, bool fNew, int nBucket) const;
+
     //! Determine whether the statistics about this entry are bad enough so that it can just be deleted
     bool IsTerrible(int64_t nNow = GetAdjustedTime()) const;
 
@@ -106,15 +109,15 @@ public:
  *
  * To that end:
  *  * Addresses are organized into buckets.
- *    * Address that have not yet been tried go into 256 "new" buckets.
- *      * Based on the address range (/16 for IPv4) of source of the information, 32 buckets are selected at random
+ *    * Address that have not yet been tried go into 1024 "new" buckets.
+ *      * Based on the address range (/16 for IPv4) of source of the information, 64 buckets are selected at random
  *      * The actual bucket is chosen from one of these, based on the range the address itself is located.
- *      * One single address can occur in up to 4 different buckets, to increase selection chances for addresses that
+ *      * One single address can occur in up to 8 different buckets, to increase selection chances for addresses that
  *        are seen frequently. The chance for increasing this multiplicity decreases exponentially.
  *      * When adding a new address to a full bucket, a randomly chosen entry (with a bias favoring less recently seen
  *        ones) is removed from it first.
- *    * Addresses of nodes that are known to be accessible go into 64 "tried" buckets.
- *      * Each address range selects at random 4 of these buckets.
+ *    * Addresses of nodes that are known to be accessible go into 256 "tried" buckets.
+ *      * Each address range selects at random 8 of these buckets.
  *      * The actual bucket is chosen from one of these, based on the full address.
  *      * When adding a new good address to a full bucket, a randomly chosen entry (with a bias favoring less recently
  *        tried ones) is evicted from it, back to the "new" buckets.
@@ -125,28 +128,22 @@ public:
  */
 
 //! total number of buckets for tried addresses
-#define ADDRMAN_TRIED_BUCKET_COUNT 64
-
-//! maximum allowed number of entries in buckets for tried addresses
-#define ADDRMAN_TRIED_BUCKET_SIZE 64
+#define ADDRMAN_TRIED_BUCKET_COUNT 256
 
 //! total number of buckets for new addresses
-#define ADDRMAN_NEW_BUCKET_COUNT 256
+#define ADDRMAN_NEW_BUCKET_COUNT 1024
 
-//! maximum allowed number of entries in buckets for new addresses
-#define ADDRMAN_NEW_BUCKET_SIZE 64
+//! maximum allowed number of entries in buckets for new and tried addresses
+#define ADDRMAN_BUCKET_SIZE 64
 
 //! over how many buckets entries with tried addresses from a single group (/16 for IPv4) are spread
-#define ADDRMAN_TRIED_BUCKETS_PER_GROUP 4
+#define ADDRMAN_TRIED_BUCKETS_PER_GROUP 8
 
 //! over how many buckets entries with new addresses originating from a single group are spread
-#define ADDRMAN_NEW_BUCKETS_PER_SOURCE_GROUP 32
+#define ADDRMAN_NEW_BUCKETS_PER_SOURCE_GROUP 64
 
 //! in how many buckets for entries with new addresses a single address may occur
-#define ADDRMAN_NEW_BUCKETS_PER_ADDRESS 4
-
-//! how many entries in a bucket with tried addresses are inspected, when selecting one to replace
-#define ADDRMAN_TRIED_ENTRIES_INSPECT_ON_EVICT 4
+#define ADDRMAN_NEW_BUCKETS_PER_ADDRESS 8
 
 //! how old addresses can maximally be
 #define ADDRMAN_HORIZON_DAYS 30
@@ -176,7 +173,7 @@ private:
     mutable CCriticalSection cs;
 
     //! secret key to randomize bucket select with
-    std::vector<unsigned char> nKey;
+    uint256 nKey;
 
     //! last used nId
     int nIdCount;
@@ -194,13 +191,13 @@ private:
     int nTried;
 
     //! list of "tried" buckets
-    std::vector<std::vector<int> > vvTried;
+    int vvTried[ADDRMAN_TRIED_BUCKET_COUNT][ADDRMAN_BUCKET_SIZE];
 
     //! number of (unique) "new" entries
     int nNew;
 
     //! list of "new" buckets
-    std::vector<std::set<int> > vvNew;
+    int vvNew[ADDRMAN_NEW_BUCKET_COUNT][ADDRMAN_BUCKET_SIZE];
 
 protected:
 
@@ -214,17 +211,14 @@ protected:
     //! Swap two elements in vRandom.
     void SwapRandom(unsigned int nRandomPos1, unsigned int nRandomPos2);
 
-    //! Return position in given bucket to replace.
-    int SelectTried(int nKBucket);
+    //! Move an entry from the "new" table(s) to the "tried" table
+    void MakeTried(CAddrInfo& info, int nId);
 
-    //! Remove an element from a "new" bucket.
-    //! This is the only place where actual deletions occur.
-    //! Elements are never deleted while in the "tried" table, only possibly evicted back to the "new" table.
-    int ShrinkNew(int nUBucket);
+    //! Delete an entry. It must not be in tried, and have refcount 0.
+    void Delete(int nId);
 
-    //! Move an entry from the "new" table(s) to the "tried" table
-    //! @pre vvUnkown[nOrigin].count(nId) != 0
-    void MakeTried(CAddrInfo& info, int nId, int nOrigin);
+    //! Clear a position in a "new" table. This is the only place where entries are actually deleted.
+    void ClearNew(int nUBucket, int nUBucketPos);
 
     //! Mark an entry "good", possibly moving it from "new" to "tried".
     void Good_(const CService &addr, int64_t nTime);
@@ -237,7 +231,7 @@ protected:
 
     //! Select an address to connect to.
     //! nUnkBias determines how much to favor new addresses over tried ones (min=0, max=100)
-    CAddress Select_(int nUnkBias);
+    CAddress Select_();
 
 #ifdef DEBUG_ADDRMAN
     //! Perform consistency check. Returns an error code or zero.
@@ -253,17 +247,21 @@ protected:
 public:
     /**
      * serialized format:
-     * * version byte (currently 0)
-     * * nKey
+     * * version byte (currently 1)
+     * * 0x20 + nKey (serialized as if it were a vector, for backward compatibility)
      * * nNew
      * * nTried
-     * * number of "new" buckets
+     * * number of "new" buckets XOR 2**30
      * * all nNew addrinfos in vvNew
      * * all nTried addrinfos in vvTried
      * * for each bucket:
      *   * number of elements
      *   * for each element: index
      *
+     * 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it
+     * as incompatible. This is necessary because it did not check the version number on
+     * deserialization.
+     *
      * Notice that vvTried, mapAddr and vVector are never encoded explicitly;
      * they are instead reconstructed from the other information.
      *
@@ -275,48 +273,53 @@ public:
      *
      * We don't use ADD_SERIALIZE_METHODS since the serialization and deserialization code has
      * very little in common.
-     *
      */
     template<typename Stream>
     void Serialize(Stream &s, int nType, int nVersionDummy) const
     {
         LOCK(cs);
 
-        unsigned char nVersion = 0;
+        unsigned char nVersion = 1;
         s << nVersion;
+        s << ((unsigned char)32);
         s << nKey;
         s << nNew;
         s << nTried;
 
-        int nUBuckets = ADDRMAN_NEW_BUCKET_COUNT;
+        int nUBuckets = ADDRMAN_NEW_BUCKET_COUNT ^ (1 << 30);
         s << nUBuckets;
         std::map<int, int> mapUnkIds;
         int nIds = 0;
         for (std::map<int, CAddrInfo>::const_iterator it = mapInfo.begin(); it != mapInfo.end(); it++) {
-            if (nIds == nNew) break; // this means nNew was wrong, oh ow
             mapUnkIds[(*it).first] = nIds;
             const CAddrInfo &info = (*it).second;
             if (info.nRefCount) {
+                assert(nIds != nNew); // this means nNew was wrong, oh ow
                 s << info;
                 nIds++;
             }
         }
         nIds = 0;
         for (std::map<int, CAddrInfo>::const_iterator it = mapInfo.begin(); it != mapInfo.end(); it++) {
-            if (nIds == nTried) break; // this means nTried was wrong, oh ow
             const CAddrInfo &info = (*it).second;
             if (info.fInTried) {
+                assert(nIds != nTried); // this means nTried was wrong, oh ow
                 s << info;
                 nIds++;
             }
         }
-        for (std::vector<std::set<int> >::const_iterator it = vvNew.begin(); it != vvNew.end(); it++) {
-            const std::set<int> &vNew = (*it);
-            int nSize = vNew.size();
+        for (int bucket = 0; bucket < ADDRMAN_NEW_BUCKET_COUNT; bucket++) {
+            int nSize = 0;
+            for (int i = 0; i < ADDRMAN_BUCKET_SIZE; i++) {
+                if (vvNew[bucket][i] != -1)
+                    nSize++;
+            }
             s << nSize;
-            for (std::set<int>::const_iterator it2 = vNew.begin(); it2 != vNew.end(); it2++) {
-                int nIndex = mapUnkIds[*it2];
-                s << nIndex;
+            for (int i = 0; i < ADDRMAN_BUCKET_SIZE; i++) {
+                if (vvNew[bucket][i] != -1) {
+                    int nIndex = mapUnkIds[vvNew[bucket][i]];
+                    s << nIndex;
+                }
             }
         }
     }
@@ -326,64 +329,97 @@ public:
     {
         LOCK(cs);
 
+        Clear();
+
         unsigned char nVersion;
         s >> nVersion;
+        unsigned char nKeySize;
+        s >> nKeySize;
+        if (nKeySize != 32) throw std::ios_base::failure("Incorrect keysize in addrman deserialization");
         s >> nKey;
         s >> nNew;
         s >> nTried;
-
         int nUBuckets = 0;
         s >> nUBuckets;
-        nIdCount = 0;
-        mapInfo.clear();
-        mapAddr.clear();
-        vRandom.clear();
-        vvTried = std::vector<std::vector<int> >(ADDRMAN_TRIED_BUCKET_COUNT, std::vector<int>(0));
-        vvNew = std::vector<std::set<int> >(ADDRMAN_NEW_BUCKET_COUNT, std::set<int>());
+        if (nVersion != 0) {
+            nUBuckets ^= (1 << 30);
+        }
+
+        // Deserialize entries from the new table.
         for (int n = 0; n < nNew; n++) {
             CAddrInfo &info = mapInfo[n];
             s >> info;
             mapAddr[info] = n;
             info.nRandomPos = vRandom.size();
             vRandom.push_back(n);
-            if (nUBuckets != ADDRMAN_NEW_BUCKET_COUNT) {
-                vvNew[info.GetNewBucket(nKey)].insert(n);
-                info.nRefCount++;
+            if (nVersion != 1 || nUBuckets != ADDRMAN_NEW_BUCKET_COUNT) {
+                // In case the new table data cannot be used (nVersion unknown, or bucket count wrong),
+                // immediately try to give them a reference based on their primary source address.
+                int nUBucket = info.GetNewBucket(nKey);
+                int nUBucketPos = info.GetBucketPosition(nKey, true, nUBucket);
+                if (vvNew[nUBucket][nUBucketPos] == -1) {
+                    vvNew[nUBucket][nUBucketPos] = n;
+                    info.nRefCount++;
+                }
             }
         }
         nIdCount = nNew;
+
+        // Deserialize entries from the tried table.
         int nLost = 0;
         for (int n = 0; n < nTried; n++) {
             CAddrInfo info;
             s >> info;
-            std::vector<int> &vTried = vvTried[info.GetTriedBucket(nKey)];
-            if (vTried.size() < ADDRMAN_TRIED_BUCKET_SIZE) {
+            int nKBucket = info.GetTriedBucket(nKey);
+            int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
+            if (vvTried[nKBucket][nKBucketPos] == -1) {
                 info.nRandomPos = vRandom.size();
                 info.fInTried = true;
                 vRandom.push_back(nIdCount);
                 mapInfo[nIdCount] = info;
                 mapAddr[info] = nIdCount;
-                vTried.push_back(nIdCount);
+                vvTried[nKBucket][nKBucketPos] = nIdCount;
                 nIdCount++;
             } else {
                 nLost++;
             }
         }
         nTried -= nLost;
-        for (int b = 0; b < nUBuckets; b++) {
-            std::set<int> &vNew = vvNew[b];
+
+        // Deserialize positions in the new table (if possible).
+        for (int bucket = 0; bucket < nUBuckets; bucket++) {
             int nSize = 0;
             s >> nSize;
             for (int n = 0; n < nSize; n++) {
                 int nIndex = 0;
                 s >> nIndex;
-                CAddrInfo &info = mapInfo[nIndex];
-                if (nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS) {
-                    info.nRefCount++;
-                    vNew.insert(nIndex);
+                if (nIndex >= 0 && nIndex < nNew) {
+                    CAddrInfo &info = mapInfo[nIndex];
+                    int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
+                    if (nVersion == 1 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS) {
+                        info.nRefCount++;
+                        vvNew[bucket][nUBucketPos] = nIndex;
+                    }
                 }
             }
         }
+
+        // Prune new entries with refcount 0 (as a result of collisions).
+        int nLostUnk = 0;
+        for (std::map<int, CAddrInfo>::const_iterator it = mapInfo.begin(); it != mapInfo.end(); ) {
+            if (it->second.fInTried == false && it->second.nRefCount == 0) {
+                std::map<int, CAddrInfo>::const_iterator itCopy = it++;
+                Delete(itCopy->first);
+                nLostUnk++;
+            } else {
+                it++;
+            }
+        }
+        if (nLost + nLostUnk > 0) {
+            LogPrint("addrman", "addrman lost %i new and %i tried addresses due to collisions\n", nLostUnk, nLost);
+        }
+
+        Check();
     }
 
     unsigned int GetSerializeSize(int nType, int nVersion) const
@@ -391,14 +427,34 @@ public:
         return (CSizeComputer(nType, nVersion) << *this).size();
     }
 
-    CAddrMan() : vRandom(0), vvTried(ADDRMAN_TRIED_BUCKET_COUNT, std::vector<int>(0)), vvNew(ADDRMAN_NEW_BUCKET_COUNT, std::set<int>())
+    void Clear()
     {
-         nKey.resize(32);
-         GetRandBytes(&nKey[0], 32);
+        std::vector<int>().swap(vRandom);
+        nKey = GetRandHash();
+        for (size_t bucket = 0; bucket < ADDRMAN_NEW_BUCKET_COUNT; bucket++) {
+            for (size_t entry = 0; entry < ADDRMAN_BUCKET_SIZE; entry++) {
+                vvNew[bucket][entry] = -1;
+            }
+        }
+        for (size_t bucket = 0; bucket < ADDRMAN_TRIED_BUCKET_COUNT; bucket++) {
+            for (size_t entry = 0; entry < ADDRMAN_BUCKET_SIZE; entry++) {
+                vvTried[bucket][entry] = -1;
+            }
+        }
+
+        nIdCount = 0;
+        nTried = 0;
+        nNew = 0;
+    }
 
-         nIdCount = 0;
-         nTried = 0;
-         nNew = 0;
+    CAddrMan()
+    {
+        Clear();
+    }
+
+    ~CAddrMan()
+    {
+        nKey.SetNull();
     }
 
     //! Return the number of (unique) addresses in all tables.
@@ -477,13 +533,13 @@ public:
      * Choose an address to connect to.
      * nUnkBias determines how much "new" entries are favored over "tried" ones (0-100).
      */
-    CAddress Select(int nUnkBias = 50)
+    CAddress Select()
     {
         CAddress addrRet;
         {
             LOCK(cs);
             Check();
-            addrRet = Select_(nUnkBias);
+            addrRet = Select_();
             Check();
         }
         return addrRet;