aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorW. J. van der Laan <laanwj@protonmail.com>2021-09-20 19:35:52 +0200
committerW. J. van der Laan <laanwj@protonmail.com>2021-09-20 19:47:55 +0200
commit7f7bd3111cdc673fec867af9a849afb85b90c3fa (patch)
treea717a700b8e30209ce3a6ee934e217c67a694022 /src
parent488e7455608dd075c017c1c9bb6ed34b1706e45e (diff)
parent57ce20307e604530f78ef4f0f8d9fb94f80ca81b (diff)
Merge bitcoin/bitcoin#22974: addrman: Improve performance of Good
57ce20307e604530f78ef4f0f8d9fb94f80ca81b fuzz: allow lower number of sources (Martin Zumsande) acf656d540a82e6fc30421590305cfe295eabbb5 fuzz: Use public interface to fill addrman tried tables (Martin Zumsande) eb2e113df13c7b1ede279878f5cbad877af49f8e addrman: Improve performance of Good (Martin Zumsande) Pull request description: Currently, `CAddrman::Good()` is rather slow because the process of moving an addr from new to tried involves looping over the new tables twice: 1) In `Good_()`, there is a loop searching for a new bucket the addr is currently in, but this information is never used except for aborting if it is not found anywhere (since [this commit](https://github.com/bitcoin/bitcoin/commit/e6b343d880f50d52390c5af8623afa15fcbc65a2#diff-49d1faa58beca1ee1509a247e0331bb91f8604e30a483a7b2dea813e6cea02e2R263) it is no longer passed to `MakeTried`) This is unnecessary because in a non-corrupted addrman, an address that is not in New must be either in Tried or not at all in addrman, both cases in which we'd return early in `Good_()` and never get to this point. I removed this loop (and left a check for `nRefCount` as a belt-and-suspenders check). 2) In `MakeTried()`, which is called from `Good_()`, another loop removes all instances of this address from new. This can be spedup by stopping the search at `nRefCount==0`. Further reductions in `nRefCount` would only lead to an assert anyway. Moreover, the search can be started at the bucket determined by the source of the addr for which `Good` was called, so that if it is present just once in New, no further buckets need to be checked. While calls to `Good()` are not that frequent normally, the performance gain is clearly seen in the fuzz target `addman_serdeser`, where, because of the slowness in creating a decently filled addrman, a shortcut was created that would directly populate the tried tables by reaching into addrman's internals, bypassing `Good()` (#21129). I removed this workaround in the second commit: Using `Good()` is still slower by a factor of 2 (down from a factor of ~60 before), but I think that this compensated by the advantages of not having to reach into the internal structures of addrman (see https://github.com/jnewbery/bitcoin/pull/18#issuecomment-775218676). [Edit]: For benchmark results see https://github.com/bitcoin/bitcoin/pull/22974#issuecomment-919435266 and https://github.com/bitcoin/bitcoin/pull/22974#issuecomment-920445700 - the benchmark `AddrManGood` shows a significant speedup by a factor >100. ACKs for top commit: naumenkogs: ACK 57ce20307e604530f78ef4f0f8d9fb94f80ca81b jnewbery: ACK 57ce20307e laanwj: Code review ACK 57ce20307e604530f78ef4f0f8d9fb94f80ca81b theStack: ACK 57ce20307e604530f78ef4f0f8d9fb94f80ca81b vasild: ACK 57ce20307e604530f78ef4f0f8d9fb94f80ca81b Tree-SHA512: fb6dfc198f2e28bdbb41cef9709828f22d83b4be0e640a3155ca42e771b6f58466de1468f54d773e794f780a79113f9f7d522032e87fdd75bdc4d99330445198
Diffstat (limited to 'src')
-rw-r--r--src/addrman.cpp26
-rw-r--r--src/test/fuzz/addrman.cpp25
2 files changed, 12 insertions, 39 deletions
diff --git a/src/addrman.cpp b/src/addrman.cpp
index 772c34ae77..a1e8cb1bf1 100644
--- a/src/addrman.cpp
+++ b/src/addrman.cpp
@@ -11,6 +11,7 @@
#include <netaddress.h>
#include <serialize.h>
#include <streams.h>
+#include <util/check.h>
#include <cmath>
#include <optional>
@@ -488,11 +489,14 @@ void CAddrMan::MakeTried(CAddrInfo& info, int nId)
AssertLockHeld(cs);
// remove the entry from all new buckets
- for (int bucket = 0; bucket < ADDRMAN_NEW_BUCKET_COUNT; bucket++) {
- int pos = info.GetBucketPosition(nKey, true, bucket);
+ const int start_bucket{info.GetNewBucket(nKey, m_asmap)};
+ for (int n = 0; n < ADDRMAN_NEW_BUCKET_COUNT; ++n) {
+ const int bucket{(start_bucket + n) % ADDRMAN_NEW_BUCKET_COUNT};
+ const int pos{info.GetBucketPosition(nKey, true, bucket)};
if (vvNew[bucket][pos] == nId) {
vvNew[bucket][pos] = -1;
info.nRefCount--;
+ if (info.nRefCount == 0) break;
}
}
nNew--;
@@ -564,22 +568,10 @@ void CAddrMan::Good_(const CService& addr, bool test_before_evict, int64_t nTime
if (info.fInTried)
return;
- // find a bucket it is in now
- int nRnd = insecure_rand.randrange(ADDRMAN_NEW_BUCKET_COUNT);
- int nUBucket = -1;
- for (unsigned int n = 0; n < ADDRMAN_NEW_BUCKET_COUNT; n++) {
- int nB = (n + nRnd) % ADDRMAN_NEW_BUCKET_COUNT;
- int nBpos = info.GetBucketPosition(nKey, true, nB);
- if (vvNew[nB][nBpos] == nId) {
- nUBucket = nB;
- break;
- }
- }
-
- // if no bucket is found, something bad happened;
- // TODO: maybe re-add the node, but for now, just bail out
- if (nUBucket == -1)
+ // if it is not in new, something bad happened
+ if (!Assume(info.nRefCount > 0)) {
return;
+ }
// which tried bucket to move the entry to
int tried_bucket = info.GetTriedBucket(nKey, m_asmap);
diff --git a/src/test/fuzz/addrman.cpp b/src/test/fuzz/addrman.cpp
index fdbfb3b93b..95c5a99c1b 100644
--- a/src/test/fuzz/addrman.cpp
+++ b/src/test/fuzz/addrman.cpp
@@ -85,7 +85,7 @@ public:
// 0, 1, 2, 3 corresponding to 0%, 100%, 50%, 33%
const size_t n = m_fuzzed_data_provider.ConsumeIntegralInRange<size_t>(0, 3);
- const size_t num_sources = m_fuzzed_data_provider.ConsumeIntegralInRange<size_t>(10, 50);
+ const size_t num_sources = m_fuzzed_data_provider.ConsumeIntegralInRange<size_t>(1, 50);
CNetAddr prev_source;
// Use insecure_rand inside the loops instead of m_fuzzed_data_provider because when
// the latter is exhausted it just returns 0.
@@ -96,31 +96,12 @@ public:
for (size_t j = 0; j < num_addresses; ++j) {
const auto addr = CAddress{CService{RandAddr(), 8333}, NODE_NETWORK};
const auto time_penalty = insecure_rand.randrange(100000001);
-#if 1
- // 2.83 sec to fill.
- if (n > 0 && mapInfo.size() % n == 0 && mapAddr.find(addr) == mapAddr.end()) {
- // Add to the "tried" table (if the bucket slot is free).
- const CAddrInfo dummy{addr, source};
- const int bucket = dummy.GetTriedBucket(nKey, m_asmap);
- const int bucket_pos = dummy.GetBucketPosition(nKey, false, bucket);
- if (vvTried[bucket][bucket_pos] == -1) {
- int id;
- CAddrInfo* addr_info = Create(addr, source, &id);
- vvTried[bucket][bucket_pos] = id;
- addr_info->fInTried = true;
- ++nTried;
- }
- } else {
- // Add to the "new" table.
- Add_(addr, source, time_penalty);
- }
-#else
- // 261.91 sec to fill.
Add_(addr, source, time_penalty);
+
if (n > 0 && mapInfo.size() % n == 0) {
Good_(addr, false, GetTime());
}
-#endif
+
// Add 10% of the addresses from more than one source.
if (insecure_rand.randrange(10) == 0 && prev_source.IsValid()) {
Add_(addr, prev_source, time_penalty);