Merge bitcoin/bitcoin#25970: Add headerssync tuning parameters optimization script to repo

3d420d8f28f2d351abf8b0afe90848110e15d50c Add instructions for headerssync-params.py to release-process.md (Pieter Wuille) 53d7d35b5899685cd1577156250068e0cab502f4 Update parameters in headerssync.cpp (Pieter Wuille) 7899402cff708319b1c5181242a97557eefe1ae7 Add headerssync-params.py script to the repository (Pieter Wuille) Pull request description: Builds upon #25946, as it incorporates changes based on the selected values there. This adds the headerssync tuning parameters optimization script from https://gist.github.com/sipa/016ae445c132cdf65a2791534dfb7ae1 to the repository, updates the parameters based on its output, and adds release process instructions for doing this update in the future. A few considerations: * It would be a bit cleaner to have these parameters be part of `CChainParams`, but due to the nature of the approach, it really only applies to chains with unforgeable proof-of-work, which we really can only reasonably expect from mainnet, so I think it's fine to keep them local to `headerssync.cpp`. Keeping them as compile-time evaluatable constants also has a (likely negligible) performance impact (avoiding runtime modulo operations). * If we want to make sure the chainparams and headerssync params don't go out of date, it could be possible to run the script in CI, and and possibly even have the parameters be generated automatically at build time. I think that's overkill for how unfrequently these need to change, and running the script has non-trivial cost (~minutes in the normal python interpreter). * A viable alternative is just leaving this out-of-repo entirely, and just do ad-hoc updating from time to time. Having it in the repo and release notes does make sure it's not forgotten, though adds a cost to contributors/maintainers who follow the process. ACKs for top commit: ajtowns: reACK 3d420d8f28f2d351abf8b0afe90848110e15d50c Tree-SHA512: 03188301c20423c72c1cbd008ccce89b93e2898edcbeecc561b2928a0d64e9a829ab0744dc3b017c23de8b02f3c107ae31e694302d3931f4dc3540e184de1963
author: fanquake <fanquake@gmail.com> 2023-10-05 11:25:15 +0100
committer: fanquake <fanquake@gmail.com> 2023-10-05 11:28:29 +0100
commit: 2eacc61ad74c767f716cff65e958c7ff45f54aed (patch)
tree: 4c5b197f6472d637e96c7dfefcbfeae42c33423b /contrib/devtools
parent: 0e3de3b83e99c29d4e84ae92894e3a9fedfe7bea (diff)
parent: 3d420d8f28f2d351abf8b0afe90848110e15d50c (diff)
2 files changed, 367 insertions, 0 deletions
diff --git a/contrib/devtools/README.md b/contrib/devtools/README.md
index 54b1a85588..8bbf39b67f 100644
--- a/contrib/devtools/README.md
+++ b/contrib/devtools/README.md
@@ -90,6 +90,16 @@ example:
 BUILDDIR=$PWD/build contrib/devtools/gen-manpages.py
 ```
 
+headerssync-params.py
+=====================
+
+A script to generate optimal parameters for the headerssync module (src/headerssync.cpp). It takes no command-line
+options, as all its configuration is set at the top of the file. It runs many times faster inside PyPy. Invocation:
+
+```bash
+pypy3 contrib/devtools/headerssync-params.py
+```
+
 gen-bitcoin-conf.sh
 ===================
 
diff --git a/contrib/devtools/headerssync-params.py b/contrib/devtools/headerssync-params.py
new file mode 100644
index 0000000000..f0088d6cb9
--- /dev/null
+++ b/contrib/devtools/headerssync-params.py
@@ -0,0 +1,357 @@
+#!/usr/bin/env python3
+# Copyright (c) 2022 Pieter Wuille
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+"""Script to find the optimal parameters for the headerssync module through simulation."""
+
+from math import log, exp, sqrt
+from datetime import datetime, timedelta
+import random
+
+# Parameters:
+
+# Aim for still working fine at some point in the future. [datetime]
+TIME = datetime(2026, 5, 25)
+
+# Expected block interval. [timedelta]
+BLOCK_INTERVAL = timedelta(seconds=600)
+
+# The number of headers corresponding to the minchainwork parameter. [headers]
+MINCHAINWORK_HEADERS = 784000
+
+# Combined processing bandwidth from all attackers to one victim. [bit/s]
+# 6 Gbit/s is approximately the speed at which a single thread of a Ryzen 5950X CPU thread can hash
+# headers. In practice, the victim's network bandwidth and network processing overheads probably
+# impose a far lower number, but it's a useful upper bound.
+ATTACK_BANDWIDTH = 6000000000
+
+# How much additional permanent memory usage are attackers (jointly) allowed to cause in the victim,
+# expressed as fraction of the normal memory usage due to mainchain growth, for the duration the
+# attack is sustained. [unitless]
+# 0.2 means that attackers, while they keep up the attack, can cause permanent memory usage due to
+# headers storage to grow at 1.2 header per BLOCK_INTERVAL.
+ATTACK_FRACTION = 0.2
+
+# When this is set, the mapping from period size to memory usage (at optimal buffer size for that
+# period) is assumed to be convex. This greatly speeds up the computation, and does not appear
+# to influence the outcome. Set to False for a stronger guarantee to get the optimal result.
+ASSUME_CONVEX = True
+
+# Explanation:
+#
+#  The headerssync module implements a DoS protection against low-difficulty header spam which does
+#  not rely on checkpoints. In short it works as follows:
+#
+#  - (initial) header synchronization is split into two phases:
+#    - A commitment phase, in which headers are downloaded from the peer, and a very compact
+#      commitment to them is remembered in per-peer memory. The commitment phase ends when the
+#      received chain's combined work reaches a predetermined threshold.
+#    - A redownload phase, during which the headers are downloaded a second time from the same peer,
+#      and compared against the commitment constructed in the first phase. If there is a match, the
+#      redownloaded headers are fed to validation and accepted into permanent storage.
+#
+#    This separation guarantees that no headers are accepted into permanent storage without
+#    requiring the peer to first prove the chain actually has sufficient work.
+#
+#  - To actually implement this commitment mechanism, the following approach is used:
+#    - Keep a *1 bit* commitment (constructed using a salted hash function), for every block whose
+#      height is a multiple of {period} plus an offset value. If RANDOMIZE_OFFSET, the offset,
+#      like the salt, is chosen randomly when the synchronization starts and kept fixed afterwards.
+#    - When redownloading, headers are fed through a per-peer queue that holds {bufsize} headers,
+#      before passing them to validation. All the headers in this queue are verified against the
+#      commitment bits created in the first phase before any header is released from it. This means
+#      {bufsize/period} bits are checked "on top of" each header before actually processing it,
+#      which results in a commitment structure with roughly {bufsize/period} bits of security, as
+#      once a header is modified, due to the prevhash inclusion, all future headers necessarily
+#      change as well.
+#
+#  The question is what these {period} and {bufsize} parameters need to be set to. This program
+#  exhaustively tests a range of values to find the optimal choice, taking into account:
+#
+#  - Minimizing the (maximum of) two scenarios that trigger per-peer memory usage:
+#
+#    - When downloading a (likely honest) chain that reaches the chainwork threshold after {n}
+#      blocks, and then redownloads them, we will consume per-peer memory that is sufficient to
+#      store {n/period} commitment bits and {bufsize} headers. We only consider attackers without
+#      sufficient hashpower (as otherwise they are from a PoW perspective not attackers), which
+#      means {n} is restricted to the honest chain's length before reaching minchainwork.
+#
+#    - When downloading a (likely false) chain of {n} headers that never reaches the chainwork
+#      threshold, we will consume per-peer memory that is sufficient to store {n/period}
+#      commitment bits. Such a chain may be very long, by exploiting the timewarp bug to avoid
+#      ramping up difficulty. There is however an absolute limit on how long such a chain can be: 6
+#      blocks per second since genesis, due to the increasing MTP consensus rule.
+#
+#  - Not gratuitously preventing synchronizing any valid chain, however difficult such a chain may
+#    be to construct. In particular, the above scenario with an enormous timewarp-expoiting chain
+#    cannot simply be ignored, as it is legal that the honest main chain is like that. We however
+#    do not bother minimizing the memory usage in that case (because a billion-header long honest
+#    chain will inevitably use far larger amounts of memory than designed for).
+#
+#  - Keep the rate at which attackers can get low-difficulty headers accepted to the block index
+#    negligible. Specifically, the possibility exists for an attacker to send the honest main
+#    chain's headers during the commitment phase, but then start deviating at an attacker-chosen
+#    point by sending novel low-difficulty headers instead. Depending on how high we set the
+#    {bufsize/period} ratio, we can make the probability that such a header makes it in
+#    arbitrarily small, but at the cost of higher memory during the redownload phase. It turns out,
+#    some rate of memory usage growth is expected anyway due to chain growth, so permitting the
+#    attacker to increase that rate by a small factor isn't concerning. The attacker may start
+#    somewhat later than genesis, as long as the difficulty doesn't get too high. This reduces
+#    the attacker bandwidth required at the cost of higher PoW needed for constructing the
+#    alternate chain. This trade-off is ignored here, as it results in at most a small constant
+#    factor in attack rate.
+
+
+# System properties:
+
+# Headers in the redownload buffer are stored without prevhash. [bits]
+COMPACT_HEADER_SIZE = 48 * 8
+
+# How many bits a header uses in P2P protocol. [bits]
+NET_HEADER_SIZE = 81 * 8
+
+# How many headers are sent at once. [headers]
+HEADER_BATCH_COUNT = 2000
+
+# Whether or not the offset of which blocks heights get checksummed is randomized.
+RANDOMIZE_OFFSET = True
+
+# Timestamp of the genesis block
+GENESIS_TIME = datetime(2009, 1, 3)
+
+# Derived values:
+
+# What rate of headers worth of RAM attackers are allowed to cause in the victim. [headers/s]
+LIMIT_HEADERRATE = ATTACK_FRACTION / BLOCK_INTERVAL.total_seconds()
+
+# How many headers can attackers (jointly) send a victim per second. [headers/s]
+NET_HEADERRATE = ATTACK_BANDWIDTH / NET_HEADER_SIZE
+
+# What fraction of headers sent by attackers can at most be accepted by a victim [unitless]
+LIMIT_FRACTION = LIMIT_HEADERRATE / NET_HEADERRATE
+
+# How many headers we permit attackers to cause being accepted per attack. [headers/attack]
+ATTACK_HEADERS = LIMIT_FRACTION * MINCHAINWORK_HEADERS
+
+
+def find_max_headers(when):
+    """Compute the maximum number of headers a valid Bitcoin chain can have at given time."""
+    # When exploiting the timewarp attack, this can be up to 6 per second since genesis.
+    return 6 * ((when - GENESIS_TIME) // timedelta(seconds=1))
+
+
+def lambert_w(value):
+    """Solve the equation x*exp(x)=value (x > 0, value > 0)."""
+    # Initial approximation.
+    approx = max(log(value), 0.0)
+    for _ in range(10):
+        # Newton-Rhapson iteration steps.
+        approx += (value * exp(-approx) - approx) / (approx + 1.0)
+    return approx
+
+
+def attack_rate(period, bufsize, limit=None):
+    """Compute maximal accepted headers per attack in (period, bufsize) configuration.
+
+    If limit is provided, the computation is stopped early when the result is known to exceed the
+    value in limit.
+    """
+
+    max_rate = None
+    max_honest = None
+    # Let the current batch 0 being received be the first one in which the attacker starts lying.
+    # They will only ever start doing so right after a commitment block, but where that is can be
+    # in a number of places. Let honest be the number of honest headers in this current batch,
+    # preceding the forged ones.
+    for honest in range(HEADER_BATCH_COUNT):
+        # The number of headers the attack under consideration will on average get accepted.
+        # This is the number being computed.
+        rate = 0
+
+        # Iterate over the possible alignments of commitments w.r.t. the first batch. In case
+        # the alignments are randomized, try all values. If not, the attacker can know/choose
+        # the alignment, and will always start forging right after a commitment.
+        if RANDOMIZE_OFFSET:
+            align_choices = list(range(period))
+        else:
+            align_choices = [(honest - 1) % period]
+        # Now loop over those possible alignment values, computing the average attack rate
+        # over them by dividing each contribution by len(align_choices).
+        for align in align_choices:
+            # These state variables capture the situation after receiving the first batch.
+            # - The number of headers received after the last commitment for an honest block:
+            after_good_commit = HEADER_BATCH_COUNT - honest + ((honest - align - 1) % period)
+            # - The number of forged headers in the redownload buffer:
+            forged_in_buf = HEADER_BATCH_COUNT - honest
+
+            # Now iterate over the next batches of headers received, adding contributions to the
+            # rate variable.
+            while True:
+                # Process the first HEADER_BATCH_COUNT headers in the buffer:
+                accept_forged_headers = max(forged_in_buf - bufsize, 0)
+                forged_in_buf -= accept_forged_headers
+                if accept_forged_headers:
+                    # The probability the attack has not been detected yet at this point:
+                    prob = 0.5 ** (after_good_commit // period)
+                    # Update attack rate, divided by align_choices to average over the alignments.
+                    rate += accept_forged_headers * prob / len(align_choices)
+                    # If this means we exceed limit, bail out early (performance optimization).
+                    if limit is not None and rate >= limit:
+                        return rate, None
+                    # If the maximal term being added is negligible compared to rate, stop
+                    # iterating.
+                    if HEADER_BATCH_COUNT * prob < 1.0e-16 * rate * len(align_choices):
+                        break
+                # Update state from a new incoming batch (which is all forged)
+                after_good_commit += HEADER_BATCH_COUNT
+                forged_in_buf += HEADER_BATCH_COUNT
+
+        if max_rate is None or rate > max_rate:
+            max_rate = rate
+            max_honest = honest
+
+    return max_rate, max_honest
+
+
+def memory_usage(period, bufsize, when):
+    """How much memory (max,mainchain,timewarp) does the (period,bufsize) configuration need?"""
+
+    # Per-peer memory usage for a timewarp chain that never meets minchainwork
+    mem_timewarp = find_max_headers(when) // period
+    # Per-peer memory usage for being fed the main chain
+    mem_mainchain = (MINCHAINWORK_HEADERS // period) + bufsize * COMPACT_HEADER_SIZE
+    # Maximum per-peer memory usage
+    max_mem = max(mem_timewarp, mem_mainchain)
+
+    return max_mem, mem_mainchain, mem_timewarp
+
+def find_bufsize(period, attack_headers, when, max_mem=None, min_bufsize=1):
+    """Determine how big bufsize needs to be given a specific period length.
+
+    Given a period, find the smallest value of bufsize such that the attack rate against the
+    (period, bufsize) configuration is below attack_headers. If max_mem is provided, and no
+    such bufsize exists that needs less than max_mem bits of memory, None is returned.
+    min_bufsize is the minimal result to be considered."""
+
+    if max_mem is None:
+        succ_buf = min_bufsize - 1
+        fail_buf = min_bufsize
+        # First double iteratively until an upper bound for failure is found.
+        while True:
+            if attack_rate(period, fail_buf, attack_headers)[0] < attack_headers:
+                break
+            succ_buf, fail_buf = fail_buf, 3 * fail_buf - 2 * succ_buf
+    else:
+        # If a long low-work header chain exists that exceeds max_mem already, give up.
+        if find_max_headers(when) // period > max_mem:
+            return None
+        # Otherwise, verify that the maximal buffer size that permits a mainchain sync with less
+        # than max_mem memory is sufficient to get the attack rate below attack_headers. If not,
+        # also give up.
+        max_buf = (max_mem - (MINCHAINWORK_HEADERS // period)) // COMPACT_HEADER_SIZE
+        if max_buf < min_bufsize:
+            return None
+        if attack_rate(period, max_buf, attack_headers)[0] >= attack_headers:
+            return None
+        # If it is sufficient, that's an upper bound to start our search.
+        succ_buf = min_bufsize - 1
+        fail_buf = max_buf
+
+    # Then perform a bisection search to narrow it down.
+    while fail_buf > succ_buf + 1:
+        try_buf = (succ_buf + fail_buf) // 2
+        if attack_rate(period, try_buf, attack_headers)[0] >= attack_headers:
+            succ_buf = try_buf
+        else:
+            fail_buf = try_buf
+    return fail_buf
+
+
+def optimize(when):
+    """Find the best (period, bufsize) configuration."""
+
+    # When period*bufsize = memory_scale, the per-peer memory for a mainchain sync and a maximally
+    # long low-difficulty header sync are equal.
+    memory_scale = (find_max_headers(when) - MINCHAINWORK_HEADERS) / COMPACT_HEADER_SIZE
+    # Compute approximation for {bufsize/period}, using a formula for a simplified problem.
+    approx_ratio = lambert_w(log(4) * memory_scale / ATTACK_HEADERS**2) / log(4)
+    # Use those for a first attempt.
+    print("Searching configurations:")
+    period = int(sqrt(memory_scale / approx_ratio) + 0.5)
+    bufsize = find_bufsize(period, ATTACK_HEADERS, when)
+    mem = memory_usage(period, bufsize, when)
+    best = (period, bufsize, mem)
+    maps = [(period, bufsize), (MINCHAINWORK_HEADERS + 1, None)]
+    print(f"- Initial: period={period}, buffer={bufsize}, mem={mem[0] / 8192:.3f} KiB")
+
+    # Consider all period values between 1 and MINCHAINWORK_HEADERS, except the one just tried.
+    periods = [iv for iv in range(1, MINCHAINWORK_HEADERS + 1) if iv != period]
+    # Iterate, picking a random element from periods, computing its corresponding bufsize, and
+    # then using the result to shrink the period.
+    while True:
+        # Remove all periods whose memory usage for low-work long chain sync exceed the best
+        # memory usage we've found so far.
+        periods = [p for p in periods if find_max_headers(when) // p < best[2][0]]
+        # Stop if there is nothing left to try.
+        if len(periods) == 0:
+            break
+        # Pick a random remaining option for period size, and compute corresponding bufsize.
+        period = periods.pop(random.randrange(len(periods)))
+        # The buffer size (at a given attack level) cannot shrink as the period grows. Find the
+        # largest period smaller than the selected one we know the buffer size for, and use that
+        # as a lower bound to find_bufsize.
+        min_bufsize = max([(p, b) for p, b in maps if p < period] + [(0,0)])[1]
+        bufsize = find_bufsize(period, ATTACK_HEADERS, when, best[2][0], min_bufsize)
+        if bufsize is not None:
+            # We found a (period, bufsize) configuration with better memory usage than our best
+            # so far. Remember it for future lower bounds.
+            maps.append((period, bufsize))
+            mem = memory_usage(period, bufsize, when)
+            assert mem[0] <= best[2][0]
+            if ASSUME_CONVEX:
+                # Remove all periods that are on the other side of the former best as the new
+                # best.
+                periods = [p for p in periods if (p < best[0]) == (period < best[0])]
+            best = (period, bufsize, mem)
+            print(f"- New best: period={period}, buffer={bufsize}, mem={mem[0] / 8192:.3f} KiB")
+        else:
+            # The (period, bufsize) configuration we found is worse than what we already had.
+            if ASSUME_CONVEX:
+                # Remove all periods that are on the other side of the tried configuration as the
+                # best one.
+                periods = [p for p in periods if (p < period) == (best[0] < period)]
+
+    # Return the result.
+    period, bufsize, _ = best
+    return period, bufsize
+
+
+def analyze(when):
+    """Find the best configuration and print it out."""
+
+    period, bufsize = optimize(when)
+    # Compute accurate statistics for the best found configuration.
+    _, mem_mainchain, mem_timewarp = memory_usage(period, bufsize, when)
+    headers_per_attack, _ = attack_rate(period, bufsize)
+    attack_volume = NET_HEADER_SIZE * MINCHAINWORK_HEADERS
+    # And report them.
+    print()
+    print("Optimal configuration:")
+    print()
+    print("//! Store one header commitment per HEADER_COMMITMENT_PERIOD blocks.")
+    print(f"constexpr size_t HEADER_COMMITMENT_PERIOD{{{period}}};")
+    print()
+    print("//! Only feed headers to validation once this many headers on top have been")
+    print("//! received and validated against commitments.")
+    print(f"constexpr size_t REDOWNLOAD_BUFFER_SIZE{{{bufsize}}};"
+          f" // {bufsize}/{period} = ~{bufsize/period:.1f} commitments")
+    print()
+    print("Properties:")
+    print(f"- Per-peer memory for mainchain sync: {mem_mainchain / 8192:.3f} KiB")
+    print(f"- Per-peer memory for timewarp attack: {mem_timewarp / 8192:.3f} KiB")
+    print(f"- Attack rate: {1/headers_per_attack:.1f} attacks for 1 header of memory growth")
+    print(f"  (where each attack costs {attack_volume / 8388608:.3f} MiB bandwidth)")
+
+
+analyze(TIME)
author	fanquake <fanquake@gmail.com>	2023-10-05 11:25:15 +0100
committer	fanquake <fanquake@gmail.com>	2023-10-05 11:28:29 +0100
commit	2eacc61ad74c767f716cff65e958c7ff45f54aed (patch)
tree	4c5b197f6472d637e96c7dfefcbfeae42c33423b /contrib/devtools
parent	0e3de3b83e99c29d4e84ae92894e3a9fedfe7bea (diff)
parent	3d420d8f28f2d351abf8b0afe90848110e15d50c (diff)