diff options
author | W. J. van der Laan <laanwj@protonmail.com> | 2021-11-30 20:50:05 +0100 |
---|---|---|
committer | W. J. van der Laan <laanwj@protonmail.com> | 2021-11-30 20:50:11 +0100 |
commit | aef8c7cf82cff24ddd88fab0a3c5fb93de467886 (patch) | |
tree | 1225c14e1d6db98de1a2509954801a01323f93ce /test | |
parent | c5712d172e92e231b8dc2828d4cf4bbbb8e86a16 (diff) | |
parent | d9803f7a0a33688f7429cf10384244f4770851ca (diff) |
Merge bitcoin/bitcoin#23289: test: add stress tests for initialization
d9803f7a0a33688f7429cf10384244f4770851ca test: add stress tests for initialization (James O'Beirne)
23f85616a8d9c9a1b054e492eca4d199028f34dc test: add node.chain_path and node.debug_log_path (James O'Beirne)
Pull request description:
In the course of coming up with a test plan for #23280, I thought it would be neat to include a Python snippet showing how I tested the initialization process. I quickly realized I was reinventing the functional test framework... so here's a new test.
This change bangs init around like the Fonz hitting a jukebox. It adds some interesting (read: lazy and random) coverage for the initialization process by
- interrupting init with SIGTERM after certain log statements,
- interrupting init at random points, and
- starting init with some essential data missing (block files, block indices, etc.) to test init error paths.
As far as I can tell, some of these code paths are uncovered otherwise (namely the startup errors).
---
Incidentally, I think I may have uncovered some kind of a bug or race condition with indexing initialization based on an intermittent failure in this testcase. This test sometimes fails after shutting down immediately after `loadblk` thread start:
```
2021-10-15T21:14:51.295000Z TestFramework (INFO): Starting node and will exit after line 'loadblk thread start'
36 │ 2021-10-15T21:14:51.296000Z TestFramework.node0 (DEBUG): bitcoind started, waiting for RPC to come up
37 │ 2021-10-15T21:14:51.493000Z TestFramework (INFO): terminating node after 110 log lines seen
38 │ 2021-10-15T21:14:51.625000Z TestFramework (INFO): Starting node and will exit after line 'txindex thread start'
39 │ 2021-10-15T21:14:51.625000Z TestFramework.node0 (DEBUG): bitcoind started, waiting for RPC to come up
------> [[ FAILURE HERE ]] 2021-10-15T21:15:21.626000Z TestFramework (WARNING): missed line {bail_line}; bailing now after {num_lines} lines
```
and then fails to start up afterwards. Combined logs showing `Error: txindex best block of the index goes beyond pruned data`, when the node under test is not pruned:
```
node0 2021-10-15T21:16:51.848439Z [shutoff] [validationinterface.cpp:244] [ChainStateFlushed] Enqueuing ChainStateFlushed: block hash=1014bc4ff4917602ae53d10e9dfe230af4b7d52a6cdaa8a47798b9c288180907
node0 2021-10-15T21:16:51.848954Z [shutoff] [init.cpp:302] [Shutdown] Shutdown: done
test 2021-10-15T21:16:51.882000Z TestFramework (ERROR): Unexpected exception caught during testing
Traceback (most recent call last):
File "/home/james/src/bitcoin/test/functional/test_framework/test_framework.py", line 132, in main
self.run_test()
File "/home/james/src/bitcoin/./test/functional/stress_init.py", line 87, in run_test
check_clean_start()
File "/home/james/src/bitcoin/./test/functional/stress_init.py", line 60, in check_clean_start
node.wait_for_rpc_connection()
File "/home/james/src/bitcoin/test/functional/test_framework/test_node.py", line 224, in wait_for_rpc_connection
raise FailedToStartError(self._node_msg(
test_framework.test_node.FailedToStartError: [node 0] bitcoind exited with status 1 during initialization
test 2021-10-15T21:16:51.882000Z TestFramework (DEBUG): Closing down network thread
test 2021-10-15T21:16:51.933000Z TestFramework (INFO): Stopping nodes
test 2021-10-15T21:16:51.933000Z TestFramework.node0 (DEBUG): Stopping node
node0 stderr Error: txindex best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)
node0 stderr Error: txindex best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)
node0 stderr Error: txindex best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)
node0 stderr Error: txindex best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)
node0 stderr Error: txindex best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)
```
ACKs for top commit:
laanwj:
Code review ACK d9803f7a0a33688f7429cf10384244f4770851ca
Tree-SHA512: 4d80dc399daf199a6222e81e47d12d830dc7af07355eddbb7f52479a676a645b8d3d45093ff54a9295f01a163b2f4fe0e038e83fc269969e03d4cfda69eaf111
Diffstat (limited to 'test')
-rwxr-xr-x | test/functional/feature_init.py | 182 | ||||
-rwxr-xr-x | test/functional/test_framework/test_node.py | 14 | ||||
-rwxr-xr-x | test/functional/test_runner.py | 1 |
3 files changed, 194 insertions, 3 deletions
diff --git a/test/functional/feature_init.py b/test/functional/feature_init.py new file mode 100755 index 0000000000..cffbd40271 --- /dev/null +++ b/test/functional/feature_init.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +# Copyright (c) 2021 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +"""Stress tests related to node initialization.""" +import random +import time +import os +from pathlib import Path + +from test_framework.test_framework import BitcoinTestFramework, SkipTest +from test_framework.test_node import ErrorMatch +from test_framework.util import assert_equal + + +class InitStressTest(BitcoinTestFramework): + """ + Ensure that initialization can be interrupted at a number of points and not impair + subsequent starts. + """ + + def set_test_params(self): + self.setup_clean_chain = False + self.num_nodes = 1 + + def run_test(self): + """ + - test terminating initialization after seeing a certain log line. + - test terminating init after seeing a random number of log lines. + - test removing certain essential files to test startup error paths. + """ + # TODO: skip Windows for now since it isn't clear how to SIGTERM. + # + # Windows doesn't support `process.terminate()`. + # and other approaches (like below) don't work: + # + # os.kill(node.process.pid, signal.CTRL_C_EVENT) + if os.name == 'nt': + raise SkipTest("can't SIGTERM on Windows") + + self.stop_node(0) + node = self.nodes[0] + + def sigterm_node(): + node.process.terminate() + node.process.wait() + node.debug_log_path.unlink() + node.debug_log_path.touch() + + def check_clean_start(): + """Ensure that node restarts successfully after various interrupts.""" + # TODO: add -txindex=1 to fully test index initiatlization. + # See https://github.com/bitcoin/bitcoin/pull/23289#discussion_r735159180 for + # a discussion of the related bug. + node.start() + node.wait_for_rpc_connection() + assert_equal(200, node.getblockcount()) + + lines_to_terminate_after = [ + 'scheduler thread start', + 'Loading P2P addresses', + 'Loading banlist', + 'Loading block index', + 'Switching active chainstate', + 'Loaded best chain:', + 'init message: Verifying blocks', + 'loadblk thread start', + # TODO: reenable - see above TODO + # 'txindex thread start', + 'net thread start', + 'addcon thread start', + 'msghand thread start', + ] + if self.is_wallet_compiled(): + lines_to_terminate_after.append('Verifying wallet') + + for terminate_line in lines_to_terminate_after: + self.log.info(f"Starting node and will exit after line '{terminate_line}'") + node.start( + # TODO: add -txindex=1 to fully test index initiatlization. + # extra_args=['-txindex=1'], + ) + logfile = open(node.debug_log_path, 'r', encoding='utf8') + + MAX_SECS_TO_WAIT = 30 + start = time.time() + num_lines = 0 + + while True: + line = logfile.readline() + if line: + num_lines += 1 + + if line and terminate_line.lower() in line.lower(): + self.log.debug(f"Terminating node after {num_lines} log lines seen") + sigterm_node() + break + + if (time.time() - start) > MAX_SECS_TO_WAIT: + raise AssertionError( + f"missed line {terminate_line}; terminating now after {num_lines} lines") + + if node.process.poll() is not None: + raise AssertionError(f"node failed to start (line: '{terminate_line}')") + + check_clean_start() + num_total_logs = len(node.debug_log_path.read_text().splitlines()) + self.stop_node(0) + + self.log.info( + f"Terminate at some random point in the init process (max logs: {num_total_logs})") + + for _ in range(40): + terminate_after = random.randint(1, num_total_logs) + self.log.debug(f"Starting node and will exit after {terminate_after} lines") + node.start( + # TODO: add -txindex=1 to fully test index initiatlization. + # extra_args=['-txindex=1'], + ) + logfile = open(node.debug_log_path, 'r', encoding='utf8') + + MAX_SECS_TO_WAIT = 10 + start = time.time() + num_lines = 0 + + while True: + line = logfile.readline() + if line: + num_lines += 1 + + if num_lines >= terminate_after or (time.time() - start) > MAX_SECS_TO_WAIT: + self.log.debug(f"Terminating node after {num_lines} log lines seen") + sigterm_node() + break + + if node.process.poll() is not None: + raise AssertionError("node failed to start") + + check_clean_start() + self.stop_node(0) + + self.log.info("Test startup errors after removing certain essential files") + + files_to_disturb = { + 'blocks/index/*.ldb': 'Error opening block database.', + 'chainstate/*.ldb': 'Error opening block database.', + 'blocks/blk*.dat': 'Error loading block database.', + } + + for file_patt, err_fragment in files_to_disturb.items(): + target_file = list(node.chain_path.glob(file_patt))[0] + + self.log.info(f"Tweaking file to ensure failure {target_file}") + bak_path = str(target_file) + ".bak" + target_file.rename(bak_path) + + # TODO: at some point, we should test perturbing the files instead of removing + # them, e.g. + # + # contents = target_file.read_bytes() + # tweaked_contents = bytearray(contents) + # tweaked_contents[50:250] = b'1' * 200 + # target_file.write_bytes(bytes(tweaked_contents)) + # + # At the moment I can't get this to work (bitcoind loads successfully?) so + # investigate doing this later. + + node.assert_start_raises_init_error( + # TODO: add -txindex=1 to fully test index initiatlization. + # extra_args=['-txindex=1'], + expected_msg=err_fragment, + match=ErrorMatch.PARTIAL_REGEX, + ) + + self.log.info(f"Restoring file from {bak_path} and restarting") + Path(bak_path).rename(target_file) + check_clean_start() + self.stop_node(0) + + +if __name__ == '__main__': + InitStressTest().main() diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index 269f2442a9..b3279666b2 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -20,6 +20,7 @@ import urllib.parse import collections import shlex import sys +from pathlib import Path from .authproxy import JSONRPCException from .descriptors import descsum_create @@ -380,13 +381,20 @@ class TestNode(): def wait_until_stopped(self, timeout=BITCOIND_PROC_WAIT_TIMEOUT): wait_until_helper(self.is_node_stopped, timeout=timeout, timeout_factor=self.timeout_factor) + @property + def chain_path(self) -> Path: + return Path(self.datadir) / self.chain + + @property + def debug_log_path(self) -> Path: + return self.chain_path / 'debug.log' + @contextlib.contextmanager def assert_debug_log(self, expected_msgs, unexpected_msgs=None, timeout=2): if unexpected_msgs is None: unexpected_msgs = [] time_end = time.time() + timeout * self.timeout_factor - debug_log = os.path.join(self.datadir, self.chain, 'debug.log') - with open(debug_log, encoding='utf-8') as dl: + with open(self.debug_log_path, encoding='utf-8') as dl: dl.seek(0, 2) prev_size = dl.tell() @@ -394,7 +402,7 @@ class TestNode(): while True: found = True - with open(debug_log, encoding='utf-8') as dl: + with open(self.debug_log_path, encoding='utf-8') as dl: dl.seek(prev_size) log = dl.read() print_log = " - " + "\n - ".join(log.splitlines()) diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py index b9b4edf0ba..35cab8f6ec 100755 --- a/test/functional/test_runner.py +++ b/test/functional/test_runner.py @@ -278,6 +278,7 @@ BASE_SCRIPTS = [ 'wallet_taproot.py', 'p2p_fingerprint.py', 'feature_uacomment.py', + 'feature_init.py', 'wallet_coinbase_category.py --legacy-wallet', 'wallet_coinbase_category.py --descriptors', 'feature_filelock.py', |