diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/node/chainstate.cpp | 122 | ||||
-rw-r--r-- | src/test/validation_chainstatemanager_tests.cpp | 171 | ||||
-rw-r--r-- | src/validation.cpp | 356 | ||||
-rw-r--r-- | src/validation.h | 119 |
4 files changed, 707 insertions, 61 deletions
diff --git a/src/node/chainstate.cpp b/src/node/chainstate.cpp index 626010d26f..125d6de5a5 100644 --- a/src/node/chainstate.cpp +++ b/src/node/chainstate.cpp @@ -28,38 +28,13 @@ #include <vector> namespace node { -ChainstateLoadResult LoadChainstate(ChainstateManager& chainman, const CacheSizes& cache_sizes, - const ChainstateLoadOptions& options) +// Complete initialization of chainstates after the initial call has been made +// to ChainstateManager::InitializeChainstate(). +static ChainstateLoadResult CompleteChainstateInitialization( + ChainstateManager& chainman, + const CacheSizes& cache_sizes, + const ChainstateLoadOptions& options) EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { - auto is_coinsview_empty = [&](Chainstate* chainstate) EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { - return options.reindex || options.reindex_chainstate || chainstate->CoinsTip().GetBestBlock().IsNull(); - }; - - if (!chainman.AssumedValidBlock().IsNull()) { - LogPrintf("Assuming ancestors of block %s have valid signatures.\n", chainman.AssumedValidBlock().GetHex()); - } else { - LogPrintf("Validating signatures for all blocks.\n"); - } - LogPrintf("Setting nMinimumChainWork=%s\n", chainman.MinimumChainWork().GetHex()); - if (chainman.MinimumChainWork() < UintToArith256(chainman.GetConsensus().nMinimumChainWork)) { - LogPrintf("Warning: nMinimumChainWork set below default value of %s\n", chainman.GetConsensus().nMinimumChainWork.GetHex()); - } - if (chainman.m_blockman.GetPruneTarget() == std::numeric_limits<uint64_t>::max()) { - LogPrintf("Block pruning enabled. Use RPC call pruneblockchain(height) to manually prune block and undo files.\n"); - } else if (chainman.m_blockman.GetPruneTarget()) { - LogPrintf("Prune configured to target %u MiB on disk for block and undo files.\n", chainman.m_blockman.GetPruneTarget() / 1024 / 1024); - } - - LOCK(cs_main); - chainman.m_total_coinstip_cache = cache_sizes.coins; - chainman.m_total_coinsdb_cache = cache_sizes.coins_db; - - // Load the fully validated chainstate. - chainman.InitializeChainstate(options.mempool); - - // Load a chain created from a UTXO snapshot, if any exist. - chainman.DetectSnapshotChainstate(options.mempool); - auto& pblocktree{chainman.m_blockman.m_block_tree_db}; // new CBlockTreeDB tries to delete the existing file, which // fails if it's still open from the previous loop. Close it first: @@ -111,6 +86,13 @@ ChainstateLoadResult LoadChainstate(ChainstateManager& chainman, const CacheSize return {ChainstateLoadStatus::FAILURE, _("Error initializing block database")}; } + auto is_coinsview_empty = [&](Chainstate* chainstate) EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { + return options.reindex || options.reindex_chainstate || chainstate->CoinsTip().GetBestBlock().IsNull(); + }; + + assert(chainman.m_total_coinstip_cache > 0); + assert(chainman.m_total_coinsdb_cache > 0); + // Conservative value which is arbitrarily chosen, as it will ultimately be changed // by a call to `chainman.MaybeRebalanceCaches()`. We just need to make sure // that the sum of the two caches (40%) does not exceed the allowable amount @@ -175,6 +157,84 @@ ChainstateLoadResult LoadChainstate(ChainstateManager& chainman, const CacheSize return {ChainstateLoadStatus::SUCCESS, {}}; } +ChainstateLoadResult LoadChainstate(ChainstateManager& chainman, const CacheSizes& cache_sizes, + const ChainstateLoadOptions& options) +{ + if (!chainman.AssumedValidBlock().IsNull()) { + LogPrintf("Assuming ancestors of block %s have valid signatures.\n", chainman.AssumedValidBlock().GetHex()); + } else { + LogPrintf("Validating signatures for all blocks.\n"); + } + LogPrintf("Setting nMinimumChainWork=%s\n", chainman.MinimumChainWork().GetHex()); + if (chainman.MinimumChainWork() < UintToArith256(chainman.GetConsensus().nMinimumChainWork)) { + LogPrintf("Warning: nMinimumChainWork set below default value of %s\n", chainman.GetConsensus().nMinimumChainWork.GetHex()); + } + if (chainman.m_blockman.GetPruneTarget() == std::numeric_limits<uint64_t>::max()) { + LogPrintf("Block pruning enabled. Use RPC call pruneblockchain(height) to manually prune block and undo files.\n"); + } else if (chainman.m_blockman.GetPruneTarget()) { + LogPrintf("Prune configured to target %u MiB on disk for block and undo files.\n", chainman.m_blockman.GetPruneTarget() / 1024 / 1024); + } + + LOCK(cs_main); + + chainman.m_total_coinstip_cache = cache_sizes.coins; + chainman.m_total_coinsdb_cache = cache_sizes.coins_db; + + // Load the fully validated chainstate. + chainman.InitializeChainstate(options.mempool); + + // Load a chain created from a UTXO snapshot, if any exist. + chainman.DetectSnapshotChainstate(options.mempool); + + auto [init_status, init_error] = CompleteChainstateInitialization(chainman, cache_sizes, options); + if (init_status != ChainstateLoadStatus::SUCCESS) { + return {init_status, init_error}; + } + + // If a snapshot chainstate was fully validated by a background chainstate during + // the last run, detect it here and clean up the now-unneeded background + // chainstate. + // + // Why is this cleanup done here (on subsequent restart) and not just when the + // snapshot is actually validated? Because this entails unusual + // filesystem operations to move leveldb data directories around, and that seems + // too risky to do in the middle of normal runtime. + auto snapshot_completion = chainman.MaybeCompleteSnapshotValidation(); + + if (snapshot_completion == SnapshotCompletionResult::SKIPPED) { + // do nothing; expected case + } else if (snapshot_completion == SnapshotCompletionResult::SUCCESS) { + LogPrintf("[snapshot] cleaning up unneeded background chainstate, then reinitializing\n"); + if (!chainman.ValidatedSnapshotCleanup()) { + AbortNode("Background chainstate cleanup failed unexpectedly."); + } + + // Because ValidatedSnapshotCleanup() has torn down chainstates with + // ChainstateManager::ResetChainstates(), reinitialize them here without + // duplicating the blockindex work above. + assert(chainman.GetAll().empty()); + assert(!chainman.IsSnapshotActive()); + assert(!chainman.IsSnapshotValidated()); + + chainman.InitializeChainstate(options.mempool); + + // A reload of the block index is required to recompute setBlockIndexCandidates + // for the fully validated chainstate. + chainman.ActiveChainstate().UnloadBlockIndex(); + + auto [init_status, init_error] = CompleteChainstateInitialization(chainman, cache_sizes, options); + if (init_status != ChainstateLoadStatus::SUCCESS) { + return {init_status, init_error}; + } + } else { + return {ChainstateLoadStatus::FAILURE, _( + "UTXO snapshot failed to validate. " + "Restart to resume normal initial block download, or try loading a different snapshot.")}; + } + + return {ChainstateLoadStatus::SUCCESS, {}}; +} + ChainstateLoadResult VerifyLoadedChainstate(ChainstateManager& chainman, const ChainstateLoadOptions& options) { auto is_coinsview_empty = [&](Chainstate* chainstate) EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { diff --git a/src/test/validation_chainstatemanager_tests.cpp b/src/test/validation_chainstatemanager_tests.cpp index 78301c7c14..6fc9d0fa51 100644 --- a/src/test/validation_chainstatemanager_tests.cpp +++ b/src/test/validation_chainstatemanager_tests.cpp @@ -474,9 +474,10 @@ BOOST_FIXTURE_TEST_CASE(chainstatemanager_loadblockindex, TestChain100Setup) //! Ensure that snapshot chainstates initialize properly when found on disk. BOOST_FIXTURE_TEST_CASE(chainstatemanager_snapshot_init, SnapshotTestSetup) { - this->SetupSnapshot(); - ChainstateManager& chainman = *Assert(m_node.chainman); + Chainstate& bg_chainstate = chainman.ActiveChainstate(); + + this->SetupSnapshot(); fs::path snapshot_chainstate_dir = *node::FindSnapshotChainstateDir(); BOOST_CHECK(fs::exists(snapshot_chainstate_dir)); @@ -489,6 +490,20 @@ BOOST_FIXTURE_TEST_CASE(chainstatemanager_snapshot_init, SnapshotTestSetup) auto all_chainstates = chainman.GetAll(); BOOST_CHECK_EQUAL(all_chainstates.size(), 2); + // "Rewind" the background chainstate so that its tip is not at the + // base block of the snapshot - this is so after simulating a node restart, + // it will initialize instead of attempting to complete validation. + // + // Note that this is not a realistic use of DisconnectTip(). + DisconnectedBlockTransactions unused_pool; + BlockValidationState unused_state; + { + LOCK2(::cs_main, bg_chainstate.MempoolMutex()); + BOOST_CHECK(bg_chainstate.DisconnectTip(unused_state, &unused_pool)); + unused_pool.clear(); // to avoid queuedTx assertion errors on teardown + } + BOOST_CHECK_EQUAL(bg_chainstate.m_chain.Height(), 109); + // Test that simulating a shutdown (resetting ChainstateManager) and then performing // chainstate reinitializing successfully cleans up the background-validation // chainstate data, and we end up with a single chainstate that is at tip. @@ -520,10 +535,160 @@ BOOST_FIXTURE_TEST_CASE(chainstatemanager_snapshot_init, SnapshotTestSetup) // chainstate. for (Chainstate* cs : chainman_restarted.GetAll()) { if (cs != &chainman_restarted.ActiveChainstate()) { - BOOST_CHECK_EQUAL(cs->m_chain.Height(), 110); + BOOST_CHECK_EQUAL(cs->m_chain.Height(), 109); } } } } +BOOST_FIXTURE_TEST_CASE(chainstatemanager_snapshot_completion, SnapshotTestSetup) +{ + this->SetupSnapshot(); + + ChainstateManager& chainman = *Assert(m_node.chainman); + Chainstate& active_cs = chainman.ActiveChainstate(); + auto tip_cache_before_complete = active_cs.m_coinstip_cache_size_bytes; + auto db_cache_before_complete = active_cs.m_coinsdb_cache_size_bytes; + + SnapshotCompletionResult res; + auto mock_shutdown = [](bilingual_str msg) {}; + + fs::path snapshot_chainstate_dir = *node::FindSnapshotChainstateDir(); + BOOST_CHECK(fs::exists(snapshot_chainstate_dir)); + BOOST_CHECK_EQUAL(snapshot_chainstate_dir, gArgs.GetDataDirNet() / "chainstate_snapshot"); + + BOOST_CHECK(chainman.IsSnapshotActive()); + const uint256 snapshot_tip_hash = WITH_LOCK(chainman.GetMutex(), + return chainman.ActiveTip()->GetBlockHash()); + + res = WITH_LOCK(::cs_main, + return chainman.MaybeCompleteSnapshotValidation(mock_shutdown)); + BOOST_CHECK_EQUAL(res, SnapshotCompletionResult::SUCCESS); + + WITH_LOCK(::cs_main, BOOST_CHECK(chainman.IsSnapshotValidated())); + BOOST_CHECK(chainman.IsSnapshotActive()); + + // Cache should have been rebalanced and reallocated to the "only" remaining + // chainstate. + BOOST_CHECK(active_cs.m_coinstip_cache_size_bytes > tip_cache_before_complete); + BOOST_CHECK(active_cs.m_coinsdb_cache_size_bytes > db_cache_before_complete); + + auto all_chainstates = chainman.GetAll(); + BOOST_CHECK_EQUAL(all_chainstates.size(), 1); + BOOST_CHECK_EQUAL(all_chainstates[0], &active_cs); + + // Trying completion again should return false. + res = WITH_LOCK(::cs_main, + return chainman.MaybeCompleteSnapshotValidation(mock_shutdown)); + BOOST_CHECK_EQUAL(res, SnapshotCompletionResult::SKIPPED); + + // The invalid snapshot path should not have been used. + fs::path snapshot_invalid_dir = gArgs.GetDataDirNet() / "chainstate_snapshot_INVALID"; + BOOST_CHECK(!fs::exists(snapshot_invalid_dir)); + // chainstate_snapshot should still exist. + BOOST_CHECK(fs::exists(snapshot_chainstate_dir)); + + // Test that simulating a shutdown (reseting ChainstateManager) and then performing + // chainstate reinitializing successfully cleans up the background-validation + // chainstate data, and we end up with a single chainstate that is at tip. + ChainstateManager& chainman_restarted = this->SimulateNodeRestart(); + + BOOST_TEST_MESSAGE("Performing Load/Verify/Activate of chainstate"); + + // This call reinitializes the chainstates, and should clean up the now unnecessary + // background-validation leveldb contents. + this->LoadVerifyActivateChainstate(); + + BOOST_CHECK(!fs::exists(snapshot_invalid_dir)); + // chainstate_snapshot should now *not* exist. + BOOST_CHECK(!fs::exists(snapshot_chainstate_dir)); + + const Chainstate& active_cs2 = chainman_restarted.ActiveChainstate(); + + { + LOCK(chainman_restarted.GetMutex()); + BOOST_CHECK_EQUAL(chainman_restarted.GetAll().size(), 1); + BOOST_CHECK(!chainman_restarted.IsSnapshotActive()); + BOOST_CHECK(!chainman_restarted.IsSnapshotValidated()); + BOOST_CHECK(active_cs2.m_coinstip_cache_size_bytes > tip_cache_before_complete); + BOOST_CHECK(active_cs2.m_coinsdb_cache_size_bytes > db_cache_before_complete); + + BOOST_CHECK_EQUAL(chainman_restarted.ActiveTip()->GetBlockHash(), snapshot_tip_hash); + BOOST_CHECK_EQUAL(chainman_restarted.ActiveHeight(), 210); + } + + BOOST_TEST_MESSAGE( + "Ensure we can mine blocks on top of the \"new\" IBD chainstate"); + mineBlocks(10); + { + LOCK(chainman_restarted.GetMutex()); + BOOST_CHECK_EQUAL(chainman_restarted.ActiveHeight(), 220); + } +} + +BOOST_FIXTURE_TEST_CASE(chainstatemanager_snapshot_completion_hash_mismatch, SnapshotTestSetup) +{ + auto chainstates = this->SetupSnapshot(); + Chainstate& validation_chainstate = *std::get<0>(chainstates); + ChainstateManager& chainman = *Assert(m_node.chainman); + SnapshotCompletionResult res; + auto mock_shutdown = [](bilingual_str msg) {}; + + // Test tampering with the IBD UTXO set with an extra coin to ensure it causes + // snapshot completion to fail. + CCoinsViewCache& ibd_coins = WITH_LOCK(::cs_main, + return validation_chainstate.CoinsTip()); + Coin badcoin; + badcoin.out.nValue = InsecureRand32(); + badcoin.nHeight = 1; + badcoin.out.scriptPubKey.assign(InsecureRandBits(6), 0); + uint256 txid = InsecureRand256(); + ibd_coins.AddCoin(COutPoint(txid, 0), std::move(badcoin), false); + + fs::path snapshot_chainstate_dir = gArgs.GetDataDirNet() / "chainstate_snapshot"; + BOOST_CHECK(fs::exists(snapshot_chainstate_dir)); + + res = WITH_LOCK(::cs_main, + return chainman.MaybeCompleteSnapshotValidation(mock_shutdown)); + BOOST_CHECK_EQUAL(res, SnapshotCompletionResult::HASH_MISMATCH); + + auto all_chainstates = chainman.GetAll(); + BOOST_CHECK_EQUAL(all_chainstates.size(), 1); + BOOST_CHECK_EQUAL(all_chainstates[0], &validation_chainstate); + BOOST_CHECK_EQUAL(&chainman.ActiveChainstate(), &validation_chainstate); + + fs::path snapshot_invalid_dir = gArgs.GetDataDirNet() / "chainstate_snapshot_INVALID"; + BOOST_CHECK(fs::exists(snapshot_invalid_dir)); + + // Test that simulating a shutdown (reseting ChainstateManager) and then performing + // chainstate reinitializing successfully loads only the fully-validated + // chainstate data, and we end up with a single chainstate that is at tip. + ChainstateManager& chainman_restarted = this->SimulateNodeRestart(); + + BOOST_TEST_MESSAGE("Performing Load/Verify/Activate of chainstate"); + + // This call reinitializes the chainstates, and should clean up the now unnecessary + // background-validation leveldb contents. + this->LoadVerifyActivateChainstate(); + + BOOST_CHECK(fs::exists(snapshot_invalid_dir)); + BOOST_CHECK(!fs::exists(snapshot_chainstate_dir)); + + { + LOCK(::cs_main); + BOOST_CHECK_EQUAL(chainman_restarted.GetAll().size(), 1); + BOOST_CHECK(!chainman_restarted.IsSnapshotActive()); + BOOST_CHECK(!chainman_restarted.IsSnapshotValidated()); + BOOST_CHECK_EQUAL(chainman_restarted.ActiveHeight(), 210); + } + + BOOST_TEST_MESSAGE( + "Ensure we can mine blocks on top of the \"new\" IBD chainstate"); + mineBlocks(10); + { + LOCK(::cs_main); + BOOST_CHECK_EQUAL(chainman_restarted.ActiveHeight(), 220); + } +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/validation.cpp b/src/validation.cpp index 0674454883..823d326d18 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -2875,6 +2875,14 @@ bool Chainstate::ConnectTip(BlockValidationState& state, CBlockIndex* pindexNew, Ticks<SecondsDouble>(time_total), Ticks<MillisecondsDouble>(time_total) / num_blocks_total); + // If we are the background validation chainstate, check to see if we are done + // validating the snapshot (i.e. our tip has reached the snapshot's base block). + if (this != &m_chainman.ActiveChainstate()) { + // This call may set `m_disabled`, which is referenced immediately afterwards in + // ActivateBestChain, so that we stop connecting blocks past the snapshot base. + m_chainman.MaybeCompleteSnapshotValidation(); + } + connectTrace.BlockConnected(pindexNew, std::move(pthisBlock)); return true; } @@ -3097,6 +3105,14 @@ bool Chainstate::ActivateBestChain(BlockValidationState& state, std::shared_ptr< // we use m_chainstate_mutex to enforce mutual exclusion so that only one caller may execute this function at a time LOCK(m_chainstate_mutex); + // Belt-and-suspenders check that we aren't attempting to advance the background + // chainstate past the snapshot base block. + if (WITH_LOCK(::cs_main, return m_disabled)) { + LogPrintf("m_disabled is set - this chainstate should not be in operation. " /* Continued */ + "Please report this as a bug. %s\n", PACKAGE_BUGREPORT); + return false; + } + CBlockIndex *pindexMostWork = nullptr; CBlockIndex *pindexNewTip = nullptr; int nStopAtHeight = gArgs.GetIntArg("-stopatheight", DEFAULT_STOPATHEIGHT); @@ -3147,6 +3163,15 @@ bool Chainstate::ActivateBestChain(BlockValidationState& state, std::shared_ptr< assert(trace.pblock && trace.pindex); GetMainSignals().BlockConnected(trace.pblock, trace.pindex); } + + // This will have been toggled in + // ActivateBestChainStep -> ConnectTip -> MaybeCompleteSnapshotValidation, + // if at all, so we should catch it here. + // + // Break this do-while to ensure we don't advance past the base snapshot. + if (m_disabled) { + break; + } } while (!m_chain.Tip() || (starting_tip && CBlockIndexWorkComparator()(m_chain.Tip(), starting_tip))); if (!blocks_connected) return true; @@ -3167,6 +3192,11 @@ bool Chainstate::ActivateBestChain(BlockValidationState& state, std::shared_ptr< if (nStopAtHeight && pindexNewTip && pindexNewTip->nHeight >= nStopAtHeight) StartShutdown(); + if (WITH_LOCK(::cs_main, return m_disabled)) { + // Background chainstate has reached the snapshot base block, so exit. + break; + } + // We check shutdown only after giving ActivateBestChainStep a chance to run once so that we // never shutdown before connecting the genesis block during LoadChainTip(). Previously this // caused an assert() failure during shutdown in such cases as the UTXO DB flushing checks @@ -4372,6 +4402,8 @@ bool ChainstateManager::LoadBlockIndex() assert(any_chain([](auto chainstate) { return !chainstate->reliesOnAssumedValid(); })); first_assumed_valid_height = block->nHeight; + LogPrintf("Saw first assumedvalid block at height %d (%s)\n", + first_assumed_valid_height, block->ToString()); break; } } @@ -4908,12 +4940,8 @@ std::vector<Chainstate*> ChainstateManager::GetAll() LOCK(::cs_main); std::vector<Chainstate*> out; - if (!IsSnapshotValidated() && m_ibd_chainstate) { - out.push_back(m_ibd_chainstate.get()); - } - - if (m_snapshot_chainstate) { - out.push_back(m_snapshot_chainstate.get()); + for (Chainstate* cs : {m_ibd_chainstate.get(), m_snapshot_chainstate.get()}) { + if (this->IsUsable(cs)) out.push_back(cs); } return out; @@ -5099,6 +5127,19 @@ static void FlushSnapshotToDisk(CCoinsViewCache& coins_cache, bool snapshot_load coins_cache.Flush(); } +struct StopHashingException : public std::exception +{ + const char* what() const throw() override + { + return "ComputeUTXOStats interrupted by shutdown."; + } +}; + +static void SnapshotUTXOHashBreakpoint() +{ + if (ShutdownRequested()) throw StopHashingException(); +} + bool ChainstateManager::PopulateAndValidateSnapshot( Chainstate& snapshot_chainstate, AutoFile& coins_file, @@ -5222,13 +5263,18 @@ bool ChainstateManager::PopulateAndValidateSnapshot( assert(coins_cache.GetBestBlock() == base_blockhash); - auto breakpoint_fnc = [] { /* TODO insert breakpoint here? */ }; - // As above, okay to immediately release cs_main here since no other context knows // about the snapshot_chainstate. CCoinsViewDB* snapshot_coinsdb = WITH_LOCK(::cs_main, return &snapshot_chainstate.CoinsDB()); - const std::optional<CCoinsStats> maybe_stats = ComputeUTXOStats(CoinStatsHashType::HASH_SERIALIZED, snapshot_coinsdb, m_blockman, breakpoint_fnc); + std::optional<CCoinsStats> maybe_stats; + + try { + maybe_stats = ComputeUTXOStats( + CoinStatsHashType::HASH_SERIALIZED, snapshot_coinsdb, m_blockman, SnapshotUTXOHashBreakpoint); + } catch (StopHashingException const&) { + return false; + } if (!maybe_stats.has_value()) { LogPrintf("[snapshot] failed to generate coins stats\n"); return false; @@ -5296,6 +5342,149 @@ bool ChainstateManager::PopulateAndValidateSnapshot( return true; } +// Currently, this function holds cs_main for its duration, which could be for +// multiple minutes due to the ComputeUTXOStats call. This hold is necessary +// because we need to avoid advancing the background validation chainstate +// farther than the snapshot base block - and this function is also invoked +// from within ConnectTip, i.e. from within ActivateBestChain, so cs_main is +// held anyway. +// +// Eventually (TODO), we could somehow separate this function's runtime from +// maintenance of the active chain, but that will either require +// +// (i) setting `m_disabled` immediately and ensuring all chainstate accesses go +// through IsUsable() checks, or +// +// (ii) giving each chainstate its own lock instead of using cs_main for everything. +SnapshotCompletionResult ChainstateManager::MaybeCompleteSnapshotValidation( + std::function<void(bilingual_str)> shutdown_fnc) +{ + AssertLockHeld(cs_main); + if (m_ibd_chainstate.get() == &this->ActiveChainstate() || + !this->IsUsable(m_snapshot_chainstate.get()) || + !this->IsUsable(m_ibd_chainstate.get()) || + !m_ibd_chainstate->m_chain.Tip()) { + // Nothing to do - this function only applies to the background + // validation chainstate. + return SnapshotCompletionResult::SKIPPED; + } + const int snapshot_tip_height = this->ActiveHeight(); + const int snapshot_base_height = *Assert(this->GetSnapshotBaseHeight()); + const CBlockIndex& index_new = *Assert(m_ibd_chainstate->m_chain.Tip()); + + if (index_new.nHeight < snapshot_base_height) { + // Background IBD not complete yet. + return SnapshotCompletionResult::SKIPPED; + } + + assert(SnapshotBlockhash()); + uint256 snapshot_blockhash = *Assert(SnapshotBlockhash()); + + auto handle_invalid_snapshot = [&]() EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { + bilingual_str user_error = strprintf(_( + "%s failed to validate the -assumeutxo snapshot state. " + "This indicates a hardware problem, or a bug in the software, or a " + "bad software modification that allowed an invalid snapshot to be " + "loaded. As a result of this, the node will shut down and stop using any " + "state that was built on the snapshot, resetting the chain height " + "from %d to %d. On the next " + "restart, the node will resume syncing from %d " + "without using any snapshot data. " + "Please report this incident to %s, including how you obtained the snapshot. " + "The invalid snapshot chainstate has been left on disk in case it is " + "helpful in diagnosing the issue that caused this error."), + PACKAGE_NAME, snapshot_tip_height, snapshot_base_height, snapshot_base_height, PACKAGE_BUGREPORT + ); + + LogPrintf("[snapshot] !!! %s\n", user_error.original); + LogPrintf("[snapshot] deleting snapshot, reverting to validated chain, and stopping node\n"); + + m_active_chainstate = m_ibd_chainstate.get(); + m_snapshot_chainstate->m_disabled = true; + assert(!this->IsUsable(m_snapshot_chainstate.get())); + assert(this->IsUsable(m_ibd_chainstate.get())); + + m_snapshot_chainstate->InvalidateCoinsDBOnDisk(); + + shutdown_fnc(user_error); + }; + + if (index_new.GetBlockHash() != snapshot_blockhash) { + LogPrintf("[snapshot] supposed base block %s does not match the " /* Continued */ + "snapshot base block %s (height %d). Snapshot is not valid.", + index_new.ToString(), snapshot_blockhash.ToString(), snapshot_base_height); + handle_invalid_snapshot(); + return SnapshotCompletionResult::BASE_BLOCKHASH_MISMATCH; + } + + assert(index_new.nHeight == snapshot_base_height); + + int curr_height = m_ibd_chainstate->m_chain.Height(); + + assert(snapshot_base_height == curr_height); + assert(snapshot_base_height == index_new.nHeight); + assert(this->IsUsable(m_snapshot_chainstate.get())); + assert(this->GetAll().size() == 2); + + CCoinsViewDB& ibd_coins_db = m_ibd_chainstate->CoinsDB(); + m_ibd_chainstate->ForceFlushStateToDisk(); + + auto maybe_au_data = ExpectedAssumeutxo(curr_height, ::Params()); + if (!maybe_au_data) { + LogPrintf("[snapshot] assumeutxo data not found for height " /* Continued */ + "(%d) - refusing to validate snapshot\n", curr_height); + handle_invalid_snapshot(); + return SnapshotCompletionResult::MISSING_CHAINPARAMS; + } + + const AssumeutxoData& au_data = *maybe_au_data; + std::optional<CCoinsStats> maybe_ibd_stats; + LogPrintf("[snapshot] computing UTXO stats for background chainstate to validate " /* Continued */ + "snapshot - this could take a few minutes\n"); + try { + maybe_ibd_stats = ComputeUTXOStats( + CoinStatsHashType::HASH_SERIALIZED, + &ibd_coins_db, + m_blockman, + SnapshotUTXOHashBreakpoint); + } catch (StopHashingException const&) { + return SnapshotCompletionResult::STATS_FAILED; + } + + // XXX note that this function is slow and will hold cs_main for potentially minutes. + if (!maybe_ibd_stats) { + LogPrintf("[snapshot] failed to generate stats for validation coins db\n"); + // While this isn't a problem with the snapshot per se, this condition + // prevents us from validating the snapshot, so we should shut down and let the + // user handle the issue manually. + handle_invalid_snapshot(); + return SnapshotCompletionResult::STATS_FAILED; + } + const auto& ibd_stats = *maybe_ibd_stats; + + // Compare the background validation chainstate's UTXO set hash against the hard-coded + // assumeutxo hash we expect. + // + // TODO: For belt-and-suspenders, we could cache the UTXO set + // hash for the snapshot when it's loaded in its chainstate's leveldb. We could then + // reference that here for an additional check. + if (AssumeutxoHash{ibd_stats.hashSerialized} != au_data.hash_serialized) { + LogPrintf("[snapshot] hash mismatch: actual=%s, expected=%s\n", + ibd_stats.hashSerialized.ToString(), + au_data.hash_serialized.ToString()); + handle_invalid_snapshot(); + return SnapshotCompletionResult::HASH_MISMATCH; + } + + LogPrintf("[snapshot] snapshot beginning at %s has been fully validated\n", + snapshot_blockhash.ToString()); + + m_ibd_chainstate->m_disabled = true; + this->MaybeRebalanceCaches(); + + return SnapshotCompletionResult::SUCCESS; +} + Chainstate& ChainstateManager::ActiveChainstate() const { LOCK(::cs_main); @@ -5312,17 +5501,22 @@ bool ChainstateManager::IsSnapshotActive() const void ChainstateManager::MaybeRebalanceCaches() { AssertLockHeld(::cs_main); - if (m_ibd_chainstate && !m_snapshot_chainstate) { + bool ibd_usable = this->IsUsable(m_ibd_chainstate.get()); + bool snapshot_usable = this->IsUsable(m_snapshot_chainstate.get()); + assert(ibd_usable || snapshot_usable); + + if (ibd_usable && !snapshot_usable) { LogPrintf("[snapshot] allocating all cache to the IBD chainstate\n"); // Allocate everything to the IBD chainstate. m_ibd_chainstate->ResizeCoinsCaches(m_total_coinstip_cache, m_total_coinsdb_cache); } - else if (m_snapshot_chainstate && !m_ibd_chainstate) { + else if (snapshot_usable && !ibd_usable) { + // If background validation has completed and snapshot is our active chain... LogPrintf("[snapshot] allocating all cache to the snapshot chainstate\n"); // Allocate everything to the snapshot chainstate. m_snapshot_chainstate->ResizeCoinsCaches(m_total_coinstip_cache, m_total_coinsdb_cache); } - else if (m_ibd_chainstate && m_snapshot_chainstate) { + else if (ibd_usable && snapshot_usable) { // If both chainstates exist, determine who needs more cache based on IBD status. // // Note: shrink caches first so that we don't inadvertently overwhelm available memory. @@ -5414,3 +5608,141 @@ bool IsBIP30Unspendable(const CBlockIndex& block_index) return (block_index.nHeight==91722 && block_index.GetBlockHash() == uint256S("0x00000000000271a2dc26e7667f8419f2e15416dc6955e5a6c6cdf3f2574dd08e")) || (block_index.nHeight==91812 && block_index.GetBlockHash() == uint256S("0x00000000000af0aed4792b1acee3d966af36cf5def14935db8de83d6f9306f2f")); } + +void Chainstate::InvalidateCoinsDBOnDisk() +{ + AssertLockHeld(::cs_main); + // Should never be called on a non-snapshot chainstate. + assert(m_from_snapshot_blockhash); + auto storage_path_maybe = this->CoinsDB().StoragePath(); + // Should never be called with a non-existent storage path. + assert(storage_path_maybe); + fs::path snapshot_datadir = *storage_path_maybe; + + // Coins views no longer usable. + m_coins_views.reset(); + + auto invalid_path = snapshot_datadir + "_INVALID"; + std::string dbpath = fs::PathToString(snapshot_datadir); + std::string target = fs::PathToString(invalid_path); + LogPrintf("[snapshot] renaming snapshot datadir %s to %s\n", dbpath, target); + + // The invalid snapshot datadir is simply moved and not deleted because we may + // want to do forensics later during issue investigation. The user is instructed + // accordingly in MaybeCompleteSnapshotValidation(). + try { + fs::rename(snapshot_datadir, invalid_path); + } catch (const fs::filesystem_error& e) { + auto src_str = fs::PathToString(snapshot_datadir); + auto dest_str = fs::PathToString(invalid_path); + + LogPrintf("%s: error renaming file '%s' -> '%s': %s\n", + __func__, src_str, dest_str, e.what()); + AbortNode(strprintf( + "Rename of '%s' -> '%s' failed. " + "You should resolve this by manually moving or deleting the invalid " + "snapshot directory %s, otherwise you will encounter the same error again " + "on the next startup.", + src_str, dest_str, src_str)); + } +} + +const CBlockIndex* ChainstateManager::GetSnapshotBaseBlock() const +{ + const auto blockhash_op = this->SnapshotBlockhash(); + if (!blockhash_op) return nullptr; + return Assert(m_blockman.LookupBlockIndex(*blockhash_op)); +} + +std::optional<int> ChainstateManager::GetSnapshotBaseHeight() const +{ + const CBlockIndex* base = this->GetSnapshotBaseBlock(); + return base ? std::make_optional(base->nHeight) : std::nullopt; +} + +bool ChainstateManager::ValidatedSnapshotCleanup() +{ + AssertLockHeld(::cs_main); + auto get_storage_path = [](auto& chainstate) EXCLUSIVE_LOCKS_REQUIRED(::cs_main) -> std::optional<fs::path> { + if (!(chainstate && chainstate->HasCoinsViews())) { + return {}; + } + return chainstate->CoinsDB().StoragePath(); + }; + std::optional<fs::path> ibd_chainstate_path_maybe = get_storage_path(m_ibd_chainstate); + std::optional<fs::path> snapshot_chainstate_path_maybe = get_storage_path(m_snapshot_chainstate); + + if (!this->IsSnapshotValidated()) { + // No need to clean up. + return false; + } + // If either path doesn't exist, that means at least one of the chainstates + // is in-memory, in which case we can't do on-disk cleanup. You'd better be + // in a unittest! + if (!ibd_chainstate_path_maybe || !snapshot_chainstate_path_maybe) { + LogPrintf("[snapshot] snapshot chainstate cleanup cannot happen with " /* Continued */ + "in-memory chainstates. You are testing, right?\n"); + return false; + } + + const auto& snapshot_chainstate_path = *snapshot_chainstate_path_maybe; + const auto& ibd_chainstate_path = *ibd_chainstate_path_maybe; + + // Since we're going to be moving around the underlying leveldb filesystem content + // for each chainstate, make sure that the chainstates (and their constituent + // CoinsViews members) have been destructed first. + // + // The caller of this method will be responsible for reinitializing chainstates + // if they want to continue operation. + this->ResetChainstates(); + + // No chainstates should be considered usable. + assert(this->GetAll().size() == 0); + + LogPrintf("[snapshot] deleting background chainstate directory (now unnecessary) (%s)\n", + fs::PathToString(ibd_chainstate_path)); + + fs::path tmp_old{ibd_chainstate_path + "_todelete"}; + + auto rename_failed_abort = []( + fs::path p_old, + fs::path p_new, + const fs::filesystem_error& err) { + LogPrintf("%s: error renaming file (%s): %s\n", + __func__, fs::PathToString(p_old), err.what()); + AbortNode(strprintf( + "Rename of '%s' -> '%s' failed. " + "Cannot clean up the background chainstate leveldb directory.", + fs::PathToString(p_old), fs::PathToString(p_new))); + }; + + try { + fs::rename(ibd_chainstate_path, tmp_old); + } catch (const fs::filesystem_error& e) { + rename_failed_abort(ibd_chainstate_path, tmp_old, e); + throw; + } + + LogPrintf("[snapshot] moving snapshot chainstate (%s) to " /* Continued */ + "default chainstate directory (%s)\n", + fs::PathToString(snapshot_chainstate_path), fs::PathToString(ibd_chainstate_path)); + + try { + fs::rename(snapshot_chainstate_path, ibd_chainstate_path); + } catch (const fs::filesystem_error& e) { + rename_failed_abort(snapshot_chainstate_path, ibd_chainstate_path, e); + throw; + } + + if (!DeleteCoinsDBFromDisk(tmp_old, /*is_snapshot=*/false)) { + // No need to AbortNode because once the unneeded bg chainstate data is + // moved, it will not interfere with subsequent initialization. + LogPrintf("Deletion of %s failed. Please remove it manually, as the " /* Continued */ + "directory is now unnecessary.\n", + fs::PathToString(tmp_old)); + } else { + LogPrintf("[snapshot] deleted background chainstate directory (%s)\n", + fs::PathToString(ibd_chainstate_path)); + } + return true; +} diff --git a/src/validation.h b/src/validation.h index 067d2ea6d2..b5ad1aff08 100644 --- a/src/validation.h +++ b/src/validation.h @@ -24,6 +24,7 @@ #include <policy/packages.h> #include <policy/policy.h> #include <script/script_error.h> +#include <shutdown.h> #include <sync.h> #include <txdb.h> #include <txmempool.h> // For CTxMemPool::cs @@ -493,6 +494,19 @@ protected: //! Manages the UTXO set, which is a reflection of the contents of `m_chain`. std::unique_ptr<CoinsViews> m_coins_views; + //! This toggle exists for use when doing background validation for UTXO + //! snapshots. + //! + //! In the expected case, it is set once the background validation chain reaches the + //! same height as the base of the snapshot and its UTXO set is found to hash to + //! the expected assumeutxo value. It signals that we should no longer connect + //! blocks to the background chainstate. When set on the background validation + //! chainstate, it signifies that we have fully validated the snapshot chainstate. + //! + //! In the unlikely case that the snapshot chainstate is found to be invalid, this + //! is set to true on the snapshot chainstate. + bool m_disabled GUARDED_BY(::cs_main) {false}; + public: //! Reference to a BlockManager instance which itself is shared across all //! Chainstate instances. @@ -560,15 +574,15 @@ public: CCoinsViewCache& CoinsTip() EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { AssertLockHeld(::cs_main); - assert(m_coins_views->m_cacheview); - return *m_coins_views->m_cacheview.get(); + Assert(m_coins_views); + return *Assert(m_coins_views->m_cacheview); } //! @returns A reference to the on-disk UTXO set database. CCoinsViewDB& CoinsDB() EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { AssertLockHeld(::cs_main); - return m_coins_views->m_dbview; + return Assert(m_coins_views)->m_dbview; } //! @returns A pointer to the mempool. @@ -582,12 +596,15 @@ public: CCoinsViewErrorCatcher& CoinsErrorCatcher() EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { AssertLockHeld(::cs_main); - return m_coins_views->m_catcherview; + return Assert(m_coins_views)->m_catcherview; } //! Destructs all objects related to accessing the UTXO set. void ResetCoinsViews() { m_coins_views.reset(); } + //! Does this chainstate have a UTXO set attached? + bool HasCoinsViews() const { return (bool)m_coins_views; } + //! The cache size of the on-disk coins view. size_t m_coinsdb_cache_size_bytes{0}; @@ -667,6 +684,12 @@ public: * May not be called with cs_main held. May not be called in a * validationinterface callback. * + * Note that if this is called while a snapshot chainstate is active, and if + * it is called on a background chainstate whose tip has reached the base block + * of the snapshot, its execution will take *MINUTES* while it hashes the + * background UTXO set to verify the assumeutxo value the snapshot was activated + * with. `cs_main` will be held during this time. + * * @returns true unless a system error occurred */ bool ActivateBestChain( @@ -745,6 +768,12 @@ public: std::string ToString() EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + //! Indirection necessary to make lock annotations work with an optional mempool. + RecursiveMutex* MempoolMutex() const LOCK_RETURNED(m_mempool->cs) + { + return m_mempool ? &m_mempool->cs : nullptr; + } + private: bool ActivateBestChainStep(BlockValidationState& state, CBlockIndex* pindexMostWork, const std::shared_ptr<const CBlock>& pblock, bool& fInvalidFound, ConnectTrace& connectTrace) EXCLUSIVE_LOCKS_REQUIRED(cs_main, m_mempool->cs); bool ConnectTip(BlockValidationState& state, CBlockIndex* pindexNew, const std::shared_ptr<const CBlock>& pblock, ConnectTrace& connectTrace, DisconnectedBlockTransactions& disconnectpool) EXCLUSIVE_LOCKS_REQUIRED(cs_main, m_mempool->cs); @@ -758,12 +787,6 @@ private: void CheckForkWarningConditions() EXCLUSIVE_LOCKS_REQUIRED(cs_main); void InvalidChainFound(CBlockIndex* pindexNew) EXCLUSIVE_LOCKS_REQUIRED(cs_main); - //! Indirection necessary to make lock annotations work with an optional mempool. - RecursiveMutex* MempoolMutex() const LOCK_RETURNED(m_mempool->cs) - { - return m_mempool ? &m_mempool->cs : nullptr; - } - /** * Make mempool consistent after a reorg, by re-adding or recursively erasing * disconnected block transactions from the mempool, and also removing any @@ -788,9 +811,37 @@ private: std::chrono::microseconds m_last_write{0}; std::chrono::microseconds m_last_flush{0}; + /** + * In case of an invalid snapshot, rename the coins leveldb directory so + * that it can be examined for issue diagnosis. + */ + void InvalidateCoinsDBOnDisk() EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + friend ChainstateManager; }; + +enum class SnapshotCompletionResult { + SUCCESS, + SKIPPED, + + // Expected assumeutxo configuration data is not found for the height of the + // base block. + MISSING_CHAINPARAMS, + + // Failed to generate UTXO statistics (to check UTXO set hash) for the background + // chainstate. + STATS_FAILED, + + // The UTXO set hash of the background validation chainstate does not match + // the one expected by assumeutxo chainparams. + HASH_MISMATCH, + + // The blockhash of the current tip of the background validation chainstate does + // not match the one expected by the snapshot chainstate. + BASE_BLOCKHASH_MISMATCH, +}; + /** * Provides an interface for creating and interacting with one or two * chainstates: an IBD chainstate generated by downloading blocks, and @@ -860,10 +911,6 @@ private: //! that call. Chainstate* m_active_chainstate GUARDED_BY(::cs_main) {nullptr}; - //! If true, the assumed-valid chainstate has been fully validated - //! by the background validation chainstate. - bool m_snapshot_validated GUARDED_BY(::cs_main){false}; - CBlockIndex* m_best_invalid GUARDED_BY(::cs_main){nullptr}; //! Internal helper for ActivateSnapshot(). @@ -889,6 +936,22 @@ private: /** Most recent headers presync progress update, for rate-limiting. */ std::chrono::time_point<std::chrono::steady_clock> m_last_presync_update GUARDED_BY(::cs_main) {}; + //! Returns nullptr if no snapshot has been loaded. + const CBlockIndex* GetSnapshotBaseBlock() const EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + + //! Return the height of the base block of the snapshot in use, if one exists, else + //! nullopt. + std::optional<int> GetSnapshotBaseHeight() const EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + + //! Return true if a chainstate is considered usable. + //! + //! This is false when a background validation chainstate has completed its + //! validation of an assumed-valid chainstate, or when a snapshot + //! chainstate has been found to be invalid. + bool IsUsable(const Chainstate* const cs) const EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { + return cs && !cs->m_disabled; + } + public: using Options = kernel::ChainstateManagerOpts; @@ -976,6 +1039,18 @@ public: [[nodiscard]] bool ActivateSnapshot( AutoFile& coins_file, const node::SnapshotMetadata& metadata, bool in_memory); + //! Once the background validation chainstate has reached the height which + //! is the base of the UTXO snapshot in use, compare its coins to ensure + //! they match those expected by the snapshot. + //! + //! If the coins match (expected), then mark the validation chainstate for + //! deletion and continue using the snapshot chainstate as active. + //! Otherwise, revert to using the ibd chainstate and shutdown. + SnapshotCompletionResult MaybeCompleteSnapshotValidation( + std::function<void(bilingual_str)> shutdown_fnc = + [](bilingual_str msg) { AbortNode(msg.original, msg); }) + EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + //! The most-work chain. Chainstate& ActiveChainstate() const; CChain& ActiveChain() const EXCLUSIVE_LOCKS_REQUIRED(GetMutex()) { return ActiveChainstate().m_chain; } @@ -1000,7 +1075,10 @@ public: std::optional<uint256> SnapshotBlockhash() const; //! Is there a snapshot in use and has it been fully validated? - bool IsSnapshotValidated() const EXCLUSIVE_LOCKS_REQUIRED(::cs_main) { return m_snapshot_validated; } + bool IsSnapshotValidated() const EXCLUSIVE_LOCKS_REQUIRED(::cs_main) + { + return m_snapshot_chainstate && m_ibd_chainstate && m_ibd_chainstate->m_disabled; + } /** * Process an incoming block. This only returns after the best known valid @@ -1080,6 +1158,17 @@ public: Chainstate& ActivateExistingSnapshot(CTxMemPool* mempool, uint256 base_blockhash) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + //! If we have validated a snapshot chain during this runtime, copy its + //! chainstate directory over to the main `chainstate` location, completing + //! validation of the snapshot. + //! + //! If the cleanup succeeds, the caller will need to ensure chainstates are + //! reinitialized, since ResetChainstates() will be called before leveldb + //! directories are moved or deleted. + //! + //! @sa node/chainstate:LoadChainstate() + bool ValidatedSnapshotCleanup() EXCLUSIVE_LOCKS_REQUIRED(::cs_main); + ~ChainstateManager(); }; |