diff options
-rwxr-xr-x | contrib/devtools/check-doc.py | 2 | ||||
-rw-r--r-- | src/chain.cpp | 19 | ||||
-rw-r--r-- | src/chain.h | 3 | ||||
-rw-r--r-- | src/coins.cpp | 9 | ||||
-rw-r--r-- | src/coins.h | 13 | ||||
-rw-r--r-- | src/init.cpp | 11 | ||||
-rw-r--r-- | src/net_processing.cpp | 19 | ||||
-rw-r--r-- | src/txdb.cpp | 51 | ||||
-rw-r--r-- | src/txdb.h | 7 | ||||
-rw-r--r-- | src/validation.cpp | 117 | ||||
-rw-r--r-- | src/validation.h | 5 | ||||
-rwxr-xr-x | test/functional/dbcrash.py | 268 | ||||
-rw-r--r-- | test/functional/test_framework/util.py | 2 | ||||
-rwxr-xr-x | test/functional/test_runner.py | 1 |
14 files changed, 484 insertions, 43 deletions
diff --git a/contrib/devtools/check-doc.py b/contrib/devtools/check-doc.py index 249214e931..150f368513 100755 --- a/contrib/devtools/check-doc.py +++ b/contrib/devtools/check-doc.py @@ -21,7 +21,7 @@ CMD_GREP_DOCS = r"egrep -r -I 'HelpMessageOpt\(\"\-[^\"=]+?(=|\")' %s" % (CMD_RO REGEX_ARG = re.compile(r'(?:map(?:Multi)?Args(?:\.count\(|\[)|Get(?:Bool)?Arg\()\"(\-[^\"]+?)\"') REGEX_DOC = re.compile(r'HelpMessageOpt\(\"(\-[^\"=]+?)(?:=|\")') # list unsupported, deprecated and duplicate args as they need no documentation -SET_DOC_OPTIONAL = set(['-rpcssl', '-benchmark', '-h', '-help', '-socks', '-tor', '-debugnet', '-whitelistalwaysrelay', '-prematurewitness', '-walletprematurewitness', '-promiscuousmempoolflags', '-blockminsize']) +SET_DOC_OPTIONAL = set(['-rpcssl', '-benchmark', '-h', '-help', '-socks', '-tor', '-debugnet', '-whitelistalwaysrelay', '-prematurewitness', '-walletprematurewitness', '-promiscuousmempoolflags', '-blockminsize', '-dbcrashratio']) def main(): used = check_output(CMD_GREP_ARGS, shell=True) diff --git a/src/chain.cpp b/src/chain.cpp index 8d4c4e7dea..ffd58d471d 100644 --- a/src/chain.cpp +++ b/src/chain.cpp @@ -148,3 +148,22 @@ int64_t GetBlockProofEquivalentTime(const CBlockIndex& to, const CBlockIndex& fr } return sign * r.GetLow64(); } + +/** Find the last common ancestor two blocks have. + * Both pa and pb must be non-NULL. */ +const CBlockIndex* LastCommonAncestor(const CBlockIndex* pa, const CBlockIndex* pb) { + if (pa->nHeight > pb->nHeight) { + pa = pa->GetAncestor(pb->nHeight); + } else if (pb->nHeight > pa->nHeight) { + pb = pb->GetAncestor(pa->nHeight); + } + + while (pa != pb && pa && pb) { + pa = pa->pprev; + pb = pb->pprev; + } + + // Eventually all chain branches meet at the genesis block. + assert(pa == pb); + return pa; +} diff --git a/src/chain.h b/src/chain.h index de120d2d75..c5304b7d6f 100644 --- a/src/chain.h +++ b/src/chain.h @@ -362,6 +362,9 @@ public: arith_uint256 GetBlockProof(const CBlockIndex& block); /** Return the time it would take to redo the work difference between from and to, assuming the current hashrate corresponds to the difficulty at tip, in seconds. */ int64_t GetBlockProofEquivalentTime(const CBlockIndex& to, const CBlockIndex& from, const CBlockIndex& tip, const Consensus::Params&); +/** Find the forking point between two chain tips. */ +const CBlockIndex* LastCommonAncestor(const CBlockIndex* pa, const CBlockIndex* pb); + /** Used to marshal pointers into hashes for db storage. */ class CDiskBlockIndex : public CBlockIndex diff --git a/src/coins.cpp b/src/coins.cpp index 3113b7755d..b5dc6197bd 100644 --- a/src/coins.cpp +++ b/src/coins.cpp @@ -12,6 +12,7 @@ bool CCoinsView::GetCoin(const COutPoint &outpoint, Coin &coin) const { return false; } uint256 CCoinsView::GetBestBlock() const { return uint256(); } +std::vector<uint256> CCoinsView::GetHeadBlocks() const { return std::vector<uint256>(); } bool CCoinsView::BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlock) { return false; } CCoinsViewCursor *CCoinsView::Cursor() const { return 0; } @@ -25,6 +26,7 @@ CCoinsViewBacked::CCoinsViewBacked(CCoinsView *viewIn) : base(viewIn) { } bool CCoinsViewBacked::GetCoin(const COutPoint &outpoint, Coin &coin) const { return base->GetCoin(outpoint, coin); } bool CCoinsViewBacked::HaveCoin(const COutPoint &outpoint) const { return base->HaveCoin(outpoint); } uint256 CCoinsViewBacked::GetBestBlock() const { return base->GetBestBlock(); } +std::vector<uint256> CCoinsViewBacked::GetHeadBlocks() const { return base->GetHeadBlocks(); } void CCoinsViewBacked::SetBackend(CCoinsView &viewIn) { base = &viewIn; } bool CCoinsViewBacked::BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlock) { return base->BatchWrite(mapCoins, hashBlock); } CCoinsViewCursor *CCoinsViewBacked::Cursor() const { return base->Cursor(); } @@ -85,13 +87,14 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi cachedCoinsUsage += it->second.coin.DynamicMemoryUsage(); } -void AddCoins(CCoinsViewCache& cache, const CTransaction &tx, int nHeight) { +void AddCoins(CCoinsViewCache& cache, const CTransaction &tx, int nHeight, bool check) { bool fCoinbase = tx.IsCoinBase(); const uint256& txid = tx.GetHash(); for (size_t i = 0; i < tx.vout.size(); ++i) { - // Pass fCoinbase as the possible_overwrite flag to AddCoin, in order to correctly + bool overwrite = check ? cache.HaveCoin(COutPoint(txid, i)) : fCoinbase; + // Always set the possible_overwrite flag to AddCoin for coinbase txn, in order to correctly // deal with the pre-BIP30 occurrences of duplicate coinbase transactions. - cache.AddCoin(COutPoint(txid, i), Coin(tx.vout[i], nHeight, fCoinbase), fCoinbase); + cache.AddCoin(COutPoint(txid, i), Coin(tx.vout[i], nHeight, fCoinbase), overwrite); } } diff --git a/src/coins.h b/src/coins.h index 077545a55b..ff7719b724 100644 --- a/src/coins.h +++ b/src/coins.h @@ -157,6 +157,12 @@ public: //! Retrieve the block hash whose state this CCoinsView currently represents virtual uint256 GetBestBlock() const; + //! Retrieve the range of blocks that may have been only partially written. + //! If the database is in a consistent state, the result is the empty vector. + //! Otherwise, a two-element vector is returned consisting of the new and + //! the old block hash, in that order. + virtual std::vector<uint256> GetHeadBlocks() const; + //! Do a bulk modification (multiple Coin changes + BestBlock change). //! The passed mapCoins can be modified. virtual bool BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlock); @@ -183,6 +189,7 @@ public: bool GetCoin(const COutPoint &outpoint, Coin &coin) const override; bool HaveCoin(const COutPoint &outpoint) const override; uint256 GetBestBlock() const override; + std::vector<uint256> GetHeadBlocks() const override; void SetBackend(CCoinsView &viewIn); bool BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlock) override; CCoinsViewCursor *Cursor() const override; @@ -291,10 +298,12 @@ private: }; //! Utility function to add all of a transaction's outputs to a cache. -// It assumes that overwrites are only possible for coinbase transactions, +// When check is false, this assumes that overwrites are only possible for coinbase transactions. +// When check is true, the underlying view may be queried to determine whether an addition is +// an overwrite. // TODO: pass in a boolean to limit these possible overwrites to known // (pre-BIP34) cases. -void AddCoins(CCoinsViewCache& cache, const CTransaction& tx, int nHeight); +void AddCoins(CCoinsViewCache& cache, const CTransaction& tx, int nHeight, bool check = false); //! Utility function to find any unspent output with a given txid. const Coin& AccessByTxid(const CCoinsViewCache& cache, const uint256& txid); diff --git a/src/init.cpp b/src/init.cpp index 57232c7df3..d59713258c 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -336,6 +336,9 @@ std::string HelpMessage(HelpMessageMode mode) #endif } strUsage += HelpMessageOpt("-datadir=<dir>", _("Specify data directory")); + if (showDebug) { + strUsage += HelpMessageOpt("-dbbatchsize", strprintf("Maximum database write batch size in bytes (default: %u)", nDefaultDbBatchSize)); + } strUsage += HelpMessageOpt("-dbcache=<n>", strprintf(_("Set database cache size in megabytes (%d to %d, default: %d)"), nMinDbCache, nMaxDbCache, nDefaultDbCache)); if (showDebug) strUsage += HelpMessageOpt("-feefilter", strprintf("Tell other nodes to filter invs to us by our mempool min fee (default: %u)", DEFAULT_FEEFILTER)); @@ -1373,7 +1376,6 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler) pblocktree = new CBlockTreeDB(nBlockTreeDBCache, false, fReindex); pcoinsdbview = new CCoinsViewDB(nCoinDBCache, false, fReindex || fReindexChainState); pcoinscatcher = new CCoinsViewErrorCatcher(pcoinsdbview); - pcoinsTip = new CCoinsViewCache(pcoinscatcher); if (fReindex) { pblocktree->WriteReindexing(true); @@ -1417,6 +1419,13 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler) break; } + if (!ReplayBlocks(chainparams, pcoinsdbview)) { + strLoadError = _("Unable to replay blocks. You will need to rebuild the database using -reindex-chainstate."); + break; + } + pcoinsTip = new CCoinsViewCache(pcoinscatcher); + LoadChainTip(chainparams); + if (!fReindex && chainActive.Tip() != NULL) { uiInterface.InitMessage(_("Rewinding blocks...")); if (!RewindBlockIndex(chainparams)) { diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 8fc6f6f95e..4d832f3711 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -452,25 +452,6 @@ bool PeerHasHeader(CNodeState *state, const CBlockIndex *pindex) return false; } -/** Find the last common ancestor two blocks have. - * Both pa and pb must be non-NULL. */ -const CBlockIndex* LastCommonAncestor(const CBlockIndex* pa, const CBlockIndex* pb) { - if (pa->nHeight > pb->nHeight) { - pa = pa->GetAncestor(pb->nHeight); - } else if (pb->nHeight > pa->nHeight) { - pb = pb->GetAncestor(pa->nHeight); - } - - while (pa != pb && pa && pb) { - pa = pa->pprev; - pb = pb->pprev; - } - - // Eventually all chain branches meet at the genesis block. - assert(pa == pb); - return pa; -} - /** Update pindexLastCommonBlock and add not-in-flight missing successors to vBlocks, until it has * at most count entries. */ void FindNextBlocksToDownload(NodeId nodeid, unsigned int count, std::vector<const CBlockIndex*>& vBlocks, NodeId& nodeStaller, const Consensus::Params& consensusParams) { diff --git a/src/txdb.cpp b/src/txdb.cpp index 97e916fd22..d24162ba2d 100644 --- a/src/txdb.cpp +++ b/src/txdb.cpp @@ -7,8 +7,10 @@ #include "chainparams.h" #include "hash.h" +#include "random.h" #include "pow.h" #include "uint256.h" +#include "util.h" #include <stdint.h> @@ -21,6 +23,7 @@ static const char DB_TXINDEX = 't'; static const char DB_BLOCK_INDEX = 'b'; static const char DB_BEST_BLOCK = 'B'; +static const char DB_HEAD_BLOCKS = 'H'; static const char DB_FLAG = 'F'; static const char DB_REINDEX_FLAG = 'R'; static const char DB_LAST_BLOCK = 'l'; @@ -68,10 +71,39 @@ uint256 CCoinsViewDB::GetBestBlock() const { return hashBestChain; } +std::vector<uint256> CCoinsViewDB::GetHeadBlocks() const { + std::vector<uint256> vhashHeadBlocks; + if (!db.Read(DB_HEAD_BLOCKS, vhashHeadBlocks)) { + return std::vector<uint256>(); + } + return vhashHeadBlocks; +} + bool CCoinsViewDB::BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlock) { CDBBatch batch(db); size_t count = 0; size_t changed = 0; + size_t batch_size = (size_t)GetArg("-dbbatchsize", nDefaultDbBatchSize); + int crash_simulate = GetArg("-dbcrashratio", 0); + assert(!hashBlock.IsNull()); + + uint256 old_tip = GetBestBlock(); + if (old_tip.IsNull()) { + // We may be in the middle of replaying. + std::vector<uint256> old_heads = GetHeadBlocks(); + if (old_heads.size() == 2) { + assert(old_heads[0] == hashBlock); + old_tip = old_heads[1]; + } + } + + // In the first batch, mark the database as being in the middle of a + // transition from old_tip to hashBlock. + // A vector is used for future extensibility, as we may want to support + // interrupting after partial writes from multiple independent reorgs. + batch.Erase(DB_BEST_BLOCK); + batch.Write(DB_HEAD_BLOCKS, std::vector<uint256>{hashBlock, old_tip}); + for (CCoinsMap::iterator it = mapCoins.begin(); it != mapCoins.end();) { if (it->second.flags & CCoinsCacheEntry::DIRTY) { CoinEntry entry(&it->first); @@ -84,10 +116,25 @@ bool CCoinsViewDB::BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlock) { count++; CCoinsMap::iterator itOld = it++; mapCoins.erase(itOld); + if (batch.SizeEstimate() > batch_size) { + LogPrint(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0)); + db.WriteBatch(batch); + batch.Clear(); + if (crash_simulate) { + static FastRandomContext rng; + if (rng.randrange(crash_simulate) == 0) { + LogPrintf("Simulating a crash. Goodbye.\n"); + _Exit(0); + } + } + } } - if (!hashBlock.IsNull()) - batch.Write(DB_BEST_BLOCK, hashBlock); + // In the last batch, mark the database as consistent with hashBlock again. + batch.Erase(DB_HEAD_BLOCKS); + batch.Write(DB_BEST_BLOCK, hashBlock); + + LogPrint(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0)); bool ret = db.WriteBatch(batch); LogPrint(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count); return ret; diff --git a/src/txdb.h b/src/txdb.h index b14a0af147..adcbc73380 100644 --- a/src/txdb.h +++ b/src/txdb.h @@ -19,12 +19,12 @@ class CBlockIndex; class CCoinsViewDBCursor; class uint256; -//! Compensate for extra memory peak (x1.5-x1.9) at flush time. -static constexpr int DB_PEAK_USAGE_FACTOR = 2; //! No need to periodic flush if at least this much space still available. -static constexpr int MAX_BLOCK_COINSDB_USAGE = 10 * DB_PEAK_USAGE_FACTOR; +static constexpr int MAX_BLOCK_COINSDB_USAGE = 10; //! -dbcache default (MiB) static const int64_t nDefaultDbCache = 450; +//! -dbbatchsize default (bytes) +static const int64_t nDefaultDbBatchSize = 16 << 20; //! max. -dbcache (MiB) static const int64_t nMaxDbCache = sizeof(void*) > 4 ? 16384 : 1024; //! min. -dbcache (MiB) @@ -74,6 +74,7 @@ public: bool GetCoin(const COutPoint &outpoint, Coin &coin) const override; bool HaveCoin(const COutPoint &outpoint) const override; uint256 GetBestBlock() const override; + std::vector<uint256> GetHeadBlocks() const override; bool BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlock) override; CCoinsViewCursor *Cursor() const override; diff --git a/src/validation.cpp b/src/validation.cpp index 8d47b52ebe..216ba3d4a5 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -96,7 +96,7 @@ namespace { struct CBlockIndexWorkComparator { - bool operator()(CBlockIndex *pa, CBlockIndex *pb) const { + bool operator()(const CBlockIndex *pa, const CBlockIndex *pb) const { // First sort by most total work, ... if (pa->nChainWork > pb->nChainWork) return false; if (pa->nChainWork < pb->nChainWork) return true; @@ -1331,17 +1331,19 @@ int ApplyTxInUndo(Coin&& undo, CCoinsViewCache& view, const COutPoint& out) return DISCONNECT_FAILED; // adding output for transaction without known metadata } } - view.AddCoin(out, std::move(undo), undo.fCoinBase); + // The potential_overwrite parameter to AddCoin is only allowed to be false if we know for + // sure that the coin did not already exist in the cache. As we have queried for that above + // using HaveCoin, we don't need to guess. When fClean is false, a coin already existed and + // it is an overwrite. + view.AddCoin(out, std::move(undo), !fClean); return fClean ? DISCONNECT_OK : DISCONNECT_UNCLEAN; } /** Undo the effects of this block (with given index) on the UTXO set represented by coins. - * When UNCLEAN or FAILED is returned, view is left in an indeterminate state. */ + * When FAILED is returned, view is left in an indeterminate state. */ static DisconnectResult DisconnectBlock(const CBlock& block, const CBlockIndex* pindex, CCoinsViewCache& view) { - assert(pindex->GetBlockHash() == view.GetBestBlock()); - bool fClean = true; CBlockUndo blockUndo; @@ -1779,7 +1781,7 @@ bool static FlushStateToDisk(const CChainParams& chainparams, CValidationState & nLastSetChain = nNow; } int64_t nMempoolSizeMax = GetArg("-maxmempool", DEFAULT_MAX_MEMPOOL_SIZE) * 1000000; - int64_t cacheSize = pcoinsTip->DynamicMemoryUsage() * DB_PEAK_USAGE_FACTOR; + int64_t cacheSize = pcoinsTip->DynamicMemoryUsage(); int64_t nTotalSpace = nCoinCacheUsage + std::max<int64_t>(nMempoolSizeMax - nMempoolUsage, 0); // The cache is large and we're within 10% and 10 MiB of the limit, but we have time now (not in the middle of a block processing). bool fCacheLarge = mode == FLUSH_STATE_PERIODIC && cacheSize > std::max((9 * nTotalSpace) / 10, nTotalSpace - MAX_BLOCK_COINSDB_USAGE * 1024 * 1024); @@ -1946,6 +1948,7 @@ bool static DisconnectTip(CValidationState& state, const CChainParams& chainpara int64_t nStart = GetTimeMicros(); { CCoinsViewCache view(pcoinsTip); + assert(view.GetBestBlock() == pindexDelete->GetBlockHash()); if (DisconnectBlock(block, pindexDelete, view) != DISCONNECT_OK) return error("DisconnectTip(): DisconnectBlock %s failed", pindexDelete->GetBlockHash().ToString()); bool flushed = view.Flush(); @@ -3417,20 +3420,25 @@ bool static LoadBlockIndexDB(const CChainParams& chainparams) pblocktree->ReadFlag("txindex", fTxIndex); LogPrintf("%s: transaction index %s\n", __func__, fTxIndex ? "enabled" : "disabled"); + return true; +} + +void LoadChainTip(const CChainParams& chainparams) +{ + if (chainActive.Tip() && chainActive.Tip()->GetBlockHash() == pcoinsTip->GetBestBlock()) return; + // Load pointer to end of best chain BlockMap::iterator it = mapBlockIndex.find(pcoinsTip->GetBestBlock()); if (it == mapBlockIndex.end()) - return true; + return; chainActive.SetTip(it->second); PruneBlockIndexCandidates(); - LogPrintf("%s: hashBestChain=%s height=%d date=%s progress=%f\n", __func__, + LogPrintf("Loaded best chain: hashBestChain=%s height=%d date=%s progress=%f\n", chainActive.Tip()->GetBlockHash().ToString(), chainActive.Height(), DateTimeStrFormat("%Y-%m-%d %H:%M:%S", chainActive.Tip()->GetBlockTime()), GuessVerificationProgress(chainparams.TxData(), chainActive.Tip())); - - return true; } CVerifyDB::CVerifyDB() @@ -3499,6 +3507,7 @@ bool CVerifyDB::VerifyDB(const CChainParams& chainparams, CCoinsView *coinsview, } // check level 3: check for inconsistencies during memory-only disconnect of tip blocks if (nCheckLevel >= 3 && pindex == pindexState && (coins.DynamicMemoryUsage() + pcoinsTip->DynamicMemoryUsage()) <= nCoinCacheUsage) { + assert(coins.GetBestBlock() == pindex->GetBlockHash()); DisconnectResult res = DisconnectBlock(block, pindex, coins); if (res == DISCONNECT_FAILED) { return error("VerifyDB(): *** irrecoverable inconsistency in block data at %d, hash=%s", pindex->nHeight, pindex->GetBlockHash().ToString()); @@ -3538,6 +3547,92 @@ bool CVerifyDB::VerifyDB(const CChainParams& chainparams, CCoinsView *coinsview, return true; } +/** Apply the effects of a block on the utxo cache, ignoring that it may already have been applied. */ +static bool RollforwardBlock(const CBlockIndex* pindex, CCoinsViewCache& inputs, const CChainParams& params) +{ + // TODO: merge with ConnectBlock + CBlock block; + if (!ReadBlockFromDisk(block, pindex, params.GetConsensus())) { + return error("ReplayBlock(): ReadBlockFromDisk failed at %d, hash=%s", pindex->nHeight, pindex->GetBlockHash().ToString()); + } + + for (const CTransactionRef& tx : block.vtx) { + if (!tx->IsCoinBase()) { + for (const CTxIn &txin : tx->vin) { + inputs.SpendCoin(txin.prevout); + } + } + // Pass check = true as every addition may be an overwrite. + AddCoins(inputs, *tx, pindex->nHeight, true); + } + return true; +} + +bool ReplayBlocks(const CChainParams& params, CCoinsView* view) +{ + LOCK(cs_main); + + CCoinsViewCache cache(view); + + std::vector<uint256> hashHeads = view->GetHeadBlocks(); + if (hashHeads.empty()) return true; // We're already in a consistent state. + if (hashHeads.size() != 2) return error("ReplayBlocks(): unknown inconsistent state"); + + uiInterface.ShowProgress(_("Replaying blocks..."), 0); + LogPrintf("Replaying blocks\n"); + + const CBlockIndex* pindexOld = nullptr; // Old tip during the interrupted flush. + const CBlockIndex* pindexNew; // New tip during the interrupted flush. + const CBlockIndex* pindexFork = nullptr; // Latest block common to both the old and the new tip. + + if (mapBlockIndex.count(hashHeads[0]) == 0) { + return error("ReplayBlocks(): reorganization to unknown block requested"); + } + pindexNew = mapBlockIndex[hashHeads[0]]; + + if (!hashHeads[1].IsNull()) { // The old tip is allowed to be 0, indicating it's the first flush. + if (mapBlockIndex.count(hashHeads[1]) == 0) { + return error("ReplayBlocks(): reorganization from unknown block requested"); + } + pindexOld = mapBlockIndex[hashHeads[1]]; + pindexFork = LastCommonAncestor(pindexOld, pindexNew); + assert(pindexFork != nullptr); + } + + // Rollback along the old branch. + while (pindexOld != pindexFork) { + if (pindexOld->nHeight > 0) { // Never disconnect the genesis block. + CBlock block; + if (!ReadBlockFromDisk(block, pindexOld, params.GetConsensus())) { + return error("RollbackBlock(): ReadBlockFromDisk() failed at %d, hash=%s", pindexOld->nHeight, pindexOld->GetBlockHash().ToString()); + } + LogPrintf("Rolling back %s (%i)\n", pindexOld->GetBlockHash().ToString(), pindexOld->nHeight); + DisconnectResult res = DisconnectBlock(block, pindexOld, cache); + if (res == DISCONNECT_FAILED) { + return error("RollbackBlock(): DisconnectBlock failed at %d, hash=%s", pindexOld->nHeight, pindexOld->GetBlockHash().ToString()); + } + // If DISCONNECT_UNCLEAN is returned, it means a non-existing UTXO was deleted, or an existing UTXO was + // overwritten. It corresponds to cases where the block-to-be-disconnect never had all its operations + // applied to the UTXO set. However, as both writing a UTXO and deleting a UTXO are idempotent operations, + // the result is still a version of the UTXO set with the effects of that block undone. + } + pindexOld = pindexOld->pprev; + } + + // Roll forward from the forking point to the new tip. + int nForkHeight = pindexFork ? pindexFork->nHeight : 0; + for (int nHeight = nForkHeight + 1; nHeight <= pindexNew->nHeight; ++nHeight) { + const CBlockIndex* pindex = pindexNew->GetAncestor(nHeight); + LogPrintf("Rolling forward %s (%i)\n", pindex->GetBlockHash().ToString(), nHeight); + if (!RollforwardBlock(pindex, cache, params)) return false; + } + + cache.SetBestBlock(pindexNew->GetBlockHash()); + cache.Flush(); + uiInterface.ShowProgress("", 100); + return true; +} + bool RewindBlockIndex(const CChainParams& params) { LOCK(cs_main); @@ -3687,8 +3782,6 @@ bool InitBlockIndex(const CChainParams& chainparams) CBlockIndex *pindex = AddToBlockIndex(block); if (!ReceivedBlockTransactions(block, state, pindex, blockPos, chainparams.GetConsensus())) return error("LoadBlockIndex(): genesis block not accepted"); - // Force a chainstate write so that when we VerifyDB in a moment, it doesn't check stale data - return FlushStateToDisk(chainparams, state, FLUSH_STATE_ALWAYS); } catch (const std::runtime_error& e) { return error("LoadBlockIndex(): failed to initialize block database: %s", e.what()); } diff --git a/src/validation.h b/src/validation.h index 82df4cb170..8a721dd7a2 100644 --- a/src/validation.h +++ b/src/validation.h @@ -260,6 +260,8 @@ bool LoadExternalBlockFile(const CChainParams& chainparams, FILE* fileIn, CDiskB bool InitBlockIndex(const CChainParams& chainparams); /** Load the block tree and coins database from disk */ bool LoadBlockIndex(const CChainParams& chainparams); +/** Update the chain tip based on database information. */ +void LoadChainTip(const CChainParams& chainparams); /** Unload database information */ void UnloadBlockIndex(); /** Run an instance of the script checking thread */ @@ -424,6 +426,9 @@ public: bool VerifyDB(const CChainParams& chainparams, CCoinsView *coinsview, int nCheckLevel, int nCheckDepth); }; +/** Replay blocks that aren't fully applied to the database. */ +bool ReplayBlocks(const CChainParams& params, CCoinsView* view); + /** Find the last common block between the parameter chain and a locator. */ CBlockIndex* FindForkInGlobalIndex(const CChain& chain, const CBlockLocator& locator); diff --git a/test/functional/dbcrash.py b/test/functional/dbcrash.py new file mode 100755 index 0000000000..4a10743f04 --- /dev/null +++ b/test/functional/dbcrash.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +# Copyright (c) 2017 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +"""Test recovery from a crash during chainstate writing.""" + +from test_framework.test_framework import BitcoinTestFramework +from test_framework.util import * +from test_framework.script import * +from test_framework.mininode import * +import random +try: + import http.client as httplib +except ImportError: + import httplib +import errno + +''' +Test structure: + +- 4 nodes + * node0, node1, and node2 will have different dbcrash ratios, and different + dbcache sizes + * node3 will be a regular node, with no crashing. + * The nodes will not connect to each other. + +- use default test framework starting chain. initialize starting_tip_height to + tip height. + +- Main loop: + * generate lots of transactions on node3, enough to fill up a block. + * uniformly randomly pick a tip height from starting_tip_height to + tip_height; with probability 1/(height_difference+4), invalidate this block. + * mine enough blocks to overtake tip_height at start of loop. + * for each node in [node0,node1,node2]: + - for each mined block: + * submit block to node + * if node crashed on/after submitting: + - restart until recovery succeeds + - check that utxo matches node3 using gettxoutsetinfo +''' + +class ChainstateWriteCrashTest(BitcoinTestFramework): + + def __init__(self): + super().__init__() + self.num_nodes = 4 + self.setup_clean_chain = False + + # Set -maxmempool=0 to turn off mempool memory sharing with dbcache + # Set -rpcservertimeout=900 to reduce socket disconnects in this + # long-running test + self.base_args = ["-limitdescendantsize=0", "-maxmempool=0", "-rpcservertimeout=900"] + + # Set different crash ratios and cache sizes. Note that not all of + # -dbcache goes to pcoinsTip. + self.node0_args = ["-dbcrashratio=8", "-dbcache=4", "-dbbatchsize=200000"] + self.base_args + self.node1_args = ["-dbcrashratio=16", "-dbcache=8", "-dbbatchsize=200000"] + self.base_args + self.node2_args = ["-dbcrashratio=24", "-dbcache=16", "-dbbatchsize=200000"] + self.base_args + + # Node3 is a normal node with default args, except will mine full blocks + self.node3_args = ["-blockmaxweight=4000000"] + self.extra_args = [self.node0_args, self.node1_args, self.node2_args, self.node3_args] + + # We'll track some test coverage statistics + self.restart_counts = [0, 0, 0] # Track the restarts for nodes 0-2 + self.crashed_on_restart = 0 # Track count of crashes during recovery + + def setup_network(self): + self.setup_nodes() + # Leave them unconnected, we'll use submitblock directly in this test + + # Starts up a given node id, waits for the tip to reach the given block + # hash, and calculates the utxo hash. Exceptions on startup should + # indicate node crash (due to -dbcrashratio), in which case we try again. + # Give up after 60 seconds. + # Returns the utxo hash of the given node. + def restart_node(self, node_index, expected_tip): + time_start = time.time() + while time.time() - time_start < 60: + try: + # Any of these RPC calls could throw due to node crash + self.nodes[node_index] = self.start_node(node_index, self.options.tmpdir, self.extra_args[node_index]) + self.nodes[node_index].waitforblock(expected_tip) + utxo_hash = self.nodes[node_index].gettxoutsetinfo()['hash_serialized_2'] + return utxo_hash + except: + # An exception here should mean the node is about to crash. + # If bitcoind exits, then try again. wait_for_node_exit() + # should raise an exception if bitcoind doesn't exit. + wait_for_node_exit(node_index, timeout=10) + self.crashed_on_restart += 1 + time.sleep(1) + + # If we got here, bitcoind isn't coming back up on restart. Could be a + # bug in bitcoind, or we've gotten unlucky with our dbcrash ratio -- + # perhaps we generated a test case that blew up our cache? + # TODO: If this happens a lot, we should try to restart without -dbcrashratio + # and make sure that recovery happens. + raise AssertionError("Unable to successfully restart node %d in allotted time", node_index) + + # Try submitting a block to the given node. + # Catch any exceptions that indicate the node has crashed. + # Returns true if the block was submitted successfully; false otherwise. + def submit_block_catch_error(self, node_index, block): + try: + self.nodes[node_index].submitblock(block) + return True + except (httplib.CannotSendRequest, httplib.RemoteDisconnected) as e: + self.log.debug("node %d submitblock raised exception: %s", node_index, e) + return False + except OSError as e: + self.log.debug("node %d submitblock raised OSError exception: errno=%s", node_index, e.errno) + if e.errno in [errno.EPIPE, errno.ECONNREFUSED, errno.ECONNRESET]: + # The node has likely crashed + return False + else: + # Unexpected exception, raise + raise + + # Use submitblock to sync node3's chain with the other nodes + # If submitblock fails, restart the node and get the new utxo hash. + def sync_node3blocks(self, block_hashes): + # If any nodes crash while updating, we'll compare utxo hashes to + # ensure recovery was successful. + node3_utxo_hash = self.nodes[3].gettxoutsetinfo()['hash_serialized_2'] + + # Retrieve all the blocks from node3 + blocks = [] + for block_hash in block_hashes: + blocks.append([block_hash, self.nodes[3].getblock(block_hash, 0)]) + + # Deliver each block to each other node + for i in range(3): + nodei_utxo_hash = None + self.log.debug("Syncing blocks to node %d", i) + for (block_hash, block) in blocks: + # Get the block from node3, and submit to node_i + self.log.debug("submitting block %s", block_hash) + if not self.submit_block_catch_error(i, block): + # TODO: more carefully check that the crash is due to -dbcrashratio + # (change the exit code perhaps, and check that here?) + wait_for_node_exit(i, timeout=30) + self.log.debug("Restarting node %d after block hash %s", i, block_hash) + nodei_utxo_hash = self.restart_node(i, block_hash) + assert nodei_utxo_hash is not None + self.restart_counts[i] += 1 + else: + # Clear it out after successful submitblock calls -- the cached + # utxo hash will no longer be correct + nodei_utxo_hash = None + + # Check that the utxo hash matches node3's utxo set + # NOTE: we only check the utxo set if we had to restart the node + # after the last block submitted: + # - checking the utxo hash causes a cache flush, which we don't + # want to do every time; so + # - we only update the utxo cache after a node restart, since flushing + # the cache is a no-op at that point + if nodei_utxo_hash is not None: + self.log.debug("Checking txoutsetinfo matches for node %d", i) + assert_equal(nodei_utxo_hash, node3_utxo_hash) + + # Verify that the utxo hash of each node matches node3. + # Restart any nodes that crash while querying. + def verify_utxo_hash(self): + node3_utxo_hash = self.nodes[3].gettxoutsetinfo()['hash_serialized_2'] + self.log.info("Verifying utxo hash matches for all nodes") + + for i in range(3): + try: + nodei_utxo_hash = self.nodes[i].gettxoutsetinfo()['hash_serialized_2'] + except OSError: + # probably a crash on db flushing + nodei_utxo_hash = self.restart_node(i, self.nodes[3].getbestblockhash()) + assert_equal(nodei_utxo_hash, node3_utxo_hash) + + + def generate_small_transactions(self, node, count, utxo_list): + FEE = 1000 # TODO: replace this with node relay fee based calculation + num_transactions = 0 + random.shuffle(utxo_list) + while len(utxo_list) >= 2 and num_transactions < count: + tx = CTransaction() + input_amount = 0 + for i in range(2): + utxo = utxo_list.pop() + tx.vin.append(CTxIn(COutPoint(int(utxo['txid'], 16), utxo['vout']))) + input_amount += int(utxo['amount']*COIN) + output_amount = (input_amount - FEE)//3 + + if output_amount <= 0: + # Sanity check -- if we chose inputs that are too small, skip + continue + + for i in range(3): + tx.vout.append(CTxOut(output_amount, hex_str_to_bytes(utxo['scriptPubKey']))) + + # Sign and send the transaction to get into the mempool + tx_signed_hex = node.signrawtransaction(ToHex(tx))['hex'] + node.sendrawtransaction(tx_signed_hex) + num_transactions += 1 + + def run_test(self): + + # Start by creating a lot of utxos on node3 + initial_height = self.nodes[3].getblockcount() + utxo_list = create_confirmed_utxos(self.nodes[3].getnetworkinfo()['relayfee'], self.nodes[3], 5000) + self.log.info("Prepped %d utxo entries", len(utxo_list)) + + # Sync these blocks with the other nodes + block_hashes_to_sync = [] + for height in range(initial_height+1, self.nodes[3].getblockcount()+1): + block_hashes_to_sync.append(self.nodes[3].getblockhash(height)) + + self.log.debug("Syncing %d blocks with other nodes", len(block_hashes_to_sync)) + # Syncing the blocks could cause nodes to crash, so the test begins here. + self.sync_node3blocks(block_hashes_to_sync) + + starting_tip_height = self.nodes[3].getblockcount() + + # Main test loop: + # each time through the loop, generate a bunch of transactions, + # and then either mine a single new block on the tip, or some-sized reorg. + for i in range(40): + self.log.info("Iteration %d, generating 2500 transactions %s", i, self.restart_counts) + # Generate a bunch of small-ish transactions + self.generate_small_transactions(self.nodes[3], 2500, utxo_list) + # Pick a random block between current tip, and starting tip + current_height = self.nodes[3].getblockcount() + random_height = random.randint(starting_tip_height, current_height) + self.log.debug("At height %d, considering height %d", current_height, random_height) + if random_height > starting_tip_height: + # Randomly reorg from this point with some probability (1/4 for + # tip, 1/5 for tip-1, ...) + if random.random() < 1.0/(current_height + 4 - random_height): + self.log.debug("Invalidating block at height %d", random_height) + self.nodes[3].invalidateblock(self.nodes[3].getblockhash(random_height)) + + # Now generate new blocks until we pass the old tip height + self.log.debug("Mining longer tip") + block_hashes = self.nodes[3].generate(current_height+1-self.nodes[3].getblockcount()) + self.log.debug("Syncing %d new blocks...", len(block_hashes)) + self.sync_node3blocks(block_hashes) + utxo_list = self.nodes[3].listunspent() + self.log.debug("Node3 utxo count: %d", len(utxo_list)) + + # Check that the utxo hashes agree with node3 + # Useful side effect: each utxo cache gets flushed here, so that we + # won't get crashes on shutdown at the end of the test. + self.verify_utxo_hash() + + # Check the test coverage + self.log.info("Restarted nodes: %s; crashes on restart: %d", self.restart_counts, self.crashed_on_restart) + + # If no nodes were restarted, we didn't test anything. + assert self.restart_counts != [0, 0, 0] + + # Make sure we tested the case of crash-during-recovery. + assert self.crashed_on_restart > 0 + + # Warn if any of the nodes escaped restart. + for i in range(3): + if self.restart_counts[i] == 0: + self.log.warn("Node %d never crashed during utxo flush!", i) + +if __name__ == "__main__": + ChainstateWriteCrashTest().main() diff --git a/test/functional/test_framework/util.py b/test/functional/test_framework/util.py index fa6388bf96..2a4f3104aa 100644 --- a/test/functional/test_framework/util.py +++ b/test/functional/test_framework/util.py @@ -249,6 +249,8 @@ def wait_for_bitcoind_start(process, datadir, i, rpchost=None): raise time.sleep(0.25) +def wait_for_node_exit(node_index, timeout): + bitcoind_processes[node_index].wait(timeout) def _start_node(i, dirname, extra_args=None, rpchost=None, timewait=None, binary=None, stderr=None): """Start a bitcoind and return RPC connection to it diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py index 9952835951..54f625514b 100755 --- a/test/functional/test_runner.py +++ b/test/functional/test_runner.py @@ -125,6 +125,7 @@ EXTENDED_SCRIPTS = [ # vv Tests less than 5m vv 'maxuploadtarget.py', 'mempool_packages.py', + 'dbcrash.py', # vv Tests less than 2m vv 'bip68-sequence.py', 'getblocktemplate_longpoll.py', |