diff options
author | Ryan Ofsky <ryan@ofsky.org> | 2023-09-29 12:50:15 -0400 |
---|---|---|
committer | Ryan Ofsky <ryan@ofsky.org> | 2023-09-29 13:29:51 -0400 |
commit | f562856d02d60625d914012d38df3582ea6b6c1f (patch) | |
tree | ddcdb2660ac463c5225a33ab01775fb06b74dcdf /src/node | |
parent | d18a8f6f6969487021b8fd50391a2a8d1dc29844 (diff) | |
parent | d8041d4e042957660827313951b18c8dd9a99a16 (diff) | |
download | bitcoin-f562856d02d60625d914012d38df3582ea6b6c1f.tar.xz |
Merge bitcoin/bitcoin#27866: blockstorage: Return on fatal flush errors
d8041d4e042957660827313951b18c8dd9a99a16 blockstorage: Return on fatal undo file flush error (TheCharlatan)
f0207e00303a1030eca795ede231e3c0d94df061 blockstorage: Return on fatal block file flush error (TheCharlatan)
5671c15f4520c6dc20e0805fd0b06157ff94bcd7 blockstorage: Mark FindBlockPos as nodiscard (TheCharlatan)
Pull request description:
The goal of this PR is to establish that fatal blockstorage flush errors should be treated as errors at their call site.
Prior to this patch `FlushBlockFile` may have failed without returning in `Chainstate::FlushStateToDisk`, leading to a potential write from `WriteBlockIndexDB` that may refer to a block that is not fully flushed to disk yet. By returning if either `FlushUndoFile` or `FlushBlockFile` fail, we ensure that no further write operations take place that may lead to an inconsistent database when crashing. Add `[[nodiscard]]` annotations to them such that they are not ignored in future.
Functions that call either `FlushUndoFile` or `FlushBlockFile`, need to handle these extra abort cases properly. Since `Chainstate::FlushStateToDisk` already produces an abort error in case of `WriteBlockIndexDB` failing, no extra logic for functions calling `Chainstate::FlushStateToDisk` is required.
Besides `Chainstate::FlushStateToDisk`, `FlushBlockFile` is also called by `FindBlockPos`, while `FlushUndoFile` is only called by `FlushBlockFile` and `WriteUndoDataForBlock`. For both these cases, the flush error is not further bubbled up. Instead, the error is logged and a comment is provided why bubbling up an error would be less desirable in these cases.
---
This pull request is part of a larger effort towards improving the shutdown / abort / fatal error handling in validation code. It is a first step towards implementing proper fatal error return type enforcement similar as proposed by theuni in this pull request [comment](https://github.com/bitcoin/bitcoin/pull/27711#issuecomment-1563561502). For ease of review of these critical changes, a first step would be checking that `AbortNode` leads to early and error-conveying returns at its call site. Further work for enforcing returns when `AbortNode` is called is done in https://github.com/bitcoin/bitcoin/pull/27862.
ACKs for top commit:
stickies-v:
re-ACK d8041d4
ryanofsky:
Code review ACK d8041d4e042957660827313951b18c8dd9a99a16
Tree-SHA512: 47ade9b873b15e567c8f60ca538d5a0daf32163e1031be3212a3a45eb492b866664b225f2787c9e40f3e0c089140157d8fd1039abc00c7bdfeec1b52ecd7e219
Diffstat (limited to 'src/node')
-rw-r--r-- | src/node/blockstorage.cpp | 40 | ||||
-rw-r--r-- | src/node/blockstorage.h | 11 |
2 files changed, 42 insertions, 9 deletions
diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index ae63d12ef7..5c3b7f958e 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -651,16 +651,19 @@ bool BlockManager::UndoReadFromDisk(CBlockUndo& blockundo, const CBlockIndex& in return true; } -void BlockManager::FlushUndoFile(int block_file, bool finalize) +bool BlockManager::FlushUndoFile(int block_file, bool finalize) { FlatFilePos undo_pos_old(block_file, m_blockfile_info[block_file].nUndoSize); if (!UndoFileSeq().Flush(undo_pos_old, finalize)) { m_opts.notifications.flushError("Flushing undo file to disk failed. This is likely the result of an I/O error."); + return false; } + return true; } -void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) +bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) { + bool success = true; LOCK(cs_LastBlockFile); if (m_blockfile_info.size() < 1) { @@ -668,17 +671,23 @@ void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) // chainstate init, when we call ChainstateManager::MaybeRebalanceCaches() (which // then calls FlushStateToDisk()), resulting in a call to this function before we // have populated `m_blockfile_info` via LoadBlockIndexDB(). - return; + return true; } assert(static_cast<int>(m_blockfile_info.size()) > m_last_blockfile); FlatFilePos block_pos_old(m_last_blockfile, m_blockfile_info[m_last_blockfile].nSize); if (!BlockFileSeq().Flush(block_pos_old, fFinalize)) { m_opts.notifications.flushError("Flushing block file to disk failed. This is likely the result of an I/O error."); + success = false; } // we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks, // e.g. during IBD or a sync after a node going offline - if (!fFinalize || finalize_undo) FlushUndoFile(m_last_blockfile, finalize_undo); + if (!fFinalize || finalize_undo) { + if (!FlushUndoFile(m_last_blockfile, finalize_undo)) { + success = false; + } + } + return success; } uint64_t BlockManager::CalculateCurrentUsage() @@ -771,7 +780,19 @@ bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigne if (!fKnown) { LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s\n", m_last_blockfile, m_blockfile_info[m_last_blockfile].ToString()); } - FlushBlockFile(!fKnown, finalize_undo); + + // Do not propagate the return code. The flush concerns a previous block + // and undo file that has already been written to. If a flush fails + // here, and we crash, there is no expected additional block data + // inconsistency arising from the flush failure here. However, the undo + // data may be inconsistent after a crash if the flush is called during + // a reindex. A flush error might also leave some of the data files + // untrimmed. + if (!FlushBlockFile(!fKnown, finalize_undo)) { + LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning, + "Failed to flush previous block file %05i (finalize=%i, finalize_undo=%i) before opening new block file %05i\n", + m_last_blockfile, !fKnown, finalize_undo, nFile); + } m_last_blockfile = nFile; m_undo_height_in_last_blockfile = 0; // No undo data yet in the new file, so reset our undo-height tracking. } @@ -862,7 +883,14 @@ bool BlockManager::WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValid // with the block writes (usually when a synced up node is getting newly mined blocks) -- this case is caught in // the FindBlockPos function if (_pos.nFile < m_last_blockfile && static_cast<uint32_t>(block.nHeight) == m_blockfile_info[_pos.nFile].nHeightLast) { - FlushUndoFile(_pos.nFile, true); + // Do not propagate the return code, a failed flush here should not + // be an indication for a failed write. If it were propagated here, + // the caller would assume the undo data not to be written, when in + // fact it is. Note though, that a failed flush might leave the data + // file untrimmed. + if (!FlushUndoFile(_pos.nFile, true)) { + LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning, "Failed to flush undo file %05i\n", _pos.nFile); + } } else if (_pos.nFile == m_last_blockfile && static_cast<uint32_t>(block.nHeight) > m_undo_height_in_last_blockfile) { m_undo_height_in_last_blockfile = block.nHeight; } diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index a89fa7f76e..9a1d44cc75 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -119,9 +119,14 @@ private: */ bool LoadBlockIndex() EXCLUSIVE_LOCKS_REQUIRED(cs_main); - void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); - void FlushUndoFile(int block_file, bool finalize = false); - bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown); + + /** Return false if block file or undo file flushing fails. */ + [[nodiscard]] bool FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); + + /** Return false if undo file flushing fails. */ + [[nodiscard]] bool FlushUndoFile(int block_file, bool finalize = false); + + [[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown); bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize); FlatFileSeq BlockFileSeq() const; |