aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfanquake <fanquake@gmail.com>2022-07-29 15:43:21 +0100
committerfanquake <fanquake@gmail.com>2022-07-29 15:47:23 +0100
commit5871b5b5ab57a0caf9b7514eb162c491c83281d5 (patch)
treeb2ca982303e301ae389a0bef181432b9377e5500
parentb1c8ea45c9e0b658c86ed0bbbf6e0fe44cd40f1c (diff)
parentdd065dae9fcebd6806ff67703ffa8128e80b97cc (diff)
downloadbitcoin-5871b5b5ab57a0caf9b7514eb162c491c83281d5.tar.xz
Merge bitcoin/bitcoin#25571: refactor: Make mapBlocksUnknownParent local, and rename it
dd065dae9fcebd6806ff67703ffa8128e80b97cc refactor: Make mapBlocksUnknownParent local, and rename it (Hennadii Stepanov) Pull request description: This PR is a second attempt at #19594. This PR has two motivations: - Improve code hygiene by eliminating a global variable, `mapBlocksUnknownParent` - Fix fuzz test OOM when running too long ([see #19594 comment](https://github.com/bitcoin/bitcoin/pull/19594#issuecomment-958801638)) A minor added advantage is to release `mapBlocksUnknownParent` memory when the reindexing phase is done. The current situation is somewhat similar to a memory leak because this map exists unused for the remaining lifetime of the process. It's true that this map should be empty of data elements after use, but its internal metadata (indexing structures, etc.) can have non-trivial size because there can be many thousands of simultaneous elements in this map. This PR helps our efforts to reduce the use of global variables. This variable isn't just global, it's hidden inside a function (it looks like a local variable but has the `static` attribute). This global variable exists because the `-reindex` processing code calls `LoadExternalBlockFile()` multiple times (once for each block file), but that function must preserve some state between calls (the `mapBlocksUnknownParent` map). This PR fixes this by allocating this map as a local variable in the caller's scope and passing it in on each call. When reindexing completes, the map goes out of scope and is deallocated. I tested this manually by reindexing on mainnet and signet. Also, the existing `feature_reindex.py` functional test passes. ACKs for top commit: mzumsande: re-ACK dd065dae9fcebd6806ff67703ffa8128e80b97cc theStack: re-ACK dd065dae9fcebd6806ff67703ffa8128e80b97cc shaavan: reACK dd065dae9fcebd6806ff67703ffa8128e80b97cc Tree-SHA512: 9cd20e44d2fa1096dd405bc107bc065ea8f904f5b3f63080341b08d8cf57b790df565f58815c2f331377d044d5306708b4bf6bdfc5ef8d0ed85d8e97d744732c
-rw-r--r--src/node/blockstorage.cpp6
-rw-r--r--src/test/fuzz/load_external_block_file.cpp11
-rw-r--r--src/validation.cpp23
-rw-r--r--src/validation.h32
4 files changed, 60 insertions, 12 deletions
diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp
index bac05f6be2..601d0bdf58 100644
--- a/src/node/blockstorage.cpp
+++ b/src/node/blockstorage.cpp
@@ -21,6 +21,7 @@
#include <util/system.h>
#include <validation.h>
+#include <map>
#include <unordered_map>
namespace node {
@@ -834,6 +835,9 @@ void ThreadImport(ChainstateManager& chainman, std::vector<fs::path> vImportFile
// -reindex
if (fReindex) {
int nFile = 0;
+ // Map of disk positions for blocks with unknown parent (only used for reindex);
+ // parent hash -> child disk position, multiple children can have the same parent.
+ std::multimap<uint256, FlatFilePos> blocks_with_unknown_parent;
while (true) {
FlatFilePos pos(nFile, 0);
if (!fs::exists(GetBlockPosFilename(pos))) {
@@ -844,7 +848,7 @@ void ThreadImport(ChainstateManager& chainman, std::vector<fs::path> vImportFile
break; // This error is logged in OpenBlockFile
}
LogPrintf("Reindexing block file blk%05u.dat...\n", (unsigned int)nFile);
- chainman.ActiveChainstate().LoadExternalBlockFile(file, &pos);
+ chainman.ActiveChainstate().LoadExternalBlockFile(file, &pos, &blocks_with_unknown_parent);
if (ShutdownRequested()) {
LogPrintf("Shutdown requested. Exit %s\n", __func__);
return;
diff --git a/src/test/fuzz/load_external_block_file.cpp b/src/test/fuzz/load_external_block_file.cpp
index bfa977520b..f4b7dc08fd 100644
--- a/src/test/fuzz/load_external_block_file.cpp
+++ b/src/test/fuzz/load_external_block_file.cpp
@@ -31,6 +31,13 @@ FUZZ_TARGET_INIT(load_external_block_file, initialize_load_external_block_file)
if (fuzzed_block_file == nullptr) {
return;
}
- FlatFilePos flat_file_pos;
- g_setup->m_node.chainman->ActiveChainstate().LoadExternalBlockFile(fuzzed_block_file, fuzzed_data_provider.ConsumeBool() ? &flat_file_pos : nullptr);
+ if (fuzzed_data_provider.ConsumeBool()) {
+ // Corresponds to the -reindex case (track orphan blocks across files).
+ FlatFilePos flat_file_pos;
+ std::multimap<uint256, FlatFilePos> blocks_with_unknown_parent;
+ g_setup->m_node.chainman->ActiveChainstate().LoadExternalBlockFile(fuzzed_block_file, &flat_file_pos, &blocks_with_unknown_parent);
+ } else {
+ // Corresponds to the -loadblock= case (orphan blocks aren't tracked across files).
+ g_setup->m_node.chainman->ActiveChainstate().LoadExternalBlockFile(fuzzed_block_file);
+ }
}
diff --git a/src/validation.cpp b/src/validation.cpp
index d64ef4df0b..17211956f5 100644
--- a/src/validation.cpp
+++ b/src/validation.cpp
@@ -57,6 +57,7 @@
#include <warnings.h>
#include <algorithm>
+#include <cassert>
#include <chrono>
#include <deque>
#include <numeric>
@@ -4256,11 +4257,16 @@ bool CChainState::LoadGenesisBlock()
return true;
}
-void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
+void CChainState::LoadExternalBlockFile(
+ FILE* fileIn,
+ FlatFilePos* dbp,
+ std::multimap<uint256, FlatFilePos>* blocks_with_unknown_parent)
{
AssertLockNotHeld(m_chainstate_mutex);
- // Map of disk positions for blocks with unknown parent (only used for reindex)
- static std::multimap<uint256, FlatFilePos> mapBlocksUnknownParent;
+
+ // Either both should be specified (-reindex), or neither (-loadblock).
+ assert(!dbp == !blocks_with_unknown_parent);
+
int64_t nStart = GetTimeMillis();
int nLoaded = 0;
@@ -4310,8 +4316,9 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
if (hash != m_params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(block.hashPrevBlock)) {
LogPrint(BCLog::REINDEX, "%s: Out of order block %s, parent %s not known\n", __func__, hash.ToString(),
block.hashPrevBlock.ToString());
- if (dbp)
- mapBlocksUnknownParent.insert(std::make_pair(block.hashPrevBlock, *dbp));
+ if (dbp && blocks_with_unknown_parent) {
+ blocks_with_unknown_parent->emplace(block.hashPrevBlock, *dbp);
+ }
continue;
}
@@ -4340,13 +4347,15 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
NotifyHeaderTip(*this);
+ if (!blocks_with_unknown_parent) continue;
+
// Recursively process earlier encountered successors of this block
std::deque<uint256> queue;
queue.push_back(hash);
while (!queue.empty()) {
uint256 head = queue.front();
queue.pop_front();
- std::pair<std::multimap<uint256, FlatFilePos>::iterator, std::multimap<uint256, FlatFilePos>::iterator> range = mapBlocksUnknownParent.equal_range(head);
+ auto range = blocks_with_unknown_parent->equal_range(head);
while (range.first != range.second) {
std::multimap<uint256, FlatFilePos>::iterator it = range.first;
std::shared_ptr<CBlock> pblockrecursive = std::make_shared<CBlock>();
@@ -4361,7 +4370,7 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
}
}
range.first++;
- mapBlocksUnknownParent.erase(it);
+ blocks_with_unknown_parent->erase(it);
NotifyHeaderTip(*this);
}
}
diff --git a/src/validation.h b/src/validation.h
index a44dbd9c7a..9fef69799b 100644
--- a/src/validation.h
+++ b/src/validation.h
@@ -575,8 +575,36 @@ public:
bool ResizeCoinsCaches(size_t coinstip_size, size_t coinsdb_size)
EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
- /** Import blocks from an external file */
- void LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp = nullptr)
+ /**
+ * Import blocks from an external file
+ *
+ * During reindexing, this function is called for each block file (datadir/blocks/blk?????.dat).
+ * It reads all blocks contained in the given file and attempts to process them (add them to the
+ * block index). The blocks may be out of order within each file and across files. Often this
+ * function reads a block but finds that its parent hasn't been read yet, so the block can't be
+ * processed yet. The function will add an entry to the blocks_with_unknown_parent map (which is
+ * passed as an argument), so that when the block's parent is later read and processed, this
+ * function can re-read the child block from disk and process it.
+ *
+ * Because a block's parent may be in a later file, not just later in the same file, the
+ * blocks_with_unknown_parent map must be passed in and out with each call. It's a multimap,
+ * rather than just a map, because multiple blocks may have the same parent (when chain splits
+ * or stale blocks exist). It maps from parent-hash to child-disk-position.
+ *
+ * This function can also be used to read blocks from user-specified block files using the
+ * -loadblock= option. There's no unknown-parent tracking, so the last two arguments are omitted.
+ *
+ *
+ * @param[in] fileIn FILE handle to file containing blocks to read
+ * @param[in] dbp (optional) Disk block position (only for reindex)
+ * @param[in,out] blocks_with_unknown_parent (optional) Map of disk positions for blocks with
+ * unknown parent, key is parent block hash
+ * (only used for reindex)
+ * */
+ void LoadExternalBlockFile(
+ FILE* fileIn,
+ FlatFilePos* dbp = nullptr,
+ std::multimap<uint256, FlatFilePos>* blocks_with_unknown_parent = nullptr)
EXCLUSIVE_LOCKS_REQUIRED(!m_chainstate_mutex);
/**