diff options
author | Fabian Jahr <fjahr@protonmail.com> | 2024-03-10 01:19:04 +0100 |
---|---|---|
committer | Fabian Jahr <fjahr@protonmail.com> | 2024-05-21 13:38:07 +0200 |
commit | de95953d870c41436de67d56c93259bc66fe1434 (patch) | |
tree | 95b51a75cedb0de7f5245dabe5321993ceffeb27 | |
parent | eb0bdbdd753bca97120247b921fd29d606fea6e9 (diff) |
rpc: Optimize serialization disk space of dumptxoutset
Co-authored-by: Aurèle Oulès <aurele@oules.com>
Co-authored-by: TheCharlatan <seb.kung@gmail.com>
-rw-r--r-- | src/rpc/blockchain.cpp | 33 | ||||
-rw-r--r-- | src/test/validation_chainstatemanager_tests.cpp | 9 | ||||
-rw-r--r-- | src/validation.cpp | 111 | ||||
-rwxr-xr-x | test/functional/feature_assumeutxo.py | 13 | ||||
-rwxr-xr-x | test/functional/rpc_dumptxoutset.py | 2 |
5 files changed, 105 insertions, 63 deletions
diff --git a/src/rpc/blockchain.cpp b/src/rpc/blockchain.cpp index eed004806a..6402819ce6 100644 --- a/src/rpc/blockchain.cpp +++ b/src/rpc/blockchain.cpp @@ -34,6 +34,7 @@ #include <rpc/server_util.h> #include <rpc/util.h> #include <script/descriptor.h> +#include <serialize.h> #include <streams.h> #include <sync.h> #include <txdb.h> @@ -2695,24 +2696,48 @@ UniValue CreateUTXOSnapshot( afile << metadata; COutPoint key; + Txid last_hash; Coin coin; unsigned int iter{0}; + size_t written_coins_count{0}; + std::vector<std::pair<uint32_t, Coin>> coins; + + auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) { + afile << last_hash; + WriteCompactSize(afile, coins.size()); + for (const auto& [n, coin] : coins) { + WriteCompactSize(afile, n); + afile << coin; + ++written_coins_count; + } + }; + pcursor->GetKey(key); + last_hash = key.hash; while (pcursor->Valid()) { if (iter % 5000 == 0) node.rpc_interruption_point(); ++iter; if (pcursor->GetKey(key) && pcursor->GetValue(coin)) { - afile << key; - afile << coin; + if (key.hash != last_hash) { + write_coins_to_file(afile, last_hash, coins, written_coins_count); + last_hash = key.hash; + coins.clear(); + } + coins.emplace_back(key.n, coin); } - pcursor->Next(); } + if (!coins.empty()) { + write_coins_to_file(afile, last_hash, coins, written_coins_count); + } + + CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count); + afile.fclose(); UniValue result(UniValue::VOBJ); - result.pushKV("coins_written", maybe_stats->coins_count); + result.pushKV("coins_written", written_coins_count); result.pushKV("base_hash", tip->GetBlockHash().ToString()); result.pushKV("base_height", tip->nHeight); result.pushKV("path", path.utf8string()); diff --git a/src/test/validation_chainstatemanager_tests.cpp b/src/test/validation_chainstatemanager_tests.cpp index 4bf66a55eb..1f6b7368a2 100644 --- a/src/test/validation_chainstatemanager_tests.cpp +++ b/src/test/validation_chainstatemanager_tests.cpp @@ -226,10 +226,13 @@ struct SnapshotTestSetup : TestChain100Setup { // A UTXO is missing but count is correct metadata.m_coins_count -= 1; - COutPoint outpoint; + Txid txid; + auto_infile >> txid; + // coins size + (void)ReadCompactSize(auto_infile); + // vout index + (void)ReadCompactSize(auto_infile); Coin coin; - - auto_infile >> outpoint; auto_infile >> coin; })); diff --git a/src/validation.cpp b/src/validation.cpp index f57851b4f7..e6199e5cf9 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -5660,69 +5660,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot( return false; } - COutPoint outpoint; - Coin coin; const uint64_t coins_count = metadata.m_coins_count; uint64_t coins_left = metadata.m_coins_count; - LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString()); + LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString()); int64_t coins_processed{0}; while (coins_left > 0) { try { - coins_file >> outpoint; - coins_file >> coin; - } catch (const std::ios_base::failure&) { - LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n", - coins_count - coins_left); - return false; - } - if (coin.nHeight > base_height || - outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash - ) { - LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n", - coins_count - coins_left); - return false; - } - if (!MoneyRange(coin.out.nValue)) { - LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n", - coins_count - coins_left); - return false; - } + Txid txid; + coins_file >> txid; + size_t coins_per_txid{0}; + coins_per_txid = ReadCompactSize(coins_file); + + if (coins_per_txid > coins_left) { + LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n"); + return false; + } - coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin)); + for (size_t i = 0; i < coins_per_txid; i++) { + COutPoint outpoint; + Coin coin; + outpoint.n = static_cast<uint32_t>(ReadCompactSize(coins_file)); + outpoint.hash = txid; + coins_file >> coin; + if (coin.nHeight > base_height || + outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash + ) { + LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n", + coins_count - coins_left); + return false; + } + if (!MoneyRange(coin.out.nValue)) { + LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n", + coins_count - coins_left); + return false; + } + coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin)); - --coins_left; - ++coins_processed; + --coins_left; + ++coins_processed; - if (coins_processed % 1000000 == 0) { - LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n", - coins_processed, - static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count), - coins_cache.DynamicMemoryUsage() / (1000 * 1000)); - } + if (coins_processed % 1000000 == 0) { + LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n", + coins_processed, + static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count), + coins_cache.DynamicMemoryUsage() / (1000 * 1000)); + } - // Batch write and flush (if we need to) every so often. - // - // If our average Coin size is roughly 41 bytes, checking every 120,000 coins - // means <5MB of memory imprecision. - if (coins_processed % 120000 == 0) { - if (m_interrupt) { - return false; - } + // Batch write and flush (if we need to) every so often. + // + // If our average Coin size is roughly 41 bytes, checking every 120,000 coins + // means <5MB of memory imprecision. + if (coins_processed % 120000 == 0) { + if (m_interrupt) { + return false; + } - const auto snapshot_cache_state = WITH_LOCK(::cs_main, - return snapshot_chainstate.GetCoinsCacheSizeState()); + const auto snapshot_cache_state = WITH_LOCK(::cs_main, + return snapshot_chainstate.GetCoinsCacheSizeState()); - if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) { - // This is a hack - we don't know what the actual best block is, but that - // doesn't matter for the purposes of flushing the cache here. We'll set this - // to its correct value (`base_blockhash`) below after the coins are loaded. - coins_cache.SetBestBlock(GetRandHash()); + if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) { + // This is a hack - we don't know what the actual best block is, but that + // doesn't matter for the purposes of flushing the cache here. We'll set this + // to its correct value (`base_blockhash`) below after the coins are loaded. + coins_cache.SetBestBlock(GetRandHash()); - // No need to acquire cs_main since this chainstate isn't being used yet. - FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false); + // No need to acquire cs_main since this chainstate isn't being used yet. + FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false); + } + } } + } catch (const std::ios_base::failure&) { + LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n", + coins_processed); + return false; } } @@ -5735,7 +5747,8 @@ bool ChainstateManager::PopulateAndValidateSnapshot( bool out_of_coins{false}; try { - coins_file >> outpoint; + Txid txid; + coins_file >> txid; } catch (const std::ios_base::failure&) { // We expect an exception since we should be out of coins. out_of_coins = true; diff --git a/test/functional/feature_assumeutxo.py b/test/functional/feature_assumeutxo.py index 2842d82d80..204502e395 100755 --- a/test/functional/feature_assumeutxo.py +++ b/test/functional/feature_assumeutxo.py @@ -94,15 +94,16 @@ class AssumeutxoTest(BitcoinTestFramework): f.write(valid_snapshot_contents[32 + 8:]) expected_error(log_msg=f"bad snapshot - coins left over after deserializing 298 coins" if off == -1 else f"bad snapshot format or truncated snapshot after deserializing 299 coins") - self.log.info(" - snapshot file with alternated UTXO data") + + self.log.info(" - snapshot file with alternated but parsable UTXO data results in different hash") cases = [ # (content, offset, wrong_hash, custom_message) [b"\xff" * 32, 0, "7d52155c9a9fdc4525b637ef6170568e5dad6fabd0b1fdbb9432010b8453095b", None], # wrong outpoint hash - [(1).to_bytes(4, "little"), 32, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad", None], # wrong outpoint index - [b"\x81", 36, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8", None], # wrong coin code VARINT - [b"\x80", 36, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5", None], # another wrong coin code - [b"\x84\x58", 36, None, "[snapshot] bad snapshot data after deserializing 0 coins"], # wrong coin case with height 364 and coinbase 0 - [b"\xCA\xD2\x8F\x5A", 41, None, "[snapshot] bad snapshot data after deserializing 0 coins - bad tx out value"], # Amount exceeds MAX_MONEY + [b"\x01", 33, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad", None], # wrong outpoint index + [b"\x81", 34, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8", None], # wrong coin code VARINT + [b"\x80", 34, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5", None], # another wrong coin code + [b"\x84\x58", 34, None, "[snapshot] bad snapshot data after deserializing 0 coins"], # wrong coin case with height 364 and coinbase 0 + [b"\xCA\xD2\x8F\x5A", 39, None, "[snapshot] bad snapshot data after deserializing 0 coins - bad tx out value"], # Amount exceeds MAX_MONEY ] for content, offset, wrong_hash, custom_message in cases: diff --git a/test/functional/rpc_dumptxoutset.py b/test/functional/rpc_dumptxoutset.py index 1ea6cf52d1..9853c11b75 100755 --- a/test/functional/rpc_dumptxoutset.py +++ b/test/functional/rpc_dumptxoutset.py @@ -43,7 +43,7 @@ class DumptxoutsetTest(BitcoinTestFramework): # UTXO snapshot hash should be deterministic based on mocked time. assert_equal( sha256sum_file(str(expected_path)).hex(), - 'b1bacb602eacf5fbc9a7c2ef6eeb0d229c04e98bdf0c2ea5929012cd0eae3830') + '3263fc0311ea46415b85513a59ad8fe67806b3cdce66147175ecb9da768d4a99') assert_equal( out['txoutset_hash'], 'a0b7baa3bf5ccbd3279728f230d7ca0c44a76e9923fca8f32dbfd08d65ea496a') |