diff options
author | Ava Chow <github@achow101.com> | 2024-05-23 12:31:23 -0400 |
---|---|---|
committer | Ava Chow <github@achow101.com> | 2024-05-23 12:31:23 -0400 |
commit | 413844f1c2a3d8f7cfef822f348f26df488b03c7 (patch) | |
tree | b35788e6e3895de7ec1d9a965d9acdd6a5f95208 /src/rpc/blockchain.cpp | |
parent | 915d7276e4060999bac2a42c533b6fb8bdbe5b3d (diff) | |
parent | 542e13b2937356810bda2c41be83c3b1675e2f2f (diff) | |
download | bitcoin-413844f1c2a3d8f7cfef822f348f26df488b03c7.tar.xz |
Merge bitcoin/bitcoin#29612: rpc: Optimize serialization and enhance metadata of dumptxoutset output
542e13b2937356810bda2c41be83c3b1675e2f2f rpc: Enhance metadata of the dumptxoutset output (Fabian Jahr)
4d8e5edbaa94805be41ae4c8aa2f4bf7aaa276fe assumeutxo: Add documentation on dumptxoutset serialization format (Fabian Jahr)
c14ed7f384075330361df636f40121cf25a066d6 assumeutxo: Add test for changed coin size value (Fabian Jahr)
de95953d870c41436de67d56c93259bc66fe1434 rpc: Optimize serialization disk space of dumptxoutset (Fabian Jahr)
Pull request description:
The second attempt at implementing the `dumptxoutset` space optimization as suggested in #25675. Closes #25675.
This builds on the work done in #26045, addresses open feedback, adds some further improvements (most importantly usage of compact size), documentation, and an additional test.
The [original snapshot at height 830,000](https://github.com/bitcoin/bitcoin/pull/29551) came in at 10.82 GB. With this change, the same snapshot is 8.94 GB, a reduction of 17.4%.
This also enhances the metadata of the output file and adds the following data to allow for better error handling and make future upgrades easier:
- A newly introduced utxo set magic
- A version number
- The network magic
- The block height
ACKs for top commit:
achow101:
ACK 542e13b2937356810bda2c41be83c3b1675e2f2f
TheCharlatan:
Re-ACK 542e13b2937356810bda2c41be83c3b1675e2f2f
theStack:
ACK 542e13b2937356810bda2c41be83c3b1675e2f2f
Tree-SHA512: 0825d30e5c3c364062db3c6cbca4e3c680e6e6d3e259fa70c0c2b2a7020f24a47406a623582040988d5c7745b08649c31110df4c10656aa25f3f27eb35843d99
Diffstat (limited to 'src/rpc/blockchain.cpp')
-rw-r--r-- | src/rpc/blockchain.cpp | 56 |
1 files changed, 49 insertions, 7 deletions
diff --git a/src/rpc/blockchain.cpp b/src/rpc/blockchain.cpp index 263d9f1e0a..a6c959797a 100644 --- a/src/rpc/blockchain.cpp +++ b/src/rpc/blockchain.cpp @@ -34,6 +34,7 @@ #include <rpc/server_util.h> #include <rpc/util.h> #include <script/descriptor.h> +#include <serialize.h> #include <streams.h> #include <sync.h> #include <txdb.h> @@ -2696,29 +2697,60 @@ UniValue CreateUTXOSnapshot( tip->nHeight, tip->GetBlockHash().ToString(), fs::PathToString(path), fs::PathToString(temppath))); - SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count}; + SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count}; afile << metadata; COutPoint key; + Txid last_hash; Coin coin; unsigned int iter{0}; + size_t written_coins_count{0}; + std::vector<std::pair<uint32_t, Coin>> coins; + + // To reduce space the serialization format of the snapshot avoids + // duplication of tx hashes. The code takes advantage of the guarantee by + // leveldb that keys are lexicographically sorted. + // In the coins vector we collect all coins that belong to a certain tx hash + // (key.hash) and when we have them all (key.hash != last_hash) we write + // them to file using the below lambda function. + // See also https://github.com/bitcoin/bitcoin/issues/25675 + auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) { + afile << last_hash; + WriteCompactSize(afile, coins.size()); + for (const auto& [n, coin] : coins) { + WriteCompactSize(afile, n); + afile << coin; + ++written_coins_count; + } + }; + pcursor->GetKey(key); + last_hash = key.hash; while (pcursor->Valid()) { if (iter % 5000 == 0) node.rpc_interruption_point(); ++iter; if (pcursor->GetKey(key) && pcursor->GetValue(coin)) { - afile << key; - afile << coin; + if (key.hash != last_hash) { + write_coins_to_file(afile, last_hash, coins, written_coins_count); + last_hash = key.hash; + coins.clear(); + } + coins.emplace_back(key.n, coin); } - pcursor->Next(); } + if (!coins.empty()) { + write_coins_to_file(afile, last_hash, coins, written_coins_count); + } + + CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count); + afile.fclose(); UniValue result(UniValue::VOBJ); - result.pushKV("coins_written", maybe_stats->coins_count); + result.pushKV("coins_written", written_coins_count); result.pushKV("base_hash", tip->GetBlockHash().ToString()); result.pushKV("base_height", tip->nHeight); result.pushKV("path", path.utf8string()); @@ -2778,12 +2810,22 @@ static RPCHelpMan loadtxoutset() } SnapshotMetadata metadata; - afile >> metadata; + try { + afile >> metadata; + } catch (const std::ios_base::failure& e) { + throw JSONRPCError(RPC_DESERIALIZATION_ERROR, strprintf("Unable to parse metadata: %s", e.what())); + } uint256 base_blockhash = metadata.m_base_blockhash; + int base_blockheight = metadata.m_base_blockheight; if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) { + auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights(); + std::string heights_formatted = Join(available_heights, ", ", [&](const auto& i) { return ToString(i); }); throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, " - "assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString())); + "assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.", + base_blockhash.ToString(), + base_blockheight, + heights_formatted)); } CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main, return chainman.m_blockman.LookupBlockIndex(base_blockhash)); |