aboutsummaryrefslogtreecommitdiff
path: root/src/rpc/blockchain.cpp
diff options
context:
space:
mode:
authorAva Chow <github@achow101.com>2024-05-23 12:31:23 -0400
committerAva Chow <github@achow101.com>2024-05-23 12:31:23 -0400
commit413844f1c2a3d8f7cfef822f348f26df488b03c7 (patch)
treeb35788e6e3895de7ec1d9a965d9acdd6a5f95208 /src/rpc/blockchain.cpp
parent915d7276e4060999bac2a42c533b6fb8bdbe5b3d (diff)
parent542e13b2937356810bda2c41be83c3b1675e2f2f (diff)
downloadbitcoin-413844f1c2a3d8f7cfef822f348f26df488b03c7.tar.xz
Merge bitcoin/bitcoin#29612: rpc: Optimize serialization and enhance metadata of dumptxoutset output
542e13b2937356810bda2c41be83c3b1675e2f2f rpc: Enhance metadata of the dumptxoutset output (Fabian Jahr) 4d8e5edbaa94805be41ae4c8aa2f4bf7aaa276fe assumeutxo: Add documentation on dumptxoutset serialization format (Fabian Jahr) c14ed7f384075330361df636f40121cf25a066d6 assumeutxo: Add test for changed coin size value (Fabian Jahr) de95953d870c41436de67d56c93259bc66fe1434 rpc: Optimize serialization disk space of dumptxoutset (Fabian Jahr) Pull request description: The second attempt at implementing the `dumptxoutset` space optimization as suggested in #25675. Closes #25675. This builds on the work done in #26045, addresses open feedback, adds some further improvements (most importantly usage of compact size), documentation, and an additional test. The [original snapshot at height 830,000](https://github.com/bitcoin/bitcoin/pull/29551) came in at 10.82 GB. With this change, the same snapshot is 8.94 GB, a reduction of 17.4%. This also enhances the metadata of the output file and adds the following data to allow for better error handling and make future upgrades easier: - A newly introduced utxo set magic - A version number - The network magic - The block height ACKs for top commit: achow101: ACK 542e13b2937356810bda2c41be83c3b1675e2f2f TheCharlatan: Re-ACK 542e13b2937356810bda2c41be83c3b1675e2f2f theStack: ACK 542e13b2937356810bda2c41be83c3b1675e2f2f Tree-SHA512: 0825d30e5c3c364062db3c6cbca4e3c680e6e6d3e259fa70c0c2b2a7020f24a47406a623582040988d5c7745b08649c31110df4c10656aa25f3f27eb35843d99
Diffstat (limited to 'src/rpc/blockchain.cpp')
-rw-r--r--src/rpc/blockchain.cpp56
1 files changed, 49 insertions, 7 deletions
diff --git a/src/rpc/blockchain.cpp b/src/rpc/blockchain.cpp
index 263d9f1e0a..a6c959797a 100644
--- a/src/rpc/blockchain.cpp
+++ b/src/rpc/blockchain.cpp
@@ -34,6 +34,7 @@
#include <rpc/server_util.h>
#include <rpc/util.h>
#include <script/descriptor.h>
+#include <serialize.h>
#include <streams.h>
#include <sync.h>
#include <txdb.h>
@@ -2696,29 +2697,60 @@ UniValue CreateUTXOSnapshot(
tip->nHeight, tip->GetBlockHash().ToString(),
fs::PathToString(path), fs::PathToString(temppath)));
- SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count};
+ SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count};
afile << metadata;
COutPoint key;
+ Txid last_hash;
Coin coin;
unsigned int iter{0};
+ size_t written_coins_count{0};
+ std::vector<std::pair<uint32_t, Coin>> coins;
+
+ // To reduce space the serialization format of the snapshot avoids
+ // duplication of tx hashes. The code takes advantage of the guarantee by
+ // leveldb that keys are lexicographically sorted.
+ // In the coins vector we collect all coins that belong to a certain tx hash
+ // (key.hash) and when we have them all (key.hash != last_hash) we write
+ // them to file using the below lambda function.
+ // See also https://github.com/bitcoin/bitcoin/issues/25675
+ auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) {
+ afile << last_hash;
+ WriteCompactSize(afile, coins.size());
+ for (const auto& [n, coin] : coins) {
+ WriteCompactSize(afile, n);
+ afile << coin;
+ ++written_coins_count;
+ }
+ };
+ pcursor->GetKey(key);
+ last_hash = key.hash;
while (pcursor->Valid()) {
if (iter % 5000 == 0) node.rpc_interruption_point();
++iter;
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
- afile << key;
- afile << coin;
+ if (key.hash != last_hash) {
+ write_coins_to_file(afile, last_hash, coins, written_coins_count);
+ last_hash = key.hash;
+ coins.clear();
+ }
+ coins.emplace_back(key.n, coin);
}
-
pcursor->Next();
}
+ if (!coins.empty()) {
+ write_coins_to_file(afile, last_hash, coins, written_coins_count);
+ }
+
+ CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count);
+
afile.fclose();
UniValue result(UniValue::VOBJ);
- result.pushKV("coins_written", maybe_stats->coins_count);
+ result.pushKV("coins_written", written_coins_count);
result.pushKV("base_hash", tip->GetBlockHash().ToString());
result.pushKV("base_height", tip->nHeight);
result.pushKV("path", path.utf8string());
@@ -2778,12 +2810,22 @@ static RPCHelpMan loadtxoutset()
}
SnapshotMetadata metadata;
- afile >> metadata;
+ try {
+ afile >> metadata;
+ } catch (const std::ios_base::failure& e) {
+ throw JSONRPCError(RPC_DESERIALIZATION_ERROR, strprintf("Unable to parse metadata: %s", e.what()));
+ }
uint256 base_blockhash = metadata.m_base_blockhash;
+ int base_blockheight = metadata.m_base_blockheight;
if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) {
+ auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights();
+ std::string heights_formatted = Join(available_heights, ", ", [&](const auto& i) { return ToString(i); });
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, "
- "assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString()));
+ "assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.",
+ base_blockhash.ToString(),
+ base_blockheight,
+ heights_formatted));
}
CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main,
return chainman.m_blockman.LookupBlockIndex(base_blockhash));