aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@protonmail.com>2021-02-12 10:37:35 +0100
committerWladimir J. van der Laan <laanwj@protonmail.com>2021-02-12 10:47:41 +0100
commit8d82eddee640ee7b4658d1df15a0002dbdfcaff6 (patch)
treeb2faecd271ca47a56a71cdcd06c114869a2dd591
parent937dfa8398736b3138e3e667679c4878566f2866 (diff)
parente987ae5a554c9952812746c29f2766bacea4b727 (diff)
downloadbitcoin-8d82eddee640ee7b4658d1df15a0002dbdfcaff6.tar.xz
Merge #19145: Add hash_type MUHASH for gettxoutsetinfo
e987ae5a554c9952812746c29f2766bacea4b727 test: Add test for deterministic UTXO set hash results (Fabian Jahr) 6ccc8fc067bf516cda7bc5d7d721945be5ac2003 test: Add test for gettxoutsetinfo RPC with MuHash (Fabian Jahr) 0d3b2f643d7da3202c0a0e757539208c4aa7c450 rpc: Add hash_type MUHASH to gettxoutsetinfo (Fabian Jahr) 2474645f3b15687e7f196b89eb935d6e6a98a9da refactor: Separate hash and stats calculation in coinstats (Fabian Jahr) a1fcceac69097a8e6540a6fd8121a5d53022528f refactor: Improve encapsulation between MuHash3072 and Num3072 (Fabian Jahr) Pull request description: This is another Pr in the series PRs for Coinstatsindex (see overview in #18000). This PR adds the `hash_type` option `muhash` to `gettxoutsetinfo` through which the user can calculate the serialized muhash of the utxo set. This PR does not use the index yet. ACKs for top commit: Sjors: tACK e987ae5 achow101: ACK e987ae5a554c9952812746c29f2766bacea4b727 jonatack: Tested re-ACK e987ae5a554c9952812746c29f2766bacea4b727 per `git diff 3506d90 e987ae5`, reviewed diff, debug built, ran gettxoutsetinfo -signet and help on this branch vs master, at height 23127 both returned `hash_serialized_2` of `2b72d65f3b6efb2311f58374ea2b939abf49684d44f4bafda45faa3b5452a454` and this branch returned `muhash` of `c9f1ff12d345ccf9939c6bbf087e6f7399b6115adee1569287e9c5c43dbb475c` ryanofsky: Code review ACK e987ae5a554c9952812746c29f2766bacea4b727. Looks very good. I left one suggestion to simplify code, but feel free to ignore it here and maybe consider it for later since PR has already had a lot of review. Tree-SHA512: 9a739ce375e73749fa69a467262b60d3e5314ef384e2d7150b3bbc8e4125cd9fd1db95306623bb9a632fcbaf5d9d2bf2f5cc43bf717d4ff5e2c9c4b52dd9296c
-rw-r--r--src/crypto/muhash.cpp42
-rw-r--r--src/crypto/muhash.h7
-rw-r--r--src/node/coinstats.cpp63
-rw-r--r--src/node/coinstats.h1
-rw-r--r--src/rpc/blockchain.cpp23
-rw-r--r--src/rpc/util.cpp17
-rw-r--r--src/rpc/util.h2
-rwxr-xr-xtest/functional/feature_utxo_set_hash.py86
-rwxr-xr-xtest/functional/rpc_blockchain.py12
-rwxr-xr-xtest/functional/test_runner.py1
10 files changed, 195 insertions, 59 deletions
diff --git a/src/crypto/muhash.cpp b/src/crypto/muhash.cpp
index fbd14f9325..e5a0d4cb9c 100644
--- a/src/crypto/muhash.cpp
+++ b/src/crypto/muhash.cpp
@@ -17,7 +17,6 @@ namespace {
using limb_t = Num3072::limb_t;
using double_limb_t = Num3072::double_limb_t;
constexpr int LIMB_SIZE = Num3072::LIMB_SIZE;
-constexpr int LIMBS = Num3072::LIMBS;
/** 2^3072 - 1103717, the largest 3072-bit safe prime number, is used as the modulus. */
constexpr limb_t MAX_PRIME_DIFF = 1103717;
@@ -123,7 +122,7 @@ inline void square_n_mul(Num3072& in_out, const int sq, const Num3072& mul)
} // namespace
-/** Indicates wether d is larger than the modulus. */
+/** Indicates whether d is larger than the modulus. */
bool Num3072::IsOverflow() const
{
if (this->limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF) return false;
@@ -276,18 +275,33 @@ void Num3072::Divide(const Num3072& a)
if (this->IsOverflow()) this->FullReduce();
}
-Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
- Num3072 out{};
- uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
- unsigned char tmp[BYTE_SIZE];
- ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE);
+Num3072::Num3072(const unsigned char (&data)[BYTE_SIZE]) {
+ for (int i = 0; i < LIMBS; ++i) {
+ if (sizeof(limb_t) == 4) {
+ this->limbs[i] = ReadLE32(data + 4 * i);
+ } else if (sizeof(limb_t) == 8) {
+ this->limbs[i] = ReadLE64(data + 8 * i);
+ }
+ }
+}
+
+void Num3072::ToBytes(unsigned char (&out)[BYTE_SIZE]) {
for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
- out.limbs[i] = ReadLE32(tmp + 4 * i);
+ WriteLE32(out + i * 4, this->limbs[i]);
} else if (sizeof(limb_t) == 8) {
- out.limbs[i] = ReadLE64(tmp + 8 * i);
+ WriteLE64(out + i * 8, this->limbs[i]);
}
}
+}
+
+Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
+ unsigned char tmp[Num3072::BYTE_SIZE];
+
+ uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
+ ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, Num3072::BYTE_SIZE);
+ Num3072 out{tmp};
+
return out;
}
@@ -301,14 +315,8 @@ void MuHash3072::Finalize(uint256& out) noexcept
m_numerator.Divide(m_denominator);
m_denominator.SetToOne(); // Needed to keep the MuHash object valid
- unsigned char data[384];
- for (int i = 0; i < LIMBS; ++i) {
- if (sizeof(limb_t) == 4) {
- WriteLE32(data + i * 4, m_numerator.limbs[i]);
- } else if (sizeof(limb_t) == 8) {
- WriteLE64(data + i * 8, m_numerator.limbs[i]);
- }
- }
+ unsigned char data[Num3072::BYTE_SIZE];
+ m_numerator.ToBytes(data);
out = (CHashWriter(SER_DISK, 0) << data).GetSHA256();
}
diff --git a/src/crypto/muhash.h b/src/crypto/muhash.h
index 0c710007c4..c023a8b9d3 100644
--- a/src/crypto/muhash.h
+++ b/src/crypto/muhash.h
@@ -22,6 +22,7 @@ private:
Num3072 GetInverse() const;
public:
+ static constexpr size_t BYTE_SIZE = 384;
#ifdef HAVE___INT128
typedef unsigned __int128 double_limb_t;
@@ -48,8 +49,10 @@ public:
void Divide(const Num3072& a);
void SetToOne();
void Square();
+ void ToBytes(unsigned char (&out)[BYTE_SIZE]);
Num3072() { this->SetToOne(); };
+ Num3072(const unsigned char (&data)[BYTE_SIZE]);
SERIALIZE_METHODS(Num3072, obj)
{
@@ -78,7 +81,7 @@ public:
* arbitrary subset of the update operations, allowing them to be
* efficiently combined later.
*
- * Muhash does not support checking if an element is already part of the
+ * MuHash does not support checking if an element is already part of the
* set. That is why this class does not enforce the use of a set as the
* data it represents because there is no efficient way to do so.
* It is possible to add elements more than once and also to remove
@@ -91,8 +94,6 @@ public:
class MuHash3072
{
private:
- static constexpr size_t BYTE_SIZE = 384;
-
Num3072 m_numerator;
Num3072 m_denominator;
diff --git a/src/node/coinstats.cpp b/src/node/coinstats.cpp
index 56113cb731..b994e79391 100644
--- a/src/node/coinstats.cpp
+++ b/src/node/coinstats.cpp
@@ -6,6 +6,7 @@
#include <node/coinstats.h>
#include <coins.h>
+#include <crypto/muhash.h>
#include <hash.h>
#include <serialize.h>
#include <uint256.h>
@@ -24,31 +25,47 @@ static uint64_t GetBogoSize(const CScript& scriptPubKey)
scriptPubKey.size() /* scriptPubKey */;
}
-static void ApplyStats(CCoinsStats& stats, CHashWriter& ss, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
+static void ApplyHash(CCoinsStats& stats, CHashWriter& ss, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it)
{
- assert(!outputs.empty());
- ss << hash;
- ss << VARINT(outputs.begin()->second.nHeight * 2 + outputs.begin()->second.fCoinBase ? 1u : 0u);
- stats.nTransactions++;
- for (const auto& output : outputs) {
- ss << VARINT(output.first + 1);
- ss << output.second.out.scriptPubKey;
- ss << VARINT_MODE(output.second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED);
- stats.nTransactionOutputs++;
- stats.nTotalAmount += output.second.out.nValue;
- stats.nBogoSize += GetBogoSize(output.second.out.scriptPubKey);
+ if (it == outputs.begin()) {
+ ss << hash;
+ ss << VARINT(it->second.nHeight * 2 + it->second.fCoinBase ? 1u : 0u);
+ }
+
+ ss << VARINT(it->first + 1);
+ ss << it->second.out.scriptPubKey;
+ ss << VARINT_MODE(it->second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED);
+
+ if (it == std::prev(outputs.end())) {
+ ss << VARINT(0u);
}
- ss << VARINT(0u);
}
-static void ApplyStats(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
+static void ApplyHash(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it) {}
+
+static void ApplyHash(CCoinsStats& stats, MuHash3072& muhash, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it)
+{
+ COutPoint outpoint = COutPoint(hash, it->first);
+ Coin coin = it->second;
+
+ CDataStream ss(SER_DISK, PROTOCOL_VERSION);
+ ss << outpoint;
+ ss << static_cast<uint32_t>(coin.nHeight * 2 + coin.fCoinBase);
+ ss << coin.out;
+ muhash.Insert(MakeUCharSpan(ss));
+}
+
+template <typename T>
+static void ApplyStats(CCoinsStats& stats, T& hash_obj, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
{
assert(!outputs.empty());
stats.nTransactions++;
- for (const auto& output : outputs) {
+ for (auto it = outputs.begin(); it != outputs.end(); ++it) {
+ ApplyHash(stats, hash_obj, hash, outputs, it);
+
stats.nTransactionOutputs++;
- stats.nTotalAmount += output.second.out.nValue;
- stats.nBogoSize += GetBogoSize(output.second.out.scriptPubKey);
+ stats.nTotalAmount += it->second.out.nValue;
+ stats.nBogoSize += GetBogoSize(it->second.out.scriptPubKey);
}
}
@@ -104,6 +121,10 @@ bool GetUTXOStats(CCoinsView* view, CCoinsStats& stats, CoinStatsHashType hash_t
CHashWriter ss(SER_GETHASH, PROTOCOL_VERSION);
return GetUTXOStats(view, stats, ss, interruption_point);
}
+ case(CoinStatsHashType::MUHASH): {
+ MuHash3072 muhash;
+ return GetUTXOStats(view, stats, muhash, interruption_point);
+ }
case(CoinStatsHashType::NONE): {
return GetUTXOStats(view, stats, nullptr, interruption_point);
}
@@ -116,10 +137,18 @@ static void PrepareHash(CHashWriter& ss, const CCoinsStats& stats)
{
ss << stats.hashBlock;
}
+// MuHash does not need the prepare step
+static void PrepareHash(MuHash3072& muhash, CCoinsStats& stats) {}
static void PrepareHash(std::nullptr_t, CCoinsStats& stats) {}
static void FinalizeHash(CHashWriter& ss, CCoinsStats& stats)
{
stats.hashSerialized = ss.GetHash();
}
+static void FinalizeHash(MuHash3072& muhash, CCoinsStats& stats)
+{
+ uint256 out;
+ muhash.Finalize(out);
+ stats.hashSerialized = out;
+}
static void FinalizeHash(std::nullptr_t, CCoinsStats& stats) {}
diff --git a/src/node/coinstats.h b/src/node/coinstats.h
index 7c56bfc2ad..f02b95235f 100644
--- a/src/node/coinstats.h
+++ b/src/node/coinstats.h
@@ -16,6 +16,7 @@ class CCoinsView;
enum class CoinStatsHashType {
HASH_SERIALIZED,
+ MUHASH,
NONE,
};
diff --git a/src/rpc/blockchain.cpp b/src/rpc/blockchain.cpp
index 6e2a7c330e..5be02b1e4e 100644
--- a/src/rpc/blockchain.cpp
+++ b/src/rpc/blockchain.cpp
@@ -1026,13 +1026,26 @@ static RPCHelpMan pruneblockchain()
};
}
+CoinStatsHashType ParseHashType(const std::string& hash_type_input)
+{
+ if (hash_type_input == "hash_serialized_2") {
+ return CoinStatsHashType::HASH_SERIALIZED;
+ } else if (hash_type_input == "muhash") {
+ return CoinStatsHashType::MUHASH;
+ } else if (hash_type_input == "none") {
+ return CoinStatsHashType::NONE;
+ } else {
+ throw JSONRPCError(RPC_INVALID_PARAMETER, strprintf("%s is not a valid hash_type", hash_type_input));
+ }
+}
+
static RPCHelpMan gettxoutsetinfo()
{
return RPCHelpMan{"gettxoutsetinfo",
"\nReturns statistics about the unspent transaction output set.\n"
"Note this call may take some time.\n",
{
- {"hash_type", RPCArg::Type::STR, /* default */ "hash_serialized_2", "Which UTXO set hash should be calculated. Options: 'hash_serialized_2' (the legacy algorithm), 'none'."},
+ {"hash_type", RPCArg::Type::STR, /* default */ "hash_serialized_2", "Which UTXO set hash should be calculated. Options: 'hash_serialized_2' (the legacy algorithm), 'muhash', 'none'."},
},
RPCResult{
RPCResult::Type::OBJ, "", "",
@@ -1042,7 +1055,8 @@ static RPCHelpMan gettxoutsetinfo()
{RPCResult::Type::NUM, "transactions", "The number of transactions with unspent outputs"},
{RPCResult::Type::NUM, "txouts", "The number of unspent transaction outputs"},
{RPCResult::Type::NUM, "bogosize", "A meaningless metric for UTXO set size"},
- {RPCResult::Type::STR_HEX, "hash_serialized_2", "The serialized hash (only present if 'hash_serialized_2' hash_type is chosen)"},
+ {RPCResult::Type::STR_HEX, "hash_serialized_2", /* optional */ true, "The serialized hash (only present if 'hash_serialized_2' hash_type is chosen)"},
+ {RPCResult::Type::STR_HEX, "muhash", /* optional */ true, "The serialized hash (only present if 'muhash' hash_type is chosen)"},
{RPCResult::Type::NUM, "disk_size", "The estimated size of the chainstate on disk"},
{RPCResult::Type::STR_AMOUNT, "total_amount", "The total amount"},
}},
@@ -1057,7 +1071,7 @@ static RPCHelpMan gettxoutsetinfo()
CCoinsStats stats;
::ChainstateActive().ForceFlushStateToDisk();
- const CoinStatsHashType hash_type = ParseHashType(request.params[0], CoinStatsHashType::HASH_SERIALIZED);
+ const CoinStatsHashType hash_type{request.params[0].isNull() ? CoinStatsHashType::HASH_SERIALIZED : ParseHashType(request.params[0].get_str())};
CCoinsView* coins_view = WITH_LOCK(cs_main, return &ChainstateActive().CoinsDB());
NodeContext& node = EnsureNodeContext(request.context);
@@ -1070,6 +1084,9 @@ static RPCHelpMan gettxoutsetinfo()
if (hash_type == CoinStatsHashType::HASH_SERIALIZED) {
ret.pushKV("hash_serialized_2", stats.hashSerialized.GetHex());
}
+ if (hash_type == CoinStatsHashType::MUHASH) {
+ ret.pushKV("muhash", stats.hashSerialized.GetHex());
+ }
ret.pushKV("disk_size", stats.nDiskSize);
ret.pushKV("total_amount", ValueFromAmount(stats.nTotalAmount));
} else {
diff --git a/src/rpc/util.cpp b/src/rpc/util.cpp
index bfdba5253c..e890c0108a 100644
--- a/src/rpc/util.cpp
+++ b/src/rpc/util.cpp
@@ -113,23 +113,6 @@ std::vector<unsigned char> ParseHexO(const UniValue& o, std::string strKey)
return ParseHexV(find_value(o, strKey), strKey);
}
-CoinStatsHashType ParseHashType(const UniValue& param, const CoinStatsHashType default_type)
-{
- if (param.isNull()) {
- return default_type;
- } else {
- std::string hash_type_input = param.get_str();
-
- if (hash_type_input == "hash_serialized_2") {
- return CoinStatsHashType::HASH_SERIALIZED;
- } else if (hash_type_input == "none") {
- return CoinStatsHashType::NONE;
- } else {
- throw JSONRPCError(RPC_INVALID_PARAMETER, strprintf("%d is not a valid hash_type", hash_type_input));
- }
- }
-}
-
std::string HelpExampleCli(const std::string& methodname, const std::string& args)
{
return "> bitcoin-cli " + methodname + " " + args + "\n";
diff --git a/src/rpc/util.h b/src/rpc/util.h
index 444a013ca1..c54ce85f60 100644
--- a/src/rpc/util.h
+++ b/src/rpc/util.h
@@ -77,8 +77,6 @@ extern uint256 ParseHashO(const UniValue& o, std::string strKey);
extern std::vector<unsigned char> ParseHexV(const UniValue& v, std::string strName);
extern std::vector<unsigned char> ParseHexO(const UniValue& o, std::string strKey);
-CoinStatsHashType ParseHashType(const UniValue& param, const CoinStatsHashType default_type);
-
extern CAmount AmountFromValue(const UniValue& value);
extern std::string HelpExampleCli(const std::string& methodname, const std::string& args);
extern std::string HelpExampleRpc(const std::string& methodname, const std::string& args);
diff --git a/test/functional/feature_utxo_set_hash.py b/test/functional/feature_utxo_set_hash.py
new file mode 100755
index 0000000000..6e6046d84d
--- /dev/null
+++ b/test/functional/feature_utxo_set_hash.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020-2021 The Bitcoin Core developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or http://www.opensource.org/licenses/mit-license.php.
+"""Test UTXO set hash value calculation in gettxoutsetinfo."""
+
+import struct
+
+from test_framework.blocktools import create_transaction
+from test_framework.messages import (
+ CBlock,
+ COutPoint,
+ FromHex,
+)
+from test_framework.muhash import MuHash3072
+from test_framework.test_framework import BitcoinTestFramework
+from test_framework.util import assert_equal
+
+class UTXOSetHashTest(BitcoinTestFramework):
+ def set_test_params(self):
+ self.num_nodes = 1
+ self.setup_clean_chain = True
+
+ def skip_test_if_missing_module(self):
+ self.skip_if_no_wallet()
+
+ def test_deterministic_hash_results(self):
+ self.log.info("Test deterministic UTXO set hash results")
+
+ # These depend on the setup_clean_chain option, the chain loaded from the cache
+ assert_equal(self.nodes[0].gettxoutsetinfo()['hash_serialized_2'], "b32ec1dda5a53cd025b95387aad344a801825fe46a60ff952ce26528f01d3be8")
+ assert_equal(self.nodes[0].gettxoutsetinfo("muhash")['muhash'], "dd5ad2a105c2d29495f577245c357409002329b9f4d6182c0af3dc2f462555c8")
+
+ def test_muhash_implementation(self):
+ self.log.info("Test MuHash implementation consistency")
+
+ node = self.nodes[0]
+
+ # Generate 100 blocks and remove the first since we plan to spend its
+ # coinbase
+ block_hashes = node.generate(100)
+ blocks = list(map(lambda block: FromHex(CBlock(), node.getblock(block, False)), block_hashes))
+ spending = blocks.pop(0)
+
+ # Create a spending transaction and mine a block which includes it
+ tx = create_transaction(node, spending.vtx[0].rehash(), node.getnewaddress(), amount=49)
+ txid = node.sendrawtransaction(hexstring=tx.serialize_with_witness().hex(), maxfeerate=0)
+
+ tx_block = node.generateblock(output=node.getnewaddress(), transactions=[txid])
+ blocks.append(FromHex(CBlock(), node.getblock(tx_block['hash'], False)))
+
+ # Serialize the outputs that should be in the UTXO set and add them to
+ # a MuHash object
+ muhash = MuHash3072()
+
+ for height, block in enumerate(blocks):
+ # The Genesis block coinbase is not part of the UTXO set and we
+ # spent the first mined block
+ height += 2
+
+ for tx in block.vtx:
+ for n, tx_out in enumerate(tx.vout):
+ coinbase = 1 if not tx.vin[0].prevout.hash else 0
+
+ # Skip witness commitment
+ if (coinbase and n > 0):
+ continue
+
+ data = COutPoint(int(tx.rehash(), 16), n).serialize()
+ data += struct.pack("<i", height * 2 + coinbase)
+ data += tx_out.serialize()
+
+ muhash.insert(data)
+
+ finalized = muhash.digest()
+ node_muhash = node.gettxoutsetinfo("muhash")['muhash']
+
+ assert_equal(finalized[::-1].hex(), node_muhash)
+
+ def run_test(self):
+ self.test_deterministic_hash_results()
+ self.test_muhash_implementation()
+
+
+if __name__ == '__main__':
+ UTXOSetHashTest().main()
diff --git a/test/functional/rpc_blockchain.py b/test/functional/rpc_blockchain.py
index 99be6b7b8e..84ca1b99c2 100755
--- a/test/functional/rpc_blockchain.py
+++ b/test/functional/rpc_blockchain.py
@@ -268,6 +268,18 @@ class BlockchainTest(BitcoinTestFramework):
res5 = node.gettxoutsetinfo(hash_type='none')
assert 'hash_serialized_2' not in res5
+ # hash_type muhash should return a different UTXO set hash.
+ res6 = node.gettxoutsetinfo(hash_type='muhash')
+ assert 'muhash' in res6
+ assert(res['hash_serialized_2'] != res6['muhash'])
+
+ # muhash should not be included in gettxoutset unless requested.
+ for r in [res, res2, res3, res4, res5]:
+ assert 'muhash' not in r
+
+ # Unknown hash_type raises an error
+ assert_raises_rpc_error(-8, "foohash is not a valid hash_type", node.gettxoutsetinfo, "foohash")
+
def _test_getblockheader(self):
node = self.nodes[0]
diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py
index 1e7d9e4b0d..354cacf95a 100755
--- a/test/functional/test_runner.py
+++ b/test/functional/test_runner.py
@@ -203,6 +203,7 @@ BASE_SCRIPTS = [
'feature_notifications.py',
'rpc_getblockfilter.py',
'rpc_invalidateblock.py',
+ 'feature_utxo_set_hash.py',
'feature_rbf.py',
'mempool_packages.py',
'mempool_package_onemore.py',