From ee60e5625bf8a11c8e5509b9cea8b6465056c448 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 17 Nov 2015 17:35:40 +0100 Subject: Add merkle.{h,cpp}, generic merkle root/branch algorithm --- src/consensus/merkle.cpp | 152 +++++++++++++++++++++++++++++++++++++++++++++++ src/consensus/merkle.h | 17 ++++++ 2 files changed, 169 insertions(+) create mode 100644 src/consensus/merkle.cpp create mode 100644 src/consensus/merkle.h (limited to 'src/consensus') diff --git a/src/consensus/merkle.cpp b/src/consensus/merkle.cpp new file mode 100644 index 0000000000..6be9c26df2 --- /dev/null +++ b/src/consensus/merkle.cpp @@ -0,0 +1,152 @@ +#include "merkle.h" +#include "hash.h" +#include "utilstrencodings.h" + +/* WARNING! If you're reading this because you're learning about crypto + and/or designing a new system that will use merkle trees, keep in mind + that the following merkle tree algorithm has a serious flaw related to + duplicate txids, resulting in a vulnerability (CVE-2012-2459). + + The reason is that if the number of hashes in the list at a given time + is odd, the last one is duplicated before computing the next level (which + is unusual in Merkle trees). This results in certain sequences of + transactions leading to the same merkle root. For example, these two + trees: + + A A + / \ / \ + B C B C + / \ | / \ / \ + D E F D E F F + / \ / \ / \ / \ / \ / \ / \ + 1 2 3 4 5 6 1 2 3 4 5 6 5 6 + + for transaction lists [1,2,3,4,5,6] and [1,2,3,4,5,6,5,6] (where 5 and + 6 are repeated) result in the same root hash A (because the hash of both + of (F) and (F,F) is C). + + The vulnerability results from being able to send a block with such a + transaction list, with the same merkle root, and the same block hash as + the original without duplication, resulting in failed validation. If the + receiving node proceeds to mark that block as permanently invalid + however, it will fail to accept further unmodified (and thus potentially + valid) versions of the same block. We defend against this by detecting + the case where we would hash two identical hashes at the end of the list + together, and treating that identically to the block having an invalid + merkle root. Assuming no double-SHA256 collisions, this will detect all + known ways of changing the transactions without affecting the merkle + root. +*/ + +/* This implements a constant-space merkle root/path calculator, limited to 2^32 leaves. */ +static void MerkleComputation(const std::vector& leaves, uint256* proot, bool* pmutated, uint32_t branchpos, std::vector* pbranch) { + if (pbranch) pbranch->clear(); + if (leaves.size() == 0) { + if (pmutated) *pmutated = false; + if (proot) *proot = uint256(); + return; + } + bool mutated = false; + // count is the number of leaves processed so far. + uint32_t count = 0; + // inner is an array of eagerly computed subtree hashes, indexed by tree + // level (0 being the leaves). + // For example, when count is 25 (11001 in binary), inner[4] is the hash of + // the first 16 leaves, inner[3] of the next 8 leaves, and inner[0] equal to + // the last leaf. The other inner entries are undefined. + uint256 inner[32]; + // Which position in inner is a hash that depends on the matching leaf. + int matchlevel = -1; + // First process all leaves into 'inner' values. + while (count < leaves.size()) { + uint256 h = leaves[count]; + bool matchh = count == branchpos; + count++; + int level; + // For each of the lower bits in count that are 0, do 1 step. Each + // corresponds to an inner value that existed before processing the + // current leaf, and each needs a hash to combine it. + for (level = 0; !(count & (((uint32_t)1) << level)); level++) { + if (pbranch) { + if (matchh) { + pbranch->push_back(inner[level]); + } else if (matchlevel == level) { + pbranch->push_back(h); + matchh = true; + } + } + mutated |= (inner[level] == h); + CHash256().Write(inner[level].begin(), 32).Write(h.begin(), 32).Finalize(h.begin()); + } + // Store the resulting hash at inner position level. + inner[level] = h; + if (matchh) { + matchlevel = level; + } + } + // Do a final 'sweep' over the rightmost branch of the tree to process + // odd levels, and reduce everything to a single top value. + // Level is the level (counted from the bottom) up to which we've sweeped. + int level = 0; + // As long as bit number level in count is zero, skip it. It means there + // is nothing left at this level. + while (!(count & (((uint32_t)1) << level))) { + level++; + } + uint256 h = inner[level]; + bool matchh = matchlevel == level; + while (count != (((uint32_t)1) << level)) { + // If we reach this point, h is an inner value that is not the top. + // We combine it with itself (Bitcoin's special rule for odd levels in + // the tree) to produce a higher level one. + if (pbranch && matchh) { + pbranch->push_back(h); + } + CHash256().Write(h.begin(), 32).Write(h.begin(), 32).Finalize(h.begin()); + // Increment count to the value it would have if two entries at this + // level had existed. + count += (((uint32_t)1) << level); + level++; + // And propagate the result upwards accordingly. + while (!(count & (((uint32_t)1) << level))) { + if (pbranch) { + if (matchh) { + pbranch->push_back(inner[level]); + } else if (matchlevel == level) { + pbranch->push_back(h); + matchh = true; + } + } + CHash256().Write(inner[level].begin(), 32).Write(h.begin(), 32).Finalize(h.begin()); + level++; + } + } + // Return result. + if (pmutated) *pmutated = mutated; + if (proot) *proot = h; +} + +uint256 ComputeMerkleRoot(const std::vector& leaves, bool* mutated) { + uint256 hash; + MerkleComputation(leaves, &hash, mutated, -1, NULL); + return hash; +} + +std::vector ComputeMerkleBranch(const std::vector& leaves, uint32_t position) { + std::vector ret; + MerkleComputation(leaves, NULL, NULL, position, &ret); + return ret; +} + +uint256 ComputeMerkleRootFromBranch(const uint256& leaf, const std::vector& vMerkleBranch, uint32_t nIndex) { + uint256 hash = leaf; + for (std::vector::const_iterator it = vMerkleBranch.begin(); it != vMerkleBranch.end(); ++it) { + if (nIndex & 1) { + hash = Hash(BEGIN(*it), END(*it), BEGIN(hash), END(hash)); + } else { + hash = Hash(BEGIN(hash), END(hash), BEGIN(*it), END(*it)); + } + nIndex >>= 1; + } + return hash; +} diff --git a/src/consensus/merkle.h b/src/consensus/merkle.h new file mode 100644 index 0000000000..7fd13d3e43 --- /dev/null +++ b/src/consensus/merkle.h @@ -0,0 +1,17 @@ +// Copyright (c) 2015 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_MERKLE +#define BITCOIN_MERKLE + +#include +#include + +#include "uint256.h" + +uint256 ComputeMerkleRoot(const std::vector& leaves, bool* mutated = NULL); +std::vector ComputeMerkleBranch(const std::vector& leaves, uint32_t position); +uint256 ComputeMerkleRootFromBranch(const uint256& leaf, const std::vector& branch, uint32_t position); + +#endif -- cgit v1.2.3