Merge #13191: Specialized double-SHA256 with 64 byte inputs with SSE4.1 and AVX2

4defdfab94504018f822dc34a313ad26cedc8255 [MOVEONLY] Move unused Merkle branch code to tests (Pieter Wuille) 4437d6e1f3107a20a8c7b66be8b4b972a82e3b28 8-way AVX2 implementation for double SHA256 on 64-byte inputs (Pieter Wuille) 230294bf5fdeba7213471cd0b795fb7aa36e5717 4-way SSE4.1 implementation for double SHA256 on 64-byte inputs (Pieter Wuille) 1f0e7ca09c9d7c5787c218156fa5096a1bdf2ea8 Use SHA256D64 in Merkle root computation (Pieter Wuille) d0c96328833127284574bfef26f96aa2e4afc91a Specialized double sha256 for 64 byte inputs (Pieter Wuille) 57f34630fb6c3e218bd19535ac607008cb894173 Refactor SHA256 code (Pieter Wuille) 0df017889b4f61860092e1d54e271092cce55f62 Benchmark Merkle root computation (Pieter Wuille) Pull request description: This introduces a framework for specialized double-SHA256 with 64 byte inputs. 4 different implementations are provided: * Generic C++ (reusing the normal SHA256 code) * Specialized C++ for 64-byte inputs, but no special instructions * 4-way using SSE4.1 intrinsics * 8-way using AVX2 intrinsics On my own system (AVX2 capable), I get these benchmarks for computing the Merkle root of 9001 leaves (supported lengths / special instructions / parallellism): * 7.2 ms with varsize/naive/1way (master, non-SSE4 hardware) * 5.8 ms with size64/naive/1way (this PR, non-SSE4 capable systems) * 4.8 ms with varsize/SSE4/1way (master, SSE4 hardware) * 2.9 ms with size64/SSE4/4way (this PR, SSE4 hardware) * 1.1 ms with size64/AVX2/8way (this PR, AVX2 hardware) Tree-SHA512: efa32d48b32820d9ce788ead4eb583949265be8c2e5f538c94bc914e92d131a57f8c1ee26c6f998e81fb0e30675d4e2eddc3360bcf632676249036018cff343e
author: Wladimir J. van der Laan <laanwj@gmail.com> 2018-06-04 09:11:18 +0200
committer: Wladimir J. van der Laan <laanwj@gmail.com> 2018-06-04 12:11:53 +0200
commit: 0de7cc848e07d942e09bb3bb60189985e53d2bb5 (patch)
tree: c92cb5451129c63e4caa52f2aa3829a5f312a44f /src/consensus
parent: 2722a1f8e935df86e698d717a35e7ffef65e6e02 (diff)
parent: 4defdfab94504018f822dc34a313ad26cedc8255 (diff)
download: bitcoin-0de7cc848e07d942e09bb3bb60189985e53d2bb5.tar.xz
2 files changed, 17 insertions, 127 deletions
diff --git a/src/consensus/merkle.cpp b/src/consensus/merkle.cpp
index 74a9ebb2e3..07cd109cc1 100644
--- a/src/consensus/merkle.cpp
+++ b/src/consensus/merkle.cpp
@@ -42,118 +42,26 @@
        root.
 */
 
-/* This implements a constant-space merkle root/path calculator, limited to 2^32 leaves. */
-static void MerkleComputation(const std::vector<uint256>& leaves, uint256* proot, bool* pmutated, uint32_t branchpos, std::vector<uint256>* pbranch) {
-    if (pbranch) pbranch->clear();
-    if (leaves.size() == 0) {
-        if (pmutated) *pmutated = false;
-        if (proot) *proot = uint256();
-        return;
-    }
-    bool mutated = false;
-    // count is the number of leaves processed so far.
-    uint32_t count = 0;
-    // inner is an array of eagerly computed subtree hashes, indexed by tree
-    // level (0 being the leaves).
-    // For example, when count is 25 (11001 in binary), inner[4] is the hash of
-    // the first 16 leaves, inner[3] of the next 8 leaves, and inner[0] equal to
-    // the last leaf. The other inner entries are undefined.
-    uint256 inner[32];
-    // Which position in inner is a hash that depends on the matching leaf.
-    int matchlevel = -1;
-    // First process all leaves into 'inner' values.
-    while (count < leaves.size()) {
-        uint256 h = leaves[count];
-        bool matchh = count == branchpos;
-        count++;
-        int level;
-        // For each of the lower bits in count that are 0, do 1 step. Each
-        // corresponds to an inner value that existed before processing the
-        // current leaf, and each needs a hash to combine it.
-        for (level = 0; !(count & (((uint32_t)1) << level)); level++) {
-            if (pbranch) {
-                if (matchh) {
-                    pbranch->push_back(inner[level]);
-                } else if (matchlevel == level) {
-                    pbranch->push_back(h);
-                    matchh = true;
-                }
+
+uint256 ComputeMerkleRoot(std::vector<uint256> hashes, bool* mutated) {
+    bool mutation = false;
+    while (hashes.size() > 1) {
+        if (mutated) {
+            for (size_t pos = 0; pos + 1 < hashes.size(); pos += 2) {
+                if (hashes[pos] == hashes[pos + 1]) mutation = true;
             }
-            mutated |= (inner[level] == h);
-            CHash256().Write(inner[level].begin(), 32).Write(h.begin(), 32).Finalize(h.begin());
         }
-        // Store the resulting hash at inner position level.
-        inner[level] = h;
-        if (matchh) {
-            matchlevel = level;
+        if (hashes.size() & 1) {
+            hashes.push_back(hashes.back());
         }
+        SHA256D64(hashes[0].begin(), hashes[0].begin(), hashes.size() / 2);
+        hashes.resize(hashes.size() / 2);
     }
-    // Do a final 'sweep' over the rightmost branch of the tree to process
-    // odd levels, and reduce everything to a single top value.
-    // Level is the level (counted from the bottom) up to which we've sweeped.
-    int level = 0;
-    // As long as bit number level in count is zero, skip it. It means there
-    // is nothing left at this level.
-    while (!(count & (((uint32_t)1) << level))) {
-        level++;
-    }
-    uint256 h = inner[level];
-    bool matchh = matchlevel == level;
-    while (count != (((uint32_t)1) << level)) {
-        // If we reach this point, h is an inner value that is not the top.
-        // We combine it with itself (Bitcoin's special rule for odd levels in
-        // the tree) to produce a higher level one.
-        if (pbranch && matchh) {
-            pbranch->push_back(h);
-        }
-        CHash256().Write(h.begin(), 32).Write(h.begin(), 32).Finalize(h.begin());
-        // Increment count to the value it would have if two entries at this
-        // level had existed.
-        count += (((uint32_t)1) << level);
-        level++;
-        // And propagate the result upwards accordingly.
-        while (!(count & (((uint32_t)1) << level))) {
-            if (pbranch) {
-                if (matchh) {
-                    pbranch->push_back(inner[level]);
-                } else if (matchlevel == level) {
-                    pbranch->push_back(h);
-                    matchh = true;
-                }
-            }
-            CHash256().Write(inner[level].begin(), 32).Write(h.begin(), 32).Finalize(h.begin());
-            level++;
-        }
-    }
-    // Return result.
-    if (pmutated) *pmutated = mutated;
-    if (proot) *proot = h;
+    if (mutated) *mutated = mutation;
+    if (hashes.size() == 0) return uint256();
+    return hashes[0];
 }
 
-uint256 ComputeMerkleRoot(const std::vector<uint256>& leaves, bool* mutated) {
-    uint256 hash;
-    MerkleComputation(leaves, &hash, mutated, -1, nullptr);
-    return hash;
-}
-
-std::vector<uint256> ComputeMerkleBranch(const std::vector<uint256>& leaves, uint32_t position) {
-    std::vector<uint256> ret;
-    MerkleComputation(leaves, nullptr, nullptr, position, &ret);
-    return ret;
-}
-
-uint256 ComputeMerkleRootFromBranch(const uint256& leaf, const std::vector<uint256>& vMerkleBranch, uint32_t nIndex) {
-    uint256 hash = leaf;
-    for (std::vector<uint256>::const_iterator it = vMerkleBranch.begin(); it != vMerkleBranch.end(); ++it) {
-        if (nIndex & 1) {
-            hash = Hash(BEGIN(*it), END(*it), BEGIN(hash), END(hash));
-        } else {
-            hash = Hash(BEGIN(hash), END(hash), BEGIN(*it), END(*it));
-        }
-        nIndex >>= 1;
-    }
-    return hash;
-}
 
 uint256 BlockMerkleRoot(const CBlock& block, bool* mutated)
 {
@@ -162,7 +70,7 @@ uint256 BlockMerkleRoot(const CBlock& block, bool* mutated)
     for (size_t s = 0; s < block.vtx.size(); s++) {
         leaves[s] = block.vtx[s]->GetHash();
     }
-    return ComputeMerkleRoot(leaves, mutated);
+    return ComputeMerkleRoot(std::move(leaves), mutated);
 }
 
 uint256 BlockWitnessMerkleRoot(const CBlock& block, bool* mutated)
@@ -173,15 +81,6 @@ uint256 BlockWitnessMerkleRoot(const CBlock& block, bool* mutated)
     for (size_t s = 1; s < block.vtx.size(); s++) {
         leaves[s] = block.vtx[s]->GetWitnessHash();
     }
-    return ComputeMerkleRoot(leaves, mutated);
+    return ComputeMerkleRoot(std::move(leaves), mutated);
 }
 
-std::vector<uint256> BlockMerkleBranch(const CBlock& block, uint32_t position)
-{
-    std::vector<uint256> leaves;
-    leaves.resize(block.vtx.size());
-    for (size_t s = 0; s < block.vtx.size(); s++) {
-        leaves[s] = block.vtx[s]->GetHash();
-    }
-    return ComputeMerkleBranch(leaves, position);
-}
diff --git a/src/consensus/merkle.h b/src/consensus/merkle.h
index 0afb73adb5..01d75b1329 100644
--- a/src/consensus/merkle.h
+++ b/src/consensus/merkle.h
@@ -12,9 +12,7 @@
 #include <primitives/block.h>
 #include <uint256.h>
 
-uint256 ComputeMerkleRoot(const std::vector<uint256>& leaves, bool* mutated = nullptr);
-std::vector<uint256> ComputeMerkleBranch(const std::vector<uint256>& leaves, uint32_t position);
-uint256 ComputeMerkleRootFromBranch(const uint256& leaf, const std::vector<uint256>& branch, uint32_t position);
+uint256 ComputeMerkleRoot(std::vector<uint256> hashes, bool* mutated = nullptr);
 
 /*
  * Compute the Merkle root of the transactions in a block.
@@ -28,11 +26,4 @@ uint256 BlockMerkleRoot(const CBlock& block, bool* mutated = nullptr);
  */
 uint256 BlockWitnessMerkleRoot(const CBlock& block, bool* mutated = nullptr);
 
-/*
- * Compute the Merkle branch for the tree of transactions in a block, for a
- * given position.
- * This can be verified using ComputeMerkleRootFromBranch.
- */
-std::vector<uint256> BlockMerkleBranch(const CBlock& block, uint32_t position);
-
 #endif // BITCOIN_CONSENSUS_MERKLE_H
author	Wladimir J. van der Laan <laanwj@gmail.com>	2018-06-04 09:11:18 +0200
committer	Wladimir J. van der Laan <laanwj@gmail.com>	2018-06-04 12:11:53 +0200
commit	0de7cc848e07d942e09bb3bb60189985e53d2bb5 (patch)
tree	c92cb5451129c63e4caa52f2aa3829a5f312a44f /src/consensus
parent	2722a1f8e935df86e698d717a35e7ffef65e6e02 (diff)
parent	4defdfab94504018f822dc34a313ad26cedc8255 (diff)
download	bitcoin-0de7cc848e07d942e09bb3bb60189985e53d2bb5.tar.xz