aboutsummaryrefslogtreecommitdiff
path: root/src/Makefile.am
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2018-06-04 09:11:18 +0200
committerWladimir J. van der Laan <laanwj@gmail.com>2018-06-04 12:11:53 +0200
commit0de7cc848e07d942e09bb3bb60189985e53d2bb5 (patch)
treec92cb5451129c63e4caa52f2aa3829a5f312a44f /src/Makefile.am
parent2722a1f8e935df86e698d717a35e7ffef65e6e02 (diff)
parent4defdfab94504018f822dc34a313ad26cedc8255 (diff)
downloadbitcoin-0de7cc848e07d942e09bb3bb60189985e53d2bb5.tar.xz
Merge #13191: Specialized double-SHA256 with 64 byte inputs with SSE4.1 and AVX2
4defdfab94504018f822dc34a313ad26cedc8255 [MOVEONLY] Move unused Merkle branch code to tests (Pieter Wuille) 4437d6e1f3107a20a8c7b66be8b4b972a82e3b28 8-way AVX2 implementation for double SHA256 on 64-byte inputs (Pieter Wuille) 230294bf5fdeba7213471cd0b795fb7aa36e5717 4-way SSE4.1 implementation for double SHA256 on 64-byte inputs (Pieter Wuille) 1f0e7ca09c9d7c5787c218156fa5096a1bdf2ea8 Use SHA256D64 in Merkle root computation (Pieter Wuille) d0c96328833127284574bfef26f96aa2e4afc91a Specialized double sha256 for 64 byte inputs (Pieter Wuille) 57f34630fb6c3e218bd19535ac607008cb894173 Refactor SHA256 code (Pieter Wuille) 0df017889b4f61860092e1d54e271092cce55f62 Benchmark Merkle root computation (Pieter Wuille) Pull request description: This introduces a framework for specialized double-SHA256 with 64 byte inputs. 4 different implementations are provided: * Generic C++ (reusing the normal SHA256 code) * Specialized C++ for 64-byte inputs, but no special instructions * 4-way using SSE4.1 intrinsics * 8-way using AVX2 intrinsics On my own system (AVX2 capable), I get these benchmarks for computing the Merkle root of 9001 leaves (supported lengths / special instructions / parallellism): * 7.2 ms with varsize/naive/1way (master, non-SSE4 hardware) * 5.8 ms with size64/naive/1way (this PR, non-SSE4 capable systems) * 4.8 ms with varsize/SSE4/1way (master, SSE4 hardware) * 2.9 ms with size64/SSE4/4way (this PR, SSE4 hardware) * 1.1 ms with size64/AVX2/8way (this PR, AVX2 hardware) Tree-SHA512: efa32d48b32820d9ce788ead4eb583949265be8c2e5f538c94bc914e92d131a57f8c1ee26c6f998e81fb0e30675d4e2eddc3360bcf632676249036018cff343e
Diffstat (limited to 'src/Makefile.am')
-rw-r--r--src/Makefile.am28
1 files changed, 27 insertions, 1 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 9b2ae36f6a..96e56915a6 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -30,6 +30,8 @@ LIBBITCOIN_CONSENSUS=libbitcoin_consensus.a
LIBBITCOIN_CLI=libbitcoin_cli.a
LIBBITCOIN_UTIL=libbitcoin_util.a
LIBBITCOIN_CRYPTO=crypto/libbitcoin_crypto.a
+LIBBITCOIN_CRYPTO_SSE41=crypto/libbitcoin_crypto_sse41.a
+LIBBITCOIN_CRYPTO_AVX2=crypto/libbitcoin_crypto_avx2.a
LIBBITCOINQT=qt/libbitcoinqt.a
LIBSECP256K1=secp256k1/libsecp256k1.la
@@ -50,6 +52,8 @@ $(LIBSECP256K1): $(wildcard secp256k1/src/*) $(wildcard secp256k1/include/*)
# But to build the less dependent modules first, we manually select their order here:
EXTRA_LIBRARIES += \
$(LIBBITCOIN_CRYPTO) \
+ $(LIBBITCOIN_CRYPTO_SSE41) \
+ $(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBBITCOIN_UTIL) \
$(LIBBITCOIN_COMMON) \
$(LIBBITCOIN_CONSENSUS) \
@@ -289,6 +293,22 @@ if USE_ASM
crypto_libbitcoin_crypto_a_SOURCES += crypto/sha256_sse4.cpp
endif
+crypto_libbitcoin_crypto_sse41_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
+crypto_libbitcoin_crypto_sse41_a_CPPFLAGS = $(AM_CPPFLAGS)
+if ENABLE_SSE41
+crypto_libbitcoin_crypto_sse41_a_CXXFLAGS += $(SSE41_CXXFLAGS)
+crypto_libbitcoin_crypto_sse41_a_CPPFLAGS += -DENABLE_SSE41
+endif
+crypto_libbitcoin_crypto_sse41_a_SOURCES = crypto/sha256_sse41.cpp
+
+crypto_libbitcoin_crypto_avx2_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
+crypto_libbitcoin_crypto_avx2_a_CPPFLAGS = $(AM_CPPFLAGS)
+if ENABLE_AVX2
+crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
+crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
+endif
+crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp
+
# consensus: shared between all executables that validate any consensus rules.
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
libbitcoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
@@ -411,6 +431,8 @@ bitcoind_LDADD = \
$(LIBBITCOIN_ZMQ) \
$(LIBBITCOIN_CONSENSUS) \
$(LIBBITCOIN_CRYPTO) \
+ $(LIBBITCOIN_CRYPTO_SSE41) \
+ $(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBLEVELDB) \
$(LIBLEVELDB_SSE42) \
$(LIBMEMENV) \
@@ -432,7 +454,9 @@ bitcoin_cli_LDADD = \
$(LIBBITCOIN_CLI) \
$(LIBUNIVALUE) \
$(LIBBITCOIN_UTIL) \
- $(LIBBITCOIN_CRYPTO)
+ $(LIBBITCOIN_CRYPTO) \
+ $(LIBBITCOIN_CRYPTO_SSE41) \
+ $(LIBBITCOIN_CRYPTO_AVX2)
bitcoin_cli_LDADD += $(BOOST_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(EVENT_LIBS)
#
@@ -453,6 +477,8 @@ bitcoin_tx_LDADD = \
$(LIBBITCOIN_UTIL) \
$(LIBBITCOIN_CONSENSUS) \
$(LIBBITCOIN_CRYPTO) \
+ $(LIBBITCOIN_CRYPTO_SSE41) \
+ $(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBSECP256K1)
bitcoin_tx_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)