aboutsummaryrefslogtreecommitdiff
path: root/src/Makefile.am
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2018-07-09 20:33:41 +0200
committerWladimir J. van der Laan <laanwj@gmail.com>2018-07-09 21:17:18 +0200
commit3a3eabef40979b5b136b8bd81a65c228c8b8895d (patch)
treef4afb0a04507acf4b330965b8f65f27b39eea77e /src/Makefile.am
parent7e74c54fed364a2974b6033da12de65abc07df93 (diff)
parent66b2cf1ccfad545a8ec3f2a854e23f647322bf30 (diff)
downloadbitcoin-3a3eabef40979b5b136b8bd81a65c228c8b8895d.tar.xz
Merge #13386: SHA256 implementations based on Intel SHA Extensions
66b2cf1ccfad545a8ec3f2a854e23f647322bf30 Use immintrin.h everywhere for intrinsics (Pieter Wuille) 4c935e2eee456ff66cdfb908b0edffdd1e8a6c04 Add SHA256 implementation using using Intel SHA intrinsics (Pieter Wuille) 268400d3188200c9e3dcd3482c4853354388a721 [Refactor] CPU feature detection logic for SHA256 (Pieter Wuille) Pull request description: Based on #13191. This adds SHA256 implementations that use Intel's SHA Extension instructions (using intrinsics). This needs GCC 4.9 or Clang 3.4. In addition to #13191, two extra implementations are provided: * (a) A variable-length SHA256 implementation using SHA extensions. * (b) A 2-way 64-byte input double-SHA256 implementation using SHA extensions. Benchmarks for 9001-element Merkle tree root computation on an AMD Ryzen 1800X system: * Using generic C++ code (pre-#10821): 6.1ms * Using SSE4 (master, #10821): 4.6ms * Using 4-way SSE4 specialized for 64-byte inputs (#13191): 2.8ms * Using 8-way AVX2 specialized for 64-byte inputs (#13191): 2.1ms * Using 2-way SHA-NI specialized for 64-byte inputs (this PR): 0.56ms Benchmarks for 32-byte SHA256 on the same system: * Using SSE4 (master, #10821): 190ns * Using SHA-NI (this PR): 53ns Benchmarks for 1000000-byte SHA256 on the same system: * Using SSE4 (master, #10821): 2.5ms * Using SHA-NI (this PR): 0.51ms Tree-SHA512: 2b319e33b22579f815d91f9daf7994a5e1e799c4f73c13e15070dd54ba71f3f6438ccf77ae9cbd1ce76f972d9cbeb5f0edfea3d86f101bbc1055db70e42743b7
Diffstat (limited to 'src/Makefile.am')
-rw-r--r--src/Makefile.am10
1 files changed, 10 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 70f59c6e84..458721293e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -52,6 +52,10 @@ if ENABLE_AVX2
LIBBITCOIN_CRYPTO_AVX2 = crypto/libbitcoin_crypto_avx2.a
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX2)
endif
+if ENABLE_SHANI
+LIBBITCOIN_CRYPTO_SHANI = crypto/libbitcoin_crypto_shani.a
+LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SHANI)
+endif
$(LIBSECP256K1): $(wildcard secp256k1/src/*.h) $(wildcard secp256k1/src/*.c) $(wildcard secp256k1/include/*)
$(AM_V_at)$(MAKE) $(AM_MAKEFLAGS) -C $(@D) $(@F)
@@ -318,6 +322,12 @@ crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp
+crypto_libbitcoin_crypto_shani_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
+crypto_libbitcoin_crypto_shani_a_CPPFLAGS = $(AM_CPPFLAGS)
+crypto_libbitcoin_crypto_shani_a_CXXFLAGS += $(SHANI_CXXFLAGS)
+crypto_libbitcoin_crypto_shani_a_CPPFLAGS += -DENABLE_SHANI
+crypto_libbitcoin_crypto_shani_a_SOURCES = crypto/sha256_shani.cpp
+
# consensus: shared between all executables that validate any consensus rules.
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
libbitcoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)