diff options
author | fanquake <fanquake@gmail.com> | 2023-10-04 16:00:06 +0100 |
---|---|---|
committer | fanquake <fanquake@gmail.com> | 2023-10-04 16:04:07 +0100 |
commit | 058488276f8dc244fe534ba45ec8dd2b4b198a2e (patch) | |
tree | 622549fc32aa55b2e6637917b728faa3ea967d71 /src/bench | |
parent | db7b5dfcc502a8a81c51f56fe753990ae8b3a202 (diff) | |
parent | ce6df7df9bab2405cfe7d6e382f5682cf30de476 (diff) |
Merge bitcoin/bitcoin#27598: bench: Add SHA256 implementation specific benchmarks
ce6df7df9bab2405cfe7d6e382f5682cf30de476 bench: Add SHA256 implementation specific benchmarks (Hennadii Stepanov)
5f72417176cfffece9a5aa11e97d5a6599c51e7a Add ability to specify SHA256 implementation for benchmark purposes (Hennadii Stepanov)
Pull request description:
On the master branch, only the best available `SHA256` implementation is being benchmarked. This PR makes `bench_bitcoin` benchmark all `SHA256` implementations that are available on the system.
For example:
- on Linux:
```
$ ./src/bench/bench_bitcoin -filter=SHA.*
Using the 'x86_shani(1way,2way)' SHA256 implementation
| ns/byte | byte/s | err% | total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
| 1.00 | 1,002,545,462.93 | 0.4% | 0.01 | `SHA1`
| 2.91 | 344,117,991.18 | 0.1% | 0.03 | `SHA256 using the 'standard' SHA256 implementation`
| 2.21 | 453,081,794.40 | 0.1% | 0.02 | `SHA256 using the 'sse4(1way),sse41(4way)' SHA256 implementation`
| 2.21 | 453,396,506.58 | 0.1% | 0.02 | `SHA256 using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation`
| 0.53 | 1,870,520,687.49 | 0.1% | 0.01 | `SHA256 using the 'x86_shani(1way,2way)' SHA256 implementation`
| 7.90 | 126,627,134.33 | 0.0% | 0.01 | `SHA256D64_1024 using the 'standard' SHA256 implementation`
| 3.94 | 253,850,206.07 | 0.0% | 0.01 | `SHA256D64_1024 using the 'sse4(1way),sse41(4way)' SHA256 implementation`
| 1.40 | 716,247,553.38 | 0.4% | 0.01 | `SHA256D64_1024 using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation`
| 1.26 | 792,706,270.13 | 0.9% | 0.01 | `SHA256D64_1024 using the 'x86_shani(1way,2way)' SHA256 implementation`
| 6.75 | 148,172,097.64 | 0.2% | 0.01 | `SHA256_32b using the 'standard' SHA256 implementation`
| 4.90 | 204,156,289.96 | 0.1% | 0.01 | `SHA256_32b using the 'sse4(1way),sse41(4way)' SHA256 implementation`
| 4.90 | 204,101,274.22 | 0.1% | 0.01 | `SHA256_32b using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation`
| 1.70 | 589,052,595.35 | 0.4% | 0.01 | `SHA256_32b using the 'x86_shani(1way,2way)' SHA256 implementation`
| 2.21 | 453,441,736.14 | 1.0% | 0.02 | `SHA3_256_1M`
| 1.92 | 521,807,101.48 | 1.0% | 0.02 | `SHA512`
```
- on macOS (M1):
```
% ./src/bench/bench_bitcoin -filter=SHA.\*
Using the 'arm_shani(1way,2way)' SHA256 implementation
| ns/byte | byte/s | err% | total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
| 1.36 | 737,644,274.00 | 0.6% | 0.02 | `SHA1`
| 3.08 | 324,556,777.15 | 0.2% | 0.03 | `SHA256 using the 'standard' SHA256 implementation`
| 0.45 | 2,198,104,135.18 | 0.3% | 0.01 | `SHA256 using the 'arm_shani(1way,2way)' SHA256 implementation`
| 8.84 | 113,131,299.18 | 0.0% | 0.01 | `SHA256D64_1024 using the 'standard' SHA256 implementation`
| 0.94 | 1,059,406,239.36 | 0.0% | 0.01 | `SHA256D64_1024 using the 'arm_shani(1way,2way)' SHA256 implementation`
| 6.17 | 162,050,659.51 | 0.2% | 0.01 | `SHA256_32b using the 'standard' SHA256 implementation`
| 1.15 | 866,637,155.98 | 0.0% | 0.01 | `SHA256_32b using the 'arm_shani(1way,2way)' SHA256 implementation`
| 1.69 | 592,636,491.59 | 0.2% | 0.02 | `SHA3_256_1M`
| 1.89 | 528,785,775.66 | 0.0% | 0.02 | `SHA512`
```
Found it useful, while working on https://github.com/bitcoin/bitcoin/pull/24773.
ACKs for top commit:
martinus:
ACK ce6df7df9bab2405cfe7d6e382f5682cf30de476. I would have created a helper function in the test to avoid the code duplication for each test, but that's just me nitpicking. Here are results from my Ryzen 7950X, with `./src/bench/bench_bitcoin -filter="SHA256.*" -min-time=1000`:
MarcoFalke:
review ACK ce6df7df9bab2405cfe7d6e382f5682cf30de476 🏵
sipa:
ACK ce6df7df9bab2405cfe7d6e382f5682cf30de476
Tree-SHA512: e3de50e11b9a3a0d1e05583786041d4dc9afa2022e2115d75d6d1f63b11f62f6336f093001e53a631431d558c4dae29c596755c9e2d6aa78c382270116cc1f7f
Diffstat (limited to 'src/bench')
-rw-r--r-- | src/bench/crypto_hash.cpp | 127 |
1 files changed, 121 insertions, 6 deletions
diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp index cf8d807d7b..1685a120b4 100644 --- a/src/bench/crypto_hash.cpp +++ b/src/bench/crypto_hash.cpp @@ -13,6 +13,7 @@ #include <crypto/siphash.h> #include <hash.h> #include <random.h> +#include <tinyformat.h> #include <uint256.h> /* Number of bytes to hash per iteration */ @@ -36,13 +37,48 @@ static void SHA1(benchmark::Bench& bench) }); } -static void SHA256(benchmark::Bench& bench) +static void SHA256_STANDARD(benchmark::Bench& bench) { + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD))); uint8_t hash[CSHA256::OUTPUT_SIZE]; std::vector<uint8_t> in(BUFFER_SIZE,0); bench.batch(in.size()).unit("byte").run([&] { CSHA256().Write(in.data(), in.size()).Finalize(hash); }); + SHA256AutoDetect(); +} + +static void SHA256_SSE4(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4))); + uint8_t hash[CSHA256::OUTPUT_SIZE]; + std::vector<uint8_t> in(BUFFER_SIZE,0); + bench.batch(in.size()).unit("byte").run([&] { + CSHA256().Write(in.data(), in.size()).Finalize(hash); + }); + SHA256AutoDetect(); +} + +static void SHA256_AVX2(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2))); + uint8_t hash[CSHA256::OUTPUT_SIZE]; + std::vector<uint8_t> in(BUFFER_SIZE,0); + bench.batch(in.size()).unit("byte").run([&] { + CSHA256().Write(in.data(), in.size()).Finalize(hash); + }); + SHA256AutoDetect(); +} + +static void SHA256_SHANI(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI))); + uint8_t hash[CSHA256::OUTPUT_SIZE]; + std::vector<uint8_t> in(BUFFER_SIZE,0); + bench.batch(in.size()).unit("byte").run([&] { + CSHA256().Write(in.data(), in.size()).Finalize(hash); + }); + SHA256AutoDetect(); } static void SHA3_256_1M(benchmark::Bench& bench) @@ -54,22 +90,92 @@ static void SHA3_256_1M(benchmark::Bench& bench) }); } -static void SHA256_32b(benchmark::Bench& bench) +static void SHA256_32b_STANDARD(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD))); + std::vector<uint8_t> in(32,0); + bench.batch(in.size()).unit("byte").run([&] { + CSHA256() + .Write(in.data(), in.size()) + .Finalize(in.data()); + }); + SHA256AutoDetect(); +} + +static void SHA256_32b_SSE4(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4))); + std::vector<uint8_t> in(32,0); + bench.batch(in.size()).unit("byte").run([&] { + CSHA256() + .Write(in.data(), in.size()) + .Finalize(in.data()); + }); + SHA256AutoDetect(); +} + +static void SHA256_32b_AVX2(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2))); + std::vector<uint8_t> in(32,0); + bench.batch(in.size()).unit("byte").run([&] { + CSHA256() + .Write(in.data(), in.size()) + .Finalize(in.data()); + }); + SHA256AutoDetect(); +} + +static void SHA256_32b_SHANI(benchmark::Bench& bench) { + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI))); std::vector<uint8_t> in(32,0); bench.batch(in.size()).unit("byte").run([&] { CSHA256() .Write(in.data(), in.size()) .Finalize(in.data()); }); + SHA256AutoDetect(); +} + +static void SHA256D64_1024_STANDARD(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD))); + std::vector<uint8_t> in(64 * 1024, 0); + bench.batch(in.size()).unit("byte").run([&] { + SHA256D64(in.data(), in.data(), 1024); + }); + SHA256AutoDetect(); +} + +static void SHA256D64_1024_SSE4(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4))); + std::vector<uint8_t> in(64 * 1024, 0); + bench.batch(in.size()).unit("byte").run([&] { + SHA256D64(in.data(), in.data(), 1024); + }); + SHA256AutoDetect(); +} + +static void SHA256D64_1024_AVX2(benchmark::Bench& bench) +{ + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2))); + std::vector<uint8_t> in(64 * 1024, 0); + bench.batch(in.size()).unit("byte").run([&] { + SHA256D64(in.data(), in.data(), 1024); + }); + SHA256AutoDetect(); } -static void SHA256D64_1024(benchmark::Bench& bench) +static void SHA256D64_1024_SHANI(benchmark::Bench& bench) { + bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI))); std::vector<uint8_t> in(64 * 1024, 0); bench.batch(in.size()).unit("byte").run([&] { SHA256D64(in.data(), in.data(), 1024); }); + SHA256AutoDetect(); } static void SHA512(benchmark::Bench& bench) @@ -152,13 +258,22 @@ static void MuHashPrecompute(benchmark::Bench& bench) BENCHMARK(BenchRIPEMD160, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA1, benchmark::PriorityLevel::HIGH); -BENCHMARK(SHA256, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_STANDARD, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_SSE4, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_AVX2, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_SHANI, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA512, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA3_256_1M, benchmark::PriorityLevel::HIGH); -BENCHMARK(SHA256_32b, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_32b_STANDARD, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_32b_SSE4, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_32b_AVX2, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256_32b_SHANI, benchmark::PriorityLevel::HIGH); BENCHMARK(SipHash_32b, benchmark::PriorityLevel::HIGH); -BENCHMARK(SHA256D64_1024, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256D64_1024_STANDARD, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256D64_1024_SSE4, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256D64_1024_AVX2, benchmark::PriorityLevel::HIGH); +BENCHMARK(SHA256D64_1024_SHANI, benchmark::PriorityLevel::HIGH); BENCHMARK(FastRandom_32bit, benchmark::PriorityLevel::HIGH); BENCHMARK(FastRandom_1bit, benchmark::PriorityLevel::HIGH); |