aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPieter Wuille <pieter@wuille.net>2022-04-22 12:28:28 -0400
committerPieter Wuille <pieter@wuille.net>2022-05-04 14:53:46 -0400
commit81c09ee45caecf8d9daf6766b94cebf54f3f08cd (patch)
treea369e53ad3d26375c5cc601b74cf3ddd8dfa845e
parent33aaf434af2740d33abc0f45f613a666092630c3 (diff)
Unroll the ChaCha20 inner loop for performance
-rw-r--r--src/crypto/chacha20.cpp48
1 files changed, 28 insertions, 20 deletions
diff --git a/src/crypto/chacha20.cpp b/src/crypto/chacha20.cpp
index f3ff4268ee..c7e12b0612 100644
--- a/src/crypto/chacha20.cpp
+++ b/src/crypto/chacha20.cpp
@@ -18,6 +18,8 @@ constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (
a += b; d = rotl32(d ^ a, 8); \
c += d; b = rotl32(b ^ c, 7);
+#define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0)
+
static const unsigned char sigma[] = "expand 32-byte k";
static const unsigned char tau[] = "expand 16-byte k";
@@ -119,16 +121,19 @@ void ChaCha20::Keystream(unsigned char* c, size_t bytes)
x13 = j13;
x14 = j14;
x15 = j15;
- for (i = 20;i > 0;i -= 2) {
- QUARTERROUND( x0, x4, x8,x12)
- QUARTERROUND( x1, x5, x9,x13)
- QUARTERROUND( x2, x6,x10,x14)
- QUARTERROUND( x3, x7,x11,x15)
- QUARTERROUND( x0, x5,x10,x15)
- QUARTERROUND( x1, x6,x11,x12)
- QUARTERROUND( x2, x7, x8,x13)
- QUARTERROUND( x3, x4, x9,x14)
- }
+
+ // The 20 inner ChaCha20 rounds are unrolled here for performance.
+ REPEAT10(
+ QUARTERROUND( x0, x4, x8,x12);
+ QUARTERROUND( x1, x5, x9,x13);
+ QUARTERROUND( x2, x6,x10,x14);
+ QUARTERROUND( x3, x7,x11,x15);
+ QUARTERROUND( x0, x5,x10,x15);
+ QUARTERROUND( x1, x6,x11,x12);
+ QUARTERROUND( x2, x7, x8,x13);
+ QUARTERROUND( x3, x4, x9,x14);
+ );
+
x0 += j0;
x1 += j1;
x2 += j2;
@@ -231,16 +236,19 @@ void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
x13 = j13;
x14 = j14;
x15 = j15;
- for (i = 20;i > 0;i -= 2) {
- QUARTERROUND( x0, x4, x8,x12)
- QUARTERROUND( x1, x5, x9,x13)
- QUARTERROUND( x2, x6,x10,x14)
- QUARTERROUND( x3, x7,x11,x15)
- QUARTERROUND( x0, x5,x10,x15)
- QUARTERROUND( x1, x6,x11,x12)
- QUARTERROUND( x2, x7, x8,x13)
- QUARTERROUND( x3, x4, x9,x14)
- }
+
+ // The 20 inner ChaCha20 rounds are unrolled here for performance.
+ REPEAT10(
+ QUARTERROUND( x0, x4, x8,x12);
+ QUARTERROUND( x1, x5, x9,x13);
+ QUARTERROUND( x2, x6,x10,x14);
+ QUARTERROUND( x3, x7,x11,x15);
+ QUARTERROUND( x0, x5,x10,x15);
+ QUARTERROUND( x1, x6,x11,x12);
+ QUARTERROUND( x2, x7, x8,x13);
+ QUARTERROUND( x3, x4, x9,x14);
+ );
+
x0 += j0;
x1 += j1;
x2 += j2;