24 files changed, 4285 insertions, 0 deletions
diff --git a/src/crypto/aes.cpp b/src/crypto/aes.cpp
new file mode 100644
index 0000000000..bf7a252349
--- /dev/null
+++ b/src/crypto/aes.cpp
@@ -0,0 +1,216 @@
+// Copyright (c) 2016-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <crypto/aes.h>
+#include <crypto/common.h>
+
+#include <assert.h>
+#include <string.h>
+
+extern "C" {
+#include <crypto/ctaes/ctaes.c>
+}
+
+AES128Encrypt::AES128Encrypt(const unsigned char key[16])
+{
+    AES128_init(&ctx, key);
+}
+
+AES128Encrypt::~AES128Encrypt()
+{
+    memset(&ctx, 0, sizeof(ctx));
+}
+
+void AES128Encrypt::Encrypt(unsigned char ciphertext[16], const unsigned char plaintext[16]) const
+{
+    AES128_encrypt(&ctx, 1, ciphertext, plaintext);
+}
+
+AES128Decrypt::AES128Decrypt(const unsigned char key[16])
+{
+    AES128_init(&ctx, key);
+}
+
+AES128Decrypt::~AES128Decrypt()
+{
+    memset(&ctx, 0, sizeof(ctx));
+}
+
+void AES128Decrypt::Decrypt(unsigned char plaintext[16], const unsigned char ciphertext[16]) const
+{
+    AES128_decrypt(&ctx, 1, plaintext, ciphertext);
+}
+
+AES256Encrypt::AES256Encrypt(const unsigned char key[32])
+{
+    AES256_init(&ctx, key);
+}
+
+AES256Encrypt::~AES256Encrypt()
+{
+    memset(&ctx, 0, sizeof(ctx));
+}
+
+void AES256Encrypt::Encrypt(unsigned char ciphertext[16], const unsigned char plaintext[16]) const
+{
+    AES256_encrypt(&ctx, 1, ciphertext, plaintext);
+}
+
+AES256Decrypt::AES256Decrypt(const unsigned char key[32])
+{
+    AES256_init(&ctx, key);
+}
+
+AES256Decrypt::~AES256Decrypt()
+{
+    memset(&ctx, 0, sizeof(ctx));
+}
+
+void AES256Decrypt::Decrypt(unsigned char plaintext[16], const unsigned char ciphertext[16]) const
+{
+    AES256_decrypt(&ctx, 1, plaintext, ciphertext);
+}
+
+
+template <typename T>
+static int CBCEncrypt(const T& enc, const unsigned char iv[AES_BLOCKSIZE], const unsigned char* data, int size, bool pad, unsigned char* out)
+{
+    int written = 0;
+    int padsize = size % AES_BLOCKSIZE;
+    unsigned char mixed[AES_BLOCKSIZE];
+
+    if (!data || !size || !out)
+        return 0;
+
+    if (!pad && padsize != 0)
+        return 0;
+
+    memcpy(mixed, iv, AES_BLOCKSIZE);
+
+    // Write all but the last block
+    while (written + AES_BLOCKSIZE <= size) {
+        for (int i = 0; i != AES_BLOCKSIZE; i++)
+            mixed[i] ^= *data++;
+        enc.Encrypt(out + written, mixed);
+        memcpy(mixed, out + written, AES_BLOCKSIZE);
+        written += AES_BLOCKSIZE;
+    }
+    if (pad) {
+        // For all that remains, pad each byte with the value of the remaining
+        // space. If there is none, pad by a full block.
+        for (int i = 0; i != padsize; i++)
+            mixed[i] ^= *data++;
+        for (int i = padsize; i != AES_BLOCKSIZE; i++)
+            mixed[i] ^= AES_BLOCKSIZE - padsize;
+        enc.Encrypt(out + written, mixed);
+        written += AES_BLOCKSIZE;
+    }
+    return written;
+}
+
+template <typename T>
+static int CBCDecrypt(const T& dec, const unsigned char iv[AES_BLOCKSIZE], const unsigned char* data, int size, bool pad, unsigned char* out)
+{
+    int written = 0;
+    bool fail = false;
+    const unsigned char* prev = iv;
+
+    if (!data || !size || !out)
+        return 0;
+
+    if (size % AES_BLOCKSIZE != 0)
+        return 0;
+
+    // Decrypt all data. Padding will be checked in the output.
+    while (written != size) {
+        dec.Decrypt(out, data + written);
+        for (int i = 0; i != AES_BLOCKSIZE; i++)
+            *out++ ^= prev[i];
+        prev = data + written;
+        written += AES_BLOCKSIZE;
+    }
+
+    // When decrypting padding, attempt to run in constant-time
+    if (pad) {
+        // If used, padding size is the value of the last decrypted byte. For
+        // it to be valid, It must be between 1 and AES_BLOCKSIZE.
+        unsigned char padsize = *--out;
+        fail = !padsize | (padsize > AES_BLOCKSIZE);
+
+        // If not well-formed, treat it as though there's no padding.
+        padsize *= !fail;
+
+        // All padding must equal the last byte otherwise it's not well-formed
+        for (int i = AES_BLOCKSIZE; i != 0; i--)
+            fail |= ((i > AES_BLOCKSIZE - padsize) & (*out-- != padsize));
+
+        written -= padsize;
+    }
+    return written * !fail;
+}
+
+AES256CBCEncrypt::AES256CBCEncrypt(const unsigned char key[AES256_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn)
+    : enc(key), pad(padIn)
+{
+    memcpy(iv, ivIn, AES_BLOCKSIZE);
+}
+
+int AES256CBCEncrypt::Encrypt(const unsigned char* data, int size, unsigned char* out) const
+{
+    return CBCEncrypt(enc, iv, data, size, pad, out);
+}
+
+AES256CBCEncrypt::~AES256CBCEncrypt()
+{
+    memset(iv, 0, sizeof(iv));
+}
+
+AES256CBCDecrypt::AES256CBCDecrypt(const unsigned char key[AES256_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn)
+    : dec(key), pad(padIn)
+{
+    memcpy(iv, ivIn, AES_BLOCKSIZE);
+}
+
+
+int AES256CBCDecrypt::Decrypt(const unsigned char* data, int size, unsigned char* out) const
+{
+    return CBCDecrypt(dec, iv, data, size, pad, out);
+}
+
+AES256CBCDecrypt::~AES256CBCDecrypt()
+{
+    memset(iv, 0, sizeof(iv));
+}
+
+AES128CBCEncrypt::AES128CBCEncrypt(const unsigned char key[AES128_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn)
+    : enc(key), pad(padIn)
+{
+    memcpy(iv, ivIn, AES_BLOCKSIZE);
+}
+
+AES128CBCEncrypt::~AES128CBCEncrypt()
+{
+    memset(iv, 0, AES_BLOCKSIZE);
+}
+
+int AES128CBCEncrypt::Encrypt(const unsigned char* data, int size, unsigned char* out) const
+{
+    return CBCEncrypt(enc, iv, data, size, pad, out);
+}
+
+AES128CBCDecrypt::AES128CBCDecrypt(const unsigned char key[AES128_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn)
+    : dec(key), pad(padIn)
+{
+    memcpy(iv, ivIn, AES_BLOCKSIZE);
+}
+
+AES128CBCDecrypt::~AES128CBCDecrypt()
+{
+    memset(iv, 0, AES_BLOCKSIZE);
+}
+
+int AES128CBCDecrypt::Decrypt(const unsigned char* data, int size, unsigned char* out) const
+{
+    return CBCDecrypt(dec, iv, data, size, pad, out);
+}
diff --git a/src/crypto/aes.h b/src/crypto/aes.h
new file mode 100644
index 0000000000..2dec8d9558
--- /dev/null
+++ b/src/crypto/aes.h
@@ -0,0 +1,118 @@
+// Copyright (c) 2015-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+//
+// C++ wrapper around ctaes, a constant-time AES implementation
+
+#ifndef BITCOIN_CRYPTO_AES_H
+#define BITCOIN_CRYPTO_AES_H
+
+extern "C" {
+#include <crypto/ctaes/ctaes.h>
+}
+
+static const int AES_BLOCKSIZE = 16;
+static const int AES128_KEYSIZE = 16;
+static const int AES256_KEYSIZE = 32;
+
+/** An encryption class for AES-128. */
+class AES128Encrypt
+{
+private:
+    AES128_ctx ctx;
+
+public:
+    explicit AES128Encrypt(const unsigned char key[16]);
+    ~AES128Encrypt();
+    void Encrypt(unsigned char ciphertext[16], const unsigned char plaintext[16]) const;
+};
+
+/** A decryption class for AES-128. */
+class AES128Decrypt
+{
+private:
+    AES128_ctx ctx;
+
+public:
+    explicit AES128Decrypt(const unsigned char key[16]);
+    ~AES128Decrypt();
+    void Decrypt(unsigned char plaintext[16], const unsigned char ciphertext[16]) const;
+};
+
+/** An encryption class for AES-256. */
+class AES256Encrypt
+{
+private:
+    AES256_ctx ctx;
+
+public:
+    explicit AES256Encrypt(const unsigned char key[32]);
+    ~AES256Encrypt();
+    void Encrypt(unsigned char ciphertext[16], const unsigned char plaintext[16]) const;
+};
+
+/** A decryption class for AES-256. */
+class AES256Decrypt
+{
+private:
+    AES256_ctx ctx;
+
+public:
+    explicit AES256Decrypt(const unsigned char key[32]);
+    ~AES256Decrypt();
+    void Decrypt(unsigned char plaintext[16], const unsigned char ciphertext[16]) const;
+};
+
+class AES256CBCEncrypt
+{
+public:
+    AES256CBCEncrypt(const unsigned char key[AES256_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn);
+    ~AES256CBCEncrypt();
+    int Encrypt(const unsigned char* data, int size, unsigned char* out) const;
+
+private:
+    const AES256Encrypt enc;
+    const bool pad;
+    unsigned char iv[AES_BLOCKSIZE];
+};
+
+class AES256CBCDecrypt
+{
+public:
+    AES256CBCDecrypt(const unsigned char key[AES256_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn);
+    ~AES256CBCDecrypt();
+    int Decrypt(const unsigned char* data, int size, unsigned char* out) const;
+
+private:
+    const AES256Decrypt dec;
+    const bool pad;
+    unsigned char iv[AES_BLOCKSIZE];
+};
+
+class AES128CBCEncrypt
+{
+public:
+    AES128CBCEncrypt(const unsigned char key[AES128_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn);
+    ~AES128CBCEncrypt();
+    int Encrypt(const unsigned char* data, int size, unsigned char* out) const;
+
+private:
+    const AES128Encrypt enc;
+    const bool pad;
+    unsigned char iv[AES_BLOCKSIZE];
+};
+
+class AES128CBCDecrypt
+{
+public:
+    AES128CBCDecrypt(const unsigned char key[AES128_KEYSIZE], const unsigned char ivIn[AES_BLOCKSIZE], bool padIn);
+    ~AES128CBCDecrypt();
+    int Decrypt(const unsigned char* data, int size, unsigned char* out) const;
+
+private:
+    const AES128Decrypt dec;
+    const bool pad;
+    unsigned char iv[AES_BLOCKSIZE];
+};
+
+#endif // BITCOIN_CRYPTO_AES_H
diff --git a/src/crypto/chacha20.cpp b/src/crypto/chacha20.cpp
new file mode 100644
index 0000000000..ac4470f04f
--- /dev/null
+++ b/src/crypto/chacha20.cpp
@@ -0,0 +1,180 @@
+// Copyright (c) 2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+// Based on the public domain implementation 'merged' by D. J. Bernstein
+// See https://cr.yp.to/chacha.html.
+
+#include <crypto/common.h>
+#include <crypto/chacha20.h>
+
+#include <string.h>
+
+constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (v >> (32 - c)); }
+
+#define QUARTERROUND(a,b,c,d) \
+  a += b; d = rotl32(d ^ a, 16); \
+  c += d; b = rotl32(b ^ c, 12); \
+  a += b; d = rotl32(d ^ a, 8); \
+  c += d; b = rotl32(b ^ c, 7);
+
+static const unsigned char sigma[] = "expand 32-byte k";
+static const unsigned char tau[] = "expand 16-byte k";
+
+void ChaCha20::SetKey(const unsigned char* k, size_t keylen)
+{
+    const unsigned char *constants;
+
+    input[4] = ReadLE32(k + 0);
+    input[5] = ReadLE32(k + 4);
+    input[6] = ReadLE32(k + 8);
+    input[7] = ReadLE32(k + 12);
+    if (keylen == 32) { /* recommended */
+        k += 16;
+        constants = sigma;
+    } else { /* keylen == 16 */
+        constants = tau;
+    }
+    input[8] = ReadLE32(k + 0);
+    input[9] = ReadLE32(k + 4);
+    input[10] = ReadLE32(k + 8);
+    input[11] = ReadLE32(k + 12);
+    input[0] = ReadLE32(constants + 0);
+    input[1] = ReadLE32(constants + 4);
+    input[2] = ReadLE32(constants + 8);
+    input[3] = ReadLE32(constants + 12);
+    input[12] = 0;
+    input[13] = 0;
+    input[14] = 0;
+    input[15] = 0;
+}
+
+ChaCha20::ChaCha20()
+{
+    memset(input, 0, sizeof(input));
+}
+
+ChaCha20::ChaCha20(const unsigned char* k, size_t keylen)
+{
+    SetKey(k, keylen);
+}
+
+void ChaCha20::SetIV(uint64_t iv)
+{
+    input[14] = iv;
+    input[15] = iv >> 32;
+}
+
+void ChaCha20::Seek(uint64_t pos)
+{
+    input[12] = pos;
+    input[13] = pos >> 32;
+}
+
+void ChaCha20::Output(unsigned char* c, size_t bytes)
+{
+    uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+    uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+    unsigned char *ctarget = nullptr;
+    unsigned char tmp[64];
+    unsigned int i;
+
+    if (!bytes) return;
+
+    j0 = input[0];
+    j1 = input[1];
+    j2 = input[2];
+    j3 = input[3];
+    j4 = input[4];
+    j5 = input[5];
+    j6 = input[6];
+    j7 = input[7];
+    j8 = input[8];
+    j9 = input[9];
+    j10 = input[10];
+    j11 = input[11];
+    j12 = input[12];
+    j13 = input[13];
+    j14 = input[14];
+    j15 = input[15];
+
+    for (;;) {
+        if (bytes < 64) {
+            ctarget = c;
+            c = tmp;
+        }
+        x0 = j0;
+        x1 = j1;
+        x2 = j2;
+        x3 = j3;
+        x4 = j4;
+        x5 = j5;
+        x6 = j6;
+        x7 = j7;
+        x8 = j8;
+        x9 = j9;
+        x10 = j10;
+        x11 = j11;
+        x12 = j12;
+        x13 = j13;
+        x14 = j14;
+        x15 = j15;
+        for (i = 20;i > 0;i -= 2) {
+            QUARTERROUND( x0, x4, x8,x12)
+            QUARTERROUND( x1, x5, x9,x13)
+            QUARTERROUND( x2, x6,x10,x14)
+            QUARTERROUND( x3, x7,x11,x15)
+            QUARTERROUND( x0, x5,x10,x15)
+            QUARTERROUND( x1, x6,x11,x12)
+            QUARTERROUND( x2, x7, x8,x13)
+            QUARTERROUND( x3, x4, x9,x14)
+        }
+        x0 += j0;
+        x1 += j1;
+        x2 += j2;
+        x3 += j3;
+        x4 += j4;
+        x5 += j5;
+        x6 += j6;
+        x7 += j7;
+        x8 += j8;
+        x9 += j9;
+        x10 += j10;
+        x11 += j11;
+        x12 += j12;
+        x13 += j13;
+        x14 += j14;
+        x15 += j15;
+
+        ++j12;
+        if (!j12) ++j13;
+
+        WriteLE32(c + 0, x0);
+        WriteLE32(c + 4, x1);
+        WriteLE32(c + 8, x2);
+        WriteLE32(c + 12, x3);
+        WriteLE32(c + 16, x4);
+        WriteLE32(c + 20, x5);
+        WriteLE32(c + 24, x6);
+        WriteLE32(c + 28, x7);
+        WriteLE32(c + 32, x8);
+        WriteLE32(c + 36, x9);
+        WriteLE32(c + 40, x10);
+        WriteLE32(c + 44, x11);
+        WriteLE32(c + 48, x12);
+        WriteLE32(c + 52, x13);
+        WriteLE32(c + 56, x14);
+        WriteLE32(c + 60, x15);
+
+        if (bytes <= 64) {
+            if (bytes < 64) {
+                for (i = 0;i < bytes;++i) ctarget[i] = c[i];
+            }
+            input[12] = j12;
+            input[13] = j13;
+            return;
+        }
+        bytes -= 64;
+        c += 64;
+    }
+}
diff --git a/src/crypto/chacha20.h b/src/crypto/chacha20.h
new file mode 100644
index 0000000000..a305977bcd
--- /dev/null
+++ b/src/crypto/chacha20.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_CHACHA20_H
+#define BITCOIN_CRYPTO_CHACHA20_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** A PRNG class for ChaCha20. */
+class ChaCha20
+{
+private:
+    uint32_t input[16];
+
+public:
+    ChaCha20();
+    ChaCha20(const unsigned char* key, size_t keylen);
+    void SetKey(const unsigned char* key, size_t keylen);
+    void SetIV(uint64_t iv);
+    void Seek(uint64_t pos);
+    void Output(unsigned char* output, size_t bytes);
+};
+
+#endif // BITCOIN_CRYPTO_CHACHA20_H
diff --git a/src/crypto/common.h b/src/crypto/common.h
new file mode 100644
index 0000000000..825b430978
--- /dev/null
+++ b/src/crypto/common.h
@@ -0,0 +1,103 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_COMMON_H
+#define BITCOIN_CRYPTO_COMMON_H
+
+#if defined(HAVE_CONFIG_H)
+#include <config/bitcoin-config.h>
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+#include <compat/endian.h>
+
+uint16_t static inline ReadLE16(const unsigned char* ptr)
+{
+    uint16_t x;
+    memcpy((char*)&x, ptr, 2);
+    return le16toh(x);
+}
+
+uint32_t static inline ReadLE32(const unsigned char* ptr)
+{
+    uint32_t x;
+    memcpy((char*)&x, ptr, 4);
+    return le32toh(x);
+}
+
+uint64_t static inline ReadLE64(const unsigned char* ptr)
+{
+    uint64_t x;
+    memcpy((char*)&x, ptr, 8);
+    return le64toh(x);
+}
+
+void static inline WriteLE16(unsigned char* ptr, uint16_t x)
+{
+    uint16_t v = htole16(x);
+    memcpy(ptr, (char*)&v, 2);
+}
+
+void static inline WriteLE32(unsigned char* ptr, uint32_t x)
+{
+    uint32_t v = htole32(x);
+    memcpy(ptr, (char*)&v, 4);
+}
+
+void static inline WriteLE64(unsigned char* ptr, uint64_t x)
+{
+    uint64_t v = htole64(x);
+    memcpy(ptr, (char*)&v, 8);
+}
+
+uint32_t static inline ReadBE32(const unsigned char* ptr)
+{
+    uint32_t x;
+    memcpy((char*)&x, ptr, 4);
+    return be32toh(x);
+}
+
+uint64_t static inline ReadBE64(const unsigned char* ptr)
+{
+    uint64_t x;
+    memcpy((char*)&x, ptr, 8);
+    return be64toh(x);
+}
+
+void static inline WriteBE32(unsigned char* ptr, uint32_t x)
+{
+    uint32_t v = htobe32(x);
+    memcpy(ptr, (char*)&v, 4);
+}
+
+void static inline WriteBE64(unsigned char* ptr, uint64_t x)
+{
+    uint64_t v = htobe64(x);
+    memcpy(ptr, (char*)&v, 8);
+}
+
+/** Return the smallest number n such that (x >> n) == 0 (or 64 if the highest bit in x is set. */
+uint64_t static inline CountBits(uint64_t x)
+{
+#ifdef HAVE_DECL___BUILTIN_CLZL
+    if (sizeof(unsigned long) >= sizeof(uint64_t)) {
+        return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0;
+    }
+#endif
+#ifdef HAVE_DECL___BUILTIN_CLZLL
+    if (sizeof(unsigned long long) >= sizeof(uint64_t)) {
+        return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0;
+    }
+#endif
+    int ret = 0;
+    while (x) {
+        x >>= 1;
+        ++ret;
+    }
+    return ret;
+}
+
+#endif // BITCOIN_CRYPTO_COMMON_H
diff --git a/src/crypto/ctaes/COPYING b/src/crypto/ctaes/COPYING
new file mode 100644
index 0000000000..415b202a2a
--- /dev/null
+++ b/src/crypto/ctaes/COPYING
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 Pieter Wuille
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/src/crypto/ctaes/README.md b/src/crypto/ctaes/README.md
new file mode 100644
index 0000000000..0e7fe17751
--- /dev/null
+++ b/src/crypto/ctaes/README.md
@@ -0,0 +1,41 @@
+ctaes
+=====
+
+Simple C module for constant-time AES encryption and decryption.
+
+Features:
+* Simple, pure C code without any dependencies.
+* No tables or data-dependent branches whatsoever, but using bit sliced approach from https://eprint.iacr.org/2009/129.pdf.
+* Very small object code: slightly over 4k of executable code when compiled with -Os.
+* Slower than implementations based on precomputed tables or specialized instructions, but can do ~15 MB/s on modern CPUs.
+
+Performance
+-----------
+
+Compiled with GCC 5.3.1 with -O3, on an Intel(R) Core(TM) i7-4800MQ CPU, numbers in CPU cycles:
+
+| Algorithm | Key schedule | Encryption per byte | Decryption per byte |
+| --------- | ------------:| -------------------:| -------------------:|
+| AES-128   |         2.8k |                 154 |                 161 |
+| AES-192   |         3.1k |                 169 |                 181 |
+| AES-256   |         4.0k |                 191 |                 203 |
+
+Build steps
+-----------
+
+Object code:
+
+    $ gcc -O3 ctaes.c -c -o ctaes.o
+
+Tests:
+
+    $ gcc -O3 ctaes.c test.c -o test
+
+Benchmark:
+
+    $ gcc -O3 ctaes.c bench.c -o bench
+
+Review
+------
+
+Results of a formal review of the code can be found in http://bitcoin.sipa.be/ctaes/review.zip
diff --git a/src/crypto/ctaes/bench.c b/src/crypto/ctaes/bench.c
new file mode 100644
index 0000000000..a86df496c8
--- /dev/null
+++ b/src/crypto/ctaes/bench.c
@@ -0,0 +1,170 @@
+#include <stdio.h>
+#include <math.h>
+#include "sys/time.h"
+
+#include "ctaes.h"
+
+static double gettimedouble(void) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_usec * 0.000001 + tv.tv_sec;
+}
+
+static void print_number(double x) {
+    double y = x;
+    int c = 0;
+    if (y < 0.0) {
+        y = -y;
+    }
+    while (y < 100.0) {
+        y *= 10.0;
+        c++;
+    }
+    printf("%.*f", c, x);
+}
+
+static void run_benchmark(char *name, void (*benchmark)(void*), void (*setup)(void*), void (*teardown)(void*), void* data, int count, int iter) {
+    int i;
+    double min = HUGE_VAL;
+    double sum = 0.0;
+    double max = 0.0;
+    for (i = 0; i < count; i++) {
+        double begin, total;
+        if (setup != NULL) {
+            setup(data);
+        }
+        begin = gettimedouble();
+        benchmark(data);
+        total = gettimedouble() - begin;
+        if (teardown != NULL) {
+            teardown(data);
+        }
+        if (total < min) {
+            min = total;
+        }
+        if (total > max) {
+            max = total;
+        }
+        sum += total;
+    }
+    printf("%s: min ", name);
+    print_number(min * 1000000000.0 / iter);
+    printf("ns / avg ");
+    print_number((sum / count) * 1000000000.0 / iter);
+    printf("ns / max ");
+    print_number(max * 1000000000.0 / iter);
+    printf("ns\n");
+}
+
+static void bench_AES128_init(void* data) {
+    AES128_ctx* ctx = (AES128_ctx*)data;
+    int i;
+    for (i = 0; i < 50000; i++) {
+        AES128_init(ctx, (unsigned char*)ctx);
+    }
+}
+
+static void bench_AES128_encrypt_setup(void* data) {
+    AES128_ctx* ctx = (AES128_ctx*)data;
+    static const unsigned char key[16] = {0};
+    AES128_init(ctx, key);
+}
+
+static void bench_AES128_encrypt(void* data) {
+    const AES128_ctx* ctx = (const AES128_ctx*)data;
+    unsigned char scratch[16] = {0};
+    int i;
+    for (i = 0; i < 4000000 / 16; i++) {
+        AES128_encrypt(ctx, 1, scratch, scratch);
+    }
+}
+
+static void bench_AES128_decrypt(void* data) {
+    const AES128_ctx* ctx = (const AES128_ctx*)data;
+    unsigned char scratch[16] = {0};
+    int i;
+    for (i = 0; i < 4000000 / 16; i++) {
+        AES128_decrypt(ctx, 1, scratch, scratch);
+    }
+}
+
+static void bench_AES192_init(void* data) {
+    AES192_ctx* ctx = (AES192_ctx*)data;
+    int i;
+    for (i = 0; i < 50000; i++) {
+        AES192_init(ctx, (unsigned char*)ctx);
+    }
+}
+
+static void bench_AES192_encrypt_setup(void* data) {
+    AES192_ctx* ctx = (AES192_ctx*)data;
+    static const unsigned char key[16] = {0};
+    AES192_init(ctx, key);
+}
+
+static void bench_AES192_encrypt(void* data) {
+    const AES192_ctx* ctx = (const AES192_ctx*)data;
+    unsigned char scratch[16] = {0};
+    int i;
+    for (i = 0; i < 4000000 / 16; i++) {
+        AES192_encrypt(ctx, 1, scratch, scratch);
+    }
+}
+
+static void bench_AES192_decrypt(void* data) {
+    const AES192_ctx* ctx = (const AES192_ctx*)data;
+    unsigned char scratch[16] = {0};
+    int i;
+    for (i = 0; i < 4000000 / 16; i++) {
+        AES192_decrypt(ctx, 1, scratch, scratch);
+    }
+}
+
+static void bench_AES256_init(void* data) {
+    AES256_ctx* ctx = (AES256_ctx*)data;
+    int i;
+    for (i = 0; i < 50000; i++) {
+        AES256_init(ctx, (unsigned char*)ctx);
+    }
+}
+
+
+static void bench_AES256_encrypt_setup(void* data) {
+    AES256_ctx* ctx = (AES256_ctx*)data;
+    static const unsigned char key[16] = {0};
+    AES256_init(ctx, key);
+}
+
+static void bench_AES256_encrypt(void* data) {
+    const AES256_ctx* ctx = (const AES256_ctx*)data;
+    unsigned char scratch[16] = {0};
+    int i;
+    for (i = 0; i < 4000000 / 16; i++) {
+        AES256_encrypt(ctx, 1, scratch, scratch);
+    }
+}
+
+static void bench_AES256_decrypt(void* data) {
+    const AES256_ctx* ctx = (const AES256_ctx*)data;
+    unsigned char scratch[16] = {0};
+    int i;
+    for (i = 0; i < 4000000 / 16; i++) {
+        AES256_decrypt(ctx, 1, scratch, scratch);
+    }
+}
+
+int main(void) {
+    AES128_ctx ctx128;
+    AES192_ctx ctx192;
+    AES256_ctx ctx256;
+    run_benchmark("aes128_init", bench_AES128_init, NULL, NULL, &ctx128, 20, 50000);
+    run_benchmark("aes128_encrypt_byte", bench_AES128_encrypt, bench_AES128_encrypt_setup, NULL, &ctx128, 20, 4000000);
+    run_benchmark("aes128_decrypt_byte", bench_AES128_decrypt, bench_AES128_encrypt_setup, NULL, &ctx128, 20, 4000000);
+    run_benchmark("aes192_init", bench_AES192_init, NULL, NULL, &ctx192, 20, 50000);
+    run_benchmark("aes192_encrypt_byte", bench_AES192_encrypt, bench_AES192_encrypt_setup, NULL, &ctx192, 20, 4000000);
+    run_benchmark("aes192_decrypt_byte", bench_AES192_decrypt, bench_AES192_encrypt_setup, NULL, &ctx192, 20, 4000000);
+    run_benchmark("aes256_init", bench_AES256_init, NULL, NULL, &ctx256, 20, 50000);
+    run_benchmark("aes256_encrypt_byte", bench_AES256_encrypt, bench_AES256_encrypt_setup, NULL, &ctx256, 20, 4000000);
+    run_benchmark("aes256_decrypt_byte", bench_AES256_decrypt, bench_AES256_encrypt_setup, NULL, &ctx256, 20, 4000000);
+    return 0;
+}
diff --git a/src/crypto/ctaes/ctaes.c b/src/crypto/ctaes/ctaes.c
new file mode 100644
index 0000000000..55962bf252
--- /dev/null
+++ b/src/crypto/ctaes/ctaes.c
@@ -0,0 +1,556 @@
+ /*********************************************************************
+ * Copyright (c) 2016 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+/* Constant time, unoptimized, concise, plain C, AES implementation
+ * Based On:
+ *   Emilia Kasper and Peter Schwabe, Faster and Timing-Attack Resistant AES-GCM
+ *   http://www.iacr.org/archive/ches2009/57470001/57470001.pdf
+ * But using 8 16-bit integers representing a single AES state rather than 8 128-bit
+ * integers representing 8 AES states.
+ */
+
+#include "ctaes.h"
+
+/* Slice variable slice_i contains the i'th bit of the 16 state variables in this order:
+ *  0  1  2  3
+ *  4  5  6  7
+ *  8  9 10 11
+ * 12 13 14 15
+ */
+
+/** Convert a byte to sliced form, storing it corresponding to given row and column in s */
+static void LoadByte(AES_state* s, unsigned char byte, int r, int c) {
+    int i;
+    for (i = 0; i < 8; i++) {
+        s->slice[i] |= (byte & 1) << (r * 4 + c);
+        byte >>= 1;
+    }
+}
+
+/** Load 16 bytes of data into 8 sliced integers */
+static void LoadBytes(AES_state *s, const unsigned char* data16) {
+    int c;
+    for (c = 0; c < 4; c++) {
+        int r;
+        for (r = 0; r < 4; r++) {
+            LoadByte(s, *(data16++), r, c);
+        }
+    }
+}
+
+/** Convert 8 sliced integers into 16 bytes of data */
+static void SaveBytes(unsigned char* data16, const AES_state *s) {
+    int c;
+    for (c = 0; c < 4; c++) {
+        int r;
+        for (r = 0; r < 4; r++) {
+            int b;
+            uint8_t v = 0;
+            for (b = 0; b < 8; b++) {
+                v |= ((s->slice[b] >> (r * 4 + c)) & 1) << b;
+            }
+            *(data16++) = v;
+        }
+    }
+}
+
+/* S-box implementation based on the gate logic from:
+ *   Joan Boyar and Rene Peralta, A depth-16 circuit for the AES S-box.
+ *   https://eprint.iacr.org/2011/332.pdf
+*/
+static void SubBytes(AES_state *s, int inv) {
+    /* Load the bit slices */
+    uint16_t U0 = s->slice[7], U1 = s->slice[6], U2 = s->slice[5], U3 = s->slice[4];
+    uint16_t U4 = s->slice[3], U5 = s->slice[2], U6 = s->slice[1], U7 = s->slice[0];
+
+    uint16_t T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16;
+    uint16_t T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, D;
+    uint16_t M1, M6, M11, M13, M15, M20, M21, M22, M23, M25, M37, M38, M39, M40;
+    uint16_t M41, M42, M43, M44, M45, M46, M47, M48, M49, M50, M51, M52, M53, M54;
+    uint16_t M55, M56, M57, M58, M59, M60, M61, M62, M63;
+
+    if (inv) {
+        uint16_t R5, R13, R17, R18, R19;
+        /* Undo linear postprocessing */
+        T23 = U0 ^ U3;
+        T22 = ~(U1 ^ U3);
+        T2 = ~(U0 ^ U1);
+        T1 = U3 ^ U4;
+        T24 = ~(U4 ^ U7);
+        R5 = U6 ^ U7;
+        T8 = ~(U1 ^ T23);
+        T19 = T22 ^ R5;
+        T9 = ~(U7 ^ T1);
+        T10 = T2 ^ T24;
+        T13 = T2 ^ R5;
+        T3 = T1 ^ R5;
+        T25 = ~(U2 ^ T1);
+        R13 = U1 ^ U6;
+        T17 = ~(U2 ^ T19);
+        T20 = T24 ^ R13;
+        T4 = U4 ^ T8;
+        R17 = ~(U2 ^ U5);
+        R18 = ~(U5 ^ U6);
+        R19 = ~(U2 ^ U4);
+        D = U0 ^ R17;
+        T6 = T22 ^ R17;
+        T16 = R13 ^ R19;
+        T27 = T1 ^ R18;
+        T15 = T10 ^ T27;
+        T14 = T10 ^ R18;
+        T26 = T3 ^ T16;
+    } else {
+        /* Linear preprocessing. */
+        T1 = U0 ^ U3;
+        T2 = U0 ^ U5;
+        T3 = U0 ^ U6;
+        T4 = U3 ^ U5;
+        T5 = U4 ^ U6;
+        T6 = T1 ^ T5;
+        T7 = U1 ^ U2;
+        T8 = U7 ^ T6;
+        T9 = U7 ^ T7;
+        T10 = T6 ^ T7;
+        T11 = U1 ^ U5;
+        T12 = U2 ^ U5;
+        T13 = T3 ^ T4;
+        T14 = T6 ^ T11;
+        T15 = T5 ^ T11;
+        T16 = T5 ^ T12;
+        T17 = T9 ^ T16;
+        T18 = U3 ^ U7;
+        T19 = T7 ^ T18;
+        T20 = T1 ^ T19;
+        T21 = U6 ^ U7;
+        T22 = T7 ^ T21;
+        T23 = T2 ^ T22;
+        T24 = T2 ^ T10;
+        T25 = T20 ^ T17;
+        T26 = T3 ^ T16;
+        T27 = T1 ^ T12;
+        D = U7;
+    }
+
+    /* Non-linear transformation (shared between the forward and backward case) */
+    M1 = T13 & T6;
+    M6 = T3 & T16;
+    M11 = T1 & T15;
+    M13 = (T4 & T27) ^ M11;
+    M15 = (T2 & T10) ^ M11;
+    M20 = T14 ^ M1 ^ (T23 & T8) ^ M13;
+    M21 = (T19 & D) ^ M1 ^ T24 ^ M15;
+    M22 = T26 ^ M6 ^ (T22 & T9) ^ M13;
+    M23 = (T20 & T17) ^ M6 ^ M15 ^ T25;
+    M25 = M22 & M20;
+    M37 = M21 ^ ((M20 ^ M21) & (M23 ^ M25));
+    M38 = M20 ^ M25 ^ (M21 | (M20 & M23));
+    M39 = M23 ^ ((M22 ^ M23) & (M21 ^ M25));
+    M40 = M22 ^ M25 ^ (M23 | (M21 & M22));
+    M41 = M38 ^ M40;
+    M42 = M37 ^ M39;
+    M43 = M37 ^ M38;
+    M44 = M39 ^ M40;
+    M45 = M42 ^ M41;
+    M46 = M44 & T6;
+    M47 = M40 & T8;
+    M48 = M39 & D;
+    M49 = M43 & T16;
+    M50 = M38 & T9;
+    M51 = M37 & T17;
+    M52 = M42 & T15;
+    M53 = M45 & T27;
+    M54 = M41 & T10;
+    M55 = M44 & T13;
+    M56 = M40 & T23;
+    M57 = M39 & T19;
+    M58 = M43 & T3;
+    M59 = M38 & T22;
+    M60 = M37 & T20;
+    M61 = M42 & T1;
+    M62 = M45 & T4;
+    M63 = M41 & T2;
+
+    if (inv){
+        /* Undo linear preprocessing */
+        uint16_t P0 = M52 ^ M61;
+        uint16_t P1 = M58 ^ M59;
+        uint16_t P2 = M54 ^ M62;
+        uint16_t P3 = M47 ^ M50;
+        uint16_t P4 = M48 ^ M56;
+        uint16_t P5 = M46 ^ M51;
+        uint16_t P6 = M49 ^ M60;
+        uint16_t P7 = P0 ^ P1;
+        uint16_t P8 = M50 ^ M53;
+        uint16_t P9 = M55 ^ M63;
+        uint16_t P10 = M57 ^ P4;
+        uint16_t P11 = P0 ^ P3;
+        uint16_t P12 = M46 ^ M48;
+        uint16_t P13 = M49 ^ M51;
+        uint16_t P14 = M49 ^ M62;
+        uint16_t P15 = M54 ^ M59;
+        uint16_t P16 = M57 ^ M61;
+        uint16_t P17 = M58 ^ P2;
+        uint16_t P18 = M63 ^ P5;
+        uint16_t P19 = P2 ^ P3;
+        uint16_t P20 = P4 ^ P6;
+        uint16_t P22 = P2 ^ P7;
+        uint16_t P23 = P7 ^ P8;
+        uint16_t P24 = P5 ^ P7;
+        uint16_t P25 = P6 ^ P10;
+        uint16_t P26 = P9 ^ P11;
+        uint16_t P27 = P10 ^ P18;
+        uint16_t P28 = P11 ^ P25;
+        uint16_t P29 = P15 ^ P20;
+        s->slice[7] = P13 ^ P22;
+        s->slice[6] = P26 ^ P29;
+        s->slice[5] = P17 ^ P28;
+        s->slice[4] = P12 ^ P22;
+        s->slice[3] = P23 ^ P27;
+        s->slice[2] = P19 ^ P24;
+        s->slice[1] = P14 ^ P23;
+        s->slice[0] = P9 ^ P16;
+    } else {
+        /* Linear postprocessing */
+        uint16_t L0 = M61 ^ M62;
+        uint16_t L1 = M50 ^ M56;
+        uint16_t L2 = M46 ^ M48;
+        uint16_t L3 = M47 ^ M55;
+        uint16_t L4 = M54 ^ M58;
+        uint16_t L5 = M49 ^ M61;
+        uint16_t L6 = M62 ^ L5;
+        uint16_t L7 = M46 ^ L3;
+        uint16_t L8 = M51 ^ M59;
+        uint16_t L9 = M52 ^ M53;
+        uint16_t L10 = M53 ^ L4;
+        uint16_t L11 = M60 ^ L2;
+        uint16_t L12 = M48 ^ M51;
+        uint16_t L13 = M50 ^ L0;
+        uint16_t L14 = M52 ^ M61;
+        uint16_t L15 = M55 ^ L1;
+        uint16_t L16 = M56 ^ L0;
+        uint16_t L17 = M57 ^ L1;
+        uint16_t L18 = M58 ^ L8;
+        uint16_t L19 = M63 ^ L4;
+        uint16_t L20 = L0 ^ L1;
+        uint16_t L21 = L1 ^ L7;
+        uint16_t L22 = L3 ^ L12;
+        uint16_t L23 = L18 ^ L2;
+        uint16_t L24 = L15 ^ L9;
+        uint16_t L25 = L6 ^ L10;
+        uint16_t L26 = L7 ^ L9;
+        uint16_t L27 = L8 ^ L10;
+        uint16_t L28 = L11 ^ L14;
+        uint16_t L29 = L11 ^ L17;
+        s->slice[7] = L6 ^ L24;
+        s->slice[6] = ~(L16 ^ L26);
+        s->slice[5] = ~(L19 ^ L28);
+        s->slice[4] = L6 ^ L21;
+        s->slice[3] = L20 ^ L22;
+        s->slice[2] = L25 ^ L29;
+        s->slice[1] = ~(L13 ^ L27);
+        s->slice[0] = ~(L6 ^ L23);
+    }
+}
+
+#define BIT_RANGE(from,to) (((1 << ((to) - (from))) - 1) << (from))
+
+#define BIT_RANGE_LEFT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) << (shift))
+#define BIT_RANGE_RIGHT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) >> (shift))
+
+static void ShiftRows(AES_state* s) {
+    int i;
+    for (i = 0; i < 8; i++) {
+        uint16_t v = s->slice[i];
+        s->slice[i] =
+            (v & BIT_RANGE(0, 4)) |
+            BIT_RANGE_LEFT(v, 4, 5, 3) | BIT_RANGE_RIGHT(v, 5, 8, 1) |
+            BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) |
+            BIT_RANGE_LEFT(v, 12, 15, 1) | BIT_RANGE_RIGHT(v, 15, 16, 3);
+    }
+}
+
+static void InvShiftRows(AES_state* s) {
+    int i;
+    for (i = 0; i < 8; i++) {
+        uint16_t v = s->slice[i];
+        s->slice[i] =
+            (v & BIT_RANGE(0, 4)) |
+            BIT_RANGE_LEFT(v, 4, 7, 1) | BIT_RANGE_RIGHT(v, 7, 8, 3) |
+            BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) |
+            BIT_RANGE_LEFT(v, 12, 13, 3) | BIT_RANGE_RIGHT(v, 13, 16, 1);
+    }
+}
+
+#define ROT(x,b) (((x) >> ((b) * 4)) | ((x) << ((4-(b)) * 4)))
+
+static void MixColumns(AES_state* s, int inv) {
+    /* The MixColumns transform treats the bytes of the columns of the state as
+     * coefficients of a 3rd degree polynomial over GF(2^8) and multiplies them
+     * by the fixed polynomial a(x) = {03}x^3 + {01}x^2 + {01}x + {02}, modulo
+     * x^4 + {01}.
+     *
+     * In the inverse transform, we multiply by the inverse of a(x),
+     * a^-1(x) = {0b}x^3 + {0d}x^2 + {09}x + {0e}. This is equal to
+     * a(x) * ({04}x^2 + {05}), so we can reuse the forward transform's code
+     * (found in OpenSSL's bsaes-x86_64.pl, attributed to Jussi Kivilinna)
+     *
+     * In the bitsliced representation, a multiplication of every column by x
+     * mod x^4 + 1 is simply a right rotation.
+     */
+
+    /* Shared for both directions is a multiplication by a(x), which can be
+     * rewritten as (x^3 + x^2 + x) + {02}*(x^3 + {01}).
+     *
+     * First compute s into the s? variables, (x^3 + {01}) * s into the s?_01
+     * variables and (x^3 + x^2 + x)*s into the s?_123 variables.
+     */
+    uint16_t s0 = s->slice[0], s1 = s->slice[1], s2 = s->slice[2], s3 = s->slice[3];
+    uint16_t s4 = s->slice[4], s5 = s->slice[5], s6 = s->slice[6], s7 = s->slice[7];
+    uint16_t s0_01 = s0 ^ ROT(s0, 1), s0_123 = ROT(s0_01, 1) ^ ROT(s0, 3);
+    uint16_t s1_01 = s1 ^ ROT(s1, 1), s1_123 = ROT(s1_01, 1) ^ ROT(s1, 3);
+    uint16_t s2_01 = s2 ^ ROT(s2, 1), s2_123 = ROT(s2_01, 1) ^ ROT(s2, 3);
+    uint16_t s3_01 = s3 ^ ROT(s3, 1), s3_123 = ROT(s3_01, 1) ^ ROT(s3, 3);
+    uint16_t s4_01 = s4 ^ ROT(s4, 1), s4_123 = ROT(s4_01, 1) ^ ROT(s4, 3);
+    uint16_t s5_01 = s5 ^ ROT(s5, 1), s5_123 = ROT(s5_01, 1) ^ ROT(s5, 3);
+    uint16_t s6_01 = s6 ^ ROT(s6, 1), s6_123 = ROT(s6_01, 1) ^ ROT(s6, 3);
+    uint16_t s7_01 = s7 ^ ROT(s7, 1), s7_123 = ROT(s7_01, 1) ^ ROT(s7, 3);
+    /* Now compute s = s?_123 + {02} * s?_01. */
+    s->slice[0] = s7_01 ^ s0_123;
+    s->slice[1] = s7_01 ^ s0_01 ^ s1_123;
+    s->slice[2] = s1_01 ^ s2_123;
+    s->slice[3] = s7_01 ^ s2_01 ^ s3_123;
+    s->slice[4] = s7_01 ^ s3_01 ^ s4_123;
+    s->slice[5] = s4_01 ^ s5_123;
+    s->slice[6] = s5_01 ^ s6_123;
+    s->slice[7] = s6_01 ^ s7_123;
+    if (inv) {
+        /* In the reverse direction, we further need to multiply by
+         * {04}x^2 + {05}, which can be written as {04} * (x^2 + {01}) + {01}.
+         *
+         * First compute (x^2 + {01}) * s into the t?_02 variables: */
+        uint16_t t0_02 = s->slice[0] ^ ROT(s->slice[0], 2);
+        uint16_t t1_02 = s->slice[1] ^ ROT(s->slice[1], 2);
+        uint16_t t2_02 = s->slice[2] ^ ROT(s->slice[2], 2);
+        uint16_t t3_02 = s->slice[3] ^ ROT(s->slice[3], 2);
+        uint16_t t4_02 = s->slice[4] ^ ROT(s->slice[4], 2);
+        uint16_t t5_02 = s->slice[5] ^ ROT(s->slice[5], 2);
+        uint16_t t6_02 = s->slice[6] ^ ROT(s->slice[6], 2);
+        uint16_t t7_02 = s->slice[7] ^ ROT(s->slice[7], 2);
+        /* And then update s += {04} * t?_02 */
+        s->slice[0] ^= t6_02;
+        s->slice[1] ^= t6_02 ^ t7_02;
+        s->slice[2] ^= t0_02 ^ t7_02;
+        s->slice[3] ^= t1_02 ^ t6_02;
+        s->slice[4] ^= t2_02 ^ t6_02 ^ t7_02;
+        s->slice[5] ^= t3_02 ^ t7_02;
+        s->slice[6] ^= t4_02;
+        s->slice[7] ^= t5_02;
+    }
+}
+
+static void AddRoundKey(AES_state* s, const AES_state* round) {
+    int b;
+    for (b = 0; b < 8; b++) {
+        s->slice[b] ^= round->slice[b];
+    }
+}
+
+/** column_0(s) = column_c(a) */
+static void GetOneColumn(AES_state* s, const AES_state* a, int c) {
+    int b;
+    for (b = 0; b < 8; b++) {
+        s->slice[b] = (a->slice[b] >> c) & 0x1111;
+    }
+}
+
+/** column_c1(r) |= (column_0(s) ^= column_c2(a)) */
+static void KeySetupColumnMix(AES_state* s, AES_state* r, const AES_state* a, int c1, int c2) {
+    int b;
+    for (b = 0; b < 8; b++) {
+        r->slice[b] |= ((s->slice[b] ^= ((a->slice[b] >> c2) & 0x1111)) & 0x1111) << c1;
+    }
+}
+
+/** Rotate the rows in s one position upwards, and xor in r */
+static void KeySetupTransform(AES_state* s, const AES_state* r) {
+    int b;
+    for (b = 0; b < 8; b++) {
+        s->slice[b] = ((s->slice[b] >> 4) | (s->slice[b] << 12)) ^ r->slice[b];
+    }
+}
+
+/* Multiply the cells in s by x, as polynomials over GF(2) mod x^8 + x^4 + x^3 + x + 1 */
+static void MultX(AES_state* s) {
+    uint16_t top = s->slice[7];
+    s->slice[7] = s->slice[6];
+    s->slice[6] = s->slice[5];
+    s->slice[5] = s->slice[4];
+    s->slice[4] = s->slice[3] ^ top;
+    s->slice[3] = s->slice[2] ^ top;
+    s->slice[2] = s->slice[1];
+    s->slice[1] = s->slice[0] ^ top;
+    s->slice[0] = top;
+}
+
+/** Expand the cipher key into the key schedule.
+ *
+ *  state must be a pointer to an array of size nrounds + 1.
+ *  key must be a pointer to 4 * nkeywords bytes.
+ *
+ *  AES128 uses nkeywords = 4, nrounds = 10
+ *  AES192 uses nkeywords = 6, nrounds = 12
+ *  AES256 uses nkeywords = 8, nrounds = 14
+ */
+static void AES_setup(AES_state* rounds, const uint8_t* key, int nkeywords, int nrounds)
+{
+    int i;
+
+    /* The one-byte round constant */
+    AES_state rcon = {{1,0,0,0,0,0,0,0}};
+    /* The number of the word being generated, modulo nkeywords */
+    int pos = 0;
+    /* The column representing the word currently being processed */
+    AES_state column;
+
+    for (i = 0; i < nrounds + 1; i++) {
+        int b;
+        for (b = 0; b < 8; b++) {
+            rounds[i].slice[b] = 0;
+        }
+    }
+
+    /* The first nkeywords round columns are just taken from the key directly. */
+    for (i = 0; i < nkeywords; i++) {
+        int r;
+        for (r = 0; r < 4; r++) {
+            LoadByte(&rounds[i >> 2], *(key++), r, i & 3);
+        }
+    }
+
+    GetOneColumn(&column, &rounds[(nkeywords - 1) >> 2], (nkeywords - 1) & 3);
+
+    for (i = nkeywords; i < 4 * (nrounds + 1); i++) {
+        /* Transform column */
+        if (pos == 0) {
+            SubBytes(&column, 0);
+            KeySetupTransform(&column, &rcon);
+            MultX(&rcon);
+        } else if (nkeywords > 6 && pos == 4) {
+            SubBytes(&column, 0);
+        }
+        if (++pos == nkeywords) pos = 0;
+        KeySetupColumnMix(&column, &rounds[i >> 2], &rounds[(i - nkeywords) >> 2], i & 3, (i - nkeywords) & 3);
+    }
+}
+
+static void AES_encrypt(const AES_state* rounds, int nrounds, unsigned char* cipher16, const unsigned char* plain16) {
+    AES_state s = {{0}};
+    int round;
+
+    LoadBytes(&s, plain16);
+    AddRoundKey(&s, rounds++);
+
+    for (round = 1; round < nrounds; round++) {
+        SubBytes(&s, 0);
+        ShiftRows(&s);
+        MixColumns(&s, 0);
+        AddRoundKey(&s, rounds++);
+    }
+
+    SubBytes(&s, 0);
+    ShiftRows(&s);
+    AddRoundKey(&s, rounds);
+
+    SaveBytes(cipher16, &s);
+}
+
+static void AES_decrypt(const AES_state* rounds, int nrounds, unsigned char* plain16, const unsigned char* cipher16) {
+    /* Most AES decryption implementations use the alternate scheme
+     * (the Equivalent Inverse Cipher), which allows for more code reuse between
+     * the encryption and decryption code, but requires separate setup for both.
+     */
+    AES_state s = {{0}};
+    int round;
+
+    rounds += nrounds;
+
+    LoadBytes(&s, cipher16);
+    AddRoundKey(&s, rounds--);
+
+    for (round = 1; round < nrounds; round++) {
+        InvShiftRows(&s);
+        SubBytes(&s, 1);
+        AddRoundKey(&s, rounds--);
+        MixColumns(&s, 1);
+    }
+
+    InvShiftRows(&s);
+    SubBytes(&s, 1);
+    AddRoundKey(&s, rounds);
+
+    SaveBytes(plain16, &s);
+}
+
+void AES128_init(AES128_ctx* ctx, const unsigned char* key16) {
+    AES_setup(ctx->rk, key16, 4, 10);
+}
+
+void AES128_encrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
+    while (blocks--) {
+        AES_encrypt(ctx->rk, 10, cipher16, plain16);
+        cipher16 += 16;
+        plain16 += 16;
+    }
+}
+
+void AES128_decrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
+    while (blocks--) {
+        AES_decrypt(ctx->rk, 10, plain16, cipher16);
+        cipher16 += 16;
+        plain16 += 16;
+    }
+}
+
+void AES192_init(AES192_ctx* ctx, const unsigned char* key24) {
+    AES_setup(ctx->rk, key24, 6, 12);
+}
+
+void AES192_encrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
+    while (blocks--) {
+        AES_encrypt(ctx->rk, 12, cipher16, plain16);
+        cipher16 += 16;
+        plain16 += 16;
+    }
+
+}
+
+void AES192_decrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
+    while (blocks--) {
+        AES_decrypt(ctx->rk, 12, plain16, cipher16);
+        cipher16 += 16;
+        plain16 += 16;
+    }
+}
+
+void AES256_init(AES256_ctx* ctx, const unsigned char* key32) {
+    AES_setup(ctx->rk, key32, 8, 14);
+}
+
+void AES256_encrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
+    while (blocks--) {
+        AES_encrypt(ctx->rk, 14, cipher16, plain16);
+        cipher16 += 16;
+        plain16 += 16;
+    }
+}
+
+void AES256_decrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
+    while (blocks--) {
+        AES_decrypt(ctx->rk, 14, plain16, cipher16);
+        cipher16 += 16;
+        plain16 += 16;
+    }
+}
diff --git a/src/crypto/ctaes/ctaes.h b/src/crypto/ctaes/ctaes.h
new file mode 100644
index 0000000000..2f0af04216
--- /dev/null
+++ b/src/crypto/ctaes/ctaes.h
@@ -0,0 +1,41 @@
+ /*********************************************************************
+ * Copyright (c) 2016 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _CTAES_H_
+#define _CTAES_H_ 1
+
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct {
+    uint16_t slice[8];
+} AES_state;
+
+typedef struct {
+    AES_state rk[11];
+} AES128_ctx;
+
+typedef struct {
+    AES_state rk[13];
+} AES192_ctx;
+
+typedef struct {
+    AES_state rk[15];
+} AES256_ctx;
+
+void AES128_init(AES128_ctx* ctx, const unsigned char* key16);
+void AES128_encrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16);
+void AES128_decrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16);
+
+void AES192_init(AES192_ctx* ctx, const unsigned char* key24);
+void AES192_encrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16);
+void AES192_decrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16);
+
+void AES256_init(AES256_ctx* ctx, const unsigned char* key32);
+void AES256_encrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16);
+void AES256_decrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16);
+
+#endif
diff --git a/src/crypto/ctaes/test.c b/src/crypto/ctaes/test.c
new file mode 100644
index 0000000000..21439a16f1
--- /dev/null
+++ b/src/crypto/ctaes/test.c
@@ -0,0 +1,110 @@
+ /*********************************************************************
+ * Copyright (c) 2016 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#include "ctaes.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+typedef struct {
+    int keysize;
+    const char* key;
+    const char* plain;
+    const char* cipher;
+} ctaes_test;
+
+static const ctaes_test ctaes_tests[] = {
+    /* AES test vectors from FIPS 197. */
+    {128, "000102030405060708090a0b0c0d0e0f", "00112233445566778899aabbccddeeff", "69c4e0d86a7b0430d8cdb78070b4c55a"},
+    {192, "000102030405060708090a0b0c0d0e0f1011121314151617", "00112233445566778899aabbccddeeff", "dda97ca4864cdfe06eaf70a0ec0d7191"},
+    {256, "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", "00112233445566778899aabbccddeeff", "8ea2b7ca516745bfeafc49904b496089"},
+
+    /* AES-ECB test vectors from NIST sp800-38a. */
+    {128, "2b7e151628aed2a6abf7158809cf4f3c", "6bc1bee22e409f96e93d7e117393172a", "3ad77bb40d7a3660a89ecaf32466ef97"},
+    {128, "2b7e151628aed2a6abf7158809cf4f3c", "ae2d8a571e03ac9c9eb76fac45af8e51", "f5d3d58503b9699de785895a96fdbaaf"},
+    {128, "2b7e151628aed2a6abf7158809cf4f3c", "30c81c46a35ce411e5fbc1191a0a52ef", "43b1cd7f598ece23881b00e3ed030688"},
+    {128, "2b7e151628aed2a6abf7158809cf4f3c", "f69f2445df4f9b17ad2b417be66c3710", "7b0c785e27e8ad3f8223207104725dd4"},
+    {192, "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", "6bc1bee22e409f96e93d7e117393172a", "bd334f1d6e45f25ff712a214571fa5cc"},
+    {192, "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", "ae2d8a571e03ac9c9eb76fac45af8e51", "974104846d0ad3ad7734ecb3ecee4eef"},
+    {192, "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", "30c81c46a35ce411e5fbc1191a0a52ef", "ef7afd2270e2e60adce0ba2face6444e"},
+    {192, "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", "f69f2445df4f9b17ad2b417be66c3710", "9a4b41ba738d6c72fb16691603c18e0e"},
+    {256, "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", "6bc1bee22e409f96e93d7e117393172a", "f3eed1bdb5d2a03c064b5a7e3db181f8"},
+    {256, "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", "ae2d8a571e03ac9c9eb76fac45af8e51", "591ccb10d410ed26dc5ba74a31362870"},
+    {256, "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", "30c81c46a35ce411e5fbc1191a0a52ef", "b6ed21b99ca6f4f9f153e7b1beafed1d"},
+    {256, "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", "f69f2445df4f9b17ad2b417be66c3710", "23304b7a39f9f3ff067d8d8f9e24ecc7"}
+};
+
+static void from_hex(unsigned char* data, int len, const char* hex) {
+    int p;
+    for (p = 0; p < len; p++) {
+        int v = 0;
+        int n;
+        for (n = 0; n < 2; n++) {
+            assert((*hex >= '0' && *hex <= '9') || (*hex >= 'a' && *hex <= 'f'));
+            if (*hex >= '0' && *hex <= '9') {
+                v |= (*hex - '0') << (4 * (1 - n));
+            } else {
+                v |= (*hex - 'a' + 10) << (4 * (1 - n));
+            }
+            hex++;
+        }
+        *(data++) = v;
+    }
+    assert(*hex == 0);
+}
+
+int main(void) {
+    int i;
+    int fail = 0;
+    for (i = 0; i < sizeof(ctaes_tests) / sizeof(ctaes_tests[0]); i++) {
+        unsigned char key[32], plain[16], cipher[16], ciphered[16], deciphered[16];
+        const ctaes_test* test = &ctaes_tests[i];
+        assert(test->keysize == 128 || test->keysize == 192 || test->keysize == 256);
+        from_hex(plain, 16, test->plain);
+        from_hex(cipher, 16, test->cipher);
+        switch (test->keysize) {
+            case 128: {
+                AES128_ctx ctx;
+                from_hex(key, 16, test->key);
+                AES128_init(&ctx, key);
+                AES128_encrypt(&ctx, 1, ciphered, plain);
+                AES128_decrypt(&ctx, 1, deciphered, cipher);
+                break;
+            }
+            case 192: {
+                AES192_ctx ctx;
+                from_hex(key, 24, test->key);
+                AES192_init(&ctx, key);
+                AES192_encrypt(&ctx, 1, ciphered, plain);
+                AES192_decrypt(&ctx, 1, deciphered, cipher);
+                break;
+            }
+            case 256: {
+                AES256_ctx ctx;
+                from_hex(key, 32, test->key);
+                AES256_init(&ctx, key);
+                AES256_encrypt(&ctx, 1, ciphered, plain);
+                AES256_decrypt(&ctx, 1, deciphered, cipher);
+                break;
+            }
+        }
+        if (memcmp(cipher, ciphered, 16)) {
+            fprintf(stderr, "E(key=\"%s\", plain=\"%s\") != \"%s\"\n", test->key, test->plain, test->cipher);
+            fail++;
+        }
+        if (memcmp(plain, deciphered, 16)) {
+            fprintf(stderr, "D(key=\"%s\", cipher=\"%s\") != \"%s\"\n", test->key, test->cipher, test->plain);
+            fail++;
+        }
+    }
+    if (fail == 0) {
+        fprintf(stderr, "All tests successful\n");
+    } else {
+        fprintf(stderr, "%i tests failed\n", fail);
+    }
+    return (fail != 0);
+}
diff --git a/src/crypto/hmac_sha256.cpp b/src/crypto/hmac_sha256.cpp
new file mode 100644
index 0000000000..d4afe1439f
--- /dev/null
+++ b/src/crypto/hmac_sha256.cpp
@@ -0,0 +1,34 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <crypto/hmac_sha256.h>
+
+#include <string.h>
+
+CHMAC_SHA256::CHMAC_SHA256(const unsigned char* key, size_t keylen)
+{
+    unsigned char rkey[64];
+    if (keylen <= 64) {
+        memcpy(rkey, key, keylen);
+        memset(rkey + keylen, 0, 64 - keylen);
+    } else {
+        CSHA256().Write(key, keylen).Finalize(rkey);
+        memset(rkey + 32, 0, 32);
+    }
+
+    for (int n = 0; n < 64; n++)
+        rkey[n] ^= 0x5c;
+    outer.Write(rkey, 64);
+
+    for (int n = 0; n < 64; n++)
+        rkey[n] ^= 0x5c ^ 0x36;
+    inner.Write(rkey, 64);
+}
+
+void CHMAC_SHA256::Finalize(unsigned char hash[OUTPUT_SIZE])
+{
+    unsigned char temp[32];
+    inner.Finalize(temp);
+    outer.Write(temp, 32).Finalize(hash);
+}
diff --git a/src/crypto/hmac_sha256.h b/src/crypto/hmac_sha256.h
new file mode 100644
index 0000000000..4fb30b7ac0
--- /dev/null
+++ b/src/crypto/hmac_sha256.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_HMAC_SHA256_H
+#define BITCOIN_CRYPTO_HMAC_SHA256_H
+
+#include <crypto/sha256.h>
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** A hasher class for HMAC-SHA-256. */
+class CHMAC_SHA256
+{
+private:
+    CSHA256 outer;
+    CSHA256 inner;
+
+public:
+    static const size_t OUTPUT_SIZE = 32;
+
+    CHMAC_SHA256(const unsigned char* key, size_t keylen);
+    CHMAC_SHA256& Write(const unsigned char* data, size_t len)
+    {
+        inner.Write(data, len);
+        return *this;
+    }
+    void Finalize(unsigned char hash[OUTPUT_SIZE]);
+};
+
+#endif // BITCOIN_CRYPTO_HMAC_SHA256_H
diff --git a/src/crypto/hmac_sha512.cpp b/src/crypto/hmac_sha512.cpp
new file mode 100644
index 0000000000..d9c4d04100
--- /dev/null
+++ b/src/crypto/hmac_sha512.cpp
@@ -0,0 +1,34 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <crypto/hmac_sha512.h>
+
+#include <string.h>
+
+CHMAC_SHA512::CHMAC_SHA512(const unsigned char* key, size_t keylen)
+{
+    unsigned char rkey[128];
+    if (keylen <= 128) {
+        memcpy(rkey, key, keylen);
+        memset(rkey + keylen, 0, 128 - keylen);
+    } else {
+        CSHA512().Write(key, keylen).Finalize(rkey);
+        memset(rkey + 64, 0, 64);
+    }
+
+    for (int n = 0; n < 128; n++)
+        rkey[n] ^= 0x5c;
+    outer.Write(rkey, 128);
+
+    for (int n = 0; n < 128; n++)
+        rkey[n] ^= 0x5c ^ 0x36;
+    inner.Write(rkey, 128);
+}
+
+void CHMAC_SHA512::Finalize(unsigned char hash[OUTPUT_SIZE])
+{
+    unsigned char temp[64];
+    inner.Finalize(temp);
+    outer.Write(temp, 64).Finalize(hash);
+}
diff --git a/src/crypto/hmac_sha512.h b/src/crypto/hmac_sha512.h
new file mode 100644
index 0000000000..ab84ee7652
--- /dev/null
+++ b/src/crypto/hmac_sha512.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_HMAC_SHA512_H
+#define BITCOIN_CRYPTO_HMAC_SHA512_H
+
+#include <crypto/sha512.h>
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** A hasher class for HMAC-SHA-512. */
+class CHMAC_SHA512
+{
+private:
+    CSHA512 outer;
+    CSHA512 inner;
+
+public:
+    static const size_t OUTPUT_SIZE = 64;
+
+    CHMAC_SHA512(const unsigned char* key, size_t keylen);
+    CHMAC_SHA512& Write(const unsigned char* data, size_t len)
+    {
+        inner.Write(data, len);
+        return *this;
+    }
+    void Finalize(unsigned char hash[OUTPUT_SIZE]);
+};
+
+#endif // BITCOIN_CRYPTO_HMAC_SHA512_H
diff --git a/src/crypto/ripemd160.cpp b/src/crypto/ripemd160.cpp
new file mode 100644
index 0000000000..51468ec8d0
--- /dev/null
+++ b/src/crypto/ripemd160.cpp
@@ -0,0 +1,292 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <crypto/ripemd160.h>
+
+#include <crypto/common.h>
+
+#include <string.h>
+
+// Internal implementation code.
+namespace
+{
+/// Internal RIPEMD-160 implementation.
+namespace ripemd160
+{
+uint32_t inline f1(uint32_t x, uint32_t y, uint32_t z) { return x ^ y ^ z; }
+uint32_t inline f2(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (~x & z); }
+uint32_t inline f3(uint32_t x, uint32_t y, uint32_t z) { return (x | ~y) ^ z; }
+uint32_t inline f4(uint32_t x, uint32_t y, uint32_t z) { return (x & z) | (y & ~z); }
+uint32_t inline f5(uint32_t x, uint32_t y, uint32_t z) { return x ^ (y | ~z); }
+
+/** Initialize RIPEMD-160 state. */
+void inline Initialize(uint32_t* s)
+{
+    s[0] = 0x67452301ul;
+    s[1] = 0xEFCDAB89ul;
+    s[2] = 0x98BADCFEul;
+    s[3] = 0x10325476ul;
+    s[4] = 0xC3D2E1F0ul;
+}
+
+uint32_t inline rol(uint32_t x, int i) { return (x << i) | (x >> (32 - i)); }
+
+void inline Round(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t f, uint32_t x, uint32_t k, int r)
+{
+    a = rol(a + f + x + k, r) + e;
+    c = rol(c, 10);
+}
+
+void inline R11(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f1(b, c, d), x, 0, r); }
+void inline R21(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f2(b, c, d), x, 0x5A827999ul, r); }
+void inline R31(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f3(b, c, d), x, 0x6ED9EBA1ul, r); }
+void inline R41(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f4(b, c, d), x, 0x8F1BBCDCul, r); }
+void inline R51(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f5(b, c, d), x, 0xA953FD4Eul, r); }
+
+void inline R12(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f5(b, c, d), x, 0x50A28BE6ul, r); }
+void inline R22(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f4(b, c, d), x, 0x5C4DD124ul, r); }
+void inline R32(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f3(b, c, d), x, 0x6D703EF3ul, r); }
+void inline R42(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f2(b, c, d), x, 0x7A6D76E9ul, r); }
+void inline R52(uint32_t& a, uint32_t b, uint32_t& c, uint32_t d, uint32_t e, uint32_t x, int r) { Round(a, b, c, d, e, f1(b, c, d), x, 0, r); }
+
+/** Perform a RIPEMD-160 transformation, processing a 64-byte chunk. */
+void Transform(uint32_t* s, const unsigned char* chunk)
+{
+    uint32_t a1 = s[0], b1 = s[1], c1 = s[2], d1 = s[3], e1 = s[4];
+    uint32_t a2 = a1, b2 = b1, c2 = c1, d2 = d1, e2 = e1;
+    uint32_t w0 = ReadLE32(chunk + 0), w1 = ReadLE32(chunk + 4), w2 = ReadLE32(chunk + 8), w3 = ReadLE32(chunk + 12);
+    uint32_t w4 = ReadLE32(chunk + 16), w5 = ReadLE32(chunk + 20), w6 = ReadLE32(chunk + 24), w7 = ReadLE32(chunk + 28);
+    uint32_t w8 = ReadLE32(chunk + 32), w9 = ReadLE32(chunk + 36), w10 = ReadLE32(chunk + 40), w11 = ReadLE32(chunk + 44);
+    uint32_t w12 = ReadLE32(chunk + 48), w13 = ReadLE32(chunk + 52), w14 = ReadLE32(chunk + 56), w15 = ReadLE32(chunk + 60);
+
+    R11(a1, b1, c1, d1, e1, w0, 11);
+    R12(a2, b2, c2, d2, e2, w5, 8);
+    R11(e1, a1, b1, c1, d1, w1, 14);
+    R12(e2, a2, b2, c2, d2, w14, 9);
+    R11(d1, e1, a1, b1, c1, w2, 15);
+    R12(d2, e2, a2, b2, c2, w7, 9);
+    R11(c1, d1, e1, a1, b1, w3, 12);
+    R12(c2, d2, e2, a2, b2, w0, 11);
+    R11(b1, c1, d1, e1, a1, w4, 5);
+    R12(b2, c2, d2, e2, a2, w9, 13);
+    R11(a1, b1, c1, d1, e1, w5, 8);
+    R12(a2, b2, c2, d2, e2, w2, 15);
+    R11(e1, a1, b1, c1, d1, w6, 7);
+    R12(e2, a2, b2, c2, d2, w11, 15);
+    R11(d1, e1, a1, b1, c1, w7, 9);
+    R12(d2, e2, a2, b2, c2, w4, 5);
+    R11(c1, d1, e1, a1, b1, w8, 11);
+    R12(c2, d2, e2, a2, b2, w13, 7);
+    R11(b1, c1, d1, e1, a1, w9, 13);
+    R12(b2, c2, d2, e2, a2, w6, 7);
+    R11(a1, b1, c1, d1, e1, w10, 14);
+    R12(a2, b2, c2, d2, e2, w15, 8);
+    R11(e1, a1, b1, c1, d1, w11, 15);
+    R12(e2, a2, b2, c2, d2, w8, 11);
+    R11(d1, e1, a1, b1, c1, w12, 6);
+    R12(d2, e2, a2, b2, c2, w1, 14);
+    R11(c1, d1, e1, a1, b1, w13, 7);
+    R12(c2, d2, e2, a2, b2, w10, 14);
+    R11(b1, c1, d1, e1, a1, w14, 9);
+    R12(b2, c2, d2, e2, a2, w3, 12);
+    R11(a1, b1, c1, d1, e1, w15, 8);
+    R12(a2, b2, c2, d2, e2, w12, 6);
+
+    R21(e1, a1, b1, c1, d1, w7, 7);
+    R22(e2, a2, b2, c2, d2, w6, 9);
+    R21(d1, e1, a1, b1, c1, w4, 6);
+    R22(d2, e2, a2, b2, c2, w11, 13);
+    R21(c1, d1, e1, a1, b1, w13, 8);
+    R22(c2, d2, e2, a2, b2, w3, 15);
+    R21(b1, c1, d1, e1, a1, w1, 13);
+    R22(b2, c2, d2, e2, a2, w7, 7);
+    R21(a1, b1, c1, d1, e1, w10, 11);
+    R22(a2, b2, c2, d2, e2, w0, 12);
+    R21(e1, a1, b1, c1, d1, w6, 9);
+    R22(e2, a2, b2, c2, d2, w13, 8);
+    R21(d1, e1, a1, b1, c1, w15, 7);
+    R22(d2, e2, a2, b2, c2, w5, 9);
+    R21(c1, d1, e1, a1, b1, w3, 15);
+    R22(c2, d2, e2, a2, b2, w10, 11);
+    R21(b1, c1, d1, e1, a1, w12, 7);
+    R22(b2, c2, d2, e2, a2, w14, 7);
+    R21(a1, b1, c1, d1, e1, w0, 12);
+    R22(a2, b2, c2, d2, e2, w15, 7);
+    R21(e1, a1, b1, c1, d1, w9, 15);
+    R22(e2, a2, b2, c2, d2, w8, 12);
+    R21(d1, e1, a1, b1, c1, w5, 9);
+    R22(d2, e2, a2, b2, c2, w12, 7);
+    R21(c1, d1, e1, a1, b1, w2, 11);
+    R22(c2, d2, e2, a2, b2, w4, 6);
+    R21(b1, c1, d1, e1, a1, w14, 7);
+    R22(b2, c2, d2, e2, a2, w9, 15);
+    R21(a1, b1, c1, d1, e1, w11, 13);
+    R22(a2, b2, c2, d2, e2, w1, 13);
+    R21(e1, a1, b1, c1, d1, w8, 12);
+    R22(e2, a2, b2, c2, d2, w2, 11);
+
+    R31(d1, e1, a1, b1, c1, w3, 11);
+    R32(d2, e2, a2, b2, c2, w15, 9);
+    R31(c1, d1, e1, a1, b1, w10, 13);
+    R32(c2, d2, e2, a2, b2, w5, 7);
+    R31(b1, c1, d1, e1, a1, w14, 6);
+    R32(b2, c2, d2, e2, a2, w1, 15);
+    R31(a1, b1, c1, d1, e1, w4, 7);
+    R32(a2, b2, c2, d2, e2, w3, 11);
+    R31(e1, a1, b1, c1, d1, w9, 14);
+    R32(e2, a2, b2, c2, d2, w7, 8);
+    R31(d1, e1, a1, b1, c1, w15, 9);
+    R32(d2, e2, a2, b2, c2, w14, 6);
+    R31(c1, d1, e1, a1, b1, w8, 13);
+    R32(c2, d2, e2, a2, b2, w6, 6);
+    R31(b1, c1, d1, e1, a1, w1, 15);
+    R32(b2, c2, d2, e2, a2, w9, 14);
+    R31(a1, b1, c1, d1, e1, w2, 14);
+    R32(a2, b2, c2, d2, e2, w11, 12);
+    R31(e1, a1, b1, c1, d1, w7, 8);
+    R32(e2, a2, b2, c2, d2, w8, 13);
+    R31(d1, e1, a1, b1, c1, w0, 13);
+    R32(d2, e2, a2, b2, c2, w12, 5);
+    R31(c1, d1, e1, a1, b1, w6, 6);
+    R32(c2, d2, e2, a2, b2, w2, 14);
+    R31(b1, c1, d1, e1, a1, w13, 5);
+    R32(b2, c2, d2, e2, a2, w10, 13);
+    R31(a1, b1, c1, d1, e1, w11, 12);
+    R32(a2, b2, c2, d2, e2, w0, 13);
+    R31(e1, a1, b1, c1, d1, w5, 7);
+    R32(e2, a2, b2, c2, d2, w4, 7);
+    R31(d1, e1, a1, b1, c1, w12, 5);
+    R32(d2, e2, a2, b2, c2, w13, 5);
+
+    R41(c1, d1, e1, a1, b1, w1, 11);
+    R42(c2, d2, e2, a2, b2, w8, 15);
+    R41(b1, c1, d1, e1, a1, w9, 12);
+    R42(b2, c2, d2, e2, a2, w6, 5);
+    R41(a1, b1, c1, d1, e1, w11, 14);
+    R42(a2, b2, c2, d2, e2, w4, 8);
+    R41(e1, a1, b1, c1, d1, w10, 15);
+    R42(e2, a2, b2, c2, d2, w1, 11);
+    R41(d1, e1, a1, b1, c1, w0, 14);
+    R42(d2, e2, a2, b2, c2, w3, 14);
+    R41(c1, d1, e1, a1, b1, w8, 15);
+    R42(c2, d2, e2, a2, b2, w11, 14);
+    R41(b1, c1, d1, e1, a1, w12, 9);
+    R42(b2, c2, d2, e2, a2, w15, 6);
+    R41(a1, b1, c1, d1, e1, w4, 8);
+    R42(a2, b2, c2, d2, e2, w0, 14);
+    R41(e1, a1, b1, c1, d1, w13, 9);
+    R42(e2, a2, b2, c2, d2, w5, 6);
+    R41(d1, e1, a1, b1, c1, w3, 14);
+    R42(d2, e2, a2, b2, c2, w12, 9);
+    R41(c1, d1, e1, a1, b1, w7, 5);
+    R42(c2, d2, e2, a2, b2, w2, 12);
+    R41(b1, c1, d1, e1, a1, w15, 6);
+    R42(b2, c2, d2, e2, a2, w13, 9);
+    R41(a1, b1, c1, d1, e1, w14, 8);
+    R42(a2, b2, c2, d2, e2, w9, 12);
+    R41(e1, a1, b1, c1, d1, w5, 6);
+    R42(e2, a2, b2, c2, d2, w7, 5);
+    R41(d1, e1, a1, b1, c1, w6, 5);
+    R42(d2, e2, a2, b2, c2, w10, 15);
+    R41(c1, d1, e1, a1, b1, w2, 12);
+    R42(c2, d2, e2, a2, b2, w14, 8);
+
+    R51(b1, c1, d1, e1, a1, w4, 9);
+    R52(b2, c2, d2, e2, a2, w12, 8);
+    R51(a1, b1, c1, d1, e1, w0, 15);
+    R52(a2, b2, c2, d2, e2, w15, 5);
+    R51(e1, a1, b1, c1, d1, w5, 5);
+    R52(e2, a2, b2, c2, d2, w10, 12);
+    R51(d1, e1, a1, b1, c1, w9, 11);
+    R52(d2, e2, a2, b2, c2, w4, 9);
+    R51(c1, d1, e1, a1, b1, w7, 6);
+    R52(c2, d2, e2, a2, b2, w1, 12);
+    R51(b1, c1, d1, e1, a1, w12, 8);
+    R52(b2, c2, d2, e2, a2, w5, 5);
+    R51(a1, b1, c1, d1, e1, w2, 13);
+    R52(a2, b2, c2, d2, e2, w8, 14);
+    R51(e1, a1, b1, c1, d1, w10, 12);
+    R52(e2, a2, b2, c2, d2, w7, 6);
+    R51(d1, e1, a1, b1, c1, w14, 5);
+    R52(d2, e2, a2, b2, c2, w6, 8);
+    R51(c1, d1, e1, a1, b1, w1, 12);
+    R52(c2, d2, e2, a2, b2, w2, 13);
+    R51(b1, c1, d1, e1, a1, w3, 13);
+    R52(b2, c2, d2, e2, a2, w13, 6);
+    R51(a1, b1, c1, d1, e1, w8, 14);
+    R52(a2, b2, c2, d2, e2, w14, 5);
+    R51(e1, a1, b1, c1, d1, w11, 11);
+    R52(e2, a2, b2, c2, d2, w0, 15);
+    R51(d1, e1, a1, b1, c1, w6, 8);
+    R52(d2, e2, a2, b2, c2, w3, 13);
+    R51(c1, d1, e1, a1, b1, w15, 5);
+    R52(c2, d2, e2, a2, b2, w9, 11);
+    R51(b1, c1, d1, e1, a1, w13, 6);
+    R52(b2, c2, d2, e2, a2, w11, 11);
+
+    uint32_t t = s[0];
+    s[0] = s[1] + c1 + d2;
+    s[1] = s[2] + d1 + e2;
+    s[2] = s[3] + e1 + a2;
+    s[3] = s[4] + a1 + b2;
+    s[4] = t + b1 + c2;
+}
+
+} // namespace ripemd160
+
+} // namespace
+
+////// RIPEMD160
+
+CRIPEMD160::CRIPEMD160() : bytes(0)
+{
+    ripemd160::Initialize(s);
+}
+
+CRIPEMD160& CRIPEMD160::Write(const unsigned char* data, size_t len)
+{
+    const unsigned char* end = data + len;
+    size_t bufsize = bytes % 64;
+    if (bufsize && bufsize + len >= 64) {
+        // Fill the buffer, and process it.
+        memcpy(buf + bufsize, data, 64 - bufsize);
+        bytes += 64 - bufsize;
+        data += 64 - bufsize;
+        ripemd160::Transform(s, buf);
+        bufsize = 0;
+    }
+    while (end >= data + 64) {
+        // Process full chunks directly from the source.
+        ripemd160::Transform(s, data);
+        bytes += 64;
+        data += 64;
+    }
+    if (end > data) {
+        // Fill the buffer with what remains.
+        memcpy(buf + bufsize, data, end - data);
+        bytes += end - data;
+    }
+    return *this;
+}
+
+void CRIPEMD160::Finalize(unsigned char hash[OUTPUT_SIZE])
+{
+    static const unsigned char pad[64] = {0x80};
+    unsigned char sizedesc[8];
+    WriteLE64(sizedesc, bytes << 3);
+    Write(pad, 1 + ((119 - (bytes % 64)) % 64));
+    Write(sizedesc, 8);
+    WriteLE32(hash, s[0]);
+    WriteLE32(hash + 4, s[1]);
+    WriteLE32(hash + 8, s[2]);
+    WriteLE32(hash + 12, s[3]);
+    WriteLE32(hash + 16, s[4]);
+}
+
+CRIPEMD160& CRIPEMD160::Reset()
+{
+    bytes = 0;
+    ripemd160::Initialize(s);
+    return *this;
+}
diff --git a/src/crypto/ripemd160.h b/src/crypto/ripemd160.h
new file mode 100644
index 0000000000..38ea375c1f
--- /dev/null
+++ b/src/crypto/ripemd160.h
@@ -0,0 +1,28 @@
+// Copyright (c) 2014-2016 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_RIPEMD160_H
+#define BITCOIN_CRYPTO_RIPEMD160_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** A hasher class for RIPEMD-160. */
+class CRIPEMD160
+{
+private:
+    uint32_t s[5];
+    unsigned char buf[64];
+    uint64_t bytes;
+
+public:
+    static const size_t OUTPUT_SIZE = 20;
+
+    CRIPEMD160();
+    CRIPEMD160& Write(const unsigned char* data, size_t len);
+    void Finalize(unsigned char hash[OUTPUT_SIZE]);
+    CRIPEMD160& Reset();
+};
+
+#endif // BITCOIN_CRYPTO_RIPEMD160_H
diff --git a/src/crypto/sha1.cpp b/src/crypto/sha1.cpp
new file mode 100644
index 0000000000..dc96ac507a
--- /dev/null
+++ b/src/crypto/sha1.cpp
@@ -0,0 +1,199 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <crypto/sha1.h>
+
+#include <crypto/common.h>
+
+#include <string.h>
+
+// Internal implementation code.
+namespace
+{
+/// Internal SHA-1 implementation.
+namespace sha1
+{
+/** One round of SHA-1. */
+void inline Round(uint32_t a, uint32_t& b, uint32_t c, uint32_t d, uint32_t& e, uint32_t f, uint32_t k, uint32_t w)
+{
+    e += ((a << 5) | (a >> 27)) + f + k + w;
+    b = (b << 30) | (b >> 2);
+}
+
+uint32_t inline f1(uint32_t b, uint32_t c, uint32_t d) { return d ^ (b & (c ^ d)); }
+uint32_t inline f2(uint32_t b, uint32_t c, uint32_t d) { return b ^ c ^ d; }
+uint32_t inline f3(uint32_t b, uint32_t c, uint32_t d) { return (b & c) | (d & (b | c)); }
+
+uint32_t inline left(uint32_t x) { return (x << 1) | (x >> 31); }
+
+/** Initialize SHA-1 state. */
+void inline Initialize(uint32_t* s)
+{
+    s[0] = 0x67452301ul;
+    s[1] = 0xEFCDAB89ul;
+    s[2] = 0x98BADCFEul;
+    s[3] = 0x10325476ul;
+    s[4] = 0xC3D2E1F0ul;
+}
+
+const uint32_t k1 = 0x5A827999ul;
+const uint32_t k2 = 0x6ED9EBA1ul;
+const uint32_t k3 = 0x8F1BBCDCul;
+const uint32_t k4 = 0xCA62C1D6ul;
+
+/** Perform a SHA-1 transformation, processing a 64-byte chunk. */
+void Transform(uint32_t* s, const unsigned char* chunk)
+{
+    uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4];
+    uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+
+    Round(a, b, c, d, e, f1(b, c, d), k1, w0 = ReadBE32(chunk + 0));
+    Round(e, a, b, c, d, f1(a, b, c), k1, w1 = ReadBE32(chunk + 4));
+    Round(d, e, a, b, c, f1(e, a, b), k1, w2 = ReadBE32(chunk + 8));
+    Round(c, d, e, a, b, f1(d, e, a), k1, w3 = ReadBE32(chunk + 12));
+    Round(b, c, d, e, a, f1(c, d, e), k1, w4 = ReadBE32(chunk + 16));
+    Round(a, b, c, d, e, f1(b, c, d), k1, w5 = ReadBE32(chunk + 20));
+    Round(e, a, b, c, d, f1(a, b, c), k1, w6 = ReadBE32(chunk + 24));
+    Round(d, e, a, b, c, f1(e, a, b), k1, w7 = ReadBE32(chunk + 28));
+    Round(c, d, e, a, b, f1(d, e, a), k1, w8 = ReadBE32(chunk + 32));
+    Round(b, c, d, e, a, f1(c, d, e), k1, w9 = ReadBE32(chunk + 36));
+    Round(a, b, c, d, e, f1(b, c, d), k1, w10 = ReadBE32(chunk + 40));
+    Round(e, a, b, c, d, f1(a, b, c), k1, w11 = ReadBE32(chunk + 44));
+    Round(d, e, a, b, c, f1(e, a, b), k1, w12 = ReadBE32(chunk + 48));
+    Round(c, d, e, a, b, f1(d, e, a), k1, w13 = ReadBE32(chunk + 52));
+    Round(b, c, d, e, a, f1(c, d, e), k1, w14 = ReadBE32(chunk + 56));
+    Round(a, b, c, d, e, f1(b, c, d), k1, w15 = ReadBE32(chunk + 60));
+
+    Round(e, a, b, c, d, f1(a, b, c), k1, w0 = left(w0 ^ w13 ^ w8 ^ w2));
+    Round(d, e, a, b, c, f1(e, a, b), k1, w1 = left(w1 ^ w14 ^ w9 ^ w3));
+    Round(c, d, e, a, b, f1(d, e, a), k1, w2 = left(w2 ^ w15 ^ w10 ^ w4));
+    Round(b, c, d, e, a, f1(c, d, e), k1, w3 = left(w3 ^ w0 ^ w11 ^ w5));
+    Round(a, b, c, d, e, f2(b, c, d), k2, w4 = left(w4 ^ w1 ^ w12 ^ w6));
+    Round(e, a, b, c, d, f2(a, b, c), k2, w5 = left(w5 ^ w2 ^ w13 ^ w7));
+    Round(d, e, a, b, c, f2(e, a, b), k2, w6 = left(w6 ^ w3 ^ w14 ^ w8));
+    Round(c, d, e, a, b, f2(d, e, a), k2, w7 = left(w7 ^ w4 ^ w15 ^ w9));
+    Round(b, c, d, e, a, f2(c, d, e), k2, w8 = left(w8 ^ w5 ^ w0 ^ w10));
+    Round(a, b, c, d, e, f2(b, c, d), k2, w9 = left(w9 ^ w6 ^ w1 ^ w11));
+    Round(e, a, b, c, d, f2(a, b, c), k2, w10 = left(w10 ^ w7 ^ w2 ^ w12));
+    Round(d, e, a, b, c, f2(e, a, b), k2, w11 = left(w11 ^ w8 ^ w3 ^ w13));
+    Round(c, d, e, a, b, f2(d, e, a), k2, w12 = left(w12 ^ w9 ^ w4 ^ w14));
+    Round(b, c, d, e, a, f2(c, d, e), k2, w13 = left(w13 ^ w10 ^ w5 ^ w15));
+    Round(a, b, c, d, e, f2(b, c, d), k2, w14 = left(w14 ^ w11 ^ w6 ^ w0));
+    Round(e, a, b, c, d, f2(a, b, c), k2, w15 = left(w15 ^ w12 ^ w7 ^ w1));
+
+    Round(d, e, a, b, c, f2(e, a, b), k2, w0 = left(w0 ^ w13 ^ w8 ^ w2));
+    Round(c, d, e, a, b, f2(d, e, a), k2, w1 = left(w1 ^ w14 ^ w9 ^ w3));
+    Round(b, c, d, e, a, f2(c, d, e), k2, w2 = left(w2 ^ w15 ^ w10 ^ w4));
+    Round(a, b, c, d, e, f2(b, c, d), k2, w3 = left(w3 ^ w0 ^ w11 ^ w5));
+    Round(e, a, b, c, d, f2(a, b, c), k2, w4 = left(w4 ^ w1 ^ w12 ^ w6));
+    Round(d, e, a, b, c, f2(e, a, b), k2, w5 = left(w5 ^ w2 ^ w13 ^ w7));
+    Round(c, d, e, a, b, f2(d, e, a), k2, w6 = left(w6 ^ w3 ^ w14 ^ w8));
+    Round(b, c, d, e, a, f2(c, d, e), k2, w7 = left(w7 ^ w4 ^ w15 ^ w9));
+    Round(a, b, c, d, e, f3(b, c, d), k3, w8 = left(w8 ^ w5 ^ w0 ^ w10));
+    Round(e, a, b, c, d, f3(a, b, c), k3, w9 = left(w9 ^ w6 ^ w1 ^ w11));
+    Round(d, e, a, b, c, f3(e, a, b), k3, w10 = left(w10 ^ w7 ^ w2 ^ w12));
+    Round(c, d, e, a, b, f3(d, e, a), k3, w11 = left(w11 ^ w8 ^ w3 ^ w13));
+    Round(b, c, d, e, a, f3(c, d, e), k3, w12 = left(w12 ^ w9 ^ w4 ^ w14));
+    Round(a, b, c, d, e, f3(b, c, d), k3, w13 = left(w13 ^ w10 ^ w5 ^ w15));
+    Round(e, a, b, c, d, f3(a, b, c), k3, w14 = left(w14 ^ w11 ^ w6 ^ w0));
+    Round(d, e, a, b, c, f3(e, a, b), k3, w15 = left(w15 ^ w12 ^ w7 ^ w1));
+
+    Round(c, d, e, a, b, f3(d, e, a), k3, w0 = left(w0 ^ w13 ^ w8 ^ w2));
+    Round(b, c, d, e, a, f3(c, d, e), k3, w1 = left(w1 ^ w14 ^ w9 ^ w3));
+    Round(a, b, c, d, e, f3(b, c, d), k3, w2 = left(w2 ^ w15 ^ w10 ^ w4));
+    Round(e, a, b, c, d, f3(a, b, c), k3, w3 = left(w3 ^ w0 ^ w11 ^ w5));
+    Round(d, e, a, b, c, f3(e, a, b), k3, w4 = left(w4 ^ w1 ^ w12 ^ w6));
+    Round(c, d, e, a, b, f3(d, e, a), k3, w5 = left(w5 ^ w2 ^ w13 ^ w7));
+    Round(b, c, d, e, a, f3(c, d, e), k3, w6 = left(w6 ^ w3 ^ w14 ^ w8));
+    Round(a, b, c, d, e, f3(b, c, d), k3, w7 = left(w7 ^ w4 ^ w15 ^ w9));
+    Round(e, a, b, c, d, f3(a, b, c), k3, w8 = left(w8 ^ w5 ^ w0 ^ w10));
+    Round(d, e, a, b, c, f3(e, a, b), k3, w9 = left(w9 ^ w6 ^ w1 ^ w11));
+    Round(c, d, e, a, b, f3(d, e, a), k3, w10 = left(w10 ^ w7 ^ w2 ^ w12));
+    Round(b, c, d, e, a, f3(c, d, e), k3, w11 = left(w11 ^ w8 ^ w3 ^ w13));
+    Round(a, b, c, d, e, f2(b, c, d), k4, w12 = left(w12 ^ w9 ^ w4 ^ w14));
+    Round(e, a, b, c, d, f2(a, b, c), k4, w13 = left(w13 ^ w10 ^ w5 ^ w15));
+    Round(d, e, a, b, c, f2(e, a, b), k4, w14 = left(w14 ^ w11 ^ w6 ^ w0));
+    Round(c, d, e, a, b, f2(d, e, a), k4, w15 = left(w15 ^ w12 ^ w7 ^ w1));
+
+    Round(b, c, d, e, a, f2(c, d, e), k4, w0 = left(w0 ^ w13 ^ w8 ^ w2));
+    Round(a, b, c, d, e, f2(b, c, d), k4, w1 = left(w1 ^ w14 ^ w9 ^ w3));
+    Round(e, a, b, c, d, f2(a, b, c), k4, w2 = left(w2 ^ w15 ^ w10 ^ w4));
+    Round(d, e, a, b, c, f2(e, a, b), k4, w3 = left(w3 ^ w0 ^ w11 ^ w5));
+    Round(c, d, e, a, b, f2(d, e, a), k4, w4 = left(w4 ^ w1 ^ w12 ^ w6));
+    Round(b, c, d, e, a, f2(c, d, e), k4, w5 = left(w5 ^ w2 ^ w13 ^ w7));
+    Round(a, b, c, d, e, f2(b, c, d), k4, w6 = left(w6 ^ w3 ^ w14 ^ w8));
+    Round(e, a, b, c, d, f2(a, b, c), k4, w7 = left(w7 ^ w4 ^ w15 ^ w9));
+    Round(d, e, a, b, c, f2(e, a, b), k4, w8 = left(w8 ^ w5 ^ w0 ^ w10));
+    Round(c, d, e, a, b, f2(d, e, a), k4, w9 = left(w9 ^ w6 ^ w1 ^ w11));
+    Round(b, c, d, e, a, f2(c, d, e), k4, w10 = left(w10 ^ w7 ^ w2 ^ w12));
+    Round(a, b, c, d, e, f2(b, c, d), k4, w11 = left(w11 ^ w8 ^ w3 ^ w13));
+    Round(e, a, b, c, d, f2(a, b, c), k4, w12 = left(w12 ^ w9 ^ w4 ^ w14));
+    Round(d, e, a, b, c, f2(e, a, b), k4, left(w13 ^ w10 ^ w5 ^ w15));
+    Round(c, d, e, a, b, f2(d, e, a), k4, left(w14 ^ w11 ^ w6 ^ w0));
+    Round(b, c, d, e, a, f2(c, d, e), k4, left(w15 ^ w12 ^ w7 ^ w1));
+
+    s[0] += a;
+    s[1] += b;
+    s[2] += c;
+    s[3] += d;
+    s[4] += e;
+}
+
+} // namespace sha1
+
+} // namespace
+
+////// SHA1
+
+CSHA1::CSHA1() : bytes(0)
+{
+    sha1::Initialize(s);
+}
+
+CSHA1& CSHA1::Write(const unsigned char* data, size_t len)
+{
+    const unsigned char* end = data + len;
+    size_t bufsize = bytes % 64;
+    if (bufsize && bufsize + len >= 64) {
+        // Fill the buffer, and process it.
+        memcpy(buf + bufsize, data, 64 - bufsize);
+        bytes += 64 - bufsize;
+        data += 64 - bufsize;
+        sha1::Transform(s, buf);
+        bufsize = 0;
+    }
+    while (end >= data + 64) {
+        // Process full chunks directly from the source.
+        sha1::Transform(s, data);
+        bytes += 64;
+        data += 64;
+    }
+    if (end > data) {
+        // Fill the buffer with what remains.
+        memcpy(buf + bufsize, data, end - data);
+        bytes += end - data;
+    }
+    return *this;
+}
+
+void CSHA1::Finalize(unsigned char hash[OUTPUT_SIZE])
+{
+    static const unsigned char pad[64] = {0x80};
+    unsigned char sizedesc[8];
+    WriteBE64(sizedesc, bytes << 3);
+    Write(pad, 1 + ((119 - (bytes % 64)) % 64));
+    Write(sizedesc, 8);
+    WriteBE32(hash, s[0]);
+    WriteBE32(hash + 4, s[1]);
+    WriteBE32(hash + 8, s[2]);
+    WriteBE32(hash + 12, s[3]);
+    WriteBE32(hash + 16, s[4]);
+}
+
+CSHA1& CSHA1::Reset()
+{
+    bytes = 0;
+    sha1::Initialize(s);
+    return *this;
+}
diff --git a/src/crypto/sha1.h b/src/crypto/sha1.h
new file mode 100644
index 0000000000..8b4568ee12
--- /dev/null
+++ b/src/crypto/sha1.h
@@ -0,0 +1,28 @@
+// Copyright (c) 2014-2016 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_SHA1_H
+#define BITCOIN_CRYPTO_SHA1_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** A hasher class for SHA1. */
+class CSHA1
+{
+private:
+    uint32_t s[5];
+    unsigned char buf[64];
+    uint64_t bytes;
+
+public:
+    static const size_t OUTPUT_SIZE = 20;
+
+    CSHA1();
+    CSHA1& Write(const unsigned char* data, size_t len);
+    void Finalize(unsigned char hash[OUTPUT_SIZE]);
+    CSHA1& Reset();
+};
+
+#endif // BITCOIN_CRYPTO_SHA1_H
diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp
new file mode 100644
index 0000000000..f3245b8dea
--- /dev/null
+++ b/src/crypto/sha256.cpp
@@ -0,0 +1,249 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <crypto/sha256.h>
+#include <crypto/common.h>
+
+#include <assert.h>
+#include <string.h>
+#include <atomic>
+
+#if defined(__x86_64__) || defined(__amd64__)
+#if defined(USE_ASM)
+#include <cpuid.h>
+namespace sha256_sse4
+{
+void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
+}
+#endif
+#endif
+
+// Internal implementation code.
+namespace
+{
+/// Internal SHA-256 implementation.
+namespace sha256
+{
+uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); }
+uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); }
+uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); }
+uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); }
+uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); }
+uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); }
+
+/** One round of SHA-256. */
+void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k, uint32_t w)
+{
+    uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k + w;
+    uint32_t t2 = Sigma0(a) + Maj(a, b, c);
+    d += t1;
+    h = t1 + t2;
+}
+
+/** Initialize SHA-256 state. */
+void inline Initialize(uint32_t* s)
+{
+    s[0] = 0x6a09e667ul;
+    s[1] = 0xbb67ae85ul;
+    s[2] = 0x3c6ef372ul;
+    s[3] = 0xa54ff53aul;
+    s[4] = 0x510e527ful;
+    s[5] = 0x9b05688cul;
+    s[6] = 0x1f83d9abul;
+    s[7] = 0x5be0cd19ul;
+}
+
+/** Perform a number of SHA-256 transformations, processing 64-byte chunks. */
+void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
+{
+    while (blocks--) {
+        uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
+        uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+
+        Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = ReadBE32(chunk + 0));
+        Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = ReadBE32(chunk + 4));
+        Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = ReadBE32(chunk + 8));
+        Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = ReadBE32(chunk + 12));
+        Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = ReadBE32(chunk + 16));
+        Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = ReadBE32(chunk + 20));
+        Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = ReadBE32(chunk + 24));
+        Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = ReadBE32(chunk + 28));
+        Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = ReadBE32(chunk + 32));
+        Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = ReadBE32(chunk + 36));
+        Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = ReadBE32(chunk + 40));
+        Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = ReadBE32(chunk + 44));
+        Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = ReadBE32(chunk + 48));
+        Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = ReadBE32(chunk + 52));
+        Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = ReadBE32(chunk + 56));
+        Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = ReadBE32(chunk + 60));
+
+        Round(a, b, c, d, e, f, g, h, 0xe49b69c1, w0 += sigma1(w14) + w9 + sigma0(w1));
+        Round(h, a, b, c, d, e, f, g, 0xefbe4786, w1 += sigma1(w15) + w10 + sigma0(w2));
+        Round(g, h, a, b, c, d, e, f, 0x0fc19dc6, w2 += sigma1(w0) + w11 + sigma0(w3));
+        Round(f, g, h, a, b, c, d, e, 0x240ca1cc, w3 += sigma1(w1) + w12 + sigma0(w4));
+        Round(e, f, g, h, a, b, c, d, 0x2de92c6f, w4 += sigma1(w2) + w13 + sigma0(w5));
+        Round(d, e, f, g, h, a, b, c, 0x4a7484aa, w5 += sigma1(w3) + w14 + sigma0(w6));
+        Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w6 += sigma1(w4) + w15 + sigma0(w7));
+        Round(b, c, d, e, f, g, h, a, 0x76f988da, w7 += sigma1(w5) + w0 + sigma0(w8));
+        Round(a, b, c, d, e, f, g, h, 0x983e5152, w8 += sigma1(w6) + w1 + sigma0(w9));
+        Round(h, a, b, c, d, e, f, g, 0xa831c66d, w9 += sigma1(w7) + w2 + sigma0(w10));
+        Round(g, h, a, b, c, d, e, f, 0xb00327c8, w10 += sigma1(w8) + w3 + sigma0(w11));
+        Round(f, g, h, a, b, c, d, e, 0xbf597fc7, w11 += sigma1(w9) + w4 + sigma0(w12));
+        Round(e, f, g, h, a, b, c, d, 0xc6e00bf3, w12 += sigma1(w10) + w5 + sigma0(w13));
+        Round(d, e, f, g, h, a, b, c, 0xd5a79147, w13 += sigma1(w11) + w6 + sigma0(w14));
+        Round(c, d, e, f, g, h, a, b, 0x06ca6351, w14 += sigma1(w12) + w7 + sigma0(w15));
+        Round(b, c, d, e, f, g, h, a, 0x14292967, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+        Round(a, b, c, d, e, f, g, h, 0x27b70a85, w0 += sigma1(w14) + w9 + sigma0(w1));
+        Round(h, a, b, c, d, e, f, g, 0x2e1b2138, w1 += sigma1(w15) + w10 + sigma0(w2));
+        Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w2 += sigma1(w0) + w11 + sigma0(w3));
+        Round(f, g, h, a, b, c, d, e, 0x53380d13, w3 += sigma1(w1) + w12 + sigma0(w4));
+        Round(e, f, g, h, a, b, c, d, 0x650a7354, w4 += sigma1(w2) + w13 + sigma0(w5));
+        Round(d, e, f, g, h, a, b, c, 0x766a0abb, w5 += sigma1(w3) + w14 + sigma0(w6));
+        Round(c, d, e, f, g, h, a, b, 0x81c2c92e, w6 += sigma1(w4) + w15 + sigma0(w7));
+        Round(b, c, d, e, f, g, h, a, 0x92722c85, w7 += sigma1(w5) + w0 + sigma0(w8));
+        Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w8 += sigma1(w6) + w1 + sigma0(w9));
+        Round(h, a, b, c, d, e, f, g, 0xa81a664b, w9 += sigma1(w7) + w2 + sigma0(w10));
+        Round(g, h, a, b, c, d, e, f, 0xc24b8b70, w10 += sigma1(w8) + w3 + sigma0(w11));
+        Round(f, g, h, a, b, c, d, e, 0xc76c51a3, w11 += sigma1(w9) + w4 + sigma0(w12));
+        Round(e, f, g, h, a, b, c, d, 0xd192e819, w12 += sigma1(w10) + w5 + sigma0(w13));
+        Round(d, e, f, g, h, a, b, c, 0xd6990624, w13 += sigma1(w11) + w6 + sigma0(w14));
+        Round(c, d, e, f, g, h, a, b, 0xf40e3585, w14 += sigma1(w12) + w7 + sigma0(w15));
+        Round(b, c, d, e, f, g, h, a, 0x106aa070, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+        Round(a, b, c, d, e, f, g, h, 0x19a4c116, w0 += sigma1(w14) + w9 + sigma0(w1));
+        Round(h, a, b, c, d, e, f, g, 0x1e376c08, w1 += sigma1(w15) + w10 + sigma0(w2));
+        Round(g, h, a, b, c, d, e, f, 0x2748774c, w2 += sigma1(w0) + w11 + sigma0(w3));
+        Round(f, g, h, a, b, c, d, e, 0x34b0bcb5, w3 += sigma1(w1) + w12 + sigma0(w4));
+        Round(e, f, g, h, a, b, c, d, 0x391c0cb3, w4 += sigma1(w2) + w13 + sigma0(w5));
+        Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w5 += sigma1(w3) + w14 + sigma0(w6));
+        Round(c, d, e, f, g, h, a, b, 0x5b9cca4f, w6 += sigma1(w4) + w15 + sigma0(w7));
+        Round(b, c, d, e, f, g, h, a, 0x682e6ff3, w7 += sigma1(w5) + w0 + sigma0(w8));
+        Round(a, b, c, d, e, f, g, h, 0x748f82ee, w8 += sigma1(w6) + w1 + sigma0(w9));
+        Round(h, a, b, c, d, e, f, g, 0x78a5636f, w9 += sigma1(w7) + w2 + sigma0(w10));
+        Round(g, h, a, b, c, d, e, f, 0x84c87814, w10 += sigma1(w8) + w3 + sigma0(w11));
+        Round(f, g, h, a, b, c, d, e, 0x8cc70208, w11 += sigma1(w9) + w4 + sigma0(w12));
+        Round(e, f, g, h, a, b, c, d, 0x90befffa, w12 += sigma1(w10) + w5 + sigma0(w13));
+        Round(d, e, f, g, h, a, b, c, 0xa4506ceb, w13 += sigma1(w11) + w6 + sigma0(w14));
+        Round(c, d, e, f, g, h, a, b, 0xbef9a3f7, w14 + sigma1(w12) + w7 + sigma0(w15));
+        Round(b, c, d, e, f, g, h, a, 0xc67178f2, w15 + sigma1(w13) + w8 + sigma0(w0));
+
+        s[0] += a;
+        s[1] += b;
+        s[2] += c;
+        s[3] += d;
+        s[4] += e;
+        s[5] += f;
+        s[6] += g;
+        s[7] += h;
+        chunk += 64;
+    }
+}
+
+} // namespace sha256
+
+typedef void (*TransformType)(uint32_t*, const unsigned char*, size_t);
+
+bool SelfTest(TransformType tr) {
+    static const unsigned char in1[65] = {0, 0x80};
+    static const unsigned char in2[129] = {
+        0,
+        32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 
+        32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 
+        0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0
+    };
+    static const uint32_t init[8] = {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul};
+    static const uint32_t out1[8] = {0xe3b0c442ul, 0x98fc1c14ul, 0x9afbf4c8ul, 0x996fb924ul, 0x27ae41e4ul, 0x649b934cul, 0xa495991bul, 0x7852b855ul};
+    static const uint32_t out2[8] = {0xce4153b0ul, 0x147c2a86ul, 0x3ed4298eul, 0xe0676bc8ul, 0x79fc77a1ul, 0x2abe1f49ul, 0xb2b055dful, 0x1069523eul};
+    uint32_t buf[8];
+    memcpy(buf, init, sizeof(buf));
+    // Process nothing, and check we remain in the initial state.
+    tr(buf, nullptr, 0);
+    if (memcmp(buf, init, sizeof(buf))) return false;
+    // Process the padded empty string (unaligned)
+    tr(buf, in1 + 1, 1);
+    if (memcmp(buf, out1, sizeof(buf))) return false;
+    // Process 64 spaces (unaligned)
+    memcpy(buf, init, sizeof(buf));
+    tr(buf, in2 + 1, 2);
+    if (memcmp(buf, out2, sizeof(buf))) return false;
+    return true;
+}
+
+TransformType Transform = sha256::Transform;
+
+} // namespace
+
+std::string SHA256AutoDetect()
+{
+#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__))
+    uint32_t eax, ebx, ecx, edx;
+    if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx >> 19) & 1) {
+        Transform = sha256_sse4::Transform;
+        assert(SelfTest(Transform));
+        return "sse4";
+    }
+#endif
+
+    assert(SelfTest(Transform));
+    return "standard";
+}
+
+////// SHA-256
+
+CSHA256::CSHA256() : bytes(0)
+{
+    sha256::Initialize(s);
+}
+
+CSHA256& CSHA256::Write(const unsigned char* data, size_t len)
+{
+    const unsigned char* end = data + len;
+    size_t bufsize = bytes % 64;
+    if (bufsize && bufsize + len >= 64) {
+        // Fill the buffer, and process it.
+        memcpy(buf + bufsize, data, 64 - bufsize);
+        bytes += 64 - bufsize;
+        data += 64 - bufsize;
+        Transform(s, buf, 1);
+        bufsize = 0;
+    }
+    if (end - data >= 64) {
+        size_t blocks = (end - data) / 64;
+        Transform(s, data, blocks);
+        data += 64 * blocks;
+        bytes += 64 * blocks;
+    }
+    if (end > data) {
+        // Fill the buffer with what remains.
+        memcpy(buf + bufsize, data, end - data);
+        bytes += end - data;
+    }
+    return *this;
+}
+
+void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE])
+{
+    static const unsigned char pad[64] = {0x80};
+    unsigned char sizedesc[8];
+    WriteBE64(sizedesc, bytes << 3);
+    Write(pad, 1 + ((119 - (bytes % 64)) % 64));
+    Write(sizedesc, 8);
+    WriteBE32(hash, s[0]);
+    WriteBE32(hash + 4, s[1]);
+    WriteBE32(hash + 8, s[2]);
+    WriteBE32(hash + 12, s[3]);
+    WriteBE32(hash + 16, s[4]);
+    WriteBE32(hash + 20, s[5]);
+    WriteBE32(hash + 24, s[6]);
+    WriteBE32(hash + 28, s[7]);
+}
+
+CSHA256& CSHA256::Reset()
+{
+    bytes = 0;
+    sha256::Initialize(s);
+    return *this;
+}
diff --git a/src/crypto/sha256.h b/src/crypto/sha256.h
new file mode 100644
index 0000000000..dd30fe396f
--- /dev/null
+++ b/src/crypto/sha256.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_SHA256_H
+#define BITCOIN_CRYPTO_SHA256_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+
+/** A hasher class for SHA-256. */
+class CSHA256
+{
+private:
+    uint32_t s[8];
+    unsigned char buf[64];
+    uint64_t bytes;
+
+public:
+    static const size_t OUTPUT_SIZE = 32;
+
+    CSHA256();
+    CSHA256& Write(const unsigned char* data, size_t len);
+    void Finalize(unsigned char hash[OUTPUT_SIZE]);
+    CSHA256& Reset();
+};
+
+/** Autodetect the best available SHA256 implementation.
+ *  Returns the name of the implementation.
+ */
+std::string SHA256AutoDetect();
+
+#endif // BITCOIN_CRYPTO_SHA256_H
diff --git a/src/crypto/sha256_sse4.cpp b/src/crypto/sha256_sse4.cpp
new file mode 100644
index 0000000000..89f529a3ab
--- /dev/null
+++ b/src/crypto/sha256_sse4.cpp
@@ -0,0 +1,1506 @@
+// Copyright (c) 2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+//
+// This is a translation to GCC extended asm syntax from YASM code by Intel
+// (available at the bottom of this file).
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(__x86_64__) || defined(__amd64__)
+
+namespace sha256_sse4
+{
+void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
+{
+    static const uint32_t K256 alignas(16) [] = {
+        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+        0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+        0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+        0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+        0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+        0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+        0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+        0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+    };
+    static const uint32_t FLIP_MASK alignas(16) [] = {0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f};
+    static const uint32_t SHUF_00BA alignas(16) [] = {0x03020100, 0x0b0a0908, 0xffffffff, 0xffffffff};
+    static const uint32_t SHUF_DC00 alignas(16) [] = {0xffffffff, 0xffffffff, 0x03020100, 0x0b0a0908};
+    uint32_t a, b, c, d, f, g, h, y0, y1, y2;
+    uint64_t tbl;
+    uint64_t inp_end, inp;
+    uint32_t xfer alignas(16) [4];
+
+    __asm__ __volatile__(
+        "shl    $0x6,%2;"
+        "je     Ldone_hash_%=;"
+        "add    %1,%2;"
+        "mov    %2,%14;"
+        "mov    (%0),%3;"
+        "mov    0x4(%0),%4;"
+        "mov    0x8(%0),%5;"
+        "mov    0xc(%0),%6;"
+        "mov    0x10(%0),%k2;"
+        "mov    0x14(%0),%7;"
+        "mov    0x18(%0),%8;"
+        "mov    0x1c(%0),%9;"
+        "movdqa %18,%%xmm12;"
+        "movdqa %19,%%xmm10;"
+        "movdqa %20,%%xmm11;"
+
+        "Lloop0_%=:"
+        "lea    %17,%13;"
+        "movdqu (%1),%%xmm4;"
+        "pshufb %%xmm12,%%xmm4;"
+        "movdqu 0x10(%1),%%xmm5;"
+        "pshufb %%xmm12,%%xmm5;"
+        "movdqu 0x20(%1),%%xmm6;"
+        "pshufb %%xmm12,%%xmm6;"
+        "movdqu 0x30(%1),%%xmm7;"
+        "pshufb %%xmm12,%%xmm7;"
+        "mov    %1,%15;"
+        "mov    $3,%1;"
+
+        "Lloop1_%=:"
+        "movdqa 0x0(%13),%%xmm9;"
+        "paddd  %%xmm4,%%xmm9;"
+        "movdqa %%xmm9,%16;"
+        "movdqa %%xmm7,%%xmm0;"
+        "mov    %k2,%10;"
+        "ror    $0xe,%10;"
+        "mov    %3,%11;"
+        "palignr $0x4,%%xmm6,%%xmm0;"
+        "ror    $0x9,%11;"
+        "xor    %k2,%10;"
+        "mov    %7,%12;"
+        "ror    $0x5,%10;"
+        "movdqa %%xmm5,%%xmm1;"
+        "xor    %3,%11;"
+        "xor    %8,%12;"
+        "paddd  %%xmm4,%%xmm0;"
+        "xor    %k2,%10;"
+        "and    %k2,%12;"
+        "ror    $0xb,%11;"
+        "palignr $0x4,%%xmm4,%%xmm1;"
+        "xor    %3,%11;"
+        "ror    $0x6,%10;"
+        "xor    %8,%12;"
+        "movdqa %%xmm1,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    %16,%12;"
+        "movdqa %%xmm1,%%xmm3;"
+        "mov    %3,%10;"
+        "add    %12,%9;"
+        "mov    %3,%12;"
+        "pslld  $0x19,%%xmm1;"
+        "or     %5,%10;"
+        "add    %9,%6;"
+        "and    %5,%12;"
+        "psrld  $0x7,%%xmm2;"
+        "and    %4,%10;"
+        "add    %11,%9;"
+        "por    %%xmm2,%%xmm1;"
+        "or     %12,%10;"
+        "add    %10,%9;"
+        "movdqa %%xmm3,%%xmm2;"
+        "mov    %6,%10;"
+        "mov    %9,%11;"
+        "movdqa %%xmm3,%%xmm8;"
+        "ror    $0xe,%10;"
+        "xor    %6,%10;"
+        "mov    %k2,%12;"
+        "ror    $0x9,%11;"
+        "pslld  $0xe,%%xmm3;"
+        "xor    %9,%11;"
+        "ror    $0x5,%10;"
+        "xor    %7,%12;"
+        "psrld  $0x12,%%xmm2;"
+        "ror    $0xb,%11;"
+        "xor    %6,%10;"
+        "and    %6,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm1;"
+        "xor    %9,%11;"
+        "xor    %7,%12;"
+        "psrld  $0x3,%%xmm8;"
+        "add    %10,%12;"
+        "add    4+%16,%12;"
+        "ror    $0x2,%11;"
+        "pxor   %%xmm2,%%xmm1;"
+        "mov    %9,%10;"
+        "add    %12,%8;"
+        "mov    %9,%12;"
+        "pxor   %%xmm8,%%xmm1;"
+        "or     %4,%10;"
+        "add    %8,%5;"
+        "and    %4,%12;"
+        "pshufd $0xfa,%%xmm7,%%xmm2;"
+        "and    %3,%10;"
+        "add    %11,%8;"
+        "paddd  %%xmm1,%%xmm0;"
+        "or     %12,%10;"
+        "add    %10,%8;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %5,%10;"
+        "mov    %8,%11;"
+        "ror    $0xe,%10;"
+        "movdqa %%xmm2,%%xmm8;"
+        "xor    %5,%10;"
+        "ror    $0x9,%11;"
+        "mov    %6,%12;"
+        "xor    %8,%11;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %k2,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %5,%10;"
+        "and    %5,%12;"
+        "psrld  $0xa,%%xmm8;"
+        "ror    $0xb,%11;"
+        "xor    %8,%11;"
+        "xor    %k2,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm2;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    8+%16,%12;"
+        "pxor   %%xmm2,%%xmm8;"
+        "mov    %8,%10;"
+        "add    %12,%7;"
+        "mov    %8,%12;"
+        "pshufb %%xmm10,%%xmm8;"
+        "or     %3,%10;"
+        "add    %7,%4;"
+        "and    %3,%12;"
+        "paddd  %%xmm8,%%xmm0;"
+        "and    %9,%10;"
+        "add    %11,%7;"
+        "pshufd $0x50,%%xmm0,%%xmm2;"
+        "or     %12,%10;"
+        "add    %10,%7;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %4,%10;"
+        "ror    $0xe,%10;"
+        "mov    %7,%11;"
+        "movdqa %%xmm2,%%xmm4;"
+        "ror    $0x9,%11;"
+        "xor    %4,%10;"
+        "mov    %5,%12;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %7,%11;"
+        "xor    %6,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %4,%10;"
+        "and    %4,%12;"
+        "ror    $0xb,%11;"
+        "psrld  $0xa,%%xmm4;"
+        "xor    %7,%11;"
+        "ror    $0x6,%10;"
+        "xor    %6,%12;"
+        "pxor   %%xmm3,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    12+%16,%12;"
+        "pxor   %%xmm2,%%xmm4;"
+        "mov    %7,%10;"
+        "add    %12,%k2;"
+        "mov    %7,%12;"
+        "pshufb %%xmm11,%%xmm4;"
+        "or     %9,%10;"
+        "add    %k2,%3;"
+        "and    %9,%12;"
+        "paddd  %%xmm0,%%xmm4;"
+        "and    %8,%10;"
+        "add    %11,%k2;"
+        "or     %12,%10;"
+        "add    %10,%k2;"
+        "movdqa 0x10(%13),%%xmm9;"
+        "paddd  %%xmm5,%%xmm9;"
+        "movdqa %%xmm9,%16;"
+        "movdqa %%xmm4,%%xmm0;"
+        "mov    %3,%10;"
+        "ror    $0xe,%10;"
+        "mov    %k2,%11;"
+        "palignr $0x4,%%xmm7,%%xmm0;"
+        "ror    $0x9,%11;"
+        "xor    %3,%10;"
+        "mov    %4,%12;"
+        "ror    $0x5,%10;"
+        "movdqa %%xmm6,%%xmm1;"
+        "xor    %k2,%11;"
+        "xor    %5,%12;"
+        "paddd  %%xmm5,%%xmm0;"
+        "xor    %3,%10;"
+        "and    %3,%12;"
+        "ror    $0xb,%11;"
+        "palignr $0x4,%%xmm5,%%xmm1;"
+        "xor    %k2,%11;"
+        "ror    $0x6,%10;"
+        "xor    %5,%12;"
+        "movdqa %%xmm1,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    %16,%12;"
+        "movdqa %%xmm1,%%xmm3;"
+        "mov    %k2,%10;"
+        "add    %12,%6;"
+        "mov    %k2,%12;"
+        "pslld  $0x19,%%xmm1;"
+        "or     %8,%10;"
+        "add    %6,%9;"
+        "and    %8,%12;"
+        "psrld  $0x7,%%xmm2;"
+        "and    %7,%10;"
+        "add    %11,%6;"
+        "por    %%xmm2,%%xmm1;"
+        "or     %12,%10;"
+        "add    %10,%6;"
+        "movdqa %%xmm3,%%xmm2;"
+        "mov    %9,%10;"
+        "mov    %6,%11;"
+        "movdqa %%xmm3,%%xmm8;"
+        "ror    $0xe,%10;"
+        "xor    %9,%10;"
+        "mov    %3,%12;"
+        "ror    $0x9,%11;"
+        "pslld  $0xe,%%xmm3;"
+        "xor    %6,%11;"
+        "ror    $0x5,%10;"
+        "xor    %4,%12;"
+        "psrld  $0x12,%%xmm2;"
+        "ror    $0xb,%11;"
+        "xor    %9,%10;"
+        "and    %9,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm1;"
+        "xor    %6,%11;"
+        "xor    %4,%12;"
+        "psrld  $0x3,%%xmm8;"
+        "add    %10,%12;"
+        "add    4+%16,%12;"
+        "ror    $0x2,%11;"
+        "pxor   %%xmm2,%%xmm1;"
+        "mov    %6,%10;"
+        "add    %12,%5;"
+        "mov    %6,%12;"
+        "pxor   %%xmm8,%%xmm1;"
+        "or     %7,%10;"
+        "add    %5,%8;"
+        "and    %7,%12;"
+        "pshufd $0xfa,%%xmm4,%%xmm2;"
+        "and    %k2,%10;"
+        "add    %11,%5;"
+        "paddd  %%xmm1,%%xmm0;"
+        "or     %12,%10;"
+        "add    %10,%5;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %8,%10;"
+        "mov    %5,%11;"
+        "ror    $0xe,%10;"
+        "movdqa %%xmm2,%%xmm8;"
+        "xor    %8,%10;"
+        "ror    $0x9,%11;"
+        "mov    %9,%12;"
+        "xor    %5,%11;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %3,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %8,%10;"
+        "and    %8,%12;"
+        "psrld  $0xa,%%xmm8;"
+        "ror    $0xb,%11;"
+        "xor    %5,%11;"
+        "xor    %3,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm2;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    8+%16,%12;"
+        "pxor   %%xmm2,%%xmm8;"
+        "mov    %5,%10;"
+        "add    %12,%4;"
+        "mov    %5,%12;"
+        "pshufb %%xmm10,%%xmm8;"
+        "or     %k2,%10;"
+        "add    %4,%7;"
+        "and    %k2,%12;"
+        "paddd  %%xmm8,%%xmm0;"
+        "and    %6,%10;"
+        "add    %11,%4;"
+        "pshufd $0x50,%%xmm0,%%xmm2;"
+        "or     %12,%10;"
+        "add    %10,%4;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %7,%10;"
+        "ror    $0xe,%10;"
+        "mov    %4,%11;"
+        "movdqa %%xmm2,%%xmm5;"
+        "ror    $0x9,%11;"
+        "xor    %7,%10;"
+        "mov    %8,%12;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %4,%11;"
+        "xor    %9,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %7,%10;"
+        "and    %7,%12;"
+        "ror    $0xb,%11;"
+        "psrld  $0xa,%%xmm5;"
+        "xor    %4,%11;"
+        "ror    $0x6,%10;"
+        "xor    %9,%12;"
+        "pxor   %%xmm3,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    12+%16,%12;"
+        "pxor   %%xmm2,%%xmm5;"
+        "mov    %4,%10;"
+        "add    %12,%3;"
+        "mov    %4,%12;"
+        "pshufb %%xmm11,%%xmm5;"
+        "or     %6,%10;"
+        "add    %3,%k2;"
+        "and    %6,%12;"
+        "paddd  %%xmm0,%%xmm5;"
+        "and    %5,%10;"
+        "add    %11,%3;"
+        "or     %12,%10;"
+        "add    %10,%3;"
+        "movdqa 0x20(%13),%%xmm9;"
+        "paddd  %%xmm6,%%xmm9;"
+        "movdqa %%xmm9,%16;"
+        "movdqa %%xmm5,%%xmm0;"
+        "mov    %k2,%10;"
+        "ror    $0xe,%10;"
+        "mov    %3,%11;"
+        "palignr $0x4,%%xmm4,%%xmm0;"
+        "ror    $0x9,%11;"
+        "xor    %k2,%10;"
+        "mov    %7,%12;"
+        "ror    $0x5,%10;"
+        "movdqa %%xmm7,%%xmm1;"
+        "xor    %3,%11;"
+        "xor    %8,%12;"
+        "paddd  %%xmm6,%%xmm0;"
+        "xor    %k2,%10;"
+        "and    %k2,%12;"
+        "ror    $0xb,%11;"
+        "palignr $0x4,%%xmm6,%%xmm1;"
+        "xor    %3,%11;"
+        "ror    $0x6,%10;"
+        "xor    %8,%12;"
+        "movdqa %%xmm1,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    %16,%12;"
+        "movdqa %%xmm1,%%xmm3;"
+        "mov    %3,%10;"
+        "add    %12,%9;"
+        "mov    %3,%12;"
+        "pslld  $0x19,%%xmm1;"
+        "or     %5,%10;"
+        "add    %9,%6;"
+        "and    %5,%12;"
+        "psrld  $0x7,%%xmm2;"
+        "and    %4,%10;"
+        "add    %11,%9;"
+        "por    %%xmm2,%%xmm1;"
+        "or     %12,%10;"
+        "add    %10,%9;"
+        "movdqa %%xmm3,%%xmm2;"
+        "mov    %6,%10;"
+        "mov    %9,%11;"
+        "movdqa %%xmm3,%%xmm8;"
+        "ror    $0xe,%10;"
+        "xor    %6,%10;"
+        "mov    %k2,%12;"
+        "ror    $0x9,%11;"
+        "pslld  $0xe,%%xmm3;"
+        "xor    %9,%11;"
+        "ror    $0x5,%10;"
+        "xor    %7,%12;"
+        "psrld  $0x12,%%xmm2;"
+        "ror    $0xb,%11;"
+        "xor    %6,%10;"
+        "and    %6,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm1;"
+        "xor    %9,%11;"
+        "xor    %7,%12;"
+        "psrld  $0x3,%%xmm8;"
+        "add    %10,%12;"
+        "add    4+%16,%12;"
+        "ror    $0x2,%11;"
+        "pxor   %%xmm2,%%xmm1;"
+        "mov    %9,%10;"
+        "add    %12,%8;"
+        "mov    %9,%12;"
+        "pxor   %%xmm8,%%xmm1;"
+        "or     %4,%10;"
+        "add    %8,%5;"
+        "and    %4,%12;"
+        "pshufd $0xfa,%%xmm5,%%xmm2;"
+        "and    %3,%10;"
+        "add    %11,%8;"
+        "paddd  %%xmm1,%%xmm0;"
+        "or     %12,%10;"
+        "add    %10,%8;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %5,%10;"
+        "mov    %8,%11;"
+        "ror    $0xe,%10;"
+        "movdqa %%xmm2,%%xmm8;"
+        "xor    %5,%10;"
+        "ror    $0x9,%11;"
+        "mov    %6,%12;"
+        "xor    %8,%11;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %k2,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %5,%10;"
+        "and    %5,%12;"
+        "psrld  $0xa,%%xmm8;"
+        "ror    $0xb,%11;"
+        "xor    %8,%11;"
+        "xor    %k2,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm2;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    8+%16,%12;"
+        "pxor   %%xmm2,%%xmm8;"
+        "mov    %8,%10;"
+        "add    %12,%7;"
+        "mov    %8,%12;"
+        "pshufb %%xmm10,%%xmm8;"
+        "or     %3,%10;"
+        "add    %7,%4;"
+        "and    %3,%12;"
+        "paddd  %%xmm8,%%xmm0;"
+        "and    %9,%10;"
+        "add    %11,%7;"
+        "pshufd $0x50,%%xmm0,%%xmm2;"
+        "or     %12,%10;"
+        "add    %10,%7;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %4,%10;"
+        "ror    $0xe,%10;"
+        "mov    %7,%11;"
+        "movdqa %%xmm2,%%xmm6;"
+        "ror    $0x9,%11;"
+        "xor    %4,%10;"
+        "mov    %5,%12;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %7,%11;"
+        "xor    %6,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %4,%10;"
+        "and    %4,%12;"
+        "ror    $0xb,%11;"
+        "psrld  $0xa,%%xmm6;"
+        "xor    %7,%11;"
+        "ror    $0x6,%10;"
+        "xor    %6,%12;"
+        "pxor   %%xmm3,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    12+%16,%12;"
+        "pxor   %%xmm2,%%xmm6;"
+        "mov    %7,%10;"
+        "add    %12,%k2;"
+        "mov    %7,%12;"
+        "pshufb %%xmm11,%%xmm6;"
+        "or     %9,%10;"
+        "add    %k2,%3;"
+        "and    %9,%12;"
+        "paddd  %%xmm0,%%xmm6;"
+        "and    %8,%10;"
+        "add    %11,%k2;"
+        "or     %12,%10;"
+        "add    %10,%k2;"
+        "movdqa 0x30(%13),%%xmm9;"
+        "paddd  %%xmm7,%%xmm9;"
+        "movdqa %%xmm9,%16;"
+        "add    $0x40,%13;"
+        "movdqa %%xmm6,%%xmm0;"
+        "mov    %3,%10;"
+        "ror    $0xe,%10;"
+        "mov    %k2,%11;"
+        "palignr $0x4,%%xmm5,%%xmm0;"
+        "ror    $0x9,%11;"
+        "xor    %3,%10;"
+        "mov    %4,%12;"
+        "ror    $0x5,%10;"
+        "movdqa %%xmm4,%%xmm1;"
+        "xor    %k2,%11;"
+        "xor    %5,%12;"
+        "paddd  %%xmm7,%%xmm0;"
+        "xor    %3,%10;"
+        "and    %3,%12;"
+        "ror    $0xb,%11;"
+        "palignr $0x4,%%xmm7,%%xmm1;"
+        "xor    %k2,%11;"
+        "ror    $0x6,%10;"
+        "xor    %5,%12;"
+        "movdqa %%xmm1,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    %16,%12;"
+        "movdqa %%xmm1,%%xmm3;"
+        "mov    %k2,%10;"
+        "add    %12,%6;"
+        "mov    %k2,%12;"
+        "pslld  $0x19,%%xmm1;"
+        "or     %8,%10;"
+        "add    %6,%9;"
+        "and    %8,%12;"
+        "psrld  $0x7,%%xmm2;"
+        "and    %7,%10;"
+        "add    %11,%6;"
+        "por    %%xmm2,%%xmm1;"
+        "or     %12,%10;"
+        "add    %10,%6;"
+        "movdqa %%xmm3,%%xmm2;"
+        "mov    %9,%10;"
+        "mov    %6,%11;"
+        "movdqa %%xmm3,%%xmm8;"
+        "ror    $0xe,%10;"
+        "xor    %9,%10;"
+        "mov    %3,%12;"
+        "ror    $0x9,%11;"
+        "pslld  $0xe,%%xmm3;"
+        "xor    %6,%11;"
+        "ror    $0x5,%10;"
+        "xor    %4,%12;"
+        "psrld  $0x12,%%xmm2;"
+        "ror    $0xb,%11;"
+        "xor    %9,%10;"
+        "and    %9,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm1;"
+        "xor    %6,%11;"
+        "xor    %4,%12;"
+        "psrld  $0x3,%%xmm8;"
+        "add    %10,%12;"
+        "add    4+%16,%12;"
+        "ror    $0x2,%11;"
+        "pxor   %%xmm2,%%xmm1;"
+        "mov    %6,%10;"
+        "add    %12,%5;"
+        "mov    %6,%12;"
+        "pxor   %%xmm8,%%xmm1;"
+        "or     %7,%10;"
+        "add    %5,%8;"
+        "and    %7,%12;"
+        "pshufd $0xfa,%%xmm6,%%xmm2;"
+        "and    %k2,%10;"
+        "add    %11,%5;"
+        "paddd  %%xmm1,%%xmm0;"
+        "or     %12,%10;"
+        "add    %10,%5;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %8,%10;"
+        "mov    %5,%11;"
+        "ror    $0xe,%10;"
+        "movdqa %%xmm2,%%xmm8;"
+        "xor    %8,%10;"
+        "ror    $0x9,%11;"
+        "mov    %9,%12;"
+        "xor    %5,%11;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %3,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %8,%10;"
+        "and    %8,%12;"
+        "psrld  $0xa,%%xmm8;"
+        "ror    $0xb,%11;"
+        "xor    %5,%11;"
+        "xor    %3,%12;"
+        "ror    $0x6,%10;"
+        "pxor   %%xmm3,%%xmm2;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    8+%16,%12;"
+        "pxor   %%xmm2,%%xmm8;"
+        "mov    %5,%10;"
+        "add    %12,%4;"
+        "mov    %5,%12;"
+        "pshufb %%xmm10,%%xmm8;"
+        "or     %k2,%10;"
+        "add    %4,%7;"
+        "and    %k2,%12;"
+        "paddd  %%xmm8,%%xmm0;"
+        "and    %6,%10;"
+        "add    %11,%4;"
+        "pshufd $0x50,%%xmm0,%%xmm2;"
+        "or     %12,%10;"
+        "add    %10,%4;"
+        "movdqa %%xmm2,%%xmm3;"
+        "mov    %7,%10;"
+        "ror    $0xe,%10;"
+        "mov    %4,%11;"
+        "movdqa %%xmm2,%%xmm7;"
+        "ror    $0x9,%11;"
+        "xor    %7,%10;"
+        "mov    %8,%12;"
+        "ror    $0x5,%10;"
+        "psrlq  $0x11,%%xmm2;"
+        "xor    %4,%11;"
+        "xor    %9,%12;"
+        "psrlq  $0x13,%%xmm3;"
+        "xor    %7,%10;"
+        "and    %7,%12;"
+        "ror    $0xb,%11;"
+        "psrld  $0xa,%%xmm7;"
+        "xor    %4,%11;"
+        "ror    $0x6,%10;"
+        "xor    %9,%12;"
+        "pxor   %%xmm3,%%xmm2;"
+        "ror    $0x2,%11;"
+        "add    %10,%12;"
+        "add    12+%16,%12;"
+        "pxor   %%xmm2,%%xmm7;"
+        "mov    %4,%10;"
+        "add    %12,%3;"
+        "mov    %4,%12;"
+        "pshufb %%xmm11,%%xmm7;"
+        "or     %6,%10;"
+        "add    %3,%k2;"
+        "and    %6,%12;"
+        "paddd  %%xmm0,%%xmm7;"
+        "and    %5,%10;"
+        "add    %11,%3;"
+        "or     %12,%10;"
+        "add    %10,%3;"
+        "sub    $0x1,%1;"
+        "jne    Lloop1_%=;"
+        "mov    $0x2,%1;"
+
+        "Lloop2_%=:"
+        "paddd  0x0(%13),%%xmm4;"
+        "movdqa %%xmm4,%16;"
+        "mov    %k2,%10;"
+        "ror    $0xe,%10;"
+        "mov    %3,%11;"
+        "xor    %k2,%10;"
+        "ror    $0x9,%11;"
+        "mov    %7,%12;"
+        "xor    %3,%11;"
+        "ror    $0x5,%10;"
+        "xor    %8,%12;"
+        "xor    %k2,%10;"
+        "ror    $0xb,%11;"
+        "and    %k2,%12;"
+        "xor    %3,%11;"
+        "ror    $0x6,%10;"
+        "xor    %8,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    %16,%12;"
+        "mov    %3,%10;"
+        "add    %12,%9;"
+        "mov    %3,%12;"
+        "or     %5,%10;"
+        "add    %9,%6;"
+        "and    %5,%12;"
+        "and    %4,%10;"
+        "add    %11,%9;"
+        "or     %12,%10;"
+        "add    %10,%9;"
+        "mov    %6,%10;"
+        "ror    $0xe,%10;"
+        "mov    %9,%11;"
+        "xor    %6,%10;"
+        "ror    $0x9,%11;"
+        "mov    %k2,%12;"
+        "xor    %9,%11;"
+        "ror    $0x5,%10;"
+        "xor    %7,%12;"
+        "xor    %6,%10;"
+        "ror    $0xb,%11;"
+        "and    %6,%12;"
+        "xor    %9,%11;"
+        "ror    $0x6,%10;"
+        "xor    %7,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    4+%16,%12;"
+        "mov    %9,%10;"
+        "add    %12,%8;"
+        "mov    %9,%12;"
+        "or     %4,%10;"
+        "add    %8,%5;"
+        "and    %4,%12;"
+        "and    %3,%10;"
+        "add    %11,%8;"
+        "or     %12,%10;"
+        "add    %10,%8;"
+        "mov    %5,%10;"
+        "ror    $0xe,%10;"
+        "mov    %8,%11;"
+        "xor    %5,%10;"
+        "ror    $0x9,%11;"
+        "mov    %6,%12;"
+        "xor    %8,%11;"
+        "ror    $0x5,%10;"
+        "xor    %k2,%12;"
+        "xor    %5,%10;"
+        "ror    $0xb,%11;"
+        "and    %5,%12;"
+        "xor    %8,%11;"
+        "ror    $0x6,%10;"
+        "xor    %k2,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    8+%16,%12;"
+        "mov    %8,%10;"
+        "add    %12,%7;"
+        "mov    %8,%12;"
+        "or     %3,%10;"
+        "add    %7,%4;"
+        "and    %3,%12;"
+        "and    %9,%10;"
+        "add    %11,%7;"
+        "or     %12,%10;"
+        "add    %10,%7;"
+        "mov    %4,%10;"
+        "ror    $0xe,%10;"
+        "mov    %7,%11;"
+        "xor    %4,%10;"
+        "ror    $0x9,%11;"
+        "mov    %5,%12;"
+        "xor    %7,%11;"
+        "ror    $0x5,%10;"
+        "xor    %6,%12;"
+        "xor    %4,%10;"
+        "ror    $0xb,%11;"
+        "and    %4,%12;"
+        "xor    %7,%11;"
+        "ror    $0x6,%10;"
+        "xor    %6,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    12+%16,%12;"
+        "mov    %7,%10;"
+        "add    %12,%k2;"
+        "mov    %7,%12;"
+        "or     %9,%10;"
+        "add    %k2,%3;"
+        "and    %9,%12;"
+        "and    %8,%10;"
+        "add    %11,%k2;"
+        "or     %12,%10;"
+        "add    %10,%k2;"
+        "paddd  0x10(%13),%%xmm5;"
+        "movdqa %%xmm5,%16;"
+        "add    $0x20,%13;"
+        "mov    %3,%10;"
+        "ror    $0xe,%10;"
+        "mov    %k2,%11;"
+        "xor    %3,%10;"
+        "ror    $0x9,%11;"
+        "mov    %4,%12;"
+        "xor    %k2,%11;"
+        "ror    $0x5,%10;"
+        "xor    %5,%12;"
+        "xor    %3,%10;"
+        "ror    $0xb,%11;"
+        "and    %3,%12;"
+        "xor    %k2,%11;"
+        "ror    $0x6,%10;"
+        "xor    %5,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    %16,%12;"
+        "mov    %k2,%10;"
+        "add    %12,%6;"
+        "mov    %k2,%12;"
+        "or     %8,%10;"
+        "add    %6,%9;"
+        "and    %8,%12;"
+        "and    %7,%10;"
+        "add    %11,%6;"
+        "or     %12,%10;"
+        "add    %10,%6;"
+        "mov    %9,%10;"
+        "ror    $0xe,%10;"
+        "mov    %6,%11;"
+        "xor    %9,%10;"
+        "ror    $0x9,%11;"
+        "mov    %3,%12;"
+        "xor    %6,%11;"
+        "ror    $0x5,%10;"
+        "xor    %4,%12;"
+        "xor    %9,%10;"
+        "ror    $0xb,%11;"
+        "and    %9,%12;"
+        "xor    %6,%11;"
+        "ror    $0x6,%10;"
+        "xor    %4,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    4+%16,%12;"
+        "mov    %6,%10;"
+        "add    %12,%5;"
+        "mov    %6,%12;"
+        "or     %7,%10;"
+        "add    %5,%8;"
+        "and    %7,%12;"
+        "and    %k2,%10;"
+        "add    %11,%5;"
+        "or     %12,%10;"
+        "add    %10,%5;"
+        "mov    %8,%10;"
+        "ror    $0xe,%10;"
+        "mov    %5,%11;"
+        "xor    %8,%10;"
+        "ror    $0x9,%11;"
+        "mov    %9,%12;"
+        "xor    %5,%11;"
+        "ror    $0x5,%10;"
+        "xor    %3,%12;"
+        "xor    %8,%10;"
+        "ror    $0xb,%11;"
+        "and    %8,%12;"
+        "xor    %5,%11;"
+        "ror    $0x6,%10;"
+        "xor    %3,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    8+%16,%12;"
+        "mov    %5,%10;"
+        "add    %12,%4;"
+        "mov    %5,%12;"
+        "or     %k2,%10;"
+        "add    %4,%7;"
+        "and    %k2,%12;"
+        "and    %6,%10;"
+        "add    %11,%4;"
+        "or     %12,%10;"
+        "add    %10,%4;"
+        "mov    %7,%10;"
+        "ror    $0xe,%10;"
+        "mov    %4,%11;"
+        "xor    %7,%10;"
+        "ror    $0x9,%11;"
+        "mov    %8,%12;"
+        "xor    %4,%11;"
+        "ror    $0x5,%10;"
+        "xor    %9,%12;"
+        "xor    %7,%10;"
+        "ror    $0xb,%11;"
+        "and    %7,%12;"
+        "xor    %4,%11;"
+        "ror    $0x6,%10;"
+        "xor    %9,%12;"
+        "add    %10,%12;"
+        "ror    $0x2,%11;"
+        "add    12+%16,%12;"
+        "mov    %4,%10;"
+        "add    %12,%3;"
+        "mov    %4,%12;"
+        "or     %6,%10;"
+        "add    %3,%k2;"
+        "and    %6,%12;"
+        "and    %5,%10;"
+        "add    %11,%3;"
+        "or     %12,%10;"
+        "add    %10,%3;"
+        "movdqa %%xmm6,%%xmm4;"
+        "movdqa %%xmm7,%%xmm5;"
+        "sub    $0x1,%1;"
+        "jne    Lloop2_%=;"
+        "add    (%0),%3;"
+        "mov    %3,(%0);"
+        "add    0x4(%0),%4;"
+        "mov    %4,0x4(%0);"
+        "add    0x8(%0),%5;"
+        "mov    %5,0x8(%0);"
+        "add    0xc(%0),%6;"
+        "mov    %6,0xc(%0);"
+        "add    0x10(%0),%k2;"
+        "mov    %k2,0x10(%0);"
+        "add    0x14(%0),%7;"
+        "mov    %7,0x14(%0);"
+        "add    0x18(%0),%8;"
+        "mov    %8,0x18(%0);"
+        "add    0x1c(%0),%9;"
+        "mov    %9,0x1c(%0);"
+        "mov    %15,%1;"
+        "add    $0x40,%1;"
+        "cmp    %14,%1;"
+        "jne    Lloop0_%=;"
+
+        "Ldone_hash_%=:"
+
+        : "+r"(s), "+r"(chunk), "+r"(blocks), "=r"(a), "=r"(b), "=r"(c), "=r"(d), /* e = chunk */ "=r"(f), "=r"(g), "=r"(h), "=r"(y0), "=r"(y1), "=r"(y2), "=r"(tbl), "+m"(inp_end), "+m"(inp), "+m"(xfer)
+        : "m"(K256), "m"(FLIP_MASK), "m"(SHUF_00BA), "m"(SHUF_DC00)
+        : "cc", "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12"
+   );
+}
+}
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation 
+; 
+; All rights reserved. 
+; 
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met: 
+; 
+; * Redistributions of source code must retain the above copyright
+;   notice, this list of conditions and the following disclaimer.  
+; 
+; * Redistributions in binary form must reproduce the above copyright
+;   notice, this list of conditions and the following disclaimer in the
+;   documentation and/or other materials provided with the
+;   distribution. 
+; 
+; * Neither the name of the Intel Corporation nor the names of its
+;   contributors may be used to endorse or promote products derived from
+;   this software without specific prior written permission. 
+; 
+; 
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Example YASM command lines:
+; Windows:  yasm -Xvc -f x64 -rnasm -pnasm -o sha256_sse4.obj -g cv8 sha256_sse4.asm
+; Linux:    yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o sha256_sse4.o sha256_sse4.asm
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This code is described in an Intel White-Paper:
+; "Fast SHA-256 Implementations on Intel Architecture Processors"
+;
+; To find it, surf to http://www.intel.com/p/en_US/embedded 
+; and search for that title.
+; The paper is expected to be released roughly at the end of April, 2012
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 1 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define	MOVDQ movdqu ;; assume buffers not aligned 
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros
+
+; addm [mem], reg
+; Add reg to mem using reg-mem add and store
+%macro addm 2
+    add	%2, %1
+    mov	%1, %2
+%endm
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
+; Load xmm with mem and byte swap each dword
+%macro COPY_XMM_AND_BSWAP 3
+    MOVDQ %1, %2
+    pshufb %1, %3
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define X0 xmm4
+%define X1 xmm5
+%define X2 xmm6
+%define X3 xmm7
+
+%define XTMP0 xmm0
+%define XTMP1 xmm1
+%define XTMP2 xmm2
+%define XTMP3 xmm3
+%define XTMP4 xmm8
+%define XFER  xmm9
+
+%define SHUF_00BA	xmm10 ; shuffle xBxA -> 00BA
+%define SHUF_DC00	xmm11 ; shuffle xDxC -> DC00
+%define BYTE_FLIP_MASK	xmm12
+    
+%ifdef LINUX
+%define NUM_BLKS rdx	; 3rd arg
+%define CTX	rsi	; 2nd arg
+%define INP	rdi	; 1st arg
+
+%define SRND	rdi	; clobbers INP
+%define c	ecx
+%define d 	r8d
+%define e 	edx
+%else
+%define NUM_BLKS r8	; 3rd arg
+%define CTX	rdx 	; 2nd arg
+%define INP	rcx 	; 1st arg
+
+%define SRND	rcx	; clobbers INP
+%define c 	edi 
+%define d	esi 
+%define e 	r8d
+    
+%endif
+%define TBL	rbp
+%define a eax
+%define b ebx
+
+%define f r9d
+%define g r10d
+%define h r11d
+
+%define y0 r13d
+%define y1 r14d
+%define y2 r15d
+
+
+
+_INP_END_SIZE	equ 8
+_INP_SIZE	equ 8
+_XFER_SIZE	equ 8
+%ifdef LINUX
+_XMM_SAVE_SIZE	equ 0
+%else
+_XMM_SAVE_SIZE	equ 7*16
+%endif
+; STACK_SIZE plus pushes must be an odd multiple of 8
+_ALIGN_SIZE	equ 8
+
+_INP_END	equ 0
+_INP		equ _INP_END  + _INP_END_SIZE
+_XFER		equ _INP      + _INP_SIZE
+_XMM_SAVE	equ _XFER     + _XFER_SIZE + _ALIGN_SIZE
+STACK_SIZE	equ _XMM_SAVE + _XMM_SAVE_SIZE
+
+; rotate_Xs
+; Rotate values of symbols X0...X3
+%macro rotate_Xs 0
+%xdefine X_ X0
+%xdefine X0 X1
+%xdefine X1 X2
+%xdefine X2 X3
+%xdefine X3 X_
+%endm
+
+; ROTATE_ARGS
+; Rotate values of symbols a...h
+%macro ROTATE_ARGS 0
+%xdefine TMP_ h
+%xdefine h g
+%xdefine g f
+%xdefine f e
+%xdefine e d
+%xdefine d c
+%xdefine c b
+%xdefine b a
+%xdefine a TMP_
+%endm
+
+%macro FOUR_ROUNDS_AND_SCHED 0
+	;; compute s0 four at a time and s1 two at a time
+	;; compute W[-16] + W[-7] 4 at a time
+	movdqa	XTMP0, X3
+    mov	y0, e		; y0 = e
+    ror	y0, (25-11)	; y0 = e >> (25-11)
+    mov	y1, a		; y1 = a
+	palignr	XTMP0, X2, 4	; XTMP0 = W[-7]
+    ror	y1, (22-13)	; y1 = a >> (22-13)
+    xor	y0, e		; y0 = e ^ (e >> (25-11))
+    mov	y2, f		; y2 = f
+    ror	y0, (11-6)	; y0 = (e >> (11-6)) ^ (e >> (25-6))
+	movdqa	XTMP1, X1
+    xor	y1, a		; y1 = a ^ (a >> (22-13)
+    xor	y2, g		; y2 = f^g
+	paddd	XTMP0, X0	; XTMP0 = W[-7] + W[-16]
+    xor	y0, e		; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
+    and	y2, e		; y2 = (f^g)&e
+    ror	y1, (13-2)	; y1 = (a >> (13-2)) ^ (a >> (22-2))
+	;; compute s0
+	palignr	XTMP1, X0, 4	; XTMP1 = W[-15]
+    xor	y1, a		; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
+    ror	y0, 6		; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
+    xor	y2, g		; y2 = CH = ((f^g)&e)^g
+	movdqa	XTMP2, XTMP1	; XTMP2 = W[-15]
+    ror	y1, 2		; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
+    add	y2, y0		; y2 = S1 + CH
+    add	y2, [rsp + _XFER + 0*4]	; y2 = k + w + S1 + CH
+	movdqa	XTMP3, XTMP1	; XTMP3 = W[-15]
+    mov	y0, a		; y0 = a
+    add	h, y2		; h = h + S1 + CH + k + w
+    mov	y2, a		; y2 = a
+	pslld	XTMP1, (32-7)
+    or	y0, c		; y0 = a|c
+    add	d, h		; d = d + h + S1 + CH + k + w
+    and	y2, c		; y2 = a&c
+	psrld	XTMP2, 7
+    and	y0, b		; y0 = (a|c)&b
+    add	h, y1		; h = h + S1 + CH + k + w + S0
+	por	XTMP1, XTMP2	; XTMP1 = W[-15] ror 7
+    or	y0, y2		; y0 = MAJ = (a|c)&b)|(a&c)
+    add	h, y0		; h = h + S1 + CH + k + w + S0 + MAJ
+
+ROTATE_ARGS
+	movdqa	XTMP2, XTMP3	; XTMP2 = W[-15]
+    mov	y0, e		; y0 = e
+    mov	y1, a		; y1 = a
+	movdqa	XTMP4, XTMP3	; XTMP4 = W[-15]
+    ror	y0, (25-11)	; y0 = e >> (25-11)
+    xor	y0, e		; y0 = e ^ (e >> (25-11))
+    mov	y2, f		; y2 = f
+    ror	y1, (22-13)	; y1 = a >> (22-13)
+	pslld	XTMP3, (32-18)
+    xor	y1, a		; y1 = a ^ (a >> (22-13)
+    ror	y0, (11-6)	; y0 = (e >> (11-6)) ^ (e >> (25-6))
+    xor	y2, g		; y2 = f^g
+	psrld	XTMP2, 18
+    ror	y1, (13-2)	; y1 = (a >> (13-2)) ^ (a >> (22-2))
+    xor	y0, e		; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
+    and	y2, e		; y2 = (f^g)&e
+    ror	y0, 6		; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
+	pxor	XTMP1, XTMP3
+    xor	y1, a		; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
+    xor	y2, g		; y2 = CH = ((f^g)&e)^g
+	psrld	XTMP4, 3	; XTMP4 = W[-15] >> 3
+    add	y2, y0		; y2 = S1 + CH
+    add	y2, [rsp + _XFER + 1*4]	; y2 = k + w + S1 + CH
+    ror	y1, 2		; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
+	pxor	XTMP1, XTMP2	; XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
+    mov	y0, a		; y0 = a
+    add	h, y2		; h = h + S1 + CH + k + w
+    mov	y2, a		; y2 = a
+	pxor	XTMP1, XTMP4	; XTMP1 = s0
+    or	y0, c		; y0 = a|c
+    add	d, h		; d = d + h + S1 + CH + k + w
+    and	y2, c		; y2 = a&c
+	;; compute low s1
+	pshufd	XTMP2, X3, 11111010b	; XTMP2 = W[-2] {BBAA}
+    and	y0, b		; y0 = (a|c)&b
+    add	h, y1		; h = h + S1 + CH + k + w + S0
+	paddd	XTMP0, XTMP1	; XTMP0 = W[-16] + W[-7] + s0
+    or	y0, y2		; y0 = MAJ = (a|c)&b)|(a&c)
+    add	h, y0		; h = h + S1 + CH + k + w + S0 + MAJ
+
+ROTATE_ARGS
+	movdqa	XTMP3, XTMP2	; XTMP3 = W[-2] {BBAA}
+    mov	y0, e		; y0 = e
+    mov	y1, a		; y1 = a
+    ror	y0, (25-11)	; y0 = e >> (25-11)
+	movdqa	XTMP4, XTMP2	; XTMP4 = W[-2] {BBAA}
+    xor	y0, e		; y0 = e ^ (e >> (25-11))
+    ror	y1, (22-13)	; y1 = a >> (22-13)
+    mov	y2, f		; y2 = f
+    xor	y1, a		; y1 = a ^ (a >> (22-13)
+    ror	y0, (11-6)	; y0 = (e >> (11-6)) ^ (e >> (25-6))
+	psrlq	XTMP2, 17	; XTMP2 = W[-2] ror 17 {xBxA}
+    xor	y2, g		; y2 = f^g
+	psrlq	XTMP3, 19	; XTMP3 = W[-2] ror 19 {xBxA}
+    xor	y0, e		; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
+    and	y2, e		; y2 = (f^g)&e
+	psrld	XTMP4, 10	; XTMP4 = W[-2] >> 10 {BBAA}
+    ror	y1, (13-2)	; y1 = (a >> (13-2)) ^ (a >> (22-2))
+    xor	y1, a		; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
+    xor	y2, g		; y2 = CH = ((f^g)&e)^g
+    ror	y0, 6		; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
+	pxor	XTMP2, XTMP3
+    add	y2, y0		; y2 = S1 + CH
+    ror	y1, 2		; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
+    add	y2, [rsp + _XFER + 2*4]	; y2 = k + w + S1 + CH
+	pxor	XTMP4, XTMP2	; XTMP4 = s1 {xBxA}
+    mov	y0, a		; y0 = a
+    add	h, y2		; h = h + S1 + CH + k + w
+    mov	y2, a		; y2 = a
+	pshufb	XTMP4, SHUF_00BA	; XTMP4 = s1 {00BA}
+    or	y0, c		; y0 = a|c
+    add	d, h		; d = d + h + S1 + CH + k + w
+    and	y2, c		; y2 = a&c
+	paddd	XTMP0, XTMP4	; XTMP0 = {..., ..., W[1], W[0]}
+    and	y0, b		; y0 = (a|c)&b
+    add	h, y1		; h = h + S1 + CH + k + w + S0
+	;; compute high s1
+	pshufd	XTMP2, XTMP0, 01010000b	; XTMP2 = W[-2] {DDCC}
+    or	y0, y2		; y0 = MAJ = (a|c)&b)|(a&c)
+    add	h, y0		; h = h + S1 + CH + k + w + S0 + MAJ
+
+ROTATE_ARGS
+	movdqa	XTMP3, XTMP2	; XTMP3 = W[-2] {DDCC}
+    mov	y0, e		; y0 = e
+    ror	y0, (25-11)	; y0 = e >> (25-11)
+    mov	y1, a		; y1 = a
+	movdqa	X0,    XTMP2	; X0    = W[-2] {DDCC}
+    ror	y1, (22-13)	; y1 = a >> (22-13)
+    xor	y0, e		; y0 = e ^ (e >> (25-11))
+    mov	y2, f		; y2 = f
+    ror	y0, (11-6)	; y0 = (e >> (11-6)) ^ (e >> (25-6))
+	psrlq	XTMP2, 17	; XTMP2 = W[-2] ror 17 {xDxC}
+    xor	y1, a		; y1 = a ^ (a >> (22-13)
+    xor	y2, g		; y2 = f^g
+	psrlq	XTMP3, 19	; XTMP3 = W[-2] ror 19 {xDxC}
+    xor	y0, e		; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
+    and	y2, e		; y2 = (f^g)&e
+    ror	y1, (13-2)	; y1 = (a >> (13-2)) ^ (a >> (22-2))
+	psrld	X0,    10	; X0 = W[-2] >> 10 {DDCC}
+    xor	y1, a		; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
+    ror	y0, 6		; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
+    xor	y2, g		; y2 = CH = ((f^g)&e)^g
+	pxor	XTMP2, XTMP3
+    ror	y1, 2		; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
+    add	y2, y0		; y2 = S1 + CH
+    add	y2, [rsp + _XFER + 3*4]	; y2 = k + w + S1 + CH
+	pxor	X0, XTMP2	; X0 = s1 {xDxC}
+    mov	y0, a		; y0 = a
+    add	h, y2		; h = h + S1 + CH + k + w
+    mov	y2, a		; y2 = a
+	pshufb	X0, SHUF_DC00	; X0 = s1 {DC00}
+    or	y0, c		; y0 = a|c
+    add	d, h		; d = d + h + S1 + CH + k + w
+    and	y2, c		; y2 = a&c
+	paddd	X0, XTMP0	; X0 = {W[3], W[2], W[1], W[0]}
+    and	y0, b		; y0 = (a|c)&b
+    add	h, y1		; h = h + S1 + CH + k + w + S0
+    or	y0, y2		; y0 = MAJ = (a|c)&b)|(a&c)
+    add	h, y0		; h = h + S1 + CH + k + w + S0 + MAJ
+
+ROTATE_ARGS
+rotate_Xs
+%endm
+
+;; input is [rsp + _XFER + %1 * 4]
+%macro DO_ROUND 1
+    mov	y0, e		; y0 = e
+    ror	y0, (25-11)	; y0 = e >> (25-11)
+    mov	y1, a		; y1 = a
+    xor	y0, e		; y0 = e ^ (e >> (25-11))
+    ror	y1, (22-13)	; y1 = a >> (22-13)
+    mov	y2, f		; y2 = f
+    xor	y1, a		; y1 = a ^ (a >> (22-13)
+    ror	y0, (11-6)	; y0 = (e >> (11-6)) ^ (e >> (25-6))
+    xor	y2, g		; y2 = f^g
+    xor	y0, e		; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
+    ror	y1, (13-2)	; y1 = (a >> (13-2)) ^ (a >> (22-2))
+    and	y2, e		; y2 = (f^g)&e
+    xor	y1, a		; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
+    ror	y0, 6		; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
+    xor	y2, g		; y2 = CH = ((f^g)&e)^g
+    add	y2, y0		; y2 = S1 + CH
+    ror	y1, 2		; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
+    add	y2, [rsp + _XFER + %1 * 4]	; y2 = k + w + S1 + CH
+    mov	y0, a		; y0 = a
+    add	h, y2		; h = h + S1 + CH + k + w
+    mov	y2, a		; y2 = a
+    or	y0, c		; y0 = a|c
+    add	d, h		; d = d + h + S1 + CH + k + w
+    and	y2, c		; y2 = a&c
+    and	y0, b		; y0 = (a|c)&b
+    add	h, y1		; h = h + S1 + CH + k + w + S0
+    or	y0, y2		; y0 = MAJ = (a|c)&b)|(a&c)
+    add	h, y0		; h = h + S1 + CH + k + w + S0 + MAJ
+    ROTATE_ARGS
+%endm
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; void sha256_sse4(void *input_data, UINT32 digest[8], UINT64 num_blks)
+;; arg 1 : pointer to input data
+;; arg 2 : pointer to digest
+;; arg 3 : Num blocks
+section .text
+global sha256_sse4
+align 32
+sha256_sse4:
+    push	rbx
+%ifndef LINUX
+    push	rsi
+    push	rdi
+%endif
+    push	rbp
+    push	r13
+    push	r14
+    push	r15
+
+    sub	rsp,STACK_SIZE
+%ifndef LINUX
+    movdqa	[rsp + _XMM_SAVE + 0*16],xmm6	
+    movdqa	[rsp + _XMM_SAVE + 1*16],xmm7
+    movdqa	[rsp + _XMM_SAVE + 2*16],xmm8	
+    movdqa	[rsp + _XMM_SAVE + 3*16],xmm9	
+    movdqa	[rsp + _XMM_SAVE + 4*16],xmm10
+    movdqa	[rsp + _XMM_SAVE + 5*16],xmm11
+    movdqa	[rsp + _XMM_SAVE + 6*16],xmm12
+%endif
+
+    shl	NUM_BLKS, 6	; convert to bytes
+    jz	done_hash
+    add	NUM_BLKS, INP	; pointer to end of data
+    mov	[rsp + _INP_END], NUM_BLKS
+
+    ;; load initial digest
+    mov	a,[4*0 + CTX]
+    mov	b,[4*1 + CTX]
+    mov	c,[4*2 + CTX]
+    mov	d,[4*3 + CTX]
+    mov	e,[4*4 + CTX]
+    mov	f,[4*5 + CTX]
+    mov	g,[4*6 + CTX]
+    mov	h,[4*7 + CTX]
+
+    movdqa	BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
+    movdqa	SHUF_00BA, [_SHUF_00BA wrt rip]
+    movdqa	SHUF_DC00, [_SHUF_DC00 wrt rip]
+
+loop0:
+    lea	TBL,[K256 wrt rip]
+
+    ;; byte swap first 16 dwords
+    COPY_XMM_AND_BSWAP	X0, [INP + 0*16], BYTE_FLIP_MASK
+    COPY_XMM_AND_BSWAP	X1, [INP + 1*16], BYTE_FLIP_MASK
+    COPY_XMM_AND_BSWAP	X2, [INP + 2*16], BYTE_FLIP_MASK
+    COPY_XMM_AND_BSWAP	X3, [INP + 3*16], BYTE_FLIP_MASK
+    
+    mov	[rsp + _INP], INP
+
+    ;; schedule 48 input dwords, by doing 3 rounds of 16 each
+    mov	SRND, 3
+align 16
+loop1:
+    movdqa	XFER, [TBL + 0*16]
+    paddd	XFER, X0
+    movdqa	[rsp + _XFER], XFER
+    FOUR_ROUNDS_AND_SCHED
+
+    movdqa	XFER, [TBL + 1*16]
+    paddd	XFER, X0
+    movdqa	[rsp + _XFER], XFER
+    FOUR_ROUNDS_AND_SCHED
+
+    movdqa	XFER, [TBL + 2*16]
+    paddd	XFER, X0
+    movdqa	[rsp + _XFER], XFER
+    FOUR_ROUNDS_AND_SCHED
+
+    movdqa	XFER, [TBL + 3*16]
+    paddd	XFER, X0
+    movdqa	[rsp + _XFER], XFER
+    add	TBL, 4*16
+    FOUR_ROUNDS_AND_SCHED
+
+    sub	SRND, 1
+    jne	loop1
+
+    mov	SRND, 2
+loop2:
+    paddd	X0, [TBL + 0*16]
+    movdqa	[rsp + _XFER], X0
+    DO_ROUND	0
+    DO_ROUND	1
+    DO_ROUND	2
+    DO_ROUND	3
+    paddd	X1, [TBL + 1*16]
+    movdqa	[rsp + _XFER], X1
+    add	TBL, 2*16
+    DO_ROUND	0
+    DO_ROUND	1
+    DO_ROUND	2
+    DO_ROUND	3
+
+    movdqa	X0, X2
+    movdqa	X1, X3
+
+    sub	SRND, 1
+    jne	loop2
+
+    addm	[4*0 + CTX],a
+    addm	[4*1 + CTX],b
+    addm	[4*2 + CTX],c
+    addm	[4*3 + CTX],d
+    addm	[4*4 + CTX],e
+    addm	[4*5 + CTX],f
+    addm	[4*6 + CTX],g
+    addm	[4*7 + CTX],h
+
+    mov	INP, [rsp + _INP]
+    add	INP, 64
+    cmp	INP, [rsp + _INP_END]
+    jne	loop0
+
+done_hash:
+%ifndef LINUX
+    movdqa	xmm6,[rsp + _XMM_SAVE + 0*16]
+    movdqa	xmm7,[rsp + _XMM_SAVE + 1*16]
+    movdqa	xmm8,[rsp + _XMM_SAVE + 2*16]
+    movdqa	xmm9,[rsp + _XMM_SAVE + 3*16]
+    movdqa	xmm10,[rsp + _XMM_SAVE + 4*16]
+    movdqa	xmm11,[rsp + _XMM_SAVE + 5*16]
+    movdqa	xmm12,[rsp + _XMM_SAVE + 6*16]
+%endif
+
+    add	rsp, STACK_SIZE
+
+    pop	r15
+    pop	r14
+    pop	r13
+    pop	rbp
+%ifndef LINUX
+    pop	rdi
+    pop	rsi
+%endif
+    pop	rbx
+
+    ret	
+    
+
+section .data
+align 64
+K256:
+    dd	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+    dd	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+    dd	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+    dd	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+    dd	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+    dd	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+    dd	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+    dd	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+    dd	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+    dd	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+    dd	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+    dd	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+    dd	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+    dd	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+    dd	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+    dd	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+PSHUFFLE_BYTE_FLIP_MASK: ddq 0x0c0d0e0f08090a0b0405060700010203
+
+; shuffle xBxA -> 00BA
+_SHUF_00BA:              ddq 0xFFFFFFFFFFFFFFFF0b0a090803020100
+
+; shuffle xDxC -> DC00
+_SHUF_DC00:              ddq 0x0b0a090803020100FFFFFFFFFFFFFFFF
+*/
+
+#endif
diff --git a/src/crypto/sha512.cpp b/src/crypto/sha512.cpp
new file mode 100644
index 0000000000..dff4d8da1a
--- /dev/null
+++ b/src/crypto/sha512.cpp
@@ -0,0 +1,207 @@
+// Copyright (c) 2014-2017 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <crypto/sha512.h>
+
+#include <crypto/common.h>
+
+#include <string.h>
+
+// Internal implementation code.
+namespace
+{
+/// Internal SHA-512 implementation.
+namespace sha512
+{
+uint64_t inline Ch(uint64_t x, uint64_t y, uint64_t z) { return z ^ (x & (y ^ z)); }
+uint64_t inline Maj(uint64_t x, uint64_t y, uint64_t z) { return (x & y) | (z & (x | y)); }
+uint64_t inline Sigma0(uint64_t x) { return (x >> 28 | x << 36) ^ (x >> 34 | x << 30) ^ (x >> 39 | x << 25); }
+uint64_t inline Sigma1(uint64_t x) { return (x >> 14 | x << 50) ^ (x >> 18 | x << 46) ^ (x >> 41 | x << 23); }
+uint64_t inline sigma0(uint64_t x) { return (x >> 1 | x << 63) ^ (x >> 8 | x << 56) ^ (x >> 7); }
+uint64_t inline sigma1(uint64_t x) { return (x >> 19 | x << 45) ^ (x >> 61 | x << 3) ^ (x >> 6); }
+
+/** One round of SHA-512. */
+void inline Round(uint64_t a, uint64_t b, uint64_t c, uint64_t& d, uint64_t e, uint64_t f, uint64_t g, uint64_t& h, uint64_t k, uint64_t w)
+{
+    uint64_t t1 = h + Sigma1(e) + Ch(e, f, g) + k + w;
+    uint64_t t2 = Sigma0(a) + Maj(a, b, c);
+    d += t1;
+    h = t1 + t2;
+}
+
+/** Initialize SHA-256 state. */
+void inline Initialize(uint64_t* s)
+{
+    s[0] = 0x6a09e667f3bcc908ull;
+    s[1] = 0xbb67ae8584caa73bull;
+    s[2] = 0x3c6ef372fe94f82bull;
+    s[3] = 0xa54ff53a5f1d36f1ull;
+    s[4] = 0x510e527fade682d1ull;
+    s[5] = 0x9b05688c2b3e6c1full;
+    s[6] = 0x1f83d9abfb41bd6bull;
+    s[7] = 0x5be0cd19137e2179ull;
+}
+
+/** Perform one SHA-512 transformation, processing a 128-byte chunk. */
+void Transform(uint64_t* s, const unsigned char* chunk)
+{
+    uint64_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
+    uint64_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+
+    Round(a, b, c, d, e, f, g, h, 0x428a2f98d728ae22ull, w0 = ReadBE64(chunk + 0));
+    Round(h, a, b, c, d, e, f, g, 0x7137449123ef65cdull, w1 = ReadBE64(chunk + 8));
+    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcfec4d3b2full, w2 = ReadBE64(chunk + 16));
+    Round(f, g, h, a, b, c, d, e, 0xe9b5dba58189dbbcull, w3 = ReadBE64(chunk + 24));
+    Round(e, f, g, h, a, b, c, d, 0x3956c25bf348b538ull, w4 = ReadBE64(chunk + 32));
+    Round(d, e, f, g, h, a, b, c, 0x59f111f1b605d019ull, w5 = ReadBE64(chunk + 40));
+    Round(c, d, e, f, g, h, a, b, 0x923f82a4af194f9bull, w6 = ReadBE64(chunk + 48));
+    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5da6d8118ull, w7 = ReadBE64(chunk + 56));
+    Round(a, b, c, d, e, f, g, h, 0xd807aa98a3030242ull, w8 = ReadBE64(chunk + 64));
+    Round(h, a, b, c, d, e, f, g, 0x12835b0145706fbeull, w9 = ReadBE64(chunk + 72));
+    Round(g, h, a, b, c, d, e, f, 0x243185be4ee4b28cull, w10 = ReadBE64(chunk + 80));
+    Round(f, g, h, a, b, c, d, e, 0x550c7dc3d5ffb4e2ull, w11 = ReadBE64(chunk + 88));
+    Round(e, f, g, h, a, b, c, d, 0x72be5d74f27b896full, w12 = ReadBE64(chunk + 96));
+    Round(d, e, f, g, h, a, b, c, 0x80deb1fe3b1696b1ull, w13 = ReadBE64(chunk + 104));
+    Round(c, d, e, f, g, h, a, b, 0x9bdc06a725c71235ull, w14 = ReadBE64(chunk + 112));
+    Round(b, c, d, e, f, g, h, a, 0xc19bf174cf692694ull, w15 = ReadBE64(chunk + 120));
+
+    Round(a, b, c, d, e, f, g, h, 0xe49b69c19ef14ad2ull, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0xefbe4786384f25e3ull, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x0fc19dc68b8cd5b5ull, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x240ca1cc77ac9c65ull, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x2de92c6f592b0275ull, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x4a7484aa6ea6e483ull, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcbd41fbd4ull, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x76f988da831153b5ull, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0x983e5152ee66dfabull, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0xa831c66d2db43210ull, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0xb00327c898fb213full, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0xbf597fc7beef0ee4ull, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0xc6e00bf33da88fc2ull, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xd5a79147930aa725ull, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0x06ca6351e003826full, w14 += sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0x142929670a0e6e70ull, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+    Round(a, b, c, d, e, f, g, h, 0x27b70a8546d22ffcull, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0x2e1b21385c26c926ull, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc5ac42aedull, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x53380d139d95b3dfull, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x650a73548baf63deull, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x766a0abb3c77b2a8ull, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x81c2c92e47edaee6ull, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x92722c851482353bull, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0xa2bfe8a14cf10364ull, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0xa81a664bbc423001ull, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0xc24b8b70d0f89791ull, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0xc76c51a30654be30ull, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0xd192e819d6ef5218ull, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xd69906245565a910ull, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0xf40e35855771202aull, w14 += sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0x106aa07032bbd1b8ull, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+    Round(a, b, c, d, e, f, g, h, 0x19a4c116b8d2d0c8ull, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0x1e376c085141ab53ull, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x2748774cdf8eeb99ull, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x34b0bcb5e19b48a8ull, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x391c0cb3c5c95a63ull, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x4ed8aa4ae3418acbull, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x5b9cca4f7763e373ull, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x682e6ff3d6b2b8a3ull, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0x748f82ee5defb2fcull, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0x78a5636f43172f60ull, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0x84c87814a1f0ab72ull, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0x8cc702081a6439ecull, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0x90befffa23631e28ull, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xa4506cebde82bde9ull, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0xbef9a3f7b2c67915ull, w14 += sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0xc67178f2e372532bull, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+    Round(a, b, c, d, e, f, g, h, 0xca273eceea26619cull, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0xd186b8c721c0c207ull, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0xeada7dd6cde0eb1eull, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0xf57d4f7fee6ed178ull, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x06f067aa72176fbaull, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x0a637dc5a2c898a6ull, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x113f9804bef90daeull, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x1b710b35131c471bull, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0x28db77f523047d84ull, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0x32caab7b40c72493ull, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0x3c9ebe0a15c9bebcull, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0x431d67c49c100d4cull, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0x4cc5d4becb3e42b6ull, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0x597f299cfc657e2aull, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0x5fcb6fab3ad6faecull, w14 + sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0x6c44198c4a475817ull, w15 + sigma1(w13) + w8 + sigma0(w0));
+
+    s[0] += a;
+    s[1] += b;
+    s[2] += c;
+    s[3] += d;
+    s[4] += e;
+    s[5] += f;
+    s[6] += g;
+    s[7] += h;
+}
+
+} // namespace sha512
+
+} // namespace
+
+
+////// SHA-512
+
+CSHA512::CSHA512() : bytes(0)
+{
+    sha512::Initialize(s);
+}
+
+CSHA512& CSHA512::Write(const unsigned char* data, size_t len)
+{
+    const unsigned char* end = data + len;
+    size_t bufsize = bytes % 128;
+    if (bufsize && bufsize + len >= 128) {
+        // Fill the buffer, and process it.
+        memcpy(buf + bufsize, data, 128 - bufsize);
+        bytes += 128 - bufsize;
+        data += 128 - bufsize;
+        sha512::Transform(s, buf);
+        bufsize = 0;
+    }
+    while (end >= data + 128) {
+        // Process full chunks directly from the source.
+        sha512::Transform(s, data);
+        data += 128;
+        bytes += 128;
+    }
+    if (end > data) {
+        // Fill the buffer with what remains.
+        memcpy(buf + bufsize, data, end - data);
+        bytes += end - data;
+    }
+    return *this;
+}
+
+void CSHA512::Finalize(unsigned char hash[OUTPUT_SIZE])
+{
+    static const unsigned char pad[128] = {0x80};
+    unsigned char sizedesc[16] = {0x00};
+    WriteBE64(sizedesc + 8, bytes << 3);
+    Write(pad, 1 + ((239 - (bytes % 128)) % 128));
+    Write(sizedesc, 16);
+    WriteBE64(hash, s[0]);
+    WriteBE64(hash + 8, s[1]);
+    WriteBE64(hash + 16, s[2]);
+    WriteBE64(hash + 24, s[3]);
+    WriteBE64(hash + 32, s[4]);
+    WriteBE64(hash + 40, s[5]);
+    WriteBE64(hash + 48, s[6]);
+    WriteBE64(hash + 56, s[7]);
+}
+
+CSHA512& CSHA512::Reset()
+{
+    bytes = 0;
+    sha512::Initialize(s);
+    return *this;
+}
diff --git a/src/crypto/sha512.h b/src/crypto/sha512.h
new file mode 100644
index 0000000000..cd1023bc85
--- /dev/null
+++ b/src/crypto/sha512.h
@@ -0,0 +1,28 @@
+// Copyright (c) 2014-2016 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_CRYPTO_SHA512_H
+#define BITCOIN_CRYPTO_SHA512_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** A hasher class for SHA-512. */
+class CSHA512
+{
+private:
+    uint64_t s[8];
+    unsigned char buf[128];
+    uint64_t bytes;
+
+public:
+    static const size_t OUTPUT_SIZE = 64;
+
+    CSHA512();
+    CSHA512& Write(const unsigned char* data, size_t len);
+    void Finalize(unsigned char hash[OUTPUT_SIZE]);
+    CSHA512& Reset();
+};
+
+#endif // BITCOIN_CRYPTO_SHA512_H