From adc708c98dbf03b1735edc91f813a36580781a95 Mon Sep 17 00:00:00 2001 From: Fabian Jahr Date: Sun, 20 Dec 2020 22:27:14 +0100 Subject: crypto: Add MuHash3072 implementation Co-authored-by: Pieter Wuille --- src/crypto/muhash.cpp | 61 +++++++++++++++++++++++++++++++++++++++++++++ src/crypto/muhash.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) (limited to 'src/crypto') diff --git a/src/crypto/muhash.cpp b/src/crypto/muhash.cpp index d0f59065da..fbd14f9325 100644 --- a/src/crypto/muhash.cpp +++ b/src/crypto/muhash.cpp @@ -275,3 +275,64 @@ void Num3072::Divide(const Num3072& a) this->Multiply(inv); if (this->IsOverflow()) this->FullReduce(); } + +Num3072 MuHash3072::ToNum3072(Span in) { + Num3072 out{}; + uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256(); + unsigned char tmp[BYTE_SIZE]; + ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE); + for (int i = 0; i < LIMBS; ++i) { + if (sizeof(limb_t) == 4) { + out.limbs[i] = ReadLE32(tmp + 4 * i); + } else if (sizeof(limb_t) == 8) { + out.limbs[i] = ReadLE64(tmp + 8 * i); + } + } + return out; +} + +MuHash3072::MuHash3072(Span in) noexcept +{ + m_numerator = ToNum3072(in); +} + +void MuHash3072::Finalize(uint256& out) noexcept +{ + m_numerator.Divide(m_denominator); + m_denominator.SetToOne(); // Needed to keep the MuHash object valid + + unsigned char data[384]; + for (int i = 0; i < LIMBS; ++i) { + if (sizeof(limb_t) == 4) { + WriteLE32(data + i * 4, m_numerator.limbs[i]); + } else if (sizeof(limb_t) == 8) { + WriteLE64(data + i * 8, m_numerator.limbs[i]); + } + } + + out = (CHashWriter(SER_DISK, 0) << data).GetSHA256(); +} + +MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept +{ + m_numerator.Multiply(mul.m_numerator); + m_denominator.Multiply(mul.m_denominator); + return *this; +} + +MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept +{ + m_numerator.Multiply(div.m_denominator); + m_denominator.Multiply(div.m_numerator); + return *this; +} + +MuHash3072& MuHash3072::Insert(Span in) noexcept { + m_numerator.Multiply(ToNum3072(in)); + return *this; +} + +MuHash3072& MuHash3072::Remove(Span in) noexcept { + m_numerator.Divide(ToNum3072(in)); + return *this; +} diff --git a/src/crypto/muhash.h b/src/crypto/muhash.h index e5f6f0464f..0c710007c4 100644 --- a/src/crypto/muhash.h +++ b/src/crypto/muhash.h @@ -59,4 +59,72 @@ public: } }; +/** A class representing MuHash sets + * + * MuHash is a hashing algorithm that supports adding set elements in any + * order but also deleting in any order. As a result, it can maintain a + * running sum for a set of data as a whole, and add/remove when data + * is added to or removed from it. A downside of MuHash is that computing + * an inverse is relatively expensive. This is solved by representing + * the running value as a fraction, and multiplying added elements into + * the numerator and removed elements into the denominator. Only when the + * final hash is desired, a single modular inverse and multiplication is + * needed to combine the two. The combination is also run on serialization + * to allow for space-efficient storage on disk. + * + * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can + * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that + * all of this is perfectly parallellizable: each thread can process an + * arbitrary subset of the update operations, allowing them to be + * efficiently combined later. + * + * Muhash does not support checking if an element is already part of the + * set. That is why this class does not enforce the use of a set as the + * data it represents because there is no efficient way to do so. + * It is possible to add elements more than once and also to remove + * elements that have not been added before. However, this implementation + * is intended to represent a set of elements. + * + * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and + * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html. + */ +class MuHash3072 +{ +private: + static constexpr size_t BYTE_SIZE = 384; + + Num3072 m_numerator; + Num3072 m_denominator; + + Num3072 ToNum3072(Span in); + +public: + /* The empty set. */ + MuHash3072() noexcept {}; + + /* A singleton with variable sized data in it. */ + explicit MuHash3072(Span in) noexcept; + + /* Insert a single piece of data into the set. */ + MuHash3072& Insert(Span in) noexcept; + + /* Remove a single piece of data from the set. */ + MuHash3072& Remove(Span in) noexcept; + + /* Multiply (resulting in a hash for the union of the sets) */ + MuHash3072& operator*=(const MuHash3072& mul) noexcept; + + /* Divide (resulting in a hash for the difference of the sets) */ + MuHash3072& operator/=(const MuHash3072& div) noexcept; + + /* Finalize into a 32-byte hash. Does not change this object's value. */ + void Finalize(uint256& out) noexcept; + + SERIALIZE_METHODS(MuHash3072, obj) + { + READWRITE(obj.m_numerator); + READWRITE(obj.m_denominator); + } +}; + #endif // BITCOIN_CRYPTO_MUHASH_H -- cgit v1.2.3