diff options
Diffstat (limited to 'src/crypto/muhash.h')
-rw-r--r-- | src/crypto/muhash.h | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/src/crypto/muhash.h b/src/crypto/muhash.h index e5f6f0464f..0c710007c4 100644 --- a/src/crypto/muhash.h +++ b/src/crypto/muhash.h @@ -59,4 +59,72 @@ public: } }; +/** A class representing MuHash sets + * + * MuHash is a hashing algorithm that supports adding set elements in any + * order but also deleting in any order. As a result, it can maintain a + * running sum for a set of data as a whole, and add/remove when data + * is added to or removed from it. A downside of MuHash is that computing + * an inverse is relatively expensive. This is solved by representing + * the running value as a fraction, and multiplying added elements into + * the numerator and removed elements into the denominator. Only when the + * final hash is desired, a single modular inverse and multiplication is + * needed to combine the two. The combination is also run on serialization + * to allow for space-efficient storage on disk. + * + * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can + * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that + * all of this is perfectly parallellizable: each thread can process an + * arbitrary subset of the update operations, allowing them to be + * efficiently combined later. + * + * Muhash does not support checking if an element is already part of the + * set. That is why this class does not enforce the use of a set as the + * data it represents because there is no efficient way to do so. + * It is possible to add elements more than once and also to remove + * elements that have not been added before. However, this implementation + * is intended to represent a set of elements. + * + * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and + * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html. + */ +class MuHash3072 +{ +private: + static constexpr size_t BYTE_SIZE = 384; + + Num3072 m_numerator; + Num3072 m_denominator; + + Num3072 ToNum3072(Span<const unsigned char> in); + +public: + /* The empty set. */ + MuHash3072() noexcept {}; + + /* A singleton with variable sized data in it. */ + explicit MuHash3072(Span<const unsigned char> in) noexcept; + + /* Insert a single piece of data into the set. */ + MuHash3072& Insert(Span<const unsigned char> in) noexcept; + + /* Remove a single piece of data from the set. */ + MuHash3072& Remove(Span<const unsigned char> in) noexcept; + + /* Multiply (resulting in a hash for the union of the sets) */ + MuHash3072& operator*=(const MuHash3072& mul) noexcept; + + /* Divide (resulting in a hash for the difference of the sets) */ + MuHash3072& operator/=(const MuHash3072& div) noexcept; + + /* Finalize into a 32-byte hash. Does not change this object's value. */ + void Finalize(uint256& out) noexcept; + + SERIALIZE_METHODS(MuHash3072, obj) + { + READWRITE(obj.m_numerator); + READWRITE(obj.m_denominator); + } +}; + #endif // BITCOIN_CRYPTO_MUHASH_H |