// Copyright (c) 2017-2020 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #ifndef BITCOIN_CRYPTO_MUHASH_H #define BITCOIN_CRYPTO_MUHASH_H #if defined(HAVE_CONFIG_H) #include #endif #include #include #include class Num3072 { private: void FullReduce(); bool IsOverflow() const; Num3072 GetInverse() const; public: static constexpr size_t BYTE_SIZE = 384; #ifdef HAVE___INT128 typedef unsigned __int128 double_limb_t; typedef uint64_t limb_t; static constexpr int LIMBS = 48; static constexpr int LIMB_SIZE = 64; #else typedef uint64_t double_limb_t; typedef uint32_t limb_t; static constexpr int LIMBS = 96; static constexpr int LIMB_SIZE = 32; #endif limb_t limbs[LIMBS]; // Sanity check for Num3072 constants static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits"); static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t"); static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect"); // Hard coded values in MuHash3072 constructor and Finalize static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t"); void Multiply(const Num3072& a); void Divide(const Num3072& a); void SetToOne(); void Square(); void ToBytes(unsigned char (&out)[BYTE_SIZE]); Num3072() { this->SetToOne(); }; Num3072(const unsigned char (&data)[BYTE_SIZE]); SERIALIZE_METHODS(Num3072, obj) { for (auto& limb : obj.limbs) { READWRITE(limb); } } }; /** A class representing MuHash sets * * MuHash is a hashing algorithm that supports adding set elements in any * order but also deleting in any order. As a result, it can maintain a * running sum for a set of data as a whole, and add/remove when data * is added to or removed from it. A downside of MuHash is that computing * an inverse is relatively expensive. This is solved by representing * the running value as a fraction, and multiplying added elements into * the numerator and removed elements into the denominator. Only when the * final hash is desired, a single modular inverse and multiplication is * needed to combine the two. The combination is also run on serialization * to allow for space-efficient storage on disk. * * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that * all of this is perfectly parallellizable: each thread can process an * arbitrary subset of the update operations, allowing them to be * efficiently combined later. * * MuHash does not support checking if an element is already part of the * set. That is why this class does not enforce the use of a set as the * data it represents because there is no efficient way to do so. * It is possible to add elements more than once and also to remove * elements that have not been added before. However, this implementation * is intended to represent a set of elements. * * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html. */ class MuHash3072 { private: Num3072 m_numerator; Num3072 m_denominator; Num3072 ToNum3072(Span in); public: /* The empty set. */ MuHash3072() noexcept {}; /* A singleton with variable sized data in it. */ explicit MuHash3072(Span in) noexcept; /* Insert a single piece of data into the set. */ MuHash3072& Insert(Span in) noexcept; /* Remove a single piece of data from the set. */ MuHash3072& Remove(Span in) noexcept; /* Multiply (resulting in a hash for the union of the sets) */ MuHash3072& operator*=(const MuHash3072& mul) noexcept; /* Divide (resulting in a hash for the difference of the sets) */ MuHash3072& operator/=(const MuHash3072& div) noexcept; /* Finalize into a 32-byte hash. Does not change this object's value. */ void Finalize(uint256& out) noexcept; SERIALIZE_METHODS(MuHash3072, obj) { READWRITE(obj.m_numerator); READWRITE(obj.m_denominator); } }; #endif // BITCOIN_CRYPTO_MUHASH_H