aboutsummaryrefslogtreecommitdiff
path: root/src/crypto/muhash.h
blob: 222b866b6d47650147b3fc4b77a55ccdc67372fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Copyright (c) 2017-2021 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#ifndef BITCOIN_CRYPTO_MUHASH_H
#define BITCOIN_CRYPTO_MUHASH_H

#include <serialize.h>
#include <uint256.h>

#include <stdint.h>

class Num3072
{
private:
    void FullReduce();
    bool IsOverflow() const;
    Num3072 GetInverse() const;

public:
    static constexpr size_t BYTE_SIZE = 384;

#ifdef __SIZEOF_INT128__
    typedef unsigned __int128 double_limb_t;
    typedef uint64_t limb_t;
    static constexpr int LIMBS = 48;
    static constexpr int LIMB_SIZE = 64;
#else
    typedef uint64_t double_limb_t;
    typedef uint32_t limb_t;
    static constexpr int LIMBS = 96;
    static constexpr int LIMB_SIZE = 32;
#endif
    limb_t limbs[LIMBS];

    // Sanity check for Num3072 constants
    static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits");
    static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t");
    static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect");

    // Hard coded values in MuHash3072 constructor and Finalize
    static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t");

    void Multiply(const Num3072& a);
    void Divide(const Num3072& a);
    void SetToOne();
    void Square();
    void ToBytes(unsigned char (&out)[BYTE_SIZE]);

    Num3072() { this->SetToOne(); };
    Num3072(const unsigned char (&data)[BYTE_SIZE]);

    SERIALIZE_METHODS(Num3072, obj)
    {
        for (auto& limb : obj.limbs) {
            READWRITE(limb);
        }
    }
};

/** A class representing MuHash sets
 *
 * MuHash is a hashing algorithm that supports adding set elements in any
 * order but also deleting in any order. As a result, it can maintain a
 * running sum for a set of data as a whole, and add/remove when data
 * is added to or removed from it. A downside of MuHash is that computing
 * an inverse is relatively expensive. This is solved by representing
 * the running value as a fraction, and multiplying added elements into
 * the numerator and removed elements into the denominator. Only when the
 * final hash is desired, a single modular inverse and multiplication is
 * needed to combine the two. The combination is also run on serialization
 * to allow for space-efficient storage on disk.
 *
 * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
 * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
 * all of this is perfectly parallellizable: each thread can process an
 * arbitrary subset of the update operations, allowing them to be
 * efficiently combined later.
 *
 * MuHash does not support checking if an element is already part of the
 * set. That is why this class does not enforce the use of a set as the
 * data it represents because there is no efficient way to do so.
 * It is possible to add elements more than once and also to remove
 * elements that have not been added before. However, this implementation
 * is intended to represent a set of elements.
 *
 * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
 * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
 */
class MuHash3072
{
private:
    Num3072 m_numerator;
    Num3072 m_denominator;

    Num3072 ToNum3072(Span<const unsigned char> in);

public:
    /* The empty set. */
    MuHash3072() noexcept = default;

    /* A singleton with variable sized data in it. */
    explicit MuHash3072(Span<const unsigned char> in) noexcept;

    /* Insert a single piece of data into the set. */
    MuHash3072& Insert(Span<const unsigned char> in) noexcept;

    /* Remove a single piece of data from the set. */
    MuHash3072& Remove(Span<const unsigned char> in) noexcept;

    /* Multiply (resulting in a hash for the union of the sets) */
    MuHash3072& operator*=(const MuHash3072& mul) noexcept;

    /* Divide (resulting in a hash for the difference of the sets) */
    MuHash3072& operator/=(const MuHash3072& div) noexcept;

    /* Finalize into a 32-byte hash. Does not change this object's value. */
    void Finalize(uint256& out) noexcept;

    SERIALIZE_METHODS(MuHash3072, obj)
    {
        READWRITE(obj.m_numerator);
        READWRITE(obj.m_denominator);
    }
};

#endif // BITCOIN_CRYPTO_MUHASH_H