aboutsummaryrefslogtreecommitdiff
path: root/src/crypto
diff options
context:
space:
mode:
authorFabian Jahr <fjahr@protonmail.com>2020-12-20 22:27:14 +0100
committerFabian Jahr <fjahr@protonmail.com>2020-12-21 19:57:04 +0100
commitadc708c98dbf03b1735edc91f813a36580781a95 (patch)
tree5c049fc8e47549747b30b7c0e001ea1fd1ab0a6a /src/crypto
parent0b4d290bf5b0a4d156c523431bf89aaa9ffe92e5 (diff)
crypto: Add MuHash3072 implementation
Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com>
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/muhash.cpp61
-rw-r--r--src/crypto/muhash.h68
2 files changed, 129 insertions, 0 deletions
diff --git a/src/crypto/muhash.cpp b/src/crypto/muhash.cpp
index d0f59065da..fbd14f9325 100644
--- a/src/crypto/muhash.cpp
+++ b/src/crypto/muhash.cpp
@@ -275,3 +275,64 @@ void Num3072::Divide(const Num3072& a)
this->Multiply(inv);
if (this->IsOverflow()) this->FullReduce();
}
+
+Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
+ Num3072 out{};
+ uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
+ unsigned char tmp[BYTE_SIZE];
+ ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE);
+ for (int i = 0; i < LIMBS; ++i) {
+ if (sizeof(limb_t) == 4) {
+ out.limbs[i] = ReadLE32(tmp + 4 * i);
+ } else if (sizeof(limb_t) == 8) {
+ out.limbs[i] = ReadLE64(tmp + 8 * i);
+ }
+ }
+ return out;
+}
+
+MuHash3072::MuHash3072(Span<const unsigned char> in) noexcept
+{
+ m_numerator = ToNum3072(in);
+}
+
+void MuHash3072::Finalize(uint256& out) noexcept
+{
+ m_numerator.Divide(m_denominator);
+ m_denominator.SetToOne(); // Needed to keep the MuHash object valid
+
+ unsigned char data[384];
+ for (int i = 0; i < LIMBS; ++i) {
+ if (sizeof(limb_t) == 4) {
+ WriteLE32(data + i * 4, m_numerator.limbs[i]);
+ } else if (sizeof(limb_t) == 8) {
+ WriteLE64(data + i * 8, m_numerator.limbs[i]);
+ }
+ }
+
+ out = (CHashWriter(SER_DISK, 0) << data).GetSHA256();
+}
+
+MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept
+{
+ m_numerator.Multiply(mul.m_numerator);
+ m_denominator.Multiply(mul.m_denominator);
+ return *this;
+}
+
+MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept
+{
+ m_numerator.Multiply(div.m_denominator);
+ m_denominator.Multiply(div.m_numerator);
+ return *this;
+}
+
+MuHash3072& MuHash3072::Insert(Span<const unsigned char> in) noexcept {
+ m_numerator.Multiply(ToNum3072(in));
+ return *this;
+}
+
+MuHash3072& MuHash3072::Remove(Span<const unsigned char> in) noexcept {
+ m_numerator.Divide(ToNum3072(in));
+ return *this;
+}
diff --git a/src/crypto/muhash.h b/src/crypto/muhash.h
index e5f6f0464f..0c710007c4 100644
--- a/src/crypto/muhash.h
+++ b/src/crypto/muhash.h
@@ -59,4 +59,72 @@ public:
}
};
+/** A class representing MuHash sets
+ *
+ * MuHash is a hashing algorithm that supports adding set elements in any
+ * order but also deleting in any order. As a result, it can maintain a
+ * running sum for a set of data as a whole, and add/remove when data
+ * is added to or removed from it. A downside of MuHash is that computing
+ * an inverse is relatively expensive. This is solved by representing
+ * the running value as a fraction, and multiplying added elements into
+ * the numerator and removed elements into the denominator. Only when the
+ * final hash is desired, a single modular inverse and multiplication is
+ * needed to combine the two. The combination is also run on serialization
+ * to allow for space-efficient storage on disk.
+ *
+ * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
+ * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
+ * all of this is perfectly parallellizable: each thread can process an
+ * arbitrary subset of the update operations, allowing them to be
+ * efficiently combined later.
+ *
+ * Muhash does not support checking if an element is already part of the
+ * set. That is why this class does not enforce the use of a set as the
+ * data it represents because there is no efficient way to do so.
+ * It is possible to add elements more than once and also to remove
+ * elements that have not been added before. However, this implementation
+ * is intended to represent a set of elements.
+ *
+ * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
+ * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
+ */
+class MuHash3072
+{
+private:
+ static constexpr size_t BYTE_SIZE = 384;
+
+ Num3072 m_numerator;
+ Num3072 m_denominator;
+
+ Num3072 ToNum3072(Span<const unsigned char> in);
+
+public:
+ /* The empty set. */
+ MuHash3072() noexcept {};
+
+ /* A singleton with variable sized data in it. */
+ explicit MuHash3072(Span<const unsigned char> in) noexcept;
+
+ /* Insert a single piece of data into the set. */
+ MuHash3072& Insert(Span<const unsigned char> in) noexcept;
+
+ /* Remove a single piece of data from the set. */
+ MuHash3072& Remove(Span<const unsigned char> in) noexcept;
+
+ /* Multiply (resulting in a hash for the union of the sets) */
+ MuHash3072& operator*=(const MuHash3072& mul) noexcept;
+
+ /* Divide (resulting in a hash for the difference of the sets) */
+ MuHash3072& operator/=(const MuHash3072& div) noexcept;
+
+ /* Finalize into a 32-byte hash. Does not change this object's value. */
+ void Finalize(uint256& out) noexcept;
+
+ SERIALIZE_METHODS(MuHash3072, obj)
+ {
+ READWRITE(obj.m_numerator);
+ READWRITE(obj.m_denominator);
+ }
+};
+
#endif // BITCOIN_CRYPTO_MUHASH_H