aboutsummaryrefslogtreecommitdiff
path: root/src/minisketch/include/minisketch.h
diff options
context:
space:
mode:
authorfanquake <fanquake@gmail.com>2021-11-12 09:25:40 +0800
committerfanquake <fanquake@gmail.com>2021-11-12 10:00:49 +0800
commitc1fb30633b6dcbf32db7d53c9f538019af80d6c5 (patch)
tree0fb479ab0aa27218dc34db18cd3a31ac72b1359a /src/minisketch/include/minisketch.h
parentbc03823e26d1bf0cca679ce9330f9c6a58b33faf (diff)
parent29173d6c6ca0cc3be9fa6bf2409a509ffea1a02a (diff)
downloadbitcoin-c1fb30633b6dcbf32db7d53c9f538019af80d6c5.tar.xz
Merge bitcoin/bitcoin#23114: Add minisketch subtree and integrate into build/test
29173d6c6ca0cc3be9fa6bf2409a509ffea1a02a ubsan: add minisketch exceptions (Cory Fields) 54b5e1aeab73953c1f12ec2c041572038f6f59da Add thin Minisketch wrapper to pick best implementation (Pieter Wuille) ee9dc71c1bc16205494f2a0aebe575a3c062ff52 Add basic minisketch tests (Pieter Wuille) 0659f12b131fc5915fe7a493306af197f4fb838b Add minisketch dependency (Gleb Naumenko) 0eb7928ab8d9dcb840e4965bfa81deb752b00dfa Add MSVC build configuration for libminisketch (Pieter Wuille) 8bc166d5b179205fc56855e2b462aa273a6f8661 build: add minisketch build file and include it (Cory Fields) b2904ceb85b4d440b1f4bbd716fcb601411cc2c9 build: add configure checks for minisketch (Cory Fields) b6487dc4ef47ec9ea894eceac25f37d0b806f8aa Squashed 'src/minisketch/' content from commit 89629eb2c7 (fanquake) Pull request description: This takes over #21859, which has [recently switched](https://github.com/bitcoin/bitcoin/pull/21859#issuecomment-921899200) to my integration branch. A few more build issues came up (and have been fixed) since, and after discussing with sipa it was decided I would open a PR to shepherd any final changes through. > This adds a `src/minisketch` subtree, taken from the master branch of https://github.com/sipa/minisketch, to prepare for Erlay implementation (see #21515). It gets configured for just supporting 32-bit fields (the only ones we're interested in in the context of Erlay), and some code on top is added: > * A very basic unit test (just to make sure compilation & running works; actual correctness checking is done through minisketch's own tests). > * A wrapper in `minisketchwrapper.{cpp,h}` that runs a benchmark to determine which field implementation to use. Only changes since my last update to the branch in the previous PR have been rebasing on master and fixing an issue with a header in an introduced file. ACKs for top commit: naumenkogs: ACK 29173d6c6ca0cc3be9fa6bf2409a509ffea1a02a Tree-SHA512: 1217d3228db1dd0de12c2919314e1c3626c18a416cf6291fec99d37e34fb6eec8e28d9e9fb935f8590273b8836cbadac313a15f05b4fd9f9d3024c8ce2c80d02
Diffstat (limited to 'src/minisketch/include/minisketch.h')
-rw-r--r--src/minisketch/include/minisketch.h367
1 files changed, 367 insertions, 0 deletions
diff --git a/src/minisketch/include/minisketch.h b/src/minisketch/include/minisketch.h
new file mode 100644
index 0000000000..0b5d8372e8
--- /dev/null
+++ b/src/minisketch/include/minisketch.h
@@ -0,0 +1,367 @@
+#ifndef _MINISKETCH_H_
+#define _MINISKETCH_H_ 1
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#ifdef _MSC_VER
+# include <compat.h>
+#else
+# include <unistd.h>
+#endif
+
+#ifndef MINISKETCH_API
+# if defined(_WIN32)
+# ifdef MINISKETCH_BUILD
+# define MINISKETCH_API __declspec(dllexport)
+# else
+# define MINISKETCH_API
+# endif
+# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(MINISKETCH_BUILD)
+# define MINISKETCH_API __attribute__ ((visibility ("default")))
+# else
+# define MINISKETCH_API
+# endif
+#endif
+
+#ifdef __cplusplus
+# if __cplusplus >= 201103L
+# include <memory>
+# include <vector>
+# include <cassert>
+# if __cplusplus >= 201703L
+# include <optional>
+# endif // __cplusplus >= 201703L
+# endif // __cplusplus >= 201103L
+extern "C" {
+#endif // __cplusplus
+
+/** Opaque type for decoded sketches. */
+typedef struct minisketch minisketch;
+
+/** Determine whether support for elements of `bits` bits was compiled in. */
+MINISKETCH_API int minisketch_bits_supported(uint32_t bits);
+
+/** Determine the maximum number of implementations available.
+ *
+ * Multiple implementations may be available for a given element size, with
+ * different performance characteristics on different hardware.
+ *
+ * Each implementation is identified by a number from 0 to the output of this
+ * function call, inclusive. Note that not every combination of implementation
+ * and element size may exist (see further).
+*/
+MINISKETCH_API uint32_t minisketch_implementation_max(void);
+
+/** Determine if the a combination of bits and implementation number is available.
+ *
+ * Returns 1 if it is, 0 otherwise.
+ */
+MINISKETCH_API int minisketch_implementation_supported(uint32_t bits, uint32_t implementation);
+
+/** Construct a sketch for a given element size, implementation and capacity.
+ *
+ * If the combination of `bits` and `implementation` is unavailable, or when
+ * OOM occurs, NULL is returned. If minisketch_implementation_supported
+ * returns 1 for the specified bits and implementation, this will always succeed
+ * (except when allocation fails).
+ *
+ * If the result is not NULL, it must be destroyed using minisketch_destroy.
+ */
+MINISKETCH_API minisketch* minisketch_create(uint32_t bits, uint32_t implementation, size_t capacity);
+
+/** Get the element size of a sketch in bits. */
+MINISKETCH_API uint32_t minisketch_bits(const minisketch* sketch);
+
+/** Get the capacity of a sketch. */
+MINISKETCH_API size_t minisketch_capacity(const minisketch* sketch);
+
+/** Get the implementation of a sketch. */
+MINISKETCH_API uint32_t minisketch_implementation(const minisketch* sketch);
+
+/** Set the seed for randomizing algorithm choices to a fixed value.
+ *
+ * By default, sketches are initialized with a random seed. This is important
+ * to avoid scenarios where an attacker could force worst-case behavior.
+ *
+ * This function initializes the seed to a user-provided value (any 64-bit
+ * integer is acceptable, regardless of field size).
+ *
+ * When seed is -1, a fixed internal value with predictable behavior is
+ * used. It is only intended for testing.
+ */
+MINISKETCH_API void minisketch_set_seed(minisketch* sketch, uint64_t seed);
+
+/** Clone a sketch.
+ *
+ * The result must be destroyed using minisketch_destroy.
+ */
+MINISKETCH_API minisketch* minisketch_clone(const minisketch* sketch);
+
+/** Destroy a sketch.
+ *
+ * The pointer that was passed in may not be used anymore afterwards.
+ */
+MINISKETCH_API void minisketch_destroy(minisketch* sketch);
+
+/** Compute the size in bytes for serializing a given sketch. */
+MINISKETCH_API size_t minisketch_serialized_size(const minisketch* sketch);
+
+/** Serialize a sketch to bytes. */
+MINISKETCH_API void minisketch_serialize(const minisketch* sketch, unsigned char* output);
+
+/** Deserialize a sketch from bytes. */
+MINISKETCH_API void minisketch_deserialize(minisketch* sketch, const unsigned char* input);
+
+/** Add an element to a sketch.
+ *
+ * If the element to be added is too large for the sketch, the most significant
+ * bits of the element are dropped. More precisely, if the element size of
+ * `sketch` is b bits, then this function adds the unsigned integer represented
+ * by the b least significant bits of `element` to `sketch`.
+ *
+ * If the element to be added is 0 (after potentially dropping the most significant
+ * bits), then this function is a no-op. Sketches cannot contain an element with
+ * the value 0.
+ *
+ * Note that adding the same element a second time removes it again.
+ */
+MINISKETCH_API void minisketch_add_uint64(minisketch* sketch, uint64_t element);
+
+/** Merge the elements of another sketch into this sketch.
+ *
+ * After merging, `sketch` will contain every element that existed in one but not
+ * both of the input sketches. It can be seen as an exclusive or operation on
+ * the set elements. If the capacity of `other_sketch` is lower than `sketch`'s,
+ * merging reduces the capacity of `sketch` to that of `other_sketch`.
+ *
+ * This function returns the capacity of `sketch` after merging has been performed
+ * (where this capacity is at least 1), or 0 to indicate that merging has failed because
+ * the two input sketches differ in their element size or implementation. If 0 is
+ * returned, `sketch` (and its capacity) have not been modified.
+ *
+ * It is also possible to perform this operation directly on the serializations
+ * of two sketches with the same element size and capacity by performing a bitwise XOR
+ * of the serializations.
+ */
+MINISKETCH_API size_t minisketch_merge(minisketch* sketch, const minisketch* other_sketch);
+
+/** Decode a sketch.
+ *
+ * `output` is a pointer to an array of `max_element` uint64_t's, which will be
+ * filled with the elements in this sketch.
+ *
+ * The return value is the number of decoded elements, or -1 if decoding failed.
+ */
+MINISKETCH_API ssize_t minisketch_decode(const minisketch* sketch, size_t max_elements, uint64_t* output);
+
+/** Compute the capacity needed to achieve a certain rate of false positives.
+ *
+ * A sketch with capacity c and no more than c elements can always be decoded
+ * correctly. However, if it has more than c elements, or contains just random
+ * bytes, it is possible that it will still decode, but the result will be
+ * nonsense. This can be counteracted by increasing the capacity slightly.
+ *
+ * Given a field size bits, an intended number of elements that can be decoded
+ * max_elements, and a false positive probability of 1 in 2**fpbits, this
+ * function computes the necessary capacity. It is only guaranteed to be
+ * accurate up to fpbits=256.
+ */
+MINISKETCH_API size_t minisketch_compute_capacity(uint32_t bits, size_t max_elements, uint32_t fpbits);
+
+/** Compute what max_elements can be decoded for a certain rate of false positives.
+ *
+ * This is the inverse operation of minisketch_compute_capacity. It determines,
+ * given a field size bits, a capacity of a sketch, and an acceptable false
+ * positive probability of 1 in 2**fpbits, what the maximum allowed
+ * max_elements value is. If no value of max_elements would give the desired
+ * false positive probability, 0 is returned.
+ *
+ * Note that this is not an exact inverse of minisketch_compute_capacity. For
+ * example, with bits=32, fpbits=16, and max_elements=8,
+ * minisketch_compute_capacity will return 9, as capacity 8 would only have a
+ * false positive chance of 1 in 2^15.3. Increasing the capacity to 9 however
+ * decreases the fp chance to 1 in 2^47.3, enough for max_elements=9 (with fp
+ * chance of 1 in 2^18.5). Therefore, minisketch_compute_max_elements with
+ * capacity=9 will return 9.
+ */
+MINISKETCH_API size_t minisketch_compute_max_elements(uint32_t bits, size_t capacity, uint32_t fpbits);
+
+#ifdef __cplusplus
+}
+
+#if __cplusplus >= 201103L
+/** Simple RAII C++11 wrapper around the minisketch API. */
+class Minisketch
+{
+ struct Deleter
+ {
+ void operator()(minisketch* ptr) const
+ {
+ minisketch_destroy(ptr);
+ }
+ };
+
+ std::unique_ptr<minisketch, Deleter> m_minisketch;
+
+public:
+ /** Check whether the library supports fields of the given size. */
+ static bool BitsSupported(uint32_t bits) noexcept { return minisketch_bits_supported(bits); }
+
+ /** Get the highest supported implementation number. */
+ static uint32_t MaxImplementation() noexcept { return minisketch_implementation_max(); }
+
+ /** Check whether the library supports fields with a given size and implementation number.
+ * If a particular field size `bits` is supported, implementation 0 is always supported for it.
+ * Higher implementation numbers may or may not be available as well, up to MaxImplementation().
+ */
+ static bool ImplementationSupported(uint32_t bits, uint32_t implementation) noexcept { return minisketch_implementation_supported(bits, implementation); }
+
+ /** Given field size and a maximum number of decodable elements n, compute what capacity c to
+ * use so that sketches with more elements than n have a chance no higher than 2^-fpbits of
+ * being decoded incorrectly (and will instead fail when decoding for up to n elements).
+ *
+ * See minisketch_compute_capacity for more details. */
+ static size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) noexcept { return minisketch_compute_capacity(bits, max_elements, fpbits); }
+
+ /** Reverse operation of ComputeCapacity. See minisketch_compute_max_elements. */
+ static size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) noexcept { return minisketch_compute_max_elements(bits, capacity, fpbits); }
+
+ /** Construct a clone of the specified sketch. */
+ Minisketch(const Minisketch& sketch) noexcept
+ {
+ if (sketch.m_minisketch) {
+ m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
+ }
+ }
+
+ /** Make this Minisketch a clone of the specified one. */
+ Minisketch& operator=(const Minisketch& sketch) noexcept
+ {
+ if (sketch.m_minisketch) {
+ m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
+ }
+ return *this;
+ }
+
+ /** Check whether this Minisketch object is valid. */
+ explicit operator bool() const noexcept { return bool{m_minisketch}; }
+
+ /** Construct an (invalid) Minisketch object. */
+ Minisketch() noexcept = default;
+
+ /** Move constructor. */
+ Minisketch(Minisketch&&) noexcept = default;
+
+ /** Move assignment. */
+ Minisketch& operator=(Minisketch&&) noexcept = default;
+
+ /** Construct a Minisketch object with the specified parameters.
+ *
+ * If bits is not BitsSupported(), or the combination of bits and capacity is not
+ * ImplementationSupported(), or OOM occurs internally, an invalid Minisketch
+ * object will be constructed. Use operator bool() to check that this isn't the
+ * case before performing any other operations. */
+ Minisketch(uint32_t bits, uint32_t implementation, size_t capacity) noexcept
+ {
+ m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_create(bits, implementation, capacity));
+ }
+
+ /** Create a Minisketch object sufficiently large for the specified number of elements at given fpbits.
+ * It may construct an invalid object, which you may need to check for. */
+ static Minisketch CreateFP(uint32_t bits, uint32_t implementation, size_t max_elements, uint32_t fpbits) noexcept
+ {
+ return Minisketch(bits, implementation, ComputeCapacity(bits, max_elements, fpbits));
+ }
+
+ /** Return the field size for a (valid) Minisketch object. */
+ uint32_t GetBits() const noexcept { return minisketch_bits(m_minisketch.get()); }
+
+ /** Return the capacity for a (valid) Minisketch object. */
+ size_t GetCapacity() const noexcept { return minisketch_capacity(m_minisketch.get()); }
+
+ /** Return the implementation number for a (valid) Minisketch object. */
+ uint32_t GetImplementation() const noexcept { return minisketch_implementation(m_minisketch.get()); }
+
+ /** Set the seed for a (valid) Minisketch object. See minisketch_set_seed(). */
+ Minisketch& SetSeed(uint64_t seed) noexcept
+ {
+ minisketch_set_seed(m_minisketch.get(), seed);
+ return *this;
+ }
+
+ /** Add (or remove, if already present) an element to a (valid) Minisketch object.
+ * See minisketch_add_uint64(). */
+ Minisketch& Add(uint64_t element) noexcept
+ {
+ minisketch_add_uint64(m_minisketch.get(), element);
+ return *this;
+ }
+
+ /** Merge sketch into *this; both have to be valid Minisketch objects.
+ * See minisketch_merge for details. */
+ Minisketch& Merge(const Minisketch& sketch) noexcept
+ {
+ minisketch_merge(m_minisketch.get(), sketch.m_minisketch.get());
+ return *this;
+ }
+
+ /** Decode this (valid) Minisketch object into the result vector, up to as many elements as the
+ * vector's size permits. */
+ bool Decode(std::vector<uint64_t>& result) const
+ {
+ ssize_t ret = minisketch_decode(m_minisketch.get(), result.size(), result.data());
+ if (ret == -1) return false;
+ result.resize(ret);
+ return true;
+ }
+
+ /** Get the serialized size in bytes for this (valid) Minisketch object.. */
+ size_t GetSerializedSize() const noexcept { return minisketch_serialized_size(m_minisketch.get()); }
+
+ /** Serialize this (valid) Minisketch object as a byte vector. */
+ std::vector<unsigned char> Serialize() const
+ {
+ std::vector<unsigned char> result(GetSerializedSize());
+ minisketch_serialize(m_minisketch.get(), result.data());
+ return result;
+ }
+
+ /** Deserialize into this (valid) Minisketch from an object containing its bytes (which has data()
+ * and size() members). */
+ template<typename T>
+ Minisketch& Deserialize(
+ const T& obj,
+ typename std::enable_if<
+ std::is_convertible<typename std::remove_pointer<decltype(obj.data())>::type (*)[], const unsigned char (*)[]>::value &&
+ std::is_convertible<decltype(obj.size()), std::size_t>::value,
+ std::nullptr_t
+ >::type = nullptr) noexcept
+ {
+ assert(GetSerializedSize() == obj.size());
+ minisketch_deserialize(m_minisketch.get(), obj.data());
+ return *this;
+ }
+
+#if __cplusplus >= 201703L
+ /** C++17 only: like Decode(), but up to a specified number of elements into an optional vector. */
+ std::optional<std::vector<uint64_t>> Decode(size_t max_elements) const
+ {
+ std::vector<uint64_t> result(max_elements);
+ ssize_t ret = minisketch_decode(m_minisketch.get(), max_elements, result.data());
+ if (ret == -1) return {};
+ result.resize(ret);
+ return result;
+ }
+
+ /** C++17 only: similar to Decode(), but with specified false positive probability. */
+ std::optional<std::vector<uint64_t>> DecodeFP(uint32_t fpbits) const
+ {
+ return Decode(ComputeMaxElements(GetBits(), GetCapacity(), fpbits));
+ }
+#endif // __cplusplus >= 201703L
+};
+#endif // __cplusplus >= 201103L
+#endif // __cplusplus
+
+#endif // _MINISKETCH_H_