aboutsummaryrefslogtreecommitdiff
path: root/src/minisketch/include/minisketch.h
blob: 0b5d8372e866a1967ff845a4dcd0ad2b67ac1574 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
#ifndef _MINISKETCH_H_
#define _MINISKETCH_H_ 1

#include <stdint.h>
#include <stdlib.h>

#ifdef _MSC_VER
#  include <compat.h>
#else
#  include <unistd.h>
#endif

#ifndef MINISKETCH_API
# if defined(_WIN32)
#  ifdef MINISKETCH_BUILD
#   define MINISKETCH_API __declspec(dllexport)
#  else
#   define MINISKETCH_API
#  endif
# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(MINISKETCH_BUILD)
#  define MINISKETCH_API __attribute__ ((visibility ("default")))
# else
#  define MINISKETCH_API
# endif
#endif

#ifdef __cplusplus
#  if __cplusplus >= 201103L
#    include <memory>
#    include <vector>
#    include <cassert>
#    if __cplusplus >= 201703L
#      include <optional>
#    endif // __cplusplus >= 201703L
#  endif // __cplusplus >= 201103L
extern "C" {
#endif // __cplusplus

/** Opaque type for decoded sketches. */
typedef struct minisketch minisketch;

/** Determine whether support for elements of `bits` bits was compiled in. */
MINISKETCH_API int minisketch_bits_supported(uint32_t bits);

/** Determine the maximum number of implementations available.
 *
 * Multiple implementations may be available for a given element size, with
 * different performance characteristics on different hardware.
 *
 * Each implementation is identified by a number from 0 to the output of this
 * function call, inclusive. Note that not every combination of implementation
 * and element size may exist (see further).
*/
MINISKETCH_API uint32_t minisketch_implementation_max(void);

/** Determine if the a combination of bits and implementation number is available.
 *
 * Returns 1 if it is, 0 otherwise.
 */
MINISKETCH_API int minisketch_implementation_supported(uint32_t bits, uint32_t implementation);

/** Construct a sketch for a given element size, implementation and capacity.
 *
 * If the combination of `bits` and `implementation` is unavailable, or when
 * OOM occurs, NULL is returned. If minisketch_implementation_supported
 * returns 1 for the specified bits and implementation, this will always succeed
 * (except when allocation fails).
 *
 * If the result is not NULL, it must be destroyed using minisketch_destroy.
 */
MINISKETCH_API minisketch* minisketch_create(uint32_t bits, uint32_t implementation, size_t capacity);

/** Get the element size of a sketch in bits. */
MINISKETCH_API uint32_t minisketch_bits(const minisketch* sketch);

/** Get the capacity of a sketch. */
MINISKETCH_API size_t minisketch_capacity(const minisketch* sketch);

/** Get the implementation of a sketch. */
MINISKETCH_API uint32_t minisketch_implementation(const minisketch* sketch);

/** Set the seed for randomizing algorithm choices to a fixed value.
 *
 * By default, sketches are initialized with a random seed. This is important
 * to avoid scenarios where an attacker could force worst-case behavior.
 *
 * This function initializes the seed to a user-provided value (any 64-bit
 * integer is acceptable, regardless of field size).
 *
 * When seed is -1, a fixed internal value with predictable behavior is
 * used. It is only intended for testing.
 */
MINISKETCH_API void minisketch_set_seed(minisketch* sketch, uint64_t seed);

/** Clone a sketch.
 *
 * The result must be destroyed using minisketch_destroy.
 */
MINISKETCH_API minisketch* minisketch_clone(const minisketch* sketch);

/** Destroy a sketch.
 *
 * The pointer that was passed in may not be used anymore afterwards.
 */
MINISKETCH_API void minisketch_destroy(minisketch* sketch);

/** Compute the size in bytes for serializing a given sketch. */
MINISKETCH_API size_t minisketch_serialized_size(const minisketch* sketch);

/** Serialize a sketch to bytes. */
MINISKETCH_API void minisketch_serialize(const minisketch* sketch, unsigned char* output);

/** Deserialize a sketch from bytes. */
MINISKETCH_API void minisketch_deserialize(minisketch* sketch, const unsigned char* input);

/** Add an element to a sketch.
 * 
 * If the element to be added is too large for the sketch, the most significant
 * bits of the element are dropped. More precisely, if the element size of
 * `sketch` is b bits, then this function adds the unsigned integer represented
 * by the b least significant bits of `element` to `sketch`.
 * 
 * If the element to be added is 0 (after potentially dropping the most significant
 * bits), then this function is a no-op. Sketches cannot contain an element with
 * the value 0.
 *
 * Note that adding the same element a second time removes it again.
 */
MINISKETCH_API void minisketch_add_uint64(minisketch* sketch, uint64_t element);

/** Merge the elements of another sketch into this sketch.
 *
 * After merging, `sketch` will contain every element that existed in one but not
 * both of the input sketches. It can be seen as an exclusive or operation on
 * the set elements.  If the capacity of `other_sketch` is lower than `sketch`'s,
 * merging reduces the capacity of `sketch` to that of `other_sketch`.
 *
 * This function returns the capacity of `sketch` after merging has been performed
 * (where this capacity is at least 1), or 0 to indicate that merging has failed because
 * the two input sketches differ in their element size or implementation. If 0 is
 * returned, `sketch` (and its capacity) have not been modified.
 *
 * It is also possible to perform this operation directly on the serializations
 * of two sketches with the same element size and capacity by performing a bitwise XOR
 * of the serializations.
 */
MINISKETCH_API size_t minisketch_merge(minisketch* sketch, const minisketch* other_sketch);

/** Decode a sketch.
 *
 * `output` is a pointer to an array of `max_element` uint64_t's, which will be
 * filled with the elements in this sketch.
 *
 * The return value is the number of decoded elements, or -1 if decoding failed.
 */
MINISKETCH_API ssize_t minisketch_decode(const minisketch* sketch, size_t max_elements, uint64_t* output);

/** Compute the capacity needed to achieve a certain rate of false positives.
 *
 * A sketch with capacity c and no more than c elements can always be decoded
 * correctly. However, if it has more than c elements, or contains just random
 * bytes, it is possible that it will still decode, but the result will be
 * nonsense. This can be counteracted by increasing the capacity slightly.
 *
 * Given a field size bits, an intended number of elements that can be decoded
 * max_elements, and a false positive probability of 1 in 2**fpbits, this
 * function computes the necessary capacity. It is only guaranteed to be
 * accurate up to fpbits=256.
 */
MINISKETCH_API size_t minisketch_compute_capacity(uint32_t bits, size_t max_elements, uint32_t fpbits);

/** Compute what max_elements can be decoded for a certain rate of false positives.
 *
 * This is the inverse operation of minisketch_compute_capacity. It determines,
 * given a field size bits, a capacity of a sketch, and an acceptable false
 * positive probability of 1 in 2**fpbits, what the maximum allowed
 * max_elements value is. If no value of max_elements would give the desired
 * false positive probability, 0 is returned.
 *
 * Note that this is not an exact inverse of minisketch_compute_capacity. For
 * example, with bits=32, fpbits=16, and max_elements=8,
 * minisketch_compute_capacity will return 9, as capacity 8 would only have a
 * false positive chance of 1 in 2^15.3. Increasing the capacity to 9 however
 * decreases the fp chance to 1 in 2^47.3, enough for max_elements=9 (with fp
 * chance of 1 in 2^18.5). Therefore, minisketch_compute_max_elements with
 * capacity=9 will return 9.
 */
MINISKETCH_API size_t minisketch_compute_max_elements(uint32_t bits, size_t capacity, uint32_t fpbits);

#ifdef __cplusplus
}

#if __cplusplus >= 201103L
/** Simple RAII C++11 wrapper around the minisketch API. */
class Minisketch
{
    struct Deleter
    {
        void operator()(minisketch* ptr) const
        {
            minisketch_destroy(ptr);
        }
    };

    std::unique_ptr<minisketch, Deleter> m_minisketch;

public:
    /** Check whether the library supports fields of the given size. */
    static bool BitsSupported(uint32_t bits) noexcept { return minisketch_bits_supported(bits); }

    /** Get the highest supported implementation number. */
    static uint32_t MaxImplementation() noexcept { return minisketch_implementation_max(); }

    /** Check whether the library supports fields with a given size and implementation number.
     *  If a particular field size `bits` is supported, implementation 0 is always supported for it.
     *  Higher implementation numbers may or may not be available as well, up to MaxImplementation().
     */
    static bool ImplementationSupported(uint32_t bits, uint32_t implementation) noexcept { return minisketch_implementation_supported(bits, implementation); }

    /** Given field size and a maximum number of decodable elements n, compute what capacity c to
     *  use so that sketches with more elements than n have a chance no higher than 2^-fpbits of
     *  being decoded incorrectly (and will instead fail when decoding for up to n elements).
     *
     *  See minisketch_compute_capacity for more details. */
    static size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) noexcept { return minisketch_compute_capacity(bits, max_elements, fpbits); }

    /** Reverse operation of ComputeCapacity. See minisketch_compute_max_elements. */
    static size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) noexcept { return minisketch_compute_max_elements(bits, capacity, fpbits); }

    /** Construct a clone of the specified sketch. */
    Minisketch(const Minisketch& sketch) noexcept
    {
        if (sketch.m_minisketch) {
            m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
        }
    }

    /** Make this Minisketch a clone of the specified one. */
    Minisketch& operator=(const Minisketch& sketch) noexcept
    {
        if (sketch.m_minisketch) {
            m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
        }
        return *this;
    }

    /** Check whether this Minisketch object is valid. */
    explicit operator bool() const noexcept { return bool{m_minisketch}; }

    /** Construct an (invalid) Minisketch object. */
    Minisketch() noexcept = default;

    /** Move constructor. */
    Minisketch(Minisketch&&) noexcept = default;

    /** Move assignment. */
    Minisketch& operator=(Minisketch&&) noexcept = default;

    /** Construct a Minisketch object with the specified parameters.
     *
     * If bits is not BitsSupported(), or the combination of bits and capacity is not
     * ImplementationSupported(), or OOM occurs internally, an invalid Minisketch
     * object will be constructed. Use operator bool() to check that this isn't the
     * case before performing any other operations. */
    Minisketch(uint32_t bits, uint32_t implementation, size_t capacity) noexcept
    {
        m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_create(bits, implementation, capacity));
    }

    /** Create a Minisketch object sufficiently large for the specified number of elements at given fpbits.
     *  It may construct an invalid object, which you may need to check for. */
    static Minisketch CreateFP(uint32_t bits, uint32_t implementation, size_t max_elements, uint32_t fpbits) noexcept
    {
        return Minisketch(bits, implementation, ComputeCapacity(bits, max_elements, fpbits));
    }

    /** Return the field size for a (valid) Minisketch object. */
    uint32_t GetBits() const noexcept { return minisketch_bits(m_minisketch.get()); }

    /** Return the capacity for a (valid) Minisketch object. */
    size_t GetCapacity() const noexcept { return minisketch_capacity(m_minisketch.get()); }

    /** Return the implementation number for a (valid) Minisketch object. */
    uint32_t GetImplementation() const noexcept { return minisketch_implementation(m_minisketch.get()); }

    /** Set the seed for a (valid) Minisketch object. See minisketch_set_seed(). */
    Minisketch& SetSeed(uint64_t seed) noexcept
    {
        minisketch_set_seed(m_minisketch.get(), seed);
        return *this;
    }

    /** Add (or remove, if already present) an element to a (valid) Minisketch object.
     *  See minisketch_add_uint64(). */
    Minisketch& Add(uint64_t element) noexcept
    {
        minisketch_add_uint64(m_minisketch.get(), element);
        return *this;
    }

    /** Merge sketch into *this; both have to be valid Minisketch objects.
     *  See minisketch_merge for details. */
    Minisketch& Merge(const Minisketch& sketch) noexcept
    {
        minisketch_merge(m_minisketch.get(), sketch.m_minisketch.get());
        return *this;
    }

    /** Decode this (valid) Minisketch object into the result vector, up to as many elements as the
     *  vector's size permits. */
    bool Decode(std::vector<uint64_t>& result) const
    {
        ssize_t ret = minisketch_decode(m_minisketch.get(), result.size(), result.data());
        if (ret == -1) return false;
        result.resize(ret);
        return true;
    }

    /** Get the serialized size in bytes for this (valid) Minisketch object.. */
    size_t GetSerializedSize() const noexcept { return minisketch_serialized_size(m_minisketch.get()); }

    /** Serialize this (valid) Minisketch object as a byte vector. */
    std::vector<unsigned char> Serialize() const
    {
        std::vector<unsigned char> result(GetSerializedSize());
        minisketch_serialize(m_minisketch.get(), result.data());
        return result;
    }

    /** Deserialize into this (valid) Minisketch from an object containing its bytes (which has data()
     *  and size() members). */
    template<typename T>
    Minisketch& Deserialize(
        const T& obj,
        typename std::enable_if<
            std::is_convertible<typename std::remove_pointer<decltype(obj.data())>::type (*)[], const unsigned char (*)[]>::value &&
            std::is_convertible<decltype(obj.size()), std::size_t>::value,
            std::nullptr_t
        >::type = nullptr) noexcept
    {
        assert(GetSerializedSize() == obj.size());
        minisketch_deserialize(m_minisketch.get(), obj.data());
        return *this;
    }

#if __cplusplus >= 201703L
    /** C++17 only: like Decode(), but up to a specified number of elements into an optional vector. */
    std::optional<std::vector<uint64_t>> Decode(size_t max_elements) const
    {
        std::vector<uint64_t> result(max_elements);
        ssize_t ret = minisketch_decode(m_minisketch.get(), max_elements, result.data());
        if (ret == -1) return {};
        result.resize(ret);
        return result;
    }

    /** C++17 only: similar to Decode(), but with specified false positive probability. */
    std::optional<std::vector<uint64_t>> DecodeFP(uint32_t fpbits) const
    {
        return Decode(ComputeMaxElements(GetBits(), GetCapacity(), fpbits));
    }
#endif // __cplusplus >= 201703L
};
#endif // __cplusplus >= 201103L
#endif // __cplusplus

#endif  // _MINISKETCH_H_