Merge bitcoin/bitcoin#24832: index: Verify the block filter hash when reading the filter from disk.

e734228d8585c0870c71ce8ba8c037f8cf8b249a Update GCSFilter benchmarks (Calvin Kim) aee9a8140b3a58b744766f9e89572f1d953a808b Add GCSFilterDecodeSkipCheck benchmark (Patrick Strateman) 299023c1d9962628d158fac0306f8531506a0123 Add GCSFilterDecode and GCSBlockFilterGetHash benchmarks. (Patrick Strateman) b0a53d50d9142bed51a8372eeb848816bfa94da8 Make sanity check in GCSFilter constructor optional (Patrick Strateman) Pull request description: This PR picks up the abandoned #19280 BlockFilterIndex was depending on `GolombRiceDecode()` during the filter decode to sanity check that the filter wasn't corrupt. However, we can check for corruption by ensuring that the encoded blockfilter's hash matches up with the one stored in the index database. Benchmarks that were added in #19280 showed that checking the hash is much faster. The benchmarks were changed to nanobench and the relevant benchmarks were like below, showing a clear win for the hash check method. ``` | ns/elem | elem/s | err% | ins/elem | bra/elem | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|---------------:|--------:|----------:|:---------- | 531.40 | 1,881,819.43 | 0.3% | 3,527.01 | 411.00 | 0.2% | 0.01 | `DecodeCheckedGCSFilter` | 258,220.50 | 3,872.66 | 0.1% | 2,990,092.00 | 586,706.00 | 1.7% | 0.01 | `DecodeGCSFilter` | 13,036.77 | 76,706.09 | 0.3% | 64,238.24 | 513.04 | 0.2% | 0.01 | `BlockFilterGetHash` ``` ACKs for top commit: mzumsande: Code Review ACK e734228d8585c0870c71ce8ba8c037f8cf8b249a theStack: Code-review ACK e734228d8585c0870c71ce8ba8c037f8cf8b249a stickies-v: ACK e734228d8585c0870c71ce8ba8c037f8cf8b249a ryanofsky: Code review ACK e734228d8585c0870c71ce8ba8c037f8cf8b249a, with caveat that I mostly paid attention to the main code, not the changes to the benchmark. Only changes since last review were changes to the benchmark code. Tree-SHA512: 02b86eab7b554e1a57a15b17a4d6d71faa91b556c637b0da29f0c9ee76597a110be8e3b4d0c158d4cab04af0623de18b764837be0ec2a72afcfe1ad9c78a83c6
author: fanquake <fanquake@gmail.com> 2022-07-07 17:55:15 +0100
committer: fanquake <fanquake@gmail.com> 2022-07-07 17:59:02 +0100
commit: 5abbc9afec1874efe139ccdc988b4ae122aa472c (patch)
tree: 04f904dd41e81721e30f8ab554c117bf38c7712b /src/bench
parent: d571cf2d2421c6f8efb2b61ca844034eaf230945 (diff)
parent: e734228d8585c0870c71ce8ba8c037f8cf8b249a (diff)
download: bitcoin-5abbc9afec1874efe139ccdc988b4ae122aa472c.tar.xz
1 files changed, 62 insertions, 17 deletions
diff --git a/src/bench/gcs_filter.cpp b/src/bench/gcs_filter.cpp
index 607e4392b7..80babb213b 100644
--- a/src/bench/gcs_filter.cpp
+++ b/src/bench/gcs_filter.cpp
@@ -5,39 +5,84 @@
 #include <bench/bench.h>
 #include <blockfilter.h>
 
-static void ConstructGCSFilter(benchmark::Bench& bench)
+static const GCSFilter::ElementSet GenerateGCSTestElements()
 {
     GCSFilter::ElementSet elements;
-    for (int i = 0; i < 10000; ++i) {
+
+    // Testing the benchmarks with different number of elements show that a filter
+    // with at least 100,000 elements results in benchmarks that have the same
+    // ns/op. This makes it easy to reason about how long (in nanoseconds) a single
+    // filter element takes to process.
+    for (int i = 0; i < 100000; ++i) {
         GCSFilter::Element element(32);
         element[0] = static_cast<unsigned char>(i);
         element[1] = static_cast<unsigned char>(i >> 8);
         elements.insert(std::move(element));
     }
 
+    return elements;
+}
+
+static void GCSBlockFilterGetHash(benchmark::Bench& bench)
+{
+    auto elements = GenerateGCSTestElements();
+
+    GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
+    BlockFilter block_filter(BlockFilterType::BASIC, {}, filter.GetEncoded(), /*skip_decode_check=*/false);
+
+    bench.run([&] {
+        block_filter.GetHash();
+    });
+}
+
+static void GCSFilterConstruct(benchmark::Bench& bench)
+{
+    auto elements = GenerateGCSTestElements();
+
     uint64_t siphash_k0 = 0;
-    bench.batch(elements.size()).unit("elem").run([&] {
-        GCSFilter filter({siphash_k0, 0, 20, 1 << 20}, elements);
+    bench.run([&]{
+        GCSFilter filter({siphash_k0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
 
         siphash_k0++;
     });
 }
 
-static void MatchGCSFilter(benchmark::Bench& bench)
+static void GCSFilterDecode(benchmark::Bench& bench)
 {
-    GCSFilter::ElementSet elements;
-    for (int i = 0; i < 10000; ++i) {
-        GCSFilter::Element element(32);
-        element[0] = static_cast<unsigned char>(i);
-        element[1] = static_cast<unsigned char>(i >> 8);
-        elements.insert(std::move(element));
-    }
-    GCSFilter filter({0, 0, 20, 1 << 20}, elements);
+    auto elements = GenerateGCSTestElements();
 
-    bench.unit("elem").run([&] {
-        filter.Match(GCSFilter::Element());
+    GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
+    auto encoded = filter.GetEncoded();
+
+    bench.run([&] {
+        GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/false);
     });
 }
 
-BENCHMARK(ConstructGCSFilter);
-BENCHMARK(MatchGCSFilter);
+static void GCSFilterDecodeSkipCheck(benchmark::Bench& bench)
+{
+    auto elements = GenerateGCSTestElements();
+
+    GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
+    auto encoded = filter.GetEncoded();
+
+    bench.run([&] {
+        GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/true);
+    });
+}
+
+static void GCSFilterMatch(benchmark::Bench& bench)
+{
+    auto elements = GenerateGCSTestElements();
+
+    GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
+
+    bench.run([&] {
+        filter.Match(GCSFilter::Element());
+    });
+}
+BENCHMARK(GCSBlockFilterGetHash);
+BENCHMARK(GCSFilterConstruct);
+BENCHMARK(GCSFilterDecode);
+BENCHMARK(GCSFilterDecodeSkipCheck);
+BENCHMARK(GCSFilterMatch);
author	fanquake <fanquake@gmail.com>	2022-07-07 17:55:15 +0100
committer	fanquake <fanquake@gmail.com>	2022-07-07 17:59:02 +0100
commit	5abbc9afec1874efe139ccdc988b4ae122aa472c (patch)
tree	04f904dd41e81721e30f8ab554c117bf38c7712b /src/bench
parent	d571cf2d2421c6f8efb2b61ca844034eaf230945 (diff)
parent	e734228d8585c0870c71ce8ba8c037f8cf8b249a (diff)
download	bitcoin-5abbc9afec1874efe139ccdc988b4ae122aa472c.tar.xz