From 5e650d6d2dbfc284c300668e71188e663d8f0a45 Mon Sep 17 00:00:00 2001
From: Mike Hearn <hearn@google.com>
Date: Mon, 25 Jun 2012 11:17:22 +0200
Subject: Import LevelDB 1.5, it will be used for the transaction database.

---
 src/leveldb/util/bloom.cc | 95 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 src/leveldb/util/bloom.cc

(limited to 'src/leveldb/util/bloom.cc')
diff --git a/src/leveldb/util/bloom.cc b/src/leveldb/util/bloom.cc
new file mode 100644
index 0000000000..d7941cd21f
--- /dev/null
+++ b/src/leveldb/util/bloom.cc
@@ -0,0 +1,95 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "leveldb/filter_policy.h"
+
+#include "leveldb/slice.h"
+#include "util/hash.h"
+
+namespace leveldb {
+
+namespace {
+static uint32_t BloomHash(const Slice& key) {
+  return Hash(key.data(), key.size(), 0xbc9f1d34);
+}
+
+class BloomFilterPolicy : public FilterPolicy {
+ private:
+  size_t bits_per_key_;
+  size_t k_;
+
+ public:
+  explicit BloomFilterPolicy(int bits_per_key)
+      : bits_per_key_(bits_per_key) {
+    // We intentionally round down to reduce probing cost a little bit
+    k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
+    if (k_ < 1) k_ = 1;
+    if (k_ > 30) k_ = 30;
+  }
+
+  virtual const char* Name() const {
+    return "leveldb.BuiltinBloomFilter";
+  }
+
+  virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
+    // Compute bloom filter size (in both bits and bytes)
+    size_t bits = n * bits_per_key_;
+
+    // For small n, we can see a very high false positive rate.  Fix it
+    // by enforcing a minimum bloom filter length.
+    if (bits < 64) bits = 64;
+
+    size_t bytes = (bits + 7) / 8;
+    bits = bytes * 8;
+
+    const size_t init_size = dst->size();
+    dst->resize(init_size + bytes, 0);
+    dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
+    char* array = &(*dst)[init_size];
+    for (size_t i = 0; i < n; i++) {
+      // Use double-hashing to generate a sequence of hash values.
+      // See analysis in [Kirsch,Mitzenmacher 2006].
+      uint32_t h = BloomHash(keys[i]);
+      const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+      for (size_t j = 0; j < k_; j++) {
+        const uint32_t bitpos = h % bits;
+        array[bitpos/8] |= (1 << (bitpos % 8));
+        h += delta;
+      }
+    }
+  }
+
+  virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
+    const size_t len = bloom_filter.size();
+    if (len < 2) return false;
+
+    const char* array = bloom_filter.data();
+    const size_t bits = (len - 1) * 8;
+
+    // Use the encoded k so that we can read filters generated by
+    // bloom filters created using different parameters.
+    const size_t k = array[len-1];
+    if (k > 30) {
+      // Reserved for potentially new encodings for short bloom filters.
+      // Consider it a match.
+      return true;
+    }
+
+    uint32_t h = BloomHash(key);
+    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+    for (size_t j = 0; j < k; j++) {
+      const uint32_t bitpos = h % bits;
+      if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
+      h += delta;
+    }
+    return true;
+  }
+};
+}
+
+const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
+  return new BloomFilterPolicy(bits_per_key);
+}
+
+}  // namespace leveldb
-- 
cgit v1.2.3