aboutsummaryrefslogtreecommitdiff
path: root/src/leveldb/db
diff options
context:
space:
mode:
Diffstat (limited to 'src/leveldb/db')
-rw-r--r--src/leveldb/db/builder.cc88
-rw-r--r--src/leveldb/db/builder.h34
-rw-r--r--src/leveldb/db/c.cc595
-rw-r--r--src/leveldb/db/c_test.c390
-rw-r--r--src/leveldb/db/corruption_test.cc359
-rw-r--r--src/leveldb/db/db_bench.cc979
-rw-r--r--src/leveldb/db/db_impl.cc1467
-rw-r--r--src/leveldb/db/db_impl.h202
-rw-r--r--src/leveldb/db/db_iter.cc299
-rw-r--r--src/leveldb/db/db_iter.h26
-rw-r--r--src/leveldb/db/db_test.cc2027
-rw-r--r--src/leveldb/db/dbformat.cc140
-rw-r--r--src/leveldb/db/dbformat.h227
-rw-r--r--src/leveldb/db/dbformat_test.cc112
-rw-r--r--src/leveldb/db/filename.cc139
-rw-r--r--src/leveldb/db/filename.h80
-rw-r--r--src/leveldb/db/filename_test.cc122
-rw-r--r--src/leveldb/db/leveldb_main.cc238
-rw-r--r--src/leveldb/db/log_format.h35
-rw-r--r--src/leveldb/db/log_reader.cc259
-rw-r--r--src/leveldb/db/log_reader.h108
-rw-r--r--src/leveldb/db/log_test.cc500
-rw-r--r--src/leveldb/db/log_writer.cc103
-rw-r--r--src/leveldb/db/log_writer.h48
-rw-r--r--src/leveldb/db/memtable.cc145
-rw-r--r--src/leveldb/db/memtable.h91
-rw-r--r--src/leveldb/db/repair.cc389
-rw-r--r--src/leveldb/db/skiplist.h379
-rw-r--r--src/leveldb/db/skiplist_test.cc378
-rw-r--r--src/leveldb/db/snapshot.h66
-rw-r--r--src/leveldb/db/table_cache.cc121
-rw-r--r--src/leveldb/db/table_cache.h61
-rw-r--r--src/leveldb/db/version_edit.cc266
-rw-r--r--src/leveldb/db/version_edit.h107
-rw-r--r--src/leveldb/db/version_edit_test.cc46
-rw-r--r--src/leveldb/db/version_set.cc1438
-rw-r--r--src/leveldb/db/version_set.h383
-rw-r--r--src/leveldb/db/version_set_test.cc179
-rw-r--r--src/leveldb/db/write_batch.cc147
-rw-r--r--src/leveldb/db/write_batch_internal.h49
-rw-r--r--src/leveldb/db/write_batch_test.cc120
41 files changed, 0 insertions, 12942 deletions
diff --git a/src/leveldb/db/builder.cc b/src/leveldb/db/builder.cc
deleted file mode 100644
index f419882197..0000000000
--- a/src/leveldb/db/builder.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/builder.h"
-
-#include "db/filename.h"
-#include "db/dbformat.h"
-#include "db/table_cache.h"
-#include "db/version_edit.h"
-#include "leveldb/db.h"
-#include "leveldb/env.h"
-#include "leveldb/iterator.h"
-
-namespace leveldb {
-
-Status BuildTable(const std::string& dbname,
- Env* env,
- const Options& options,
- TableCache* table_cache,
- Iterator* iter,
- FileMetaData* meta) {
- Status s;
- meta->file_size = 0;
- iter->SeekToFirst();
-
- std::string fname = TableFileName(dbname, meta->number);
- if (iter->Valid()) {
- WritableFile* file;
- s = env->NewWritableFile(fname, &file);
- if (!s.ok()) {
- return s;
- }
-
- TableBuilder* builder = new TableBuilder(options, file);
- meta->smallest.DecodeFrom(iter->key());
- for (; iter->Valid(); iter->Next()) {
- Slice key = iter->key();
- meta->largest.DecodeFrom(key);
- builder->Add(key, iter->value());
- }
-
- // Finish and check for builder errors
- if (s.ok()) {
- s = builder->Finish();
- if (s.ok()) {
- meta->file_size = builder->FileSize();
- assert(meta->file_size > 0);
- }
- } else {
- builder->Abandon();
- }
- delete builder;
-
- // Finish and check for file errors
- if (s.ok()) {
- s = file->Sync();
- }
- if (s.ok()) {
- s = file->Close();
- }
- delete file;
- file = NULL;
-
- if (s.ok()) {
- // Verify that the table is usable
- Iterator* it = table_cache->NewIterator(ReadOptions(),
- meta->number,
- meta->file_size);
- s = it->status();
- delete it;
- }
- }
-
- // Check for input iterator errors
- if (!iter->status().ok()) {
- s = iter->status();
- }
-
- if (s.ok() && meta->file_size > 0) {
- // Keep it
- } else {
- env->DeleteFile(fname);
- }
- return s;
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/builder.h b/src/leveldb/db/builder.h
deleted file mode 100644
index 62431fcf44..0000000000
--- a/src/leveldb/db/builder.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_BUILDER_H_
-#define STORAGE_LEVELDB_DB_BUILDER_H_
-
-#include "leveldb/status.h"
-
-namespace leveldb {
-
-struct Options;
-struct FileMetaData;
-
-class Env;
-class Iterator;
-class TableCache;
-class VersionEdit;
-
-// Build a Table file from the contents of *iter. The generated file
-// will be named according to meta->number. On success, the rest of
-// *meta will be filled with metadata about the generated table.
-// If no data is present in *iter, meta->file_size will be set to
-// zero, and no Table file will be produced.
-extern Status BuildTable(const std::string& dbname,
- Env* env,
- const Options& options,
- TableCache* table_cache,
- Iterator* iter,
- FileMetaData* meta);
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_BUILDER_H_
diff --git a/src/leveldb/db/c.cc b/src/leveldb/db/c.cc
deleted file mode 100644
index 08ff0ad90a..0000000000
--- a/src/leveldb/db/c.cc
+++ /dev/null
@@ -1,595 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "leveldb/c.h"
-
-#include <stdlib.h>
-#include <unistd.h>
-#include "leveldb/cache.h"
-#include "leveldb/comparator.h"
-#include "leveldb/db.h"
-#include "leveldb/env.h"
-#include "leveldb/filter_policy.h"
-#include "leveldb/iterator.h"
-#include "leveldb/options.h"
-#include "leveldb/status.h"
-#include "leveldb/write_batch.h"
-
-using leveldb::Cache;
-using leveldb::Comparator;
-using leveldb::CompressionType;
-using leveldb::DB;
-using leveldb::Env;
-using leveldb::FileLock;
-using leveldb::FilterPolicy;
-using leveldb::Iterator;
-using leveldb::kMajorVersion;
-using leveldb::kMinorVersion;
-using leveldb::Logger;
-using leveldb::NewBloomFilterPolicy;
-using leveldb::NewLRUCache;
-using leveldb::Options;
-using leveldb::RandomAccessFile;
-using leveldb::Range;
-using leveldb::ReadOptions;
-using leveldb::SequentialFile;
-using leveldb::Slice;
-using leveldb::Snapshot;
-using leveldb::Status;
-using leveldb::WritableFile;
-using leveldb::WriteBatch;
-using leveldb::WriteOptions;
-
-extern "C" {
-
-struct leveldb_t { DB* rep; };
-struct leveldb_iterator_t { Iterator* rep; };
-struct leveldb_writebatch_t { WriteBatch rep; };
-struct leveldb_snapshot_t { const Snapshot* rep; };
-struct leveldb_readoptions_t { ReadOptions rep; };
-struct leveldb_writeoptions_t { WriteOptions rep; };
-struct leveldb_options_t { Options rep; };
-struct leveldb_cache_t { Cache* rep; };
-struct leveldb_seqfile_t { SequentialFile* rep; };
-struct leveldb_randomfile_t { RandomAccessFile* rep; };
-struct leveldb_writablefile_t { WritableFile* rep; };
-struct leveldb_logger_t { Logger* rep; };
-struct leveldb_filelock_t { FileLock* rep; };
-
-struct leveldb_comparator_t : public Comparator {
- void* state_;
- void (*destructor_)(void*);
- int (*compare_)(
- void*,
- const char* a, size_t alen,
- const char* b, size_t blen);
- const char* (*name_)(void*);
-
- virtual ~leveldb_comparator_t() {
- (*destructor_)(state_);
- }
-
- virtual int Compare(const Slice& a, const Slice& b) const {
- return (*compare_)(state_, a.data(), a.size(), b.data(), b.size());
- }
-
- virtual const char* Name() const {
- return (*name_)(state_);
- }
-
- // No-ops since the C binding does not support key shortening methods.
- virtual void FindShortestSeparator(std::string*, const Slice&) const { }
- virtual void FindShortSuccessor(std::string* key) const { }
-};
-
-struct leveldb_filterpolicy_t : public FilterPolicy {
- void* state_;
- void (*destructor_)(void*);
- const char* (*name_)(void*);
- char* (*create_)(
- void*,
- const char* const* key_array, const size_t* key_length_array,
- int num_keys,
- size_t* filter_length);
- unsigned char (*key_match_)(
- void*,
- const char* key, size_t length,
- const char* filter, size_t filter_length);
-
- virtual ~leveldb_filterpolicy_t() {
- (*destructor_)(state_);
- }
-
- virtual const char* Name() const {
- return (*name_)(state_);
- }
-
- virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
- std::vector<const char*> key_pointers(n);
- std::vector<size_t> key_sizes(n);
- for (int i = 0; i < n; i++) {
- key_pointers[i] = keys[i].data();
- key_sizes[i] = keys[i].size();
- }
- size_t len;
- char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
- dst->append(filter, len);
- free(filter);
- }
-
- virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
- return (*key_match_)(state_, key.data(), key.size(),
- filter.data(), filter.size());
- }
-};
-
-struct leveldb_env_t {
- Env* rep;
- bool is_default;
-};
-
-static bool SaveError(char** errptr, const Status& s) {
- assert(errptr != NULL);
- if (s.ok()) {
- return false;
- } else if (*errptr == NULL) {
- *errptr = strdup(s.ToString().c_str());
- } else {
- // TODO(sanjay): Merge with existing error?
- free(*errptr);
- *errptr = strdup(s.ToString().c_str());
- }
- return true;
-}
-
-static char* CopyString(const std::string& str) {
- char* result = reinterpret_cast<char*>(malloc(sizeof(char) * str.size()));
- memcpy(result, str.data(), sizeof(char) * str.size());
- return result;
-}
-
-leveldb_t* leveldb_open(
- const leveldb_options_t* options,
- const char* name,
- char** errptr) {
- DB* db;
- if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) {
- return NULL;
- }
- leveldb_t* result = new leveldb_t;
- result->rep = db;
- return result;
-}
-
-void leveldb_close(leveldb_t* db) {
- delete db->rep;
- delete db;
-}
-
-void leveldb_put(
- leveldb_t* db,
- const leveldb_writeoptions_t* options,
- const char* key, size_t keylen,
- const char* val, size_t vallen,
- char** errptr) {
- SaveError(errptr,
- db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen)));
-}
-
-void leveldb_delete(
- leveldb_t* db,
- const leveldb_writeoptions_t* options,
- const char* key, size_t keylen,
- char** errptr) {
- SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen)));
-}
-
-
-void leveldb_write(
- leveldb_t* db,
- const leveldb_writeoptions_t* options,
- leveldb_writebatch_t* batch,
- char** errptr) {
- SaveError(errptr, db->rep->Write(options->rep, &batch->rep));
-}
-
-char* leveldb_get(
- leveldb_t* db,
- const leveldb_readoptions_t* options,
- const char* key, size_t keylen,
- size_t* vallen,
- char** errptr) {
- char* result = NULL;
- std::string tmp;
- Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp);
- if (s.ok()) {
- *vallen = tmp.size();
- result = CopyString(tmp);
- } else {
- *vallen = 0;
- if (!s.IsNotFound()) {
- SaveError(errptr, s);
- }
- }
- return result;
-}
-
-leveldb_iterator_t* leveldb_create_iterator(
- leveldb_t* db,
- const leveldb_readoptions_t* options) {
- leveldb_iterator_t* result = new leveldb_iterator_t;
- result->rep = db->rep->NewIterator(options->rep);
- return result;
-}
-
-const leveldb_snapshot_t* leveldb_create_snapshot(
- leveldb_t* db) {
- leveldb_snapshot_t* result = new leveldb_snapshot_t;
- result->rep = db->rep->GetSnapshot();
- return result;
-}
-
-void leveldb_release_snapshot(
- leveldb_t* db,
- const leveldb_snapshot_t* snapshot) {
- db->rep->ReleaseSnapshot(snapshot->rep);
- delete snapshot;
-}
-
-char* leveldb_property_value(
- leveldb_t* db,
- const char* propname) {
- std::string tmp;
- if (db->rep->GetProperty(Slice(propname), &tmp)) {
- // We use strdup() since we expect human readable output.
- return strdup(tmp.c_str());
- } else {
- return NULL;
- }
-}
-
-void leveldb_approximate_sizes(
- leveldb_t* db,
- int num_ranges,
- const char* const* range_start_key, const size_t* range_start_key_len,
- const char* const* range_limit_key, const size_t* range_limit_key_len,
- uint64_t* sizes) {
- Range* ranges = new Range[num_ranges];
- for (int i = 0; i < num_ranges; i++) {
- ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]);
- ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]);
- }
- db->rep->GetApproximateSizes(ranges, num_ranges, sizes);
- delete[] ranges;
-}
-
-void leveldb_compact_range(
- leveldb_t* db,
- const char* start_key, size_t start_key_len,
- const char* limit_key, size_t limit_key_len) {
- Slice a, b;
- db->rep->CompactRange(
- // Pass NULL Slice if corresponding "const char*" is NULL
- (start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
- (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
-}
-
-void leveldb_destroy_db(
- const leveldb_options_t* options,
- const char* name,
- char** errptr) {
- SaveError(errptr, DestroyDB(name, options->rep));
-}
-
-void leveldb_repair_db(
- const leveldb_options_t* options,
- const char* name,
- char** errptr) {
- SaveError(errptr, RepairDB(name, options->rep));
-}
-
-void leveldb_iter_destroy(leveldb_iterator_t* iter) {
- delete iter->rep;
- delete iter;
-}
-
-unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) {
- return iter->rep->Valid();
-}
-
-void leveldb_iter_seek_to_first(leveldb_iterator_t* iter) {
- iter->rep->SeekToFirst();
-}
-
-void leveldb_iter_seek_to_last(leveldb_iterator_t* iter) {
- iter->rep->SeekToLast();
-}
-
-void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) {
- iter->rep->Seek(Slice(k, klen));
-}
-
-void leveldb_iter_next(leveldb_iterator_t* iter) {
- iter->rep->Next();
-}
-
-void leveldb_iter_prev(leveldb_iterator_t* iter) {
- iter->rep->Prev();
-}
-
-const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) {
- Slice s = iter->rep->key();
- *klen = s.size();
- return s.data();
-}
-
-const char* leveldb_iter_value(const leveldb_iterator_t* iter, size_t* vlen) {
- Slice s = iter->rep->value();
- *vlen = s.size();
- return s.data();
-}
-
-void leveldb_iter_get_error(const leveldb_iterator_t* iter, char** errptr) {
- SaveError(errptr, iter->rep->status());
-}
-
-leveldb_writebatch_t* leveldb_writebatch_create() {
- return new leveldb_writebatch_t;
-}
-
-void leveldb_writebatch_destroy(leveldb_writebatch_t* b) {
- delete b;
-}
-
-void leveldb_writebatch_clear(leveldb_writebatch_t* b) {
- b->rep.Clear();
-}
-
-void leveldb_writebatch_put(
- leveldb_writebatch_t* b,
- const char* key, size_t klen,
- const char* val, size_t vlen) {
- b->rep.Put(Slice(key, klen), Slice(val, vlen));
-}
-
-void leveldb_writebatch_delete(
- leveldb_writebatch_t* b,
- const char* key, size_t klen) {
- b->rep.Delete(Slice(key, klen));
-}
-
-void leveldb_writebatch_iterate(
- leveldb_writebatch_t* b,
- void* state,
- void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
- void (*deleted)(void*, const char* k, size_t klen)) {
- class H : public WriteBatch::Handler {
- public:
- void* state_;
- void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen);
- void (*deleted_)(void*, const char* k, size_t klen);
- virtual void Put(const Slice& key, const Slice& value) {
- (*put_)(state_, key.data(), key.size(), value.data(), value.size());
- }
- virtual void Delete(const Slice& key) {
- (*deleted_)(state_, key.data(), key.size());
- }
- };
- H handler;
- handler.state_ = state;
- handler.put_ = put;
- handler.deleted_ = deleted;
- b->rep.Iterate(&handler);
-}
-
-leveldb_options_t* leveldb_options_create() {
- return new leveldb_options_t;
-}
-
-void leveldb_options_destroy(leveldb_options_t* options) {
- delete options;
-}
-
-void leveldb_options_set_comparator(
- leveldb_options_t* opt,
- leveldb_comparator_t* cmp) {
- opt->rep.comparator = cmp;
-}
-
-void leveldb_options_set_filter_policy(
- leveldb_options_t* opt,
- leveldb_filterpolicy_t* policy) {
- opt->rep.filter_policy = policy;
-}
-
-void leveldb_options_set_create_if_missing(
- leveldb_options_t* opt, unsigned char v) {
- opt->rep.create_if_missing = v;
-}
-
-void leveldb_options_set_error_if_exists(
- leveldb_options_t* opt, unsigned char v) {
- opt->rep.error_if_exists = v;
-}
-
-void leveldb_options_set_paranoid_checks(
- leveldb_options_t* opt, unsigned char v) {
- opt->rep.paranoid_checks = v;
-}
-
-void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) {
- opt->rep.env = (env ? env->rep : NULL);
-}
-
-void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) {
- opt->rep.info_log = (l ? l->rep : NULL);
-}
-
-void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) {
- opt->rep.write_buffer_size = s;
-}
-
-void leveldb_options_set_max_open_files(leveldb_options_t* opt, int n) {
- opt->rep.max_open_files = n;
-}
-
-void leveldb_options_set_cache(leveldb_options_t* opt, leveldb_cache_t* c) {
- opt->rep.block_cache = c->rep;
-}
-
-void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) {
- opt->rep.block_size = s;
-}
-
-void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) {
- opt->rep.block_restart_interval = n;
-}
-
-void leveldb_options_set_compression(leveldb_options_t* opt, int t) {
- opt->rep.compression = static_cast<CompressionType>(t);
-}
-
-leveldb_comparator_t* leveldb_comparator_create(
- void* state,
- void (*destructor)(void*),
- int (*compare)(
- void*,
- const char* a, size_t alen,
- const char* b, size_t blen),
- const char* (*name)(void*)) {
- leveldb_comparator_t* result = new leveldb_comparator_t;
- result->state_ = state;
- result->destructor_ = destructor;
- result->compare_ = compare;
- result->name_ = name;
- return result;
-}
-
-void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
- delete cmp;
-}
-
-leveldb_filterpolicy_t* leveldb_filterpolicy_create(
- void* state,
- void (*destructor)(void*),
- char* (*create_filter)(
- void*,
- const char* const* key_array, const size_t* key_length_array,
- int num_keys,
- size_t* filter_length),
- unsigned char (*key_may_match)(
- void*,
- const char* key, size_t length,
- const char* filter, size_t filter_length),
- const char* (*name)(void*)) {
- leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
- result->state_ = state;
- result->destructor_ = destructor;
- result->create_ = create_filter;
- result->key_match_ = key_may_match;
- result->name_ = name;
- return result;
-}
-
-void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) {
- delete filter;
-}
-
-leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
- // Make a leveldb_filterpolicy_t, but override all of its methods so
- // they delegate to a NewBloomFilterPolicy() instead of user
- // supplied C functions.
- struct Wrapper : public leveldb_filterpolicy_t {
- const FilterPolicy* rep_;
- ~Wrapper() { delete rep_; }
- const char* Name() const { return rep_->Name(); }
- void CreateFilter(const Slice* keys, int n, std::string* dst) const {
- return rep_->CreateFilter(keys, n, dst);
- }
- bool KeyMayMatch(const Slice& key, const Slice& filter) const {
- return rep_->KeyMayMatch(key, filter);
- }
- static void DoNothing(void*) { }
- };
- Wrapper* wrapper = new Wrapper;
- wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
- wrapper->state_ = NULL;
- wrapper->destructor_ = &Wrapper::DoNothing;
- return wrapper;
-}
-
-leveldb_readoptions_t* leveldb_readoptions_create() {
- return new leveldb_readoptions_t;
-}
-
-void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) {
- delete opt;
-}
-
-void leveldb_readoptions_set_verify_checksums(
- leveldb_readoptions_t* opt,
- unsigned char v) {
- opt->rep.verify_checksums = v;
-}
-
-void leveldb_readoptions_set_fill_cache(
- leveldb_readoptions_t* opt, unsigned char v) {
- opt->rep.fill_cache = v;
-}
-
-void leveldb_readoptions_set_snapshot(
- leveldb_readoptions_t* opt,
- const leveldb_snapshot_t* snap) {
- opt->rep.snapshot = (snap ? snap->rep : NULL);
-}
-
-leveldb_writeoptions_t* leveldb_writeoptions_create() {
- return new leveldb_writeoptions_t;
-}
-
-void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) {
- delete opt;
-}
-
-void leveldb_writeoptions_set_sync(
- leveldb_writeoptions_t* opt, unsigned char v) {
- opt->rep.sync = v;
-}
-
-leveldb_cache_t* leveldb_cache_create_lru(size_t capacity) {
- leveldb_cache_t* c = new leveldb_cache_t;
- c->rep = NewLRUCache(capacity);
- return c;
-}
-
-void leveldb_cache_destroy(leveldb_cache_t* cache) {
- delete cache->rep;
- delete cache;
-}
-
-leveldb_env_t* leveldb_create_default_env() {
- leveldb_env_t* result = new leveldb_env_t;
- result->rep = Env::Default();
- result->is_default = true;
- return result;
-}
-
-void leveldb_env_destroy(leveldb_env_t* env) {
- if (!env->is_default) delete env->rep;
- delete env;
-}
-
-void leveldb_free(void* ptr) {
- free(ptr);
-}
-
-int leveldb_major_version() {
- return kMajorVersion;
-}
-
-int leveldb_minor_version() {
- return kMinorVersion;
-}
-
-} // end extern "C"
diff --git a/src/leveldb/db/c_test.c b/src/leveldb/db/c_test.c
deleted file mode 100644
index 7cd5ee0207..0000000000
--- a/src/leveldb/db/c_test.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/* Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- Use of this source code is governed by a BSD-style license that can be
- found in the LICENSE file. See the AUTHORS file for names of contributors. */
-
-#include "leveldb/c.h"
-
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-const char* phase = "";
-static char dbname[200];
-
-static void StartPhase(const char* name) {
- fprintf(stderr, "=== Test %s\n", name);
- phase = name;
-}
-
-static const char* GetTempDir(void) {
- const char* ret = getenv("TEST_TMPDIR");
- if (ret == NULL || ret[0] == '\0')
- ret = "/tmp";
- return ret;
-}
-
-#define CheckNoError(err) \
- if ((err) != NULL) { \
- fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, (err)); \
- abort(); \
- }
-
-#define CheckCondition(cond) \
- if (!(cond)) { \
- fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, #cond); \
- abort(); \
- }
-
-static void CheckEqual(const char* expected, const char* v, size_t n) {
- if (expected == NULL && v == NULL) {
- // ok
- } else if (expected != NULL && v != NULL && n == strlen(expected) &&
- memcmp(expected, v, n) == 0) {
- // ok
- return;
- } else {
- fprintf(stderr, "%s: expected '%s', got '%s'\n",
- phase,
- (expected ? expected : "(null)"),
- (v ? v : "(null"));
- abort();
- }
-}
-
-static void Free(char** ptr) {
- if (*ptr) {
- free(*ptr);
- *ptr = NULL;
- }
-}
-
-static void CheckGet(
- leveldb_t* db,
- const leveldb_readoptions_t* options,
- const char* key,
- const char* expected) {
- char* err = NULL;
- size_t val_len;
- char* val;
- val = leveldb_get(db, options, key, strlen(key), &val_len, &err);
- CheckNoError(err);
- CheckEqual(expected, val, val_len);
- Free(&val);
-}
-
-static void CheckIter(leveldb_iterator_t* iter,
- const char* key, const char* val) {
- size_t len;
- const char* str;
- str = leveldb_iter_key(iter, &len);
- CheckEqual(key, str, len);
- str = leveldb_iter_value(iter, &len);
- CheckEqual(val, str, len);
-}
-
-// Callback from leveldb_writebatch_iterate()
-static void CheckPut(void* ptr,
- const char* k, size_t klen,
- const char* v, size_t vlen) {
- int* state = (int*) ptr;
- CheckCondition(*state < 2);
- switch (*state) {
- case 0:
- CheckEqual("bar", k, klen);
- CheckEqual("b", v, vlen);
- break;
- case 1:
- CheckEqual("box", k, klen);
- CheckEqual("c", v, vlen);
- break;
- }
- (*state)++;
-}
-
-// Callback from leveldb_writebatch_iterate()
-static void CheckDel(void* ptr, const char* k, size_t klen) {
- int* state = (int*) ptr;
- CheckCondition(*state == 2);
- CheckEqual("bar", k, klen);
- (*state)++;
-}
-
-static void CmpDestroy(void* arg) { }
-
-static int CmpCompare(void* arg, const char* a, size_t alen,
- const char* b, size_t blen) {
- int n = (alen < blen) ? alen : blen;
- int r = memcmp(a, b, n);
- if (r == 0) {
- if (alen < blen) r = -1;
- else if (alen > blen) r = +1;
- }
- return r;
-}
-
-static const char* CmpName(void* arg) {
- return "foo";
-}
-
-// Custom filter policy
-static unsigned char fake_filter_result = 1;
-static void FilterDestroy(void* arg) { }
-static const char* FilterName(void* arg) {
- return "TestFilter";
-}
-static char* FilterCreate(
- void* arg,
- const char* const* key_array, const size_t* key_length_array,
- int num_keys,
- size_t* filter_length) {
- *filter_length = 4;
- char* result = malloc(4);
- memcpy(result, "fake", 4);
- return result;
-}
-unsigned char FilterKeyMatch(
- void* arg,
- const char* key, size_t length,
- const char* filter, size_t filter_length) {
- CheckCondition(filter_length == 4);
- CheckCondition(memcmp(filter, "fake", 4) == 0);
- return fake_filter_result;
-}
-
-int main(int argc, char** argv) {
- leveldb_t* db;
- leveldb_comparator_t* cmp;
- leveldb_cache_t* cache;
- leveldb_env_t* env;
- leveldb_options_t* options;
- leveldb_readoptions_t* roptions;
- leveldb_writeoptions_t* woptions;
- char* err = NULL;
- int run = -1;
-
- CheckCondition(leveldb_major_version() >= 1);
- CheckCondition(leveldb_minor_version() >= 1);
-
- snprintf(dbname, sizeof(dbname),
- "%s/leveldb_c_test-%d",
- GetTempDir(),
- ((int) geteuid()));
-
- StartPhase("create_objects");
- cmp = leveldb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName);
- env = leveldb_create_default_env();
- cache = leveldb_cache_create_lru(100000);
-
- options = leveldb_options_create();
- leveldb_options_set_comparator(options, cmp);
- leveldb_options_set_error_if_exists(options, 1);
- leveldb_options_set_cache(options, cache);
- leveldb_options_set_env(options, env);
- leveldb_options_set_info_log(options, NULL);
- leveldb_options_set_write_buffer_size(options, 100000);
- leveldb_options_set_paranoid_checks(options, 1);
- leveldb_options_set_max_open_files(options, 10);
- leveldb_options_set_block_size(options, 1024);
- leveldb_options_set_block_restart_interval(options, 8);
- leveldb_options_set_compression(options, leveldb_no_compression);
-
- roptions = leveldb_readoptions_create();
- leveldb_readoptions_set_verify_checksums(roptions, 1);
- leveldb_readoptions_set_fill_cache(roptions, 0);
-
- woptions = leveldb_writeoptions_create();
- leveldb_writeoptions_set_sync(woptions, 1);
-
- StartPhase("destroy");
- leveldb_destroy_db(options, dbname, &err);
- Free(&err);
-
- StartPhase("open_error");
- db = leveldb_open(options, dbname, &err);
- CheckCondition(err != NULL);
- Free(&err);
-
- StartPhase("leveldb_free");
- db = leveldb_open(options, dbname, &err);
- CheckCondition(err != NULL);
- leveldb_free(err);
- err = NULL;
-
- StartPhase("open");
- leveldb_options_set_create_if_missing(options, 1);
- db = leveldb_open(options, dbname, &err);
- CheckNoError(err);
- CheckGet(db, roptions, "foo", NULL);
-
- StartPhase("put");
- leveldb_put(db, woptions, "foo", 3, "hello", 5, &err);
- CheckNoError(err);
- CheckGet(db, roptions, "foo", "hello");
-
- StartPhase("compactall");
- leveldb_compact_range(db, NULL, 0, NULL, 0);
- CheckGet(db, roptions, "foo", "hello");
-
- StartPhase("compactrange");
- leveldb_compact_range(db, "a", 1, "z", 1);
- CheckGet(db, roptions, "foo", "hello");
-
- StartPhase("writebatch");
- {
- leveldb_writebatch_t* wb = leveldb_writebatch_create();
- leveldb_writebatch_put(wb, "foo", 3, "a", 1);
- leveldb_writebatch_clear(wb);
- leveldb_writebatch_put(wb, "bar", 3, "b", 1);
- leveldb_writebatch_put(wb, "box", 3, "c", 1);
- leveldb_writebatch_delete(wb, "bar", 3);
- leveldb_write(db, woptions, wb, &err);
- CheckNoError(err);
- CheckGet(db, roptions, "foo", "hello");
- CheckGet(db, roptions, "bar", NULL);
- CheckGet(db, roptions, "box", "c");
- int pos = 0;
- leveldb_writebatch_iterate(wb, &pos, CheckPut, CheckDel);
- CheckCondition(pos == 3);
- leveldb_writebatch_destroy(wb);
- }
-
- StartPhase("iter");
- {
- leveldb_iterator_t* iter = leveldb_create_iterator(db, roptions);
- CheckCondition(!leveldb_iter_valid(iter));
- leveldb_iter_seek_to_first(iter);
- CheckCondition(leveldb_iter_valid(iter));
- CheckIter(iter, "box", "c");
- leveldb_iter_next(iter);
- CheckIter(iter, "foo", "hello");
- leveldb_iter_prev(iter);
- CheckIter(iter, "box", "c");
- leveldb_iter_prev(iter);
- CheckCondition(!leveldb_iter_valid(iter));
- leveldb_iter_seek_to_last(iter);
- CheckIter(iter, "foo", "hello");
- leveldb_iter_seek(iter, "b", 1);
- CheckIter(iter, "box", "c");
- leveldb_iter_get_error(iter, &err);
- CheckNoError(err);
- leveldb_iter_destroy(iter);
- }
-
- StartPhase("approximate_sizes");
- {
- int i;
- int n = 20000;
- char keybuf[100];
- char valbuf[100];
- uint64_t sizes[2];
- const char* start[2] = { "a", "k00000000000000010000" };
- size_t start_len[2] = { 1, 21 };
- const char* limit[2] = { "k00000000000000010000", "z" };
- size_t limit_len[2] = { 21, 1 };
- leveldb_writeoptions_set_sync(woptions, 0);
- for (i = 0; i < n; i++) {
- snprintf(keybuf, sizeof(keybuf), "k%020d", i);
- snprintf(valbuf, sizeof(valbuf), "v%020d", i);
- leveldb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf),
- &err);
- CheckNoError(err);
- }
- leveldb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes);
- CheckCondition(sizes[0] > 0);
- CheckCondition(sizes[1] > 0);
- }
-
- StartPhase("property");
- {
- char* prop = leveldb_property_value(db, "nosuchprop");
- CheckCondition(prop == NULL);
- prop = leveldb_property_value(db, "leveldb.stats");
- CheckCondition(prop != NULL);
- Free(&prop);
- }
-
- StartPhase("snapshot");
- {
- const leveldb_snapshot_t* snap;
- snap = leveldb_create_snapshot(db);
- leveldb_delete(db, woptions, "foo", 3, &err);
- CheckNoError(err);
- leveldb_readoptions_set_snapshot(roptions, snap);
- CheckGet(db, roptions, "foo", "hello");
- leveldb_readoptions_set_snapshot(roptions, NULL);
- CheckGet(db, roptions, "foo", NULL);
- leveldb_release_snapshot(db, snap);
- }
-
- StartPhase("repair");
- {
- leveldb_close(db);
- leveldb_options_set_create_if_missing(options, 0);
- leveldb_options_set_error_if_exists(options, 0);
- leveldb_repair_db(options, dbname, &err);
- CheckNoError(err);
- db = leveldb_open(options, dbname, &err);
- CheckNoError(err);
- CheckGet(db, roptions, "foo", NULL);
- CheckGet(db, roptions, "bar", NULL);
- CheckGet(db, roptions, "box", "c");
- leveldb_options_set_create_if_missing(options, 1);
- leveldb_options_set_error_if_exists(options, 1);
- }
-
- StartPhase("filter");
- for (run = 0; run < 2; run++) {
- // First run uses custom filter, second run uses bloom filter
- CheckNoError(err);
- leveldb_filterpolicy_t* policy;
- if (run == 0) {
- policy = leveldb_filterpolicy_create(
- NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName);
- } else {
- policy = leveldb_filterpolicy_create_bloom(10);
- }
-
- // Create new database
- leveldb_close(db);
- leveldb_destroy_db(options, dbname, &err);
- leveldb_options_set_filter_policy(options, policy);
- db = leveldb_open(options, dbname, &err);
- CheckNoError(err);
- leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
- CheckNoError(err);
- leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
- CheckNoError(err);
- leveldb_compact_range(db, NULL, 0, NULL, 0);
-
- fake_filter_result = 1;
- CheckGet(db, roptions, "foo", "foovalue");
- CheckGet(db, roptions, "bar", "barvalue");
- if (phase == 0) {
- // Must not find value when custom filter returns false
- fake_filter_result = 0;
- CheckGet(db, roptions, "foo", NULL);
- CheckGet(db, roptions, "bar", NULL);
- fake_filter_result = 1;
-
- CheckGet(db, roptions, "foo", "foovalue");
- CheckGet(db, roptions, "bar", "barvalue");
- }
- leveldb_options_set_filter_policy(options, NULL);
- leveldb_filterpolicy_destroy(policy);
- }
-
- StartPhase("cleanup");
- leveldb_close(db);
- leveldb_options_destroy(options);
- leveldb_readoptions_destroy(roptions);
- leveldb_writeoptions_destroy(woptions);
- leveldb_cache_destroy(cache);
- leveldb_comparator_destroy(cmp);
- leveldb_env_destroy(env);
-
- fprintf(stderr, "PASS\n");
- return 0;
-}
diff --git a/src/leveldb/db/corruption_test.cc b/src/leveldb/db/corruption_test.cc
deleted file mode 100644
index 31b2d5f416..0000000000
--- a/src/leveldb/db/corruption_test.cc
+++ /dev/null
@@ -1,359 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "leveldb/db.h"
-
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include "leveldb/cache.h"
-#include "leveldb/env.h"
-#include "leveldb/table.h"
-#include "leveldb/write_batch.h"
-#include "db/db_impl.h"
-#include "db/filename.h"
-#include "db/log_format.h"
-#include "db/version_set.h"
-#include "util/logging.h"
-#include "util/testharness.h"
-#include "util/testutil.h"
-
-namespace leveldb {
-
-static const int kValueSize = 1000;
-
-class CorruptionTest {
- public:
- test::ErrorEnv env_;
- std::string dbname_;
- Cache* tiny_cache_;
- Options options_;
- DB* db_;
-
- CorruptionTest() {
- tiny_cache_ = NewLRUCache(100);
- options_.env = &env_;
- dbname_ = test::TmpDir() + "/db_test";
- DestroyDB(dbname_, options_);
-
- db_ = NULL;
- options_.create_if_missing = true;
- Reopen();
- options_.create_if_missing = false;
- }
-
- ~CorruptionTest() {
- delete db_;
- DestroyDB(dbname_, Options());
- delete tiny_cache_;
- }
-
- Status TryReopen(Options* options = NULL) {
- delete db_;
- db_ = NULL;
- Options opt = (options ? *options : options_);
- opt.env = &env_;
- opt.block_cache = tiny_cache_;
- return DB::Open(opt, dbname_, &db_);
- }
-
- void Reopen(Options* options = NULL) {
- ASSERT_OK(TryReopen(options));
- }
-
- void RepairDB() {
- delete db_;
- db_ = NULL;
- ASSERT_OK(::leveldb::RepairDB(dbname_, options_));
- }
-
- void Build(int n) {
- std::string key_space, value_space;
- WriteBatch batch;
- for (int i = 0; i < n; i++) {
- //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n);
- Slice key = Key(i, &key_space);
- batch.Clear();
- batch.Put(key, Value(i, &value_space));
- ASSERT_OK(db_->Write(WriteOptions(), &batch));
- }
- }
-
- void Check(int min_expected, int max_expected) {
- int next_expected = 0;
- int missed = 0;
- int bad_keys = 0;
- int bad_values = 0;
- int correct = 0;
- std::string value_space;
- Iterator* iter = db_->NewIterator(ReadOptions());
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- uint64_t key;
- Slice in(iter->key());
- if (!ConsumeDecimalNumber(&in, &key) ||
- !in.empty() ||
- key < next_expected) {
- bad_keys++;
- continue;
- }
- missed += (key - next_expected);
- next_expected = key + 1;
- if (iter->value() != Value(key, &value_space)) {
- bad_values++;
- } else {
- correct++;
- }
- }
- delete iter;
-
- fprintf(stderr,
- "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n",
- min_expected, max_expected, correct, bad_keys, bad_values, missed);
- ASSERT_LE(min_expected, correct);
- ASSERT_GE(max_expected, correct);
- }
-
- void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
- // Pick file to corrupt
- std::vector<std::string> filenames;
- ASSERT_OK(env_.GetChildren(dbname_, &filenames));
- uint64_t number;
- FileType type;
- std::string fname;
- int picked_number = -1;
- for (int i = 0; i < filenames.size(); i++) {
- if (ParseFileName(filenames[i], &number, &type) &&
- type == filetype &&
- int(number) > picked_number) { // Pick latest file
- fname = dbname_ + "/" + filenames[i];
- picked_number = number;
- }
- }
- ASSERT_TRUE(!fname.empty()) << filetype;
-
- struct stat sbuf;
- if (stat(fname.c_str(), &sbuf) != 0) {
- const char* msg = strerror(errno);
- ASSERT_TRUE(false) << fname << ": " << msg;
- }
-
- if (offset < 0) {
- // Relative to end of file; make it absolute
- if (-offset > sbuf.st_size) {
- offset = 0;
- } else {
- offset = sbuf.st_size + offset;
- }
- }
- if (offset > sbuf.st_size) {
- offset = sbuf.st_size;
- }
- if (offset + bytes_to_corrupt > sbuf.st_size) {
- bytes_to_corrupt = sbuf.st_size - offset;
- }
-
- // Do it
- std::string contents;
- Status s = ReadFileToString(Env::Default(), fname, &contents);
- ASSERT_TRUE(s.ok()) << s.ToString();
- for (int i = 0; i < bytes_to_corrupt; i++) {
- contents[i + offset] ^= 0x80;
- }
- s = WriteStringToFile(Env::Default(), contents, fname);
- ASSERT_TRUE(s.ok()) << s.ToString();
- }
-
- int Property(const std::string& name) {
- std::string property;
- int result;
- if (db_->GetProperty(name, &property) &&
- sscanf(property.c_str(), "%d", &result) == 1) {
- return result;
- } else {
- return -1;
- }
- }
-
- // Return the ith key
- Slice Key(int i, std::string* storage) {
- char buf[100];
- snprintf(buf, sizeof(buf), "%016d", i);
- storage->assign(buf, strlen(buf));
- return Slice(*storage);
- }
-
- // Return the value to associate with the specified key
- Slice Value(int k, std::string* storage) {
- Random r(k);
- return test::RandomString(&r, kValueSize, storage);
- }
-};
-
-TEST(CorruptionTest, Recovery) {
- Build(100);
- Check(100, 100);
- Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record
- Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block
- Reopen();
-
- // The 64 records in the first two log blocks are completely lost.
- Check(36, 36);
-}
-
-TEST(CorruptionTest, RecoverWriteError) {
- env_.writable_file_error_ = true;
- Status s = TryReopen();
- ASSERT_TRUE(!s.ok());
-}
-
-TEST(CorruptionTest, NewFileErrorDuringWrite) {
- // Do enough writing to force minor compaction
- env_.writable_file_error_ = true;
- const int num = 3 + (Options().write_buffer_size / kValueSize);
- std::string value_storage;
- Status s;
- for (int i = 0; s.ok() && i < num; i++) {
- WriteBatch batch;
- batch.Put("a", Value(100, &value_storage));
- s = db_->Write(WriteOptions(), &batch);
- }
- ASSERT_TRUE(!s.ok());
- ASSERT_GE(env_.num_writable_file_errors_, 1);
- env_.writable_file_error_ = false;
- Reopen();
-}
-
-TEST(CorruptionTest, TableFile) {
- Build(100);
- DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
- dbi->TEST_CompactMemTable();
- dbi->TEST_CompactRange(0, NULL, NULL);
- dbi->TEST_CompactRange(1, NULL, NULL);
-
- Corrupt(kTableFile, 100, 1);
- Check(99, 99);
-}
-
-TEST(CorruptionTest, TableFileIndexData) {
- Build(10000); // Enough to build multiple Tables
- DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
- dbi->TEST_CompactMemTable();
-
- Corrupt(kTableFile, -2000, 500);
- Reopen();
- Check(5000, 9999);
-}
-
-TEST(CorruptionTest, MissingDescriptor) {
- Build(1000);
- RepairDB();
- Reopen();
- Check(1000, 1000);
-}
-
-TEST(CorruptionTest, SequenceNumberRecovery) {
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3"));
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4"));
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5"));
- RepairDB();
- Reopen();
- std::string v;
- ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
- ASSERT_EQ("v5", v);
- // Write something. If sequence number was not recovered properly,
- // it will be hidden by an earlier write.
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6"));
- ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
- ASSERT_EQ("v6", v);
- Reopen();
- ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
- ASSERT_EQ("v6", v);
-}
-
-TEST(CorruptionTest, CorruptedDescriptor) {
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
- DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
- dbi->TEST_CompactMemTable();
- dbi->TEST_CompactRange(0, NULL, NULL);
-
- Corrupt(kDescriptorFile, 0, 1000);
- Status s = TryReopen();
- ASSERT_TRUE(!s.ok());
-
- RepairDB();
- Reopen();
- std::string v;
- ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
- ASSERT_EQ("hello", v);
-}
-
-TEST(CorruptionTest, CompactionInputError) {
- Build(10);
- DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
- dbi->TEST_CompactMemTable();
- const int last = config::kMaxMemCompactLevel;
- ASSERT_EQ(1, Property("leveldb.num-files-at-level" + NumberToString(last)));
-
- Corrupt(kTableFile, 100, 1);
- Check(9, 9);
-
- // Force compactions by writing lots of values
- Build(10000);
- Check(10000, 10000);
-}
-
-TEST(CorruptionTest, CompactionInputErrorParanoid) {
- Options options;
- options.paranoid_checks = true;
- options.write_buffer_size = 1048576;
- Reopen(&options);
- DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
-
- // Fill levels >= 1 so memtable compaction outputs to level 1
- for (int level = 1; level < config::kNumLevels; level++) {
- dbi->Put(WriteOptions(), "", "begin");
- dbi->Put(WriteOptions(), "~", "end");
- dbi->TEST_CompactMemTable();
- }
-
- Build(10);
- dbi->TEST_CompactMemTable();
- ASSERT_EQ(1, Property("leveldb.num-files-at-level0"));
-
- Corrupt(kTableFile, 100, 1);
- Check(9, 9);
-
- // Write must eventually fail because of corrupted table
- Status s;
- std::string tmp1, tmp2;
- for (int i = 0; i < 10000 && s.ok(); i++) {
- s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2));
- }
- ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db";
-}
-
-TEST(CorruptionTest, UnrelatedKeys) {
- Build(10);
- DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
- dbi->TEST_CompactMemTable();
- Corrupt(kTableFile, 100, 1);
-
- std::string tmp1, tmp2;
- ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2)));
- std::string v;
- ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
- ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
- dbi->TEST_CompactMemTable();
- ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
- ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
-}
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/db_bench.cc b/src/leveldb/db/db_bench.cc
deleted file mode 100644
index 7abdf87587..0000000000
--- a/src/leveldb/db/db_bench.cc
+++ /dev/null
@@ -1,979 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "db/db_impl.h"
-#include "db/version_set.h"
-#include "leveldb/cache.h"
-#include "leveldb/db.h"
-#include "leveldb/env.h"
-#include "leveldb/write_batch.h"
-#include "port/port.h"
-#include "util/crc32c.h"
-#include "util/histogram.h"
-#include "util/mutexlock.h"
-#include "util/random.h"
-#include "util/testutil.h"
-
-// Comma-separated list of operations to run in the specified order
-// Actual benchmarks:
-// fillseq -- write N values in sequential key order in async mode
-// fillrandom -- write N values in random key order in async mode
-// overwrite -- overwrite N values in random key order in async mode
-// fillsync -- write N/100 values in random key order in sync mode
-// fill100K -- write N/1000 100K values in random order in async mode
-// deleteseq -- delete N keys in sequential order
-// deleterandom -- delete N keys in random order
-// readseq -- read N times sequentially
-// readreverse -- read N times in reverse order
-// readrandom -- read N times in random order
-// readmissing -- read N missing keys in random order
-// readhot -- read N times in random order from 1% section of DB
-// seekrandom -- N random seeks
-// crc32c -- repeated crc32c of 4K of data
-// acquireload -- load N*1000 times
-// Meta operations:
-// compact -- Compact the entire DB
-// stats -- Print DB stats
-// sstables -- Print sstable info
-// heapprofile -- Dump a heap profile (if supported by this port)
-static const char* FLAGS_benchmarks =
- "fillseq,"
- "fillsync,"
- "fillrandom,"
- "overwrite,"
- "readrandom,"
- "readrandom," // Extra run to allow previous compactions to quiesce
- "readseq,"
- "readreverse,"
- "compact,"
- "readrandom,"
- "readseq,"
- "readreverse,"
- "fill100K,"
- "crc32c,"
- "snappycomp,"
- "snappyuncomp,"
- "acquireload,"
- ;
-
-// Number of key/values to place in database
-static int FLAGS_num = 1000000;
-
-// Number of read operations to do. If negative, do FLAGS_num reads.
-static int FLAGS_reads = -1;
-
-// Number of concurrent threads to run.
-static int FLAGS_threads = 1;
-
-// Size of each value
-static int FLAGS_value_size = 100;
-
-// Arrange to generate values that shrink to this fraction of
-// their original size after compression
-static double FLAGS_compression_ratio = 0.5;
-
-// Print histogram of operation timings
-static bool FLAGS_histogram = false;
-
-// Number of bytes to buffer in memtable before compacting
-// (initialized to default value by "main")
-static int FLAGS_write_buffer_size = 0;
-
-// Number of bytes to use as a cache of uncompressed data.
-// Negative means use default settings.
-static int FLAGS_cache_size = -1;
-
-// Maximum number of files to keep open at the same time (use default if == 0)
-static int FLAGS_open_files = 0;
-
-// Bloom filter bits per key.
-// Negative means use default settings.
-static int FLAGS_bloom_bits = -1;
-
-// If true, do not destroy the existing database. If you set this
-// flag and also specify a benchmark that wants a fresh database, that
-// benchmark will fail.
-static bool FLAGS_use_existing_db = false;
-
-// Use the db with the following name.
-static const char* FLAGS_db = NULL;
-
-namespace leveldb {
-
-namespace {
-
-// Helper for quickly generating random data.
-class RandomGenerator {
- private:
- std::string data_;
- int pos_;
-
- public:
- RandomGenerator() {
- // We use a limited amount of data over and over again and ensure
- // that it is larger than the compression window (32KB), and also
- // large enough to serve all typical value sizes we want to write.
- Random rnd(301);
- std::string piece;
- while (data_.size() < 1048576) {
- // Add a short fragment that is as compressible as specified
- // by FLAGS_compression_ratio.
- test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
- data_.append(piece);
- }
- pos_ = 0;
- }
-
- Slice Generate(int len) {
- if (pos_ + len > data_.size()) {
- pos_ = 0;
- assert(len < data_.size());
- }
- pos_ += len;
- return Slice(data_.data() + pos_ - len, len);
- }
-};
-
-static Slice TrimSpace(Slice s) {
- int start = 0;
- while (start < s.size() && isspace(s[start])) {
- start++;
- }
- int limit = s.size();
- while (limit > start && isspace(s[limit-1])) {
- limit--;
- }
- return Slice(s.data() + start, limit - start);
-}
-
-static void AppendWithSpace(std::string* str, Slice msg) {
- if (msg.empty()) return;
- if (!str->empty()) {
- str->push_back(' ');
- }
- str->append(msg.data(), msg.size());
-}
-
-class Stats {
- private:
- double start_;
- double finish_;
- double seconds_;
- int done_;
- int next_report_;
- int64_t bytes_;
- double last_op_finish_;
- Histogram hist_;
- std::string message_;
-
- public:
- Stats() { Start(); }
-
- void Start() {
- next_report_ = 100;
- last_op_finish_ = start_;
- hist_.Clear();
- done_ = 0;
- bytes_ = 0;
- seconds_ = 0;
- start_ = Env::Default()->NowMicros();
- finish_ = start_;
- message_.clear();
- }
-
- void Merge(const Stats& other) {
- hist_.Merge(other.hist_);
- done_ += other.done_;
- bytes_ += other.bytes_;
- seconds_ += other.seconds_;
- if (other.start_ < start_) start_ = other.start_;
- if (other.finish_ > finish_) finish_ = other.finish_;
-
- // Just keep the messages from one thread
- if (message_.empty()) message_ = other.message_;
- }
-
- void Stop() {
- finish_ = Env::Default()->NowMicros();
- seconds_ = (finish_ - start_) * 1e-6;
- }
-
- void AddMessage(Slice msg) {
- AppendWithSpace(&message_, msg);
- }
-
- void FinishedSingleOp() {
- if (FLAGS_histogram) {
- double now = Env::Default()->NowMicros();
- double micros = now - last_op_finish_;
- hist_.Add(micros);
- if (micros > 20000) {
- fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
- fflush(stderr);
- }
- last_op_finish_ = now;
- }
-
- done_++;
- if (done_ >= next_report_) {
- if (next_report_ < 1000) next_report_ += 100;
- else if (next_report_ < 5000) next_report_ += 500;
- else if (next_report_ < 10000) next_report_ += 1000;
- else if (next_report_ < 50000) next_report_ += 5000;
- else if (next_report_ < 100000) next_report_ += 10000;
- else if (next_report_ < 500000) next_report_ += 50000;
- else next_report_ += 100000;
- fprintf(stderr, "... finished %d ops%30s\r", done_, "");
- fflush(stderr);
- }
- }
-
- void AddBytes(int64_t n) {
- bytes_ += n;
- }
-
- void Report(const Slice& name) {
- // Pretend at least one op was done in case we are running a benchmark
- // that does not call FinishedSingleOp().
- if (done_ < 1) done_ = 1;
-
- std::string extra;
- if (bytes_ > 0) {
- // Rate is computed on actual elapsed time, not the sum of per-thread
- // elapsed times.
- double elapsed = (finish_ - start_) * 1e-6;
- char rate[100];
- snprintf(rate, sizeof(rate), "%6.1f MB/s",
- (bytes_ / 1048576.0) / elapsed);
- extra = rate;
- }
- AppendWithSpace(&extra, message_);
-
- fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
- name.ToString().c_str(),
- seconds_ * 1e6 / done_,
- (extra.empty() ? "" : " "),
- extra.c_str());
- if (FLAGS_histogram) {
- fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
- }
- fflush(stdout);
- }
-};
-
-// State shared by all concurrent executions of the same benchmark.
-struct SharedState {
- port::Mutex mu;
- port::CondVar cv;
- int total;
-
- // Each thread goes through the following states:
- // (1) initializing
- // (2) waiting for others to be initialized
- // (3) running
- // (4) done
-
- int num_initialized;
- int num_done;
- bool start;
-
- SharedState() : cv(&mu) { }
-};
-
-// Per-thread state for concurrent executions of the same benchmark.
-struct ThreadState {
- int tid; // 0..n-1 when running in n threads
- Random rand; // Has different seeds for different threads
- Stats stats;
- SharedState* shared;
-
- ThreadState(int index)
- : tid(index),
- rand(1000 + index) {
- }
-};
-
-} // namespace
-
-class Benchmark {
- private:
- Cache* cache_;
- const FilterPolicy* filter_policy_;
- DB* db_;
- int num_;
- int value_size_;
- int entries_per_batch_;
- WriteOptions write_options_;
- int reads_;
- int heap_counter_;
-
- void PrintHeader() {
- const int kKeySize = 16;
- PrintEnvironment();
- fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
- fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n",
- FLAGS_value_size,
- static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
- fprintf(stdout, "Entries: %d\n", num_);
- fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
- ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
- / 1048576.0));
- fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
- (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
- / 1048576.0));
- PrintWarnings();
- fprintf(stdout, "------------------------------------------------\n");
- }
-
- void PrintWarnings() {
-#if defined(__GNUC__) && !defined(__OPTIMIZE__)
- fprintf(stdout,
- "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
- );
-#endif
-#ifndef NDEBUG
- fprintf(stdout,
- "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
-#endif
-
- // See if snappy is working by attempting to compress a compressible string
- const char text[] = "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy";
- std::string compressed;
- if (!port::Snappy_Compress(text, sizeof(text), &compressed)) {
- fprintf(stdout, "WARNING: Snappy compression is not enabled\n");
- } else if (compressed.size() >= sizeof(text)) {
- fprintf(stdout, "WARNING: Snappy compression is not effective\n");
- }
- }
-
- void PrintEnvironment() {
- fprintf(stderr, "LevelDB: version %d.%d\n",
- kMajorVersion, kMinorVersion);
-
-#if defined(__linux)
- time_t now = time(NULL);
- fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
-
- FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
- if (cpuinfo != NULL) {
- char line[1000];
- int num_cpus = 0;
- std::string cpu_type;
- std::string cache_size;
- while (fgets(line, sizeof(line), cpuinfo) != NULL) {
- const char* sep = strchr(line, ':');
- if (sep == NULL) {
- continue;
- }
- Slice key = TrimSpace(Slice(line, sep - 1 - line));
- Slice val = TrimSpace(Slice(sep + 1));
- if (key == "model name") {
- ++num_cpus;
- cpu_type = val.ToString();
- } else if (key == "cache size") {
- cache_size = val.ToString();
- }
- }
- fclose(cpuinfo);
- fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
- fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
- }
-#endif
- }
-
- public:
- Benchmark()
- : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
- filter_policy_(FLAGS_bloom_bits >= 0
- ? NewBloomFilterPolicy(FLAGS_bloom_bits)
- : NULL),
- db_(NULL),
- num_(FLAGS_num),
- value_size_(FLAGS_value_size),
- entries_per_batch_(1),
- reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
- heap_counter_(0) {
- std::vector<std::string> files;
- Env::Default()->GetChildren(FLAGS_db, &files);
- for (int i = 0; i < files.size(); i++) {
- if (Slice(files[i]).starts_with("heap-")) {
- Env::Default()->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
- }
- }
- if (!FLAGS_use_existing_db) {
- DestroyDB(FLAGS_db, Options());
- }
- }
-
- ~Benchmark() {
- delete db_;
- delete cache_;
- delete filter_policy_;
- }
-
- void Run() {
- PrintHeader();
- Open();
-
- const char* benchmarks = FLAGS_benchmarks;
- while (benchmarks != NULL) {
- const char* sep = strchr(benchmarks, ',');
- Slice name;
- if (sep == NULL) {
- name = benchmarks;
- benchmarks = NULL;
- } else {
- name = Slice(benchmarks, sep - benchmarks);
- benchmarks = sep + 1;
- }
-
- // Reset parameters that may be overriddden bwlow
- num_ = FLAGS_num;
- reads_ = (FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads);
- value_size_ = FLAGS_value_size;
- entries_per_batch_ = 1;
- write_options_ = WriteOptions();
-
- void (Benchmark::*method)(ThreadState*) = NULL;
- bool fresh_db = false;
- int num_threads = FLAGS_threads;
-
- if (name == Slice("fillseq")) {
- fresh_db = true;
- method = &Benchmark::WriteSeq;
- } else if (name == Slice("fillbatch")) {
- fresh_db = true;
- entries_per_batch_ = 1000;
- method = &Benchmark::WriteSeq;
- } else if (name == Slice("fillrandom")) {
- fresh_db = true;
- method = &Benchmark::WriteRandom;
- } else if (name == Slice("overwrite")) {
- fresh_db = false;
- method = &Benchmark::WriteRandom;
- } else if (name == Slice("fillsync")) {
- fresh_db = true;
- num_ /= 1000;
- write_options_.sync = true;
- method = &Benchmark::WriteRandom;
- } else if (name == Slice("fill100K")) {
- fresh_db = true;
- num_ /= 1000;
- value_size_ = 100 * 1000;
- method = &Benchmark::WriteRandom;
- } else if (name == Slice("readseq")) {
- method = &Benchmark::ReadSequential;
- } else if (name == Slice("readreverse")) {
- method = &Benchmark::ReadReverse;
- } else if (name == Slice("readrandom")) {
- method = &Benchmark::ReadRandom;
- } else if (name == Slice("readmissing")) {
- method = &Benchmark::ReadMissing;
- } else if (name == Slice("seekrandom")) {
- method = &Benchmark::SeekRandom;
- } else if (name == Slice("readhot")) {
- method = &Benchmark::ReadHot;
- } else if (name == Slice("readrandomsmall")) {
- reads_ /= 1000;
- method = &Benchmark::ReadRandom;
- } else if (name == Slice("deleteseq")) {
- method = &Benchmark::DeleteSeq;
- } else if (name == Slice("deleterandom")) {
- method = &Benchmark::DeleteRandom;
- } else if (name == Slice("readwhilewriting")) {
- num_threads++; // Add extra thread for writing
- method = &Benchmark::ReadWhileWriting;
- } else if (name == Slice("compact")) {
- method = &Benchmark::Compact;
- } else if (name == Slice("crc32c")) {
- method = &Benchmark::Crc32c;
- } else if (name == Slice("acquireload")) {
- method = &Benchmark::AcquireLoad;
- } else if (name == Slice("snappycomp")) {
- method = &Benchmark::SnappyCompress;
- } else if (name == Slice("snappyuncomp")) {
- method = &Benchmark::SnappyUncompress;
- } else if (name == Slice("heapprofile")) {
- HeapProfile();
- } else if (name == Slice("stats")) {
- PrintStats("leveldb.stats");
- } else if (name == Slice("sstables")) {
- PrintStats("leveldb.sstables");
- } else {
- if (name != Slice()) { // No error message for empty name
- fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
- }
- }
-
- if (fresh_db) {
- if (FLAGS_use_existing_db) {
- fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n",
- name.ToString().c_str());
- method = NULL;
- } else {
- delete db_;
- db_ = NULL;
- DestroyDB(FLAGS_db, Options());
- Open();
- }
- }
-
- if (method != NULL) {
- RunBenchmark(num_threads, name, method);
- }
- }
- }
-
- private:
- struct ThreadArg {
- Benchmark* bm;
- SharedState* shared;
- ThreadState* thread;
- void (Benchmark::*method)(ThreadState*);
- };
-
- static void ThreadBody(void* v) {
- ThreadArg* arg = reinterpret_cast<ThreadArg*>(v);
- SharedState* shared = arg->shared;
- ThreadState* thread = arg->thread;
- {
- MutexLock l(&shared->mu);
- shared->num_initialized++;
- if (shared->num_initialized >= shared->total) {
- shared->cv.SignalAll();
- }
- while (!shared->start) {
- shared->cv.Wait();
- }
- }
-
- thread->stats.Start();
- (arg->bm->*(arg->method))(thread);
- thread->stats.Stop();
-
- {
- MutexLock l(&shared->mu);
- shared->num_done++;
- if (shared->num_done >= shared->total) {
- shared->cv.SignalAll();
- }
- }
- }
-
- void RunBenchmark(int n, Slice name,
- void (Benchmark::*method)(ThreadState*)) {
- SharedState shared;
- shared.total = n;
- shared.num_initialized = 0;
- shared.num_done = 0;
- shared.start = false;
-
- ThreadArg* arg = new ThreadArg[n];
- for (int i = 0; i < n; i++) {
- arg[i].bm = this;
- arg[i].method = method;
- arg[i].shared = &shared;
- arg[i].thread = new ThreadState(i);
- arg[i].thread->shared = &shared;
- Env::Default()->StartThread(ThreadBody, &arg[i]);
- }
-
- shared.mu.Lock();
- while (shared.num_initialized < n) {
- shared.cv.Wait();
- }
-
- shared.start = true;
- shared.cv.SignalAll();
- while (shared.num_done < n) {
- shared.cv.Wait();
- }
- shared.mu.Unlock();
-
- for (int i = 1; i < n; i++) {
- arg[0].thread->stats.Merge(arg[i].thread->stats);
- }
- arg[0].thread->stats.Report(name);
-
- for (int i = 0; i < n; i++) {
- delete arg[i].thread;
- }
- delete[] arg;
- }
-
- void Crc32c(ThreadState* thread) {
- // Checksum about 500MB of data total
- const int size = 4096;
- const char* label = "(4K per op)";
- std::string data(size, 'x');
- int64_t bytes = 0;
- uint32_t crc = 0;
- while (bytes < 500 * 1048576) {
- crc = crc32c::Value(data.data(), size);
- thread->stats.FinishedSingleOp();
- bytes += size;
- }
- // Print so result is not dead
- fprintf(stderr, "... crc=0x%x\r", static_cast<unsigned int>(crc));
-
- thread->stats.AddBytes(bytes);
- thread->stats.AddMessage(label);
- }
-
- void AcquireLoad(ThreadState* thread) {
- int dummy;
- port::AtomicPointer ap(&dummy);
- int count = 0;
- void *ptr = NULL;
- thread->stats.AddMessage("(each op is 1000 loads)");
- while (count < 100000) {
- for (int i = 0; i < 1000; i++) {
- ptr = ap.Acquire_Load();
- }
- count++;
- thread->stats.FinishedSingleOp();
- }
- if (ptr == NULL) exit(1); // Disable unused variable warning.
- }
-
- void SnappyCompress(ThreadState* thread) {
- RandomGenerator gen;
- Slice input = gen.Generate(Options().block_size);
- int64_t bytes = 0;
- int64_t produced = 0;
- bool ok = true;
- std::string compressed;
- while (ok && bytes < 1024 * 1048576) { // Compress 1G
- ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
- produced += compressed.size();
- bytes += input.size();
- thread->stats.FinishedSingleOp();
- }
-
- if (!ok) {
- thread->stats.AddMessage("(snappy failure)");
- } else {
- char buf[100];
- snprintf(buf, sizeof(buf), "(output: %.1f%%)",
- (produced * 100.0) / bytes);
- thread->stats.AddMessage(buf);
- thread->stats.AddBytes(bytes);
- }
- }
-
- void SnappyUncompress(ThreadState* thread) {
- RandomGenerator gen;
- Slice input = gen.Generate(Options().block_size);
- std::string compressed;
- bool ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
- int64_t bytes = 0;
- char* uncompressed = new char[input.size()];
- while (ok && bytes < 1024 * 1048576) { // Compress 1G
- ok = port::Snappy_Uncompress(compressed.data(), compressed.size(),
- uncompressed);
- bytes += input.size();
- thread->stats.FinishedSingleOp();
- }
- delete[] uncompressed;
-
- if (!ok) {
- thread->stats.AddMessage("(snappy failure)");
- } else {
- thread->stats.AddBytes(bytes);
- }
- }
-
- void Open() {
- assert(db_ == NULL);
- Options options;
- options.create_if_missing = !FLAGS_use_existing_db;
- options.block_cache = cache_;
- options.write_buffer_size = FLAGS_write_buffer_size;
- options.max_open_files = FLAGS_open_files;
- options.filter_policy = filter_policy_;
- Status s = DB::Open(options, FLAGS_db, &db_);
- if (!s.ok()) {
- fprintf(stderr, "open error: %s\n", s.ToString().c_str());
- exit(1);
- }
- }
-
- void WriteSeq(ThreadState* thread) {
- DoWrite(thread, true);
- }
-
- void WriteRandom(ThreadState* thread) {
- DoWrite(thread, false);
- }
-
- void DoWrite(ThreadState* thread, bool seq) {
- if (num_ != FLAGS_num) {
- char msg[100];
- snprintf(msg, sizeof(msg), "(%d ops)", num_);
- thread->stats.AddMessage(msg);
- }
-
- RandomGenerator gen;
- WriteBatch batch;
- Status s;
- int64_t bytes = 0;
- for (int i = 0; i < num_; i += entries_per_batch_) {
- batch.Clear();
- for (int j = 0; j < entries_per_batch_; j++) {
- const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
- char key[100];
- snprintf(key, sizeof(key), "%016d", k);
- batch.Put(key, gen.Generate(value_size_));
- bytes += value_size_ + strlen(key);
- thread->stats.FinishedSingleOp();
- }
- s = db_->Write(write_options_, &batch);
- if (!s.ok()) {
- fprintf(stderr, "put error: %s\n", s.ToString().c_str());
- exit(1);
- }
- }
- thread->stats.AddBytes(bytes);
- }
-
- void ReadSequential(ThreadState* thread) {
- Iterator* iter = db_->NewIterator(ReadOptions());
- int i = 0;
- int64_t bytes = 0;
- for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) {
- bytes += iter->key().size() + iter->value().size();
- thread->stats.FinishedSingleOp();
- ++i;
- }
- delete iter;
- thread->stats.AddBytes(bytes);
- }
-
- void ReadReverse(ThreadState* thread) {
- Iterator* iter = db_->NewIterator(ReadOptions());
- int i = 0;
- int64_t bytes = 0;
- for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) {
- bytes += iter->key().size() + iter->value().size();
- thread->stats.FinishedSingleOp();
- ++i;
- }
- delete iter;
- thread->stats.AddBytes(bytes);
- }
-
- void ReadRandom(ThreadState* thread) {
- ReadOptions options;
- std::string value;
- int found = 0;
- for (int i = 0; i < reads_; i++) {
- char key[100];
- const int k = thread->rand.Next() % FLAGS_num;
- snprintf(key, sizeof(key), "%016d", k);
- if (db_->Get(options, key, &value).ok()) {
- found++;
- }
- thread->stats.FinishedSingleOp();
- }
- char msg[100];
- snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
- thread->stats.AddMessage(msg);
- }
-
- void ReadMissing(ThreadState* thread) {
- ReadOptions options;
- std::string value;
- for (int i = 0; i < reads_; i++) {
- char key[100];
- const int k = thread->rand.Next() % FLAGS_num;
- snprintf(key, sizeof(key), "%016d.", k);
- db_->Get(options, key, &value);
- thread->stats.FinishedSingleOp();
- }
- }
-
- void ReadHot(ThreadState* thread) {
- ReadOptions options;
- std::string value;
- const int range = (FLAGS_num + 99) / 100;
- for (int i = 0; i < reads_; i++) {
- char key[100];
- const int k = thread->rand.Next() % range;
- snprintf(key, sizeof(key), "%016d", k);
- db_->Get(options, key, &value);
- thread->stats.FinishedSingleOp();
- }
- }
-
- void SeekRandom(ThreadState* thread) {
- ReadOptions options;
- std::string value;
- int found = 0;
- for (int i = 0; i < reads_; i++) {
- Iterator* iter = db_->NewIterator(options);
- char key[100];
- const int k = thread->rand.Next() % FLAGS_num;
- snprintf(key, sizeof(key), "%016d", k);
- iter->Seek(key);
- if (iter->Valid() && iter->key() == key) found++;
- delete iter;
- thread->stats.FinishedSingleOp();
- }
- char msg[100];
- snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
- thread->stats.AddMessage(msg);
- }
-
- void DoDelete(ThreadState* thread, bool seq) {
- RandomGenerator gen;
- WriteBatch batch;
- Status s;
- for (int i = 0; i < num_; i += entries_per_batch_) {
- batch.Clear();
- for (int j = 0; j < entries_per_batch_; j++) {
- const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
- char key[100];
- snprintf(key, sizeof(key), "%016d", k);
- batch.Delete(key);
- thread->stats.FinishedSingleOp();
- }
- s = db_->Write(write_options_, &batch);
- if (!s.ok()) {
- fprintf(stderr, "del error: %s\n", s.ToString().c_str());
- exit(1);
- }
- }
- }
-
- void DeleteSeq(ThreadState* thread) {
- DoDelete(thread, true);
- }
-
- void DeleteRandom(ThreadState* thread) {
- DoDelete(thread, false);
- }
-
- void ReadWhileWriting(ThreadState* thread) {
- if (thread->tid > 0) {
- ReadRandom(thread);
- } else {
- // Special thread that keeps writing until other threads are done.
- RandomGenerator gen;
- while (true) {
- {
- MutexLock l(&thread->shared->mu);
- if (thread->shared->num_done + 1 >= thread->shared->num_initialized) {
- // Other threads have finished
- break;
- }
- }
-
- const int k = thread->rand.Next() % FLAGS_num;
- char key[100];
- snprintf(key, sizeof(key), "%016d", k);
- Status s = db_->Put(write_options_, key, gen.Generate(value_size_));
- if (!s.ok()) {
- fprintf(stderr, "put error: %s\n", s.ToString().c_str());
- exit(1);
- }
- }
-
- // Do not count any of the preceding work/delay in stats.
- thread->stats.Start();
- }
- }
-
- void Compact(ThreadState* thread) {
- db_->CompactRange(NULL, NULL);
- }
-
- void PrintStats(const char* key) {
- std::string stats;
- if (!db_->GetProperty(key, &stats)) {
- stats = "(failed)";
- }
- fprintf(stdout, "\n%s\n", stats.c_str());
- }
-
- static void WriteToFile(void* arg, const char* buf, int n) {
- reinterpret_cast<WritableFile*>(arg)->Append(Slice(buf, n));
- }
-
- void HeapProfile() {
- char fname[100];
- snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_);
- WritableFile* file;
- Status s = Env::Default()->NewWritableFile(fname, &file);
- if (!s.ok()) {
- fprintf(stderr, "%s\n", s.ToString().c_str());
- return;
- }
- bool ok = port::GetHeapProfile(WriteToFile, file);
- delete file;
- if (!ok) {
- fprintf(stderr, "heap profiling not supported\n");
- Env::Default()->DeleteFile(fname);
- }
- }
-};
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
- FLAGS_open_files = leveldb::Options().max_open_files;
- std::string default_db_path;
-
- for (int i = 1; i < argc; i++) {
- double d;
- int n;
- char junk;
- if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
- FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
- } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
- FLAGS_compression_ratio = d;
- } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
- (n == 0 || n == 1)) {
- FLAGS_histogram = n;
- } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
- (n == 0 || n == 1)) {
- FLAGS_use_existing_db = n;
- } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
- FLAGS_num = n;
- } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
- FLAGS_reads = n;
- } else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) {
- FLAGS_threads = n;
- } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
- FLAGS_value_size = n;
- } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
- FLAGS_write_buffer_size = n;
- } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
- FLAGS_cache_size = n;
- } else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
- FLAGS_bloom_bits = n;
- } else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
- FLAGS_open_files = n;
- } else if (strncmp(argv[i], "--db=", 5) == 0) {
- FLAGS_db = argv[i] + 5;
- } else {
- fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
- exit(1);
- }
- }
-
- // Choose a location for the test database if none given with --db=<path>
- if (FLAGS_db == NULL) {
- leveldb::Env::Default()->GetTestDirectory(&default_db_path);
- default_db_path += "/dbbench";
- FLAGS_db = default_db_path.c_str();
- }
-
- leveldb::Benchmark benchmark;
- benchmark.Run();
- return 0;
-}
diff --git a/src/leveldb/db/db_impl.cc b/src/leveldb/db/db_impl.cc
deleted file mode 100644
index c9de169f29..0000000000
--- a/src/leveldb/db/db_impl.cc
+++ /dev/null
@@ -1,1467 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/db_impl.h"
-
-#include <algorithm>
-#include <set>
-#include <string>
-#include <stdint.h>
-#include <stdio.h>
-#include <vector>
-#include "db/builder.h"
-#include "db/db_iter.h"
-#include "db/dbformat.h"
-#include "db/filename.h"
-#include "db/log_reader.h"
-#include "db/log_writer.h"
-#include "db/memtable.h"
-#include "db/table_cache.h"
-#include "db/version_set.h"
-#include "db/write_batch_internal.h"
-#include "leveldb/db.h"
-#include "leveldb/env.h"
-#include "leveldb/status.h"
-#include "leveldb/table.h"
-#include "leveldb/table_builder.h"
-#include "port/port.h"
-#include "table/block.h"
-#include "table/merger.h"
-#include "table/two_level_iterator.h"
-#include "util/coding.h"
-#include "util/logging.h"
-#include "util/mutexlock.h"
-
-namespace leveldb {
-
-// Information kept for every waiting writer
-struct DBImpl::Writer {
- Status status;
- WriteBatch* batch;
- bool sync;
- bool done;
- port::CondVar cv;
-
- explicit Writer(port::Mutex* mu) : cv(mu) { }
-};
-
-struct DBImpl::CompactionState {
- Compaction* const compaction;
-
- // Sequence numbers < smallest_snapshot are not significant since we
- // will never have to service a snapshot below smallest_snapshot.
- // Therefore if we have seen a sequence number S <= smallest_snapshot,
- // we can drop all entries for the same key with sequence numbers < S.
- SequenceNumber smallest_snapshot;
-
- // Files produced by compaction
- struct Output {
- uint64_t number;
- uint64_t file_size;
- InternalKey smallest, largest;
- };
- std::vector<Output> outputs;
-
- // State kept for output being generated
- WritableFile* outfile;
- TableBuilder* builder;
-
- uint64_t total_bytes;
-
- Output* current_output() { return &outputs[outputs.size()-1]; }
-
- explicit CompactionState(Compaction* c)
- : compaction(c),
- outfile(NULL),
- builder(NULL),
- total_bytes(0) {
- }
-};
-
-// Fix user-supplied options to be reasonable
-template <class T,class V>
-static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
- if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
- if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
-}
-Options SanitizeOptions(const std::string& dbname,
- const InternalKeyComparator* icmp,
- const InternalFilterPolicy* ipolicy,
- const Options& src) {
- Options result = src;
- result.comparator = icmp;
- result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
- ClipToRange(&result.max_open_files, 20, 50000);
- ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
- ClipToRange(&result.block_size, 1<<10, 4<<20);
- if (result.info_log == NULL) {
- // Open a log file in the same directory as the db
- src.env->CreateDir(dbname); // In case it does not exist
- src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname));
- Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log);
- if (!s.ok()) {
- // No place suitable for logging
- result.info_log = NULL;
- }
- }
- if (result.block_cache == NULL) {
- result.block_cache = NewLRUCache(8 << 20);
- }
- return result;
-}
-
-DBImpl::DBImpl(const Options& options, const std::string& dbname)
- : env_(options.env),
- internal_comparator_(options.comparator),
- internal_filter_policy_(options.filter_policy),
- options_(SanitizeOptions(
- dbname, &internal_comparator_, &internal_filter_policy_, options)),
- owns_info_log_(options_.info_log != options.info_log),
- owns_cache_(options_.block_cache != options.block_cache),
- dbname_(dbname),
- db_lock_(NULL),
- shutting_down_(NULL),
- bg_cv_(&mutex_),
- mem_(new MemTable(internal_comparator_)),
- imm_(NULL),
- logfile_(NULL),
- logfile_number_(0),
- log_(NULL),
- tmp_batch_(new WriteBatch),
- bg_compaction_scheduled_(false),
- manual_compaction_(NULL) {
- mem_->Ref();
- has_imm_.Release_Store(NULL);
-
- // Reserve ten files or so for other uses and give the rest to TableCache.
- const int table_cache_size = options.max_open_files - 10;
- table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
-
- versions_ = new VersionSet(dbname_, &options_, table_cache_,
- &internal_comparator_);
-}
-
-DBImpl::~DBImpl() {
- // Wait for background work to finish
- mutex_.Lock();
- shutting_down_.Release_Store(this); // Any non-NULL value is ok
- while (bg_compaction_scheduled_) {
- bg_cv_.Wait();
- }
- mutex_.Unlock();
-
- if (db_lock_ != NULL) {
- env_->UnlockFile(db_lock_);
- }
-
- delete versions_;
- if (mem_ != NULL) mem_->Unref();
- if (imm_ != NULL) imm_->Unref();
- delete tmp_batch_;
- delete log_;
- delete logfile_;
- delete table_cache_;
-
- if (owns_info_log_) {
- delete options_.info_log;
- }
- if (owns_cache_) {
- delete options_.block_cache;
- }
-}
-
-Status DBImpl::NewDB() {
- VersionEdit new_db;
- new_db.SetComparatorName(user_comparator()->Name());
- new_db.SetLogNumber(0);
- new_db.SetNextFile(2);
- new_db.SetLastSequence(0);
-
- const std::string manifest = DescriptorFileName(dbname_, 1);
- WritableFile* file;
- Status s = env_->NewWritableFile(manifest, &file);
- if (!s.ok()) {
- return s;
- }
- {
- log::Writer log(file);
- std::string record;
- new_db.EncodeTo(&record);
- s = log.AddRecord(record);
- if (s.ok()) {
- s = file->Close();
- }
- }
- delete file;
- if (s.ok()) {
- // Make "CURRENT" file that points to the new manifest file.
- s = SetCurrentFile(env_, dbname_, 1);
- } else {
- env_->DeleteFile(manifest);
- }
- return s;
-}
-
-void DBImpl::MaybeIgnoreError(Status* s) const {
- if (s->ok() || options_.paranoid_checks) {
- // No change needed
- } else {
- Log(options_.info_log, "Ignoring error %s", s->ToString().c_str());
- *s = Status::OK();
- }
-}
-
-void DBImpl::DeleteObsoleteFiles() {
- // Make a set of all of the live files
- std::set<uint64_t> live = pending_outputs_;
- versions_->AddLiveFiles(&live);
-
- std::vector<std::string> filenames;
- env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
- uint64_t number;
- FileType type;
- for (size_t i = 0; i < filenames.size(); i++) {
- if (ParseFileName(filenames[i], &number, &type)) {
- bool keep = true;
- switch (type) {
- case kLogFile:
- keep = ((number >= versions_->LogNumber()) ||
- (number == versions_->PrevLogNumber()));
- break;
- case kDescriptorFile:
- // Keep my manifest file, and any newer incarnations'
- // (in case there is a race that allows other incarnations)
- keep = (number >= versions_->ManifestFileNumber());
- break;
- case kTableFile:
- keep = (live.find(number) != live.end());
- break;
- case kTempFile:
- // Any temp files that are currently being written to must
- // be recorded in pending_outputs_, which is inserted into "live"
- keep = (live.find(number) != live.end());
- break;
- case kCurrentFile:
- case kDBLockFile:
- case kInfoLogFile:
- keep = true;
- break;
- }
-
- if (!keep) {
- if (type == kTableFile) {
- table_cache_->Evict(number);
- }
- Log(options_.info_log, "Delete type=%d #%lld\n",
- int(type),
- static_cast<unsigned long long>(number));
- env_->DeleteFile(dbname_ + "/" + filenames[i]);
- }
- }
- }
-}
-
-Status DBImpl::Recover(VersionEdit* edit) {
- mutex_.AssertHeld();
-
- // Ignore error from CreateDir since the creation of the DB is
- // committed only when the descriptor is created, and this directory
- // may already exist from a previous failed creation attempt.
- env_->CreateDir(dbname_);
- assert(db_lock_ == NULL);
- Status s = env_->LockFile(LockFileName(dbname_), &db_lock_);
- if (!s.ok()) {
- return s;
- }
-
- if (!env_->FileExists(CurrentFileName(dbname_))) {
- if (options_.create_if_missing) {
- s = NewDB();
- if (!s.ok()) {
- return s;
- }
- } else {
- return Status::InvalidArgument(
- dbname_, "does not exist (create_if_missing is false)");
- }
- } else {
- if (options_.error_if_exists) {
- return Status::InvalidArgument(
- dbname_, "exists (error_if_exists is true)");
- }
- }
-
- s = versions_->Recover();
- if (s.ok()) {
- SequenceNumber max_sequence(0);
-
- // Recover from all newer log files than the ones named in the
- // descriptor (new log files may have been added by the previous
- // incarnation without registering them in the descriptor).
- //
- // Note that PrevLogNumber() is no longer used, but we pay
- // attention to it in case we are recovering a database
- // produced by an older version of leveldb.
- const uint64_t min_log = versions_->LogNumber();
- const uint64_t prev_log = versions_->PrevLogNumber();
- std::vector<std::string> filenames;
- s = env_->GetChildren(dbname_, &filenames);
- if (!s.ok()) {
- return s;
- }
- uint64_t number;
- FileType type;
- std::vector<uint64_t> logs;
- for (size_t i = 0; i < filenames.size(); i++) {
- if (ParseFileName(filenames[i], &number, &type)
- && type == kLogFile
- && ((number >= min_log) || (number == prev_log))) {
- logs.push_back(number);
- }
- }
-
- // Recover in the order in which the logs were generated
- std::sort(logs.begin(), logs.end());
- for (size_t i = 0; i < logs.size(); i++) {
- s = RecoverLogFile(logs[i], edit, &max_sequence);
-
- // The previous incarnation may not have written any MANIFEST
- // records after allocating this log number. So we manually
- // update the file number allocation counter in VersionSet.
- versions_->MarkFileNumberUsed(logs[i]);
- }
-
- if (s.ok()) {
- if (versions_->LastSequence() < max_sequence) {
- versions_->SetLastSequence(max_sequence);
- }
- }
- }
-
- return s;
-}
-
-Status DBImpl::RecoverLogFile(uint64_t log_number,
- VersionEdit* edit,
- SequenceNumber* max_sequence) {
- struct LogReporter : public log::Reader::Reporter {
- Env* env;
- Logger* info_log;
- const char* fname;
- Status* status; // NULL if options_.paranoid_checks==false
- virtual void Corruption(size_t bytes, const Status& s) {
- Log(info_log, "%s%s: dropping %d bytes; %s",
- (this->status == NULL ? "(ignoring error) " : ""),
- fname, static_cast<int>(bytes), s.ToString().c_str());
- if (this->status != NULL && this->status->ok()) *this->status = s;
- }
- };
-
- mutex_.AssertHeld();
-
- // Open the log file
- std::string fname = LogFileName(dbname_, log_number);
- SequentialFile* file;
- Status status = env_->NewSequentialFile(fname, &file);
- if (!status.ok()) {
- MaybeIgnoreError(&status);
- return status;
- }
-
- // Create the log reader.
- LogReporter reporter;
- reporter.env = env_;
- reporter.info_log = options_.info_log;
- reporter.fname = fname.c_str();
- reporter.status = (options_.paranoid_checks ? &status : NULL);
- // We intentially make log::Reader do checksumming even if
- // paranoid_checks==false so that corruptions cause entire commits
- // to be skipped instead of propagating bad information (like overly
- // large sequence numbers).
- log::Reader reader(file, &reporter, true/*checksum*/,
- 0/*initial_offset*/);
- Log(options_.info_log, "Recovering log #%llu",
- (unsigned long long) log_number);
-
- // Read all the records and add to a memtable
- std::string scratch;
- Slice record;
- WriteBatch batch;
- MemTable* mem = NULL;
- while (reader.ReadRecord(&record, &scratch) &&
- status.ok()) {
- if (record.size() < 12) {
- reporter.Corruption(
- record.size(), Status::Corruption("log record too small"));
- continue;
- }
- WriteBatchInternal::SetContents(&batch, record);
-
- if (mem == NULL) {
- mem = new MemTable(internal_comparator_);
- mem->Ref();
- }
- status = WriteBatchInternal::InsertInto(&batch, mem);
- MaybeIgnoreError(&status);
- if (!status.ok()) {
- break;
- }
- const SequenceNumber last_seq =
- WriteBatchInternal::Sequence(&batch) +
- WriteBatchInternal::Count(&batch) - 1;
- if (last_seq > *max_sequence) {
- *max_sequence = last_seq;
- }
-
- if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
- status = WriteLevel0Table(mem, edit, NULL);
- if (!status.ok()) {
- // Reflect errors immediately so that conditions like full
- // file-systems cause the DB::Open() to fail.
- break;
- }
- mem->Unref();
- mem = NULL;
- }
- }
-
- if (status.ok() && mem != NULL) {
- status = WriteLevel0Table(mem, edit, NULL);
- // Reflect errors immediately so that conditions like full
- // file-systems cause the DB::Open() to fail.
- }
-
- if (mem != NULL) mem->Unref();
- delete file;
- return status;
-}
-
-Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
- Version* base) {
- mutex_.AssertHeld();
- const uint64_t start_micros = env_->NowMicros();
- FileMetaData meta;
- meta.number = versions_->NewFileNumber();
- pending_outputs_.insert(meta.number);
- Iterator* iter = mem->NewIterator();
- Log(options_.info_log, "Level-0 table #%llu: started",
- (unsigned long long) meta.number);
-
- Status s;
- {
- mutex_.Unlock();
- s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
- mutex_.Lock();
- }
-
- Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s",
- (unsigned long long) meta.number,
- (unsigned long long) meta.file_size,
- s.ToString().c_str());
- delete iter;
- pending_outputs_.erase(meta.number);
-
-
- // Note that if file_size is zero, the file has been deleted and
- // should not be added to the manifest.
- int level = 0;
- if (s.ok() && meta.file_size > 0) {
- const Slice min_user_key = meta.smallest.user_key();
- const Slice max_user_key = meta.largest.user_key();
- if (base != NULL) {
- level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
- }
- edit->AddFile(level, meta.number, meta.file_size,
- meta.smallest, meta.largest);
- }
-
- CompactionStats stats;
- stats.micros = env_->NowMicros() - start_micros;
- stats.bytes_written = meta.file_size;
- stats_[level].Add(stats);
- return s;
-}
-
-Status DBImpl::CompactMemTable() {
- mutex_.AssertHeld();
- assert(imm_ != NULL);
-
- // Save the contents of the memtable as a new Table
- VersionEdit edit;
- Version* base = versions_->current();
- base->Ref();
- Status s = WriteLevel0Table(imm_, &edit, base);
- base->Unref();
-
- if (s.ok() && shutting_down_.Acquire_Load()) {
- s = Status::IOError("Deleting DB during memtable compaction");
- }
-
- // Replace immutable memtable with the generated Table
- if (s.ok()) {
- edit.SetPrevLogNumber(0);
- edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed
- s = versions_->LogAndApply(&edit, &mutex_);
- }
-
- if (s.ok()) {
- // Commit to the new state
- imm_->Unref();
- imm_ = NULL;
- has_imm_.Release_Store(NULL);
- DeleteObsoleteFiles();
- }
-
- return s;
-}
-
-void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
- int max_level_with_files = 1;
- {
- MutexLock l(&mutex_);
- Version* base = versions_->current();
- for (int level = 1; level < config::kNumLevels; level++) {
- if (base->OverlapInLevel(level, begin, end)) {
- max_level_with_files = level;
- }
- }
- }
- TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap
- for (int level = 0; level < max_level_with_files; level++) {
- TEST_CompactRange(level, begin, end);
- }
-}
-
-void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
- assert(level >= 0);
- assert(level + 1 < config::kNumLevels);
-
- InternalKey begin_storage, end_storage;
-
- ManualCompaction manual;
- manual.level = level;
- manual.done = false;
- if (begin == NULL) {
- manual.begin = NULL;
- } else {
- begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek);
- manual.begin = &begin_storage;
- }
- if (end == NULL) {
- manual.end = NULL;
- } else {
- end_storage = InternalKey(*end, 0, static_cast<ValueType>(0));
- manual.end = &end_storage;
- }
-
- MutexLock l(&mutex_);
- while (!manual.done) {
- while (manual_compaction_ != NULL) {
- bg_cv_.Wait();
- }
- manual_compaction_ = &manual;
- MaybeScheduleCompaction();
- while (manual_compaction_ == &manual) {
- bg_cv_.Wait();
- }
- }
-}
-
-Status DBImpl::TEST_CompactMemTable() {
- // NULL batch means just wait for earlier writes to be done
- Status s = Write(WriteOptions(), NULL);
- if (s.ok()) {
- // Wait until the compaction completes
- MutexLock l(&mutex_);
- while (imm_ != NULL && bg_error_.ok()) {
- bg_cv_.Wait();
- }
- if (imm_ != NULL) {
- s = bg_error_;
- }
- }
- return s;
-}
-
-void DBImpl::MaybeScheduleCompaction() {
- mutex_.AssertHeld();
- if (bg_compaction_scheduled_) {
- // Already scheduled
- } else if (shutting_down_.Acquire_Load()) {
- // DB is being deleted; no more background compactions
- } else if (imm_ == NULL &&
- manual_compaction_ == NULL &&
- !versions_->NeedsCompaction()) {
- // No work to be done
- } else {
- bg_compaction_scheduled_ = true;
- env_->Schedule(&DBImpl::BGWork, this);
- }
-}
-
-void DBImpl::BGWork(void* db) {
- reinterpret_cast<DBImpl*>(db)->BackgroundCall();
-}
-
-void DBImpl::BackgroundCall() {
- MutexLock l(&mutex_);
- assert(bg_compaction_scheduled_);
- if (!shutting_down_.Acquire_Load()) {
- Status s = BackgroundCompaction();
- if (s.ok()) {
- // Success
- } else if (shutting_down_.Acquire_Load()) {
- // Error most likely due to shutdown; do not wait
- } else {
- // Wait a little bit before retrying background compaction in
- // case this is an environmental problem and we do not want to
- // chew up resources for failed compactions for the duration of
- // the problem.
- bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
- Log(options_.info_log, "Waiting after background compaction error: %s",
- s.ToString().c_str());
- mutex_.Unlock();
- env_->SleepForMicroseconds(1000000);
- mutex_.Lock();
- }
- }
-
- bg_compaction_scheduled_ = false;
-
- // Previous compaction may have produced too many files in a level,
- // so reschedule another compaction if needed.
- MaybeScheduleCompaction();
- bg_cv_.SignalAll();
-}
-
-Status DBImpl::BackgroundCompaction() {
- mutex_.AssertHeld();
-
- if (imm_ != NULL) {
- return CompactMemTable();
- }
-
- Compaction* c;
- bool is_manual = (manual_compaction_ != NULL);
- InternalKey manual_end;
- if (is_manual) {
- ManualCompaction* m = manual_compaction_;
- c = versions_->CompactRange(m->level, m->begin, m->end);
- m->done = (c == NULL);
- if (c != NULL) {
- manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
- }
- Log(options_.info_log,
- "Manual compaction at level-%d from %s .. %s; will stop at %s\n",
- m->level,
- (m->begin ? m->begin->DebugString().c_str() : "(begin)"),
- (m->end ? m->end->DebugString().c_str() : "(end)"),
- (m->done ? "(end)" : manual_end.DebugString().c_str()));
- } else {
- c = versions_->PickCompaction();
- }
-
- Status status;
- if (c == NULL) {
- // Nothing to do
- } else if (!is_manual && c->IsTrivialMove()) {
- // Move file to next level
- assert(c->num_input_files(0) == 1);
- FileMetaData* f = c->input(0, 0);
- c->edit()->DeleteFile(c->level(), f->number);
- c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
- f->smallest, f->largest);
- status = versions_->LogAndApply(c->edit(), &mutex_);
- VersionSet::LevelSummaryStorage tmp;
- Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
- static_cast<unsigned long long>(f->number),
- c->level() + 1,
- static_cast<unsigned long long>(f->file_size),
- status.ToString().c_str(),
- versions_->LevelSummary(&tmp));
- } else {
- CompactionState* compact = new CompactionState(c);
- status = DoCompactionWork(compact);
- CleanupCompaction(compact);
- c->ReleaseInputs();
- DeleteObsoleteFiles();
- }
- delete c;
-
- if (status.ok()) {
- // Done
- } else if (shutting_down_.Acquire_Load()) {
- // Ignore compaction errors found during shutting down
- } else {
- Log(options_.info_log,
- "Compaction error: %s", status.ToString().c_str());
- if (options_.paranoid_checks && bg_error_.ok()) {
- bg_error_ = status;
- }
- }
-
- if (is_manual) {
- ManualCompaction* m = manual_compaction_;
- if (!status.ok()) {
- m->done = true;
- }
- if (!m->done) {
- // We only compacted part of the requested range. Update *m
- // to the range that is left to be compacted.
- m->tmp_storage = manual_end;
- m->begin = &m->tmp_storage;
- }
- manual_compaction_ = NULL;
- }
- return status;
-}
-
-void DBImpl::CleanupCompaction(CompactionState* compact) {
- mutex_.AssertHeld();
- if (compact->builder != NULL) {
- // May happen if we get a shutdown call in the middle of compaction
- compact->builder->Abandon();
- delete compact->builder;
- } else {
- assert(compact->outfile == NULL);
- }
- delete compact->outfile;
- for (size_t i = 0; i < compact->outputs.size(); i++) {
- const CompactionState::Output& out = compact->outputs[i];
- pending_outputs_.erase(out.number);
- }
- delete compact;
-}
-
-Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
- assert(compact != NULL);
- assert(compact->builder == NULL);
- uint64_t file_number;
- {
- mutex_.Lock();
- file_number = versions_->NewFileNumber();
- pending_outputs_.insert(file_number);
- CompactionState::Output out;
- out.number = file_number;
- out.smallest.Clear();
- out.largest.Clear();
- compact->outputs.push_back(out);
- mutex_.Unlock();
- }
-
- // Make the output file
- std::string fname = TableFileName(dbname_, file_number);
- Status s = env_->NewWritableFile(fname, &compact->outfile);
- if (s.ok()) {
- compact->builder = new TableBuilder(options_, compact->outfile);
- }
- return s;
-}
-
-Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
- Iterator* input) {
- assert(compact != NULL);
- assert(compact->outfile != NULL);
- assert(compact->builder != NULL);
-
- const uint64_t output_number = compact->current_output()->number;
- assert(output_number != 0);
-
- // Check for iterator errors
- Status s = input->status();
- const uint64_t current_entries = compact->builder->NumEntries();
- if (s.ok()) {
- s = compact->builder->Finish();
- } else {
- compact->builder->Abandon();
- }
- const uint64_t current_bytes = compact->builder->FileSize();
- compact->current_output()->file_size = current_bytes;
- compact->total_bytes += current_bytes;
- delete compact->builder;
- compact->builder = NULL;
-
- // Finish and check for file errors
- if (s.ok()) {
- s = compact->outfile->Sync();
- }
- if (s.ok()) {
- s = compact->outfile->Close();
- }
- delete compact->outfile;
- compact->outfile = NULL;
-
- if (s.ok() && current_entries > 0) {
- // Verify that the table is usable
- Iterator* iter = table_cache_->NewIterator(ReadOptions(),
- output_number,
- current_bytes);
- s = iter->status();
- delete iter;
- if (s.ok()) {
- Log(options_.info_log,
- "Generated table #%llu: %lld keys, %lld bytes",
- (unsigned long long) output_number,
- (unsigned long long) current_entries,
- (unsigned long long) current_bytes);
- }
- }
- return s;
-}
-
-
-Status DBImpl::InstallCompactionResults(CompactionState* compact) {
- mutex_.AssertHeld();
- Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes",
- compact->compaction->num_input_files(0),
- compact->compaction->level(),
- compact->compaction->num_input_files(1),
- compact->compaction->level() + 1,
- static_cast<long long>(compact->total_bytes));
-
- // Add compaction outputs
- compact->compaction->AddInputDeletions(compact->compaction->edit());
- const int level = compact->compaction->level();
- for (size_t i = 0; i < compact->outputs.size(); i++) {
- const CompactionState::Output& out = compact->outputs[i];
- compact->compaction->edit()->AddFile(
- level + 1,
- out.number, out.file_size, out.smallest, out.largest);
- }
- return versions_->LogAndApply(compact->compaction->edit(), &mutex_);
-}
-
-Status DBImpl::DoCompactionWork(CompactionState* compact) {
- const uint64_t start_micros = env_->NowMicros();
- int64_t imm_micros = 0; // Micros spent doing imm_ compactions
-
- Log(options_.info_log, "Compacting %d@%d + %d@%d files",
- compact->compaction->num_input_files(0),
- compact->compaction->level(),
- compact->compaction->num_input_files(1),
- compact->compaction->level() + 1);
-
- assert(versions_->NumLevelFiles(compact->compaction->level()) > 0);
- assert(compact->builder == NULL);
- assert(compact->outfile == NULL);
- if (snapshots_.empty()) {
- compact->smallest_snapshot = versions_->LastSequence();
- } else {
- compact->smallest_snapshot = snapshots_.oldest()->number_;
- }
-
- // Release mutex while we're actually doing the compaction work
- mutex_.Unlock();
-
- Iterator* input = versions_->MakeInputIterator(compact->compaction);
- input->SeekToFirst();
- Status status;
- ParsedInternalKey ikey;
- std::string current_user_key;
- bool has_current_user_key = false;
- SequenceNumber last_sequence_for_key = kMaxSequenceNumber;
- for (; input->Valid() && !shutting_down_.Acquire_Load(); ) {
- // Prioritize immutable compaction work
- if (has_imm_.NoBarrier_Load() != NULL) {
- const uint64_t imm_start = env_->NowMicros();
- mutex_.Lock();
- if (imm_ != NULL) {
- CompactMemTable();
- bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
- }
- mutex_.Unlock();
- imm_micros += (env_->NowMicros() - imm_start);
- }
-
- Slice key = input->key();
- if (compact->compaction->ShouldStopBefore(key) &&
- compact->builder != NULL) {
- status = FinishCompactionOutputFile(compact, input);
- if (!status.ok()) {
- break;
- }
- }
-
- // Handle key/value, add to state, etc.
- bool drop = false;
- if (!ParseInternalKey(key, &ikey)) {
- // Do not hide error keys
- current_user_key.clear();
- has_current_user_key = false;
- last_sequence_for_key = kMaxSequenceNumber;
- } else {
- if (!has_current_user_key ||
- user_comparator()->Compare(ikey.user_key,
- Slice(current_user_key)) != 0) {
- // First occurrence of this user key
- current_user_key.assign(ikey.user_key.data(), ikey.user_key.size());
- has_current_user_key = true;
- last_sequence_for_key = kMaxSequenceNumber;
- }
-
- if (last_sequence_for_key <= compact->smallest_snapshot) {
- // Hidden by an newer entry for same user key
- drop = true; // (A)
- } else if (ikey.type == kTypeDeletion &&
- ikey.sequence <= compact->smallest_snapshot &&
- compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
- // For this user key:
- // (1) there is no data in higher levels
- // (2) data in lower levels will have larger sequence numbers
- // (3) data in layers that are being compacted here and have
- // smaller sequence numbers will be dropped in the next
- // few iterations of this loop (by rule (A) above).
- // Therefore this deletion marker is obsolete and can be dropped.
- drop = true;
- }
-
- last_sequence_for_key = ikey.sequence;
- }
-#if 0
- Log(options_.info_log,
- " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
- "%d smallest_snapshot: %d",
- ikey.user_key.ToString().c_str(),
- (int)ikey.sequence, ikey.type, kTypeValue, drop,
- compact->compaction->IsBaseLevelForKey(ikey.user_key),
- (int)last_sequence_for_key, (int)compact->smallest_snapshot);
-#endif
-
- if (!drop) {
- // Open output file if necessary
- if (compact->builder == NULL) {
- status = OpenCompactionOutputFile(compact);
- if (!status.ok()) {
- break;
- }
- }
- if (compact->builder->NumEntries() == 0) {
- compact->current_output()->smallest.DecodeFrom(key);
- }
- compact->current_output()->largest.DecodeFrom(key);
- compact->builder->Add(key, input->value());
-
- // Close output file if it is big enough
- if (compact->builder->FileSize() >=
- compact->compaction->MaxOutputFileSize()) {
- status = FinishCompactionOutputFile(compact, input);
- if (!status.ok()) {
- break;
- }
- }
- }
-
- input->Next();
- }
-
- if (status.ok() && shutting_down_.Acquire_Load()) {
- status = Status::IOError("Deleting DB during compaction");
- }
- if (status.ok() && compact->builder != NULL) {
- status = FinishCompactionOutputFile(compact, input);
- }
- if (status.ok()) {
- status = input->status();
- }
- delete input;
- input = NULL;
-
- CompactionStats stats;
- stats.micros = env_->NowMicros() - start_micros - imm_micros;
- for (int which = 0; which < 2; which++) {
- for (int i = 0; i < compact->compaction->num_input_files(which); i++) {
- stats.bytes_read += compact->compaction->input(which, i)->file_size;
- }
- }
- for (size_t i = 0; i < compact->outputs.size(); i++) {
- stats.bytes_written += compact->outputs[i].file_size;
- }
-
- mutex_.Lock();
- stats_[compact->compaction->level() + 1].Add(stats);
-
- if (status.ok()) {
- status = InstallCompactionResults(compact);
- }
- VersionSet::LevelSummaryStorage tmp;
- Log(options_.info_log,
- "compacted to: %s", versions_->LevelSummary(&tmp));
- return status;
-}
-
-namespace {
-struct IterState {
- port::Mutex* mu;
- Version* version;
- MemTable* mem;
- MemTable* imm;
-};
-
-static void CleanupIteratorState(void* arg1, void* arg2) {
- IterState* state = reinterpret_cast<IterState*>(arg1);
- state->mu->Lock();
- state->mem->Unref();
- if (state->imm != NULL) state->imm->Unref();
- state->version->Unref();
- state->mu->Unlock();
- delete state;
-}
-} // namespace
-
-Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
- SequenceNumber* latest_snapshot) {
- IterState* cleanup = new IterState;
- mutex_.Lock();
- *latest_snapshot = versions_->LastSequence();
-
- // Collect together all needed child iterators
- std::vector<Iterator*> list;
- list.push_back(mem_->NewIterator());
- mem_->Ref();
- if (imm_ != NULL) {
- list.push_back(imm_->NewIterator());
- imm_->Ref();
- }
- versions_->current()->AddIterators(options, &list);
- Iterator* internal_iter =
- NewMergingIterator(&internal_comparator_, &list[0], list.size());
- versions_->current()->Ref();
-
- cleanup->mu = &mutex_;
- cleanup->mem = mem_;
- cleanup->imm = imm_;
- cleanup->version = versions_->current();
- internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL);
-
- mutex_.Unlock();
- return internal_iter;
-}
-
-Iterator* DBImpl::TEST_NewInternalIterator() {
- SequenceNumber ignored;
- return NewInternalIterator(ReadOptions(), &ignored);
-}
-
-int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
- MutexLock l(&mutex_);
- return versions_->MaxNextLevelOverlappingBytes();
-}
-
-Status DBImpl::Get(const ReadOptions& options,
- const Slice& key,
- std::string* value) {
- Status s;
- MutexLock l(&mutex_);
- SequenceNumber snapshot;
- if (options.snapshot != NULL) {
- snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
- } else {
- snapshot = versions_->LastSequence();
- }
-
- MemTable* mem = mem_;
- MemTable* imm = imm_;
- Version* current = versions_->current();
- mem->Ref();
- if (imm != NULL) imm->Ref();
- current->Ref();
-
- bool have_stat_update = false;
- Version::GetStats stats;
-
- // Unlock while reading from files and memtables
- {
- mutex_.Unlock();
- // First look in the memtable, then in the immutable memtable (if any).
- LookupKey lkey(key, snapshot);
- if (mem->Get(lkey, value, &s)) {
- // Done
- } else if (imm != NULL && imm->Get(lkey, value, &s)) {
- // Done
- } else {
- s = current->Get(options, lkey, value, &stats);
- have_stat_update = true;
- }
- mutex_.Lock();
- }
-
- if (have_stat_update && current->UpdateStats(stats)) {
- MaybeScheduleCompaction();
- }
- mem->Unref();
- if (imm != NULL) imm->Unref();
- current->Unref();
- return s;
-}
-
-Iterator* DBImpl::NewIterator(const ReadOptions& options) {
- SequenceNumber latest_snapshot;
- Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
- return NewDBIterator(
- &dbname_, env_, user_comparator(), internal_iter,
- (options.snapshot != NULL
- ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
- : latest_snapshot));
-}
-
-const Snapshot* DBImpl::GetSnapshot() {
- MutexLock l(&mutex_);
- return snapshots_.New(versions_->LastSequence());
-}
-
-void DBImpl::ReleaseSnapshot(const Snapshot* s) {
- MutexLock l(&mutex_);
- snapshots_.Delete(reinterpret_cast<const SnapshotImpl*>(s));
-}
-
-// Convenience methods
-Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) {
- return DB::Put(o, key, val);
-}
-
-Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
- return DB::Delete(options, key);
-}
-
-Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
- Writer w(&mutex_);
- w.batch = my_batch;
- w.sync = options.sync;
- w.done = false;
-
- MutexLock l(&mutex_);
- writers_.push_back(&w);
- while (!w.done && &w != writers_.front()) {
- w.cv.Wait();
- }
- if (w.done) {
- return w.status;
- }
-
- // May temporarily unlock and wait.
- Status status = MakeRoomForWrite(my_batch == NULL);
- uint64_t last_sequence = versions_->LastSequence();
- Writer* last_writer = &w;
- if (status.ok() && my_batch != NULL) { // NULL batch is for compactions
- WriteBatch* updates = BuildBatchGroup(&last_writer);
- WriteBatchInternal::SetSequence(updates, last_sequence + 1);
- last_sequence += WriteBatchInternal::Count(updates);
-
- // Add to log and apply to memtable. We can release the lock
- // during this phase since &w is currently responsible for logging
- // and protects against concurrent loggers and concurrent writes
- // into mem_.
- {
- mutex_.Unlock();
- status = log_->AddRecord(WriteBatchInternal::Contents(updates));
- if (status.ok() && options.sync) {
- status = logfile_->Sync();
- }
- if (status.ok()) {
- status = WriteBatchInternal::InsertInto(updates, mem_);
- }
- mutex_.Lock();
- }
- if (updates == tmp_batch_) tmp_batch_->Clear();
-
- versions_->SetLastSequence(last_sequence);
- }
-
- while (true) {
- Writer* ready = writers_.front();
- writers_.pop_front();
- if (ready != &w) {
- ready->status = status;
- ready->done = true;
- ready->cv.Signal();
- }
- if (ready == last_writer) break;
- }
-
- // Notify new head of write queue
- if (!writers_.empty()) {
- writers_.front()->cv.Signal();
- }
-
- return status;
-}
-
-// REQUIRES: Writer list must be non-empty
-// REQUIRES: First writer must have a non-NULL batch
-WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) {
- assert(!writers_.empty());
- Writer* first = writers_.front();
- WriteBatch* result = first->batch;
- assert(result != NULL);
-
- size_t size = WriteBatchInternal::ByteSize(first->batch);
-
- // Allow the group to grow up to a maximum size, but if the
- // original write is small, limit the growth so we do not slow
- // down the small write too much.
- size_t max_size = 1 << 20;
- if (size <= (128<<10)) {
- max_size = size + (128<<10);
- }
-
- *last_writer = first;
- std::deque<Writer*>::iterator iter = writers_.begin();
- ++iter; // Advance past "first"
- for (; iter != writers_.end(); ++iter) {
- Writer* w = *iter;
- if (w->sync && !first->sync) {
- // Do not include a sync write into a batch handled by a non-sync write.
- break;
- }
-
- if (w->batch != NULL) {
- size += WriteBatchInternal::ByteSize(w->batch);
- if (size > max_size) {
- // Do not make batch too big
- break;
- }
-
- // Append to *reuslt
- if (result == first->batch) {
- // Switch to temporary batch instead of disturbing caller's batch
- result = tmp_batch_;
- assert(WriteBatchInternal::Count(result) == 0);
- WriteBatchInternal::Append(result, first->batch);
- }
- WriteBatchInternal::Append(result, w->batch);
- }
- *last_writer = w;
- }
- return result;
-}
-
-// REQUIRES: mutex_ is held
-// REQUIRES: this thread is currently at the front of the writer queue
-Status DBImpl::MakeRoomForWrite(bool force) {
- mutex_.AssertHeld();
- assert(!writers_.empty());
- bool allow_delay = !force;
- Status s;
- while (true) {
- if (!bg_error_.ok()) {
- // Yield previous error
- s = bg_error_;
- break;
- } else if (
- allow_delay &&
- versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) {
- // We are getting close to hitting a hard limit on the number of
- // L0 files. Rather than delaying a single write by several
- // seconds when we hit the hard limit, start delaying each
- // individual write by 1ms to reduce latency variance. Also,
- // this delay hands over some CPU to the compaction thread in
- // case it is sharing the same core as the writer.
- mutex_.Unlock();
- env_->SleepForMicroseconds(1000);
- allow_delay = false; // Do not delay a single write more than once
- mutex_.Lock();
- } else if (!force &&
- (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) {
- // There is room in current memtable
- break;
- } else if (imm_ != NULL) {
- // We have filled up the current memtable, but the previous
- // one is still being compacted, so we wait.
- bg_cv_.Wait();
- } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
- // There are too many level-0 files.
- Log(options_.info_log, "waiting...\n");
- bg_cv_.Wait();
- } else {
- // Attempt to switch to a new memtable and trigger compaction of old
- assert(versions_->PrevLogNumber() == 0);
- uint64_t new_log_number = versions_->NewFileNumber();
- WritableFile* lfile = NULL;
- s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile);
- if (!s.ok()) {
- // Avoid chewing through file number space in a tight loop.
- versions_->ReuseFileNumber(new_log_number);
- break;
- }
- delete log_;
- delete logfile_;
- logfile_ = lfile;
- logfile_number_ = new_log_number;
- log_ = new log::Writer(lfile);
- imm_ = mem_;
- has_imm_.Release_Store(imm_);
- mem_ = new MemTable(internal_comparator_);
- mem_->Ref();
- force = false; // Do not force another compaction if have room
- MaybeScheduleCompaction();
- }
- }
- return s;
-}
-
-bool DBImpl::GetProperty(const Slice& property, std::string* value) {
- value->clear();
-
- MutexLock l(&mutex_);
- Slice in = property;
- Slice prefix("leveldb.");
- if (!in.starts_with(prefix)) return false;
- in.remove_prefix(prefix.size());
-
- if (in.starts_with("num-files-at-level")) {
- in.remove_prefix(strlen("num-files-at-level"));
- uint64_t level;
- bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
- if (!ok || level >= config::kNumLevels) {
- return false;
- } else {
- char buf[100];
- snprintf(buf, sizeof(buf), "%d",
- versions_->NumLevelFiles(static_cast<int>(level)));
- *value = buf;
- return true;
- }
- } else if (in == "stats") {
- char buf[200];
- snprintf(buf, sizeof(buf),
- " Compactions\n"
- "Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n"
- "--------------------------------------------------\n"
- );
- value->append(buf);
- for (int level = 0; level < config::kNumLevels; level++) {
- int files = versions_->NumLevelFiles(level);
- if (stats_[level].micros > 0 || files > 0) {
- snprintf(
- buf, sizeof(buf),
- "%3d %8d %8.0f %9.0f %8.0f %9.0f\n",
- level,
- files,
- versions_->NumLevelBytes(level) / 1048576.0,
- stats_[level].micros / 1e6,
- stats_[level].bytes_read / 1048576.0,
- stats_[level].bytes_written / 1048576.0);
- value->append(buf);
- }
- }
- return true;
- } else if (in == "sstables") {
- *value = versions_->current()->DebugString();
- return true;
- }
-
- return false;
-}
-
-void DBImpl::GetApproximateSizes(
- const Range* range, int n,
- uint64_t* sizes) {
- // TODO(opt): better implementation
- Version* v;
- {
- MutexLock l(&mutex_);
- versions_->current()->Ref();
- v = versions_->current();
- }
-
- for (int i = 0; i < n; i++) {
- // Convert user_key into a corresponding internal key.
- InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
- InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
- uint64_t start = versions_->ApproximateOffsetOf(v, k1);
- uint64_t limit = versions_->ApproximateOffsetOf(v, k2);
- sizes[i] = (limit >= start ? limit - start : 0);
- }
-
- {
- MutexLock l(&mutex_);
- v->Unref();
- }
-}
-
-// Default implementations of convenience methods that subclasses of DB
-// can call if they wish
-Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {
- WriteBatch batch;
- batch.Put(key, value);
- return Write(opt, &batch);
-}
-
-Status DB::Delete(const WriteOptions& opt, const Slice& key) {
- WriteBatch batch;
- batch.Delete(key);
- return Write(opt, &batch);
-}
-
-DB::~DB() { }
-
-Status DB::Open(const Options& options, const std::string& dbname,
- DB** dbptr) {
- *dbptr = NULL;
-
- DBImpl* impl = new DBImpl(options, dbname);
- impl->mutex_.Lock();
- VersionEdit edit;
- Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists
- if (s.ok()) {
- uint64_t new_log_number = impl->versions_->NewFileNumber();
- WritableFile* lfile;
- s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
- &lfile);
- if (s.ok()) {
- edit.SetLogNumber(new_log_number);
- impl->logfile_ = lfile;
- impl->logfile_number_ = new_log_number;
- impl->log_ = new log::Writer(lfile);
- s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
- }
- if (s.ok()) {
- impl->DeleteObsoleteFiles();
- impl->MaybeScheduleCompaction();
- }
- }
- impl->mutex_.Unlock();
- if (s.ok()) {
- *dbptr = impl;
- } else {
- delete impl;
- }
- return s;
-}
-
-Snapshot::~Snapshot() {
-}
-
-Status DestroyDB(const std::string& dbname, const Options& options) {
- Env* env = options.env;
- std::vector<std::string> filenames;
- // Ignore error in case directory does not exist
- env->GetChildren(dbname, &filenames);
- if (filenames.empty()) {
- return Status::OK();
- }
-
- FileLock* lock;
- const std::string lockname = LockFileName(dbname);
- Status result = env->LockFile(lockname, &lock);
- if (result.ok()) {
- uint64_t number;
- FileType type;
- for (size_t i = 0; i < filenames.size(); i++) {
- if (ParseFileName(filenames[i], &number, &type) &&
- type != kDBLockFile) { // Lock file will be deleted at end
- Status del = env->DeleteFile(dbname + "/" + filenames[i]);
- if (result.ok() && !del.ok()) {
- result = del;
- }
- }
- }
- env->UnlockFile(lock); // Ignore error since state is already gone
- env->DeleteFile(lockname);
- env->DeleteDir(dbname); // Ignore error in case dir contains other files
- }
- return result;
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/db_impl.h b/src/leveldb/db/db_impl.h
deleted file mode 100644
index bd29dd8055..0000000000
--- a/src/leveldb/db/db_impl.h
+++ /dev/null
@@ -1,202 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_
-#define STORAGE_LEVELDB_DB_DB_IMPL_H_
-
-#include <deque>
-#include <set>
-#include "db/dbformat.h"
-#include "db/log_writer.h"
-#include "db/snapshot.h"
-#include "leveldb/db.h"
-#include "leveldb/env.h"
-#include "port/port.h"
-#include "port/thread_annotations.h"
-
-namespace leveldb {
-
-class MemTable;
-class TableCache;
-class Version;
-class VersionEdit;
-class VersionSet;
-
-class DBImpl : public DB {
- public:
- DBImpl(const Options& options, const std::string& dbname);
- virtual ~DBImpl();
-
- // Implementations of the DB interface
- virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);
- virtual Status Delete(const WriteOptions&, const Slice& key);
- virtual Status Write(const WriteOptions& options, WriteBatch* updates);
- virtual Status Get(const ReadOptions& options,
- const Slice& key,
- std::string* value);
- virtual Iterator* NewIterator(const ReadOptions&);
- virtual const Snapshot* GetSnapshot();
- virtual void ReleaseSnapshot(const Snapshot* snapshot);
- virtual bool GetProperty(const Slice& property, std::string* value);
- virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
- virtual void CompactRange(const Slice* begin, const Slice* end);
-
- // Extra methods (for testing) that are not in the public DB interface
-
- // Compact any files in the named level that overlap [*begin,*end]
- void TEST_CompactRange(int level, const Slice* begin, const Slice* end);
-
- // Force current memtable contents to be compacted.
- Status TEST_CompactMemTable();
-
- // Return an internal iterator over the current state of the database.
- // The keys of this iterator are internal keys (see format.h).
- // The returned iterator should be deleted when no longer needed.
- Iterator* TEST_NewInternalIterator();
-
- // Return the maximum overlapping data (in bytes) at next level for any
- // file at a level >= 1.
- int64_t TEST_MaxNextLevelOverlappingBytes();
-
- private:
- friend class DB;
- struct CompactionState;
- struct Writer;
-
- Iterator* NewInternalIterator(const ReadOptions&,
- SequenceNumber* latest_snapshot);
-
- Status NewDB();
-
- // Recover the descriptor from persistent storage. May do a significant
- // amount of work to recover recently logged updates. Any changes to
- // be made to the descriptor are added to *edit.
- Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- void MaybeIgnoreError(Status* s) const;
-
- // Delete any unneeded files and stale in-memory entries.
- void DeleteObsoleteFiles();
-
- // Compact the in-memory write buffer to disk. Switches to a new
- // log-file/memtable and writes a new descriptor iff successful.
- Status CompactMemTable()
- EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- Status RecoverLogFile(uint64_t log_number,
- VersionEdit* edit,
- SequenceNumber* max_sequence)
- EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)
- EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- Status MakeRoomForWrite(bool force /* compact even if there is room? */)
- EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- WriteBatch* BuildBatchGroup(Writer** last_writer);
-
- void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- static void BGWork(void* db);
- void BackgroundCall();
- Status BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- void CleanupCompaction(CompactionState* compact)
- EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- Status DoCompactionWork(CompactionState* compact)
- EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- Status OpenCompactionOutputFile(CompactionState* compact);
- Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
- Status InstallCompactionResults(CompactionState* compact)
- EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- // Constant after construction
- Env* const env_;
- const InternalKeyComparator internal_comparator_;
- const InternalFilterPolicy internal_filter_policy_;
- const Options options_; // options_.comparator == &internal_comparator_
- bool owns_info_log_;
- bool owns_cache_;
- const std::string dbname_;
-
- // table_cache_ provides its own synchronization
- TableCache* table_cache_;
-
- // Lock over the persistent DB state. Non-NULL iff successfully acquired.
- FileLock* db_lock_;
-
- // State below is protected by mutex_
- port::Mutex mutex_;
- port::AtomicPointer shutting_down_;
- port::CondVar bg_cv_; // Signalled when background work finishes
- MemTable* mem_;
- MemTable* imm_; // Memtable being compacted
- port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
- WritableFile* logfile_;
- uint64_t logfile_number_;
- log::Writer* log_;
-
- // Queue of writers.
- std::deque<Writer*> writers_;
- WriteBatch* tmp_batch_;
-
- SnapshotList snapshots_;
-
- // Set of table files to protect from deletion because they are
- // part of ongoing compactions.
- std::set<uint64_t> pending_outputs_;
-
- // Has a background compaction been scheduled or is running?
- bool bg_compaction_scheduled_;
-
- // Information for a manual compaction
- struct ManualCompaction {
- int level;
- bool done;
- const InternalKey* begin; // NULL means beginning of key range
- const InternalKey* end; // NULL means end of key range
- InternalKey tmp_storage; // Used to keep track of compaction progress
- };
- ManualCompaction* manual_compaction_;
-
- VersionSet* versions_;
-
- // Have we encountered a background error in paranoid mode?
- Status bg_error_;
-
- // Per level compaction stats. stats_[level] stores the stats for
- // compactions that produced data for the specified "level".
- struct CompactionStats {
- int64_t micros;
- int64_t bytes_read;
- int64_t bytes_written;
-
- CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { }
-
- void Add(const CompactionStats& c) {
- this->micros += c.micros;
- this->bytes_read += c.bytes_read;
- this->bytes_written += c.bytes_written;
- }
- };
- CompactionStats stats_[config::kNumLevels];
-
- // No copying allowed
- DBImpl(const DBImpl&);
- void operator=(const DBImpl&);
-
- const Comparator* user_comparator() const {
- return internal_comparator_.user_comparator();
- }
-};
-
-// Sanitize db options. The caller should delete result.info_log if
-// it is not equal to src.info_log.
-extern Options SanitizeOptions(const std::string& db,
- const InternalKeyComparator* icmp,
- const InternalFilterPolicy* ipolicy,
- const Options& src);
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_DB_IMPL_H_
diff --git a/src/leveldb/db/db_iter.cc b/src/leveldb/db/db_iter.cc
deleted file mode 100644
index 87dca2ded4..0000000000
--- a/src/leveldb/db/db_iter.cc
+++ /dev/null
@@ -1,299 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/db_iter.h"
-
-#include "db/filename.h"
-#include "db/dbformat.h"
-#include "leveldb/env.h"
-#include "leveldb/iterator.h"
-#include "port/port.h"
-#include "util/logging.h"
-#include "util/mutexlock.h"
-
-namespace leveldb {
-
-#if 0
-static void DumpInternalIter(Iterator* iter) {
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ParsedInternalKey k;
- if (!ParseInternalKey(iter->key(), &k)) {
- fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());
- } else {
- fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());
- }
- }
-}
-#endif
-
-namespace {
-
-// Memtables and sstables that make the DB representation contain
-// (userkey,seq,type) => uservalue entries. DBIter
-// combines multiple entries for the same userkey found in the DB
-// representation into a single entry while accounting for sequence
-// numbers, deletion markers, overwrites, etc.
-class DBIter: public Iterator {
- public:
- // Which direction is the iterator currently moving?
- // (1) When moving forward, the internal iterator is positioned at
- // the exact entry that yields this->key(), this->value()
- // (2) When moving backwards, the internal iterator is positioned
- // just before all entries whose user key == this->key().
- enum Direction {
- kForward,
- kReverse
- };
-
- DBIter(const std::string* dbname, Env* env,
- const Comparator* cmp, Iterator* iter, SequenceNumber s)
- : dbname_(dbname),
- env_(env),
- user_comparator_(cmp),
- iter_(iter),
- sequence_(s),
- direction_(kForward),
- valid_(false) {
- }
- virtual ~DBIter() {
- delete iter_;
- }
- virtual bool Valid() const { return valid_; }
- virtual Slice key() const {
- assert(valid_);
- return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_;
- }
- virtual Slice value() const {
- assert(valid_);
- return (direction_ == kForward) ? iter_->value() : saved_value_;
- }
- virtual Status status() const {
- if (status_.ok()) {
- return iter_->status();
- } else {
- return status_;
- }
- }
-
- virtual void Next();
- virtual void Prev();
- virtual void Seek(const Slice& target);
- virtual void SeekToFirst();
- virtual void SeekToLast();
-
- private:
- void FindNextUserEntry(bool skipping, std::string* skip);
- void FindPrevUserEntry();
- bool ParseKey(ParsedInternalKey* key);
-
- inline void SaveKey(const Slice& k, std::string* dst) {
- dst->assign(k.data(), k.size());
- }
-
- inline void ClearSavedValue() {
- if (saved_value_.capacity() > 1048576) {
- std::string empty;
- swap(empty, saved_value_);
- } else {
- saved_value_.clear();
- }
- }
-
- const std::string* const dbname_;
- Env* const env_;
- const Comparator* const user_comparator_;
- Iterator* const iter_;
- SequenceNumber const sequence_;
-
- Status status_;
- std::string saved_key_; // == current key when direction_==kReverse
- std::string saved_value_; // == current raw value when direction_==kReverse
- Direction direction_;
- bool valid_;
-
- // No copying allowed
- DBIter(const DBIter&);
- void operator=(const DBIter&);
-};
-
-inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
- if (!ParseInternalKey(iter_->key(), ikey)) {
- status_ = Status::Corruption("corrupted internal key in DBIter");
- return false;
- } else {
- return true;
- }
-}
-
-void DBIter::Next() {
- assert(valid_);
-
- if (direction_ == kReverse) { // Switch directions?
- direction_ = kForward;
- // iter_ is pointing just before the entries for this->key(),
- // so advance into the range of entries for this->key() and then
- // use the normal skipping code below.
- if (!iter_->Valid()) {
- iter_->SeekToFirst();
- } else {
- iter_->Next();
- }
- if (!iter_->Valid()) {
- valid_ = false;
- saved_key_.clear();
- return;
- }
- }
-
- // Temporarily use saved_key_ as storage for key to skip.
- std::string* skip = &saved_key_;
- SaveKey(ExtractUserKey(iter_->key()), skip);
- FindNextUserEntry(true, skip);
-}
-
-void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
- // Loop until we hit an acceptable entry to yield
- assert(iter_->Valid());
- assert(direction_ == kForward);
- do {
- ParsedInternalKey ikey;
- if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
- switch (ikey.type) {
- case kTypeDeletion:
- // Arrange to skip all upcoming entries for this key since
- // they are hidden by this deletion.
- SaveKey(ikey.user_key, skip);
- skipping = true;
- break;
- case kTypeValue:
- if (skipping &&
- user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
- // Entry hidden
- } else {
- valid_ = true;
- saved_key_.clear();
- return;
- }
- break;
- }
- }
- iter_->Next();
- } while (iter_->Valid());
- saved_key_.clear();
- valid_ = false;
-}
-
-void DBIter::Prev() {
- assert(valid_);
-
- if (direction_ == kForward) { // Switch directions?
- // iter_ is pointing at the current entry. Scan backwards until
- // the key changes so we can use the normal reverse scanning code.
- assert(iter_->Valid()); // Otherwise valid_ would have been false
- SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
- while (true) {
- iter_->Prev();
- if (!iter_->Valid()) {
- valid_ = false;
- saved_key_.clear();
- ClearSavedValue();
- return;
- }
- if (user_comparator_->Compare(ExtractUserKey(iter_->key()),
- saved_key_) < 0) {
- break;
- }
- }
- direction_ = kReverse;
- }
-
- FindPrevUserEntry();
-}
-
-void DBIter::FindPrevUserEntry() {
- assert(direction_ == kReverse);
-
- ValueType value_type = kTypeDeletion;
- if (iter_->Valid()) {
- do {
- ParsedInternalKey ikey;
- if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
- if ((value_type != kTypeDeletion) &&
- user_comparator_->Compare(ikey.user_key, saved_key_) < 0) {
- // We encountered a non-deleted value in entries for previous keys,
- break;
- }
- value_type = ikey.type;
- if (value_type == kTypeDeletion) {
- saved_key_.clear();
- ClearSavedValue();
- } else {
- Slice raw_value = iter_->value();
- if (saved_value_.capacity() > raw_value.size() + 1048576) {
- std::string empty;
- swap(empty, saved_value_);
- }
- SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
- saved_value_.assign(raw_value.data(), raw_value.size());
- }
- }
- iter_->Prev();
- } while (iter_->Valid());
- }
-
- if (value_type == kTypeDeletion) {
- // End
- valid_ = false;
- saved_key_.clear();
- ClearSavedValue();
- direction_ = kForward;
- } else {
- valid_ = true;
- }
-}
-
-void DBIter::Seek(const Slice& target) {
- direction_ = kForward;
- ClearSavedValue();
- saved_key_.clear();
- AppendInternalKey(
- &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek));
- iter_->Seek(saved_key_);
- if (iter_->Valid()) {
- FindNextUserEntry(false, &saved_key_ /* temporary storage */);
- } else {
- valid_ = false;
- }
-}
-
-void DBIter::SeekToFirst() {
- direction_ = kForward;
- ClearSavedValue();
- iter_->SeekToFirst();
- if (iter_->Valid()) {
- FindNextUserEntry(false, &saved_key_ /* temporary storage */);
- } else {
- valid_ = false;
- }
-}
-
-void DBIter::SeekToLast() {
- direction_ = kReverse;
- ClearSavedValue();
- iter_->SeekToLast();
- FindPrevUserEntry();
-}
-
-} // anonymous namespace
-
-Iterator* NewDBIterator(
- const std::string* dbname,
- Env* env,
- const Comparator* user_key_comparator,
- Iterator* internal_iter,
- const SequenceNumber& sequence) {
- return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/db_iter.h b/src/leveldb/db/db_iter.h
deleted file mode 100644
index d9e1b174ab..0000000000
--- a/src/leveldb/db/db_iter.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_DB_ITER_H_
-#define STORAGE_LEVELDB_DB_DB_ITER_H_
-
-#include <stdint.h>
-#include "leveldb/db.h"
-#include "db/dbformat.h"
-
-namespace leveldb {
-
-// Return a new iterator that converts internal keys (yielded by
-// "*internal_iter") that were live at the specified "sequence" number
-// into appropriate user keys.
-extern Iterator* NewDBIterator(
- const std::string* dbname,
- Env* env,
- const Comparator* user_key_comparator,
- Iterator* internal_iter,
- const SequenceNumber& sequence);
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_DB_ITER_H_
diff --git a/src/leveldb/db/db_test.cc b/src/leveldb/db/db_test.cc
deleted file mode 100644
index 684ea3bdbc..0000000000
--- a/src/leveldb/db/db_test.cc
+++ /dev/null
@@ -1,2027 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "leveldb/db.h"
-#include "leveldb/filter_policy.h"
-#include "db/db_impl.h"
-#include "db/filename.h"
-#include "db/version_set.h"
-#include "db/write_batch_internal.h"
-#include "leveldb/cache.h"
-#include "leveldb/env.h"
-#include "leveldb/table.h"
-#include "util/hash.h"
-#include "util/logging.h"
-#include "util/mutexlock.h"
-#include "util/testharness.h"
-#include "util/testutil.h"
-
-namespace leveldb {
-
-static std::string RandomString(Random* rnd, int len) {
- std::string r;
- test::RandomString(rnd, len, &r);
- return r;
-}
-
-namespace {
-class AtomicCounter {
- private:
- port::Mutex mu_;
- int count_;
- public:
- AtomicCounter() : count_(0) { }
- void Increment() {
- MutexLock l(&mu_);
- count_++;
- }
- int Read() {
- MutexLock l(&mu_);
- return count_;
- }
- void Reset() {
- MutexLock l(&mu_);
- count_ = 0;
- }
-};
-}
-
-// Special Env used to delay background operations
-class SpecialEnv : public EnvWrapper {
- public:
- // sstable Sync() calls are blocked while this pointer is non-NULL.
- port::AtomicPointer delay_sstable_sync_;
-
- // Simulate no-space errors while this pointer is non-NULL.
- port::AtomicPointer no_space_;
-
- // Simulate non-writable file system while this pointer is non-NULL
- port::AtomicPointer non_writable_;
-
- // Force sync of manifest files to fail while this pointer is non-NULL
- port::AtomicPointer manifest_sync_error_;
-
- // Force write to manifest files to fail while this pointer is non-NULL
- port::AtomicPointer manifest_write_error_;
-
- bool count_random_reads_;
- AtomicCounter random_read_counter_;
-
- AtomicCounter sleep_counter_;
-
- explicit SpecialEnv(Env* base) : EnvWrapper(base) {
- delay_sstable_sync_.Release_Store(NULL);
- no_space_.Release_Store(NULL);
- non_writable_.Release_Store(NULL);
- count_random_reads_ = false;
- manifest_sync_error_.Release_Store(NULL);
- manifest_write_error_.Release_Store(NULL);
- }
-
- Status NewWritableFile(const std::string& f, WritableFile** r) {
- class SSTableFile : public WritableFile {
- private:
- SpecialEnv* env_;
- WritableFile* base_;
-
- public:
- SSTableFile(SpecialEnv* env, WritableFile* base)
- : env_(env),
- base_(base) {
- }
- ~SSTableFile() { delete base_; }
- Status Append(const Slice& data) {
- if (env_->no_space_.Acquire_Load() != NULL) {
- // Drop writes on the floor
- return Status::OK();
- } else {
- return base_->Append(data);
- }
- }
- Status Close() { return base_->Close(); }
- Status Flush() { return base_->Flush(); }
- Status Sync() {
- while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
- env_->SleepForMicroseconds(100000);
- }
- return base_->Sync();
- }
- };
- class ManifestFile : public WritableFile {
- private:
- SpecialEnv* env_;
- WritableFile* base_;
- public:
- ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) { }
- ~ManifestFile() { delete base_; }
- Status Append(const Slice& data) {
- if (env_->manifest_write_error_.Acquire_Load() != NULL) {
- return Status::IOError("simulated writer error");
- } else {
- return base_->Append(data);
- }
- }
- Status Close() { return base_->Close(); }
- Status Flush() { return base_->Flush(); }
- Status Sync() {
- if (env_->manifest_sync_error_.Acquire_Load() != NULL) {
- return Status::IOError("simulated sync error");
- } else {
- return base_->Sync();
- }
- }
- };
-
- if (non_writable_.Acquire_Load() != NULL) {
- return Status::IOError("simulated write error");
- }
-
- Status s = target()->NewWritableFile(f, r);
- if (s.ok()) {
- if (strstr(f.c_str(), ".sst") != NULL) {
- *r = new SSTableFile(this, *r);
- } else if (strstr(f.c_str(), "MANIFEST") != NULL) {
- *r = new ManifestFile(this, *r);
- }
- }
- return s;
- }
-
- Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
- class CountingFile : public RandomAccessFile {
- private:
- RandomAccessFile* target_;
- AtomicCounter* counter_;
- public:
- CountingFile(RandomAccessFile* target, AtomicCounter* counter)
- : target_(target), counter_(counter) {
- }
- virtual ~CountingFile() { delete target_; }
- virtual Status Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const {
- counter_->Increment();
- return target_->Read(offset, n, result, scratch);
- }
- };
-
- Status s = target()->NewRandomAccessFile(f, r);
- if (s.ok() && count_random_reads_) {
- *r = new CountingFile(*r, &random_read_counter_);
- }
- return s;
- }
-
- virtual void SleepForMicroseconds(int micros) {
- sleep_counter_.Increment();
- target()->SleepForMicroseconds(micros);
- }
-};
-
-class DBTest {
- private:
- const FilterPolicy* filter_policy_;
-
- // Sequence of option configurations to try
- enum OptionConfig {
- kDefault,
- kFilter,
- kUncompressed,
- kEnd
- };
- int option_config_;
-
- public:
- std::string dbname_;
- SpecialEnv* env_;
- DB* db_;
-
- Options last_options_;
-
- DBTest() : option_config_(kDefault),
- env_(new SpecialEnv(Env::Default())) {
- filter_policy_ = NewBloomFilterPolicy(10);
- dbname_ = test::TmpDir() + "/db_test";
- DestroyDB(dbname_, Options());
- db_ = NULL;
- Reopen();
- }
-
- ~DBTest() {
- delete db_;
- DestroyDB(dbname_, Options());
- delete env_;
- delete filter_policy_;
- }
-
- // Switch to a fresh database with the next option configuration to
- // test. Return false if there are no more configurations to test.
- bool ChangeOptions() {
- option_config_++;
- if (option_config_ >= kEnd) {
- return false;
- } else {
- DestroyAndReopen();
- return true;
- }
- }
-
- // Return the current option configuration.
- Options CurrentOptions() {
- Options options;
- switch (option_config_) {
- case kFilter:
- options.filter_policy = filter_policy_;
- break;
- case kUncompressed:
- options.compression = kNoCompression;
- break;
- default:
- break;
- }
- return options;
- }
-
- DBImpl* dbfull() {
- return reinterpret_cast<DBImpl*>(db_);
- }
-
- void Reopen(Options* options = NULL) {
- ASSERT_OK(TryReopen(options));
- }
-
- void Close() {
- delete db_;
- db_ = NULL;
- }
-
- void DestroyAndReopen(Options* options = NULL) {
- delete db_;
- db_ = NULL;
- DestroyDB(dbname_, Options());
- ASSERT_OK(TryReopen(options));
- }
-
- Status TryReopen(Options* options) {
- delete db_;
- db_ = NULL;
- Options opts;
- if (options != NULL) {
- opts = *options;
- } else {
- opts = CurrentOptions();
- opts.create_if_missing = true;
- }
- last_options_ = opts;
-
- return DB::Open(opts, dbname_, &db_);
- }
-
- Status Put(const std::string& k, const std::string& v) {
- return db_->Put(WriteOptions(), k, v);
- }
-
- Status Delete(const std::string& k) {
- return db_->Delete(WriteOptions(), k);
- }
-
- std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
- ReadOptions options;
- options.snapshot = snapshot;
- std::string result;
- Status s = db_->Get(options, k, &result);
- if (s.IsNotFound()) {
- result = "NOT_FOUND";
- } else if (!s.ok()) {
- result = s.ToString();
- }
- return result;
- }
-
- // Return a string that contains all key,value pairs in order,
- // formatted like "(k1->v1)(k2->v2)".
- std::string Contents() {
- std::vector<std::string> forward;
- std::string result;
- Iterator* iter = db_->NewIterator(ReadOptions());
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- std::string s = IterStatus(iter);
- result.push_back('(');
- result.append(s);
- result.push_back(')');
- forward.push_back(s);
- }
-
- // Check reverse iteration results are the reverse of forward results
- int matched = 0;
- for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
- ASSERT_LT(matched, forward.size());
- ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
- matched++;
- }
- ASSERT_EQ(matched, forward.size());
-
- delete iter;
- return result;
- }
-
- std::string AllEntriesFor(const Slice& user_key) {
- Iterator* iter = dbfull()->TEST_NewInternalIterator();
- InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
- iter->Seek(target.Encode());
- std::string result;
- if (!iter->status().ok()) {
- result = iter->status().ToString();
- } else {
- result = "[ ";
- bool first = true;
- while (iter->Valid()) {
- ParsedInternalKey ikey;
- if (!ParseInternalKey(iter->key(), &ikey)) {
- result += "CORRUPTED";
- } else {
- if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
- break;
- }
- if (!first) {
- result += ", ";
- }
- first = false;
- switch (ikey.type) {
- case kTypeValue:
- result += iter->value().ToString();
- break;
- case kTypeDeletion:
- result += "DEL";
- break;
- }
- }
- iter->Next();
- }
- if (!first) {
- result += " ";
- }
- result += "]";
- }
- delete iter;
- return result;
- }
-
- int NumTableFilesAtLevel(int level) {
- std::string property;
- ASSERT_TRUE(
- db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level),
- &property));
- return atoi(property.c_str());
- }
-
- int TotalTableFiles() {
- int result = 0;
- for (int level = 0; level < config::kNumLevels; level++) {
- result += NumTableFilesAtLevel(level);
- }
- return result;
- }
-
- // Return spread of files per level
- std::string FilesPerLevel() {
- std::string result;
- int last_non_zero_offset = 0;
- for (int level = 0; level < config::kNumLevels; level++) {
- int f = NumTableFilesAtLevel(level);
- char buf[100];
- snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
- result += buf;
- if (f > 0) {
- last_non_zero_offset = result.size();
- }
- }
- result.resize(last_non_zero_offset);
- return result;
- }
-
- int CountFiles() {
- std::vector<std::string> files;
- env_->GetChildren(dbname_, &files);
- return static_cast<int>(files.size());
- }
-
- uint64_t Size(const Slice& start, const Slice& limit) {
- Range r(start, limit);
- uint64_t size;
- db_->GetApproximateSizes(&r, 1, &size);
- return size;
- }
-
- void Compact(const Slice& start, const Slice& limit) {
- db_->CompactRange(&start, &limit);
- }
-
- // Do n memtable compactions, each of which produces an sstable
- // covering the range [small,large].
- void MakeTables(int n, const std::string& small, const std::string& large) {
- for (int i = 0; i < n; i++) {
- Put(small, "begin");
- Put(large, "end");
- dbfull()->TEST_CompactMemTable();
- }
- }
-
- // Prevent pushing of new sstables into deeper levels by adding
- // tables that cover a specified range to all levels.
- void FillLevels(const std::string& smallest, const std::string& largest) {
- MakeTables(config::kNumLevels, smallest, largest);
- }
-
- void DumpFileCounts(const char* label) {
- fprintf(stderr, "---\n%s:\n", label);
- fprintf(stderr, "maxoverlap: %lld\n",
- static_cast<long long>(
- dbfull()->TEST_MaxNextLevelOverlappingBytes()));
- for (int level = 0; level < config::kNumLevels; level++) {
- int num = NumTableFilesAtLevel(level);
- if (num > 0) {
- fprintf(stderr, " level %3d : %d files\n", level, num);
- }
- }
- }
-
- std::string DumpSSTableList() {
- std::string property;
- db_->GetProperty("leveldb.sstables", &property);
- return property;
- }
-
- std::string IterStatus(Iterator* iter) {
- std::string result;
- if (iter->Valid()) {
- result = iter->key().ToString() + "->" + iter->value().ToString();
- } else {
- result = "(invalid)";
- }
- return result;
- }
-};
-
-TEST(DBTest, Empty) {
- do {
- ASSERT_TRUE(db_ != NULL);
- ASSERT_EQ("NOT_FOUND", Get("foo"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, ReadWrite) {
- do {
- ASSERT_OK(Put("foo", "v1"));
- ASSERT_EQ("v1", Get("foo"));
- ASSERT_OK(Put("bar", "v2"));
- ASSERT_OK(Put("foo", "v3"));
- ASSERT_EQ("v3", Get("foo"));
- ASSERT_EQ("v2", Get("bar"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, PutDeleteGet) {
- do {
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
- ASSERT_EQ("v1", Get("foo"));
- ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
- ASSERT_EQ("v2", Get("foo"));
- ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
- ASSERT_EQ("NOT_FOUND", Get("foo"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, GetFromImmutableLayer) {
- do {
- Options options = CurrentOptions();
- options.env = env_;
- options.write_buffer_size = 100000; // Small write buffer
- Reopen(&options);
-
- ASSERT_OK(Put("foo", "v1"));
- ASSERT_EQ("v1", Get("foo"));
-
- env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
- Put("k1", std::string(100000, 'x')); // Fill memtable
- Put("k2", std::string(100000, 'y')); // Trigger compaction
- ASSERT_EQ("v1", Get("foo"));
- env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls
- } while (ChangeOptions());
-}
-
-TEST(DBTest, GetFromVersions) {
- do {
- ASSERT_OK(Put("foo", "v1"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("v1", Get("foo"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, GetSnapshot) {
- do {
- // Try with both a short key and a long key
- for (int i = 0; i < 2; i++) {
- std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
- ASSERT_OK(Put(key, "v1"));
- const Snapshot* s1 = db_->GetSnapshot();
- ASSERT_OK(Put(key, "v2"));
- ASSERT_EQ("v2", Get(key));
- ASSERT_EQ("v1", Get(key, s1));
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("v2", Get(key));
- ASSERT_EQ("v1", Get(key, s1));
- db_->ReleaseSnapshot(s1);
- }
- } while (ChangeOptions());
-}
-
-TEST(DBTest, GetLevel0Ordering) {
- do {
- // Check that we process level-0 files in correct order. The code
- // below generates two level-0 files where the earlier one comes
- // before the later one in the level-0 file list since the earlier
- // one has a smaller "smallest" key.
- ASSERT_OK(Put("bar", "b"));
- ASSERT_OK(Put("foo", "v1"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_OK(Put("foo", "v2"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("v2", Get("foo"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, GetOrderedByLevels) {
- do {
- ASSERT_OK(Put("foo", "v1"));
- Compact("a", "z");
- ASSERT_EQ("v1", Get("foo"));
- ASSERT_OK(Put("foo", "v2"));
- ASSERT_EQ("v2", Get("foo"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("v2", Get("foo"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, GetPicksCorrectFile) {
- do {
- // Arrange to have multiple files in a non-level-0 level.
- ASSERT_OK(Put("a", "va"));
- Compact("a", "b");
- ASSERT_OK(Put("x", "vx"));
- Compact("x", "y");
- ASSERT_OK(Put("f", "vf"));
- Compact("f", "g");
- ASSERT_EQ("va", Get("a"));
- ASSERT_EQ("vf", Get("f"));
- ASSERT_EQ("vx", Get("x"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, GetEncountersEmptyLevel) {
- do {
- // Arrange for the following to happen:
- // * sstable A in level 0
- // * nothing in level 1
- // * sstable B in level 2
- // Then do enough Get() calls to arrange for an automatic compaction
- // of sstable A. A bug would cause the compaction to be marked as
- // occuring at level 1 (instead of the correct level 0).
-
- // Step 1: First place sstables in levels 0 and 2
- int compaction_count = 0;
- while (NumTableFilesAtLevel(0) == 0 ||
- NumTableFilesAtLevel(2) == 0) {
- ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
- compaction_count++;
- Put("a", "begin");
- Put("z", "end");
- dbfull()->TEST_CompactMemTable();
- }
-
- // Step 2: clear level 1 if necessary.
- dbfull()->TEST_CompactRange(1, NULL, NULL);
- ASSERT_EQ(NumTableFilesAtLevel(0), 1);
- ASSERT_EQ(NumTableFilesAtLevel(1), 0);
- ASSERT_EQ(NumTableFilesAtLevel(2), 1);
-
- // Step 3: read a bunch of times
- for (int i = 0; i < 1000; i++) {
- ASSERT_EQ("NOT_FOUND", Get("missing"));
- }
-
- // Step 4: Wait for compaction to finish
- env_->SleepForMicroseconds(1000000);
-
- ASSERT_EQ(NumTableFilesAtLevel(0), 0);
- } while (ChangeOptions());
-}
-
-TEST(DBTest, IterEmpty) {
- Iterator* iter = db_->NewIterator(ReadOptions());
-
- iter->SeekToFirst();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->SeekToLast();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->Seek("foo");
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- delete iter;
-}
-
-TEST(DBTest, IterSingle) {
- ASSERT_OK(Put("a", "va"));
- Iterator* iter = db_->NewIterator(ReadOptions());
-
- iter->SeekToFirst();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
- iter->SeekToFirst();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->SeekToLast();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
- iter->SeekToLast();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->Seek("");
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->Seek("a");
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->Seek("b");
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- delete iter;
-}
-
-TEST(DBTest, IterMulti) {
- ASSERT_OK(Put("a", "va"));
- ASSERT_OK(Put("b", "vb"));
- ASSERT_OK(Put("c", "vc"));
- Iterator* iter = db_->NewIterator(ReadOptions());
-
- iter->SeekToFirst();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "b->vb");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
- iter->SeekToFirst();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->SeekToLast();
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "b->vb");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
- iter->SeekToLast();
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->Seek("");
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Seek("a");
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Seek("ax");
- ASSERT_EQ(IterStatus(iter), "b->vb");
- iter->Seek("b");
- ASSERT_EQ(IterStatus(iter), "b->vb");
- iter->Seek("z");
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- // Switch from reverse to forward
- iter->SeekToLast();
- iter->Prev();
- iter->Prev();
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "b->vb");
-
- // Switch from forward to reverse
- iter->SeekToFirst();
- iter->Next();
- iter->Next();
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "b->vb");
-
- // Make sure iter stays at snapshot
- ASSERT_OK(Put("a", "va2"));
- ASSERT_OK(Put("a2", "va3"));
- ASSERT_OK(Put("b", "vb2"));
- ASSERT_OK(Put("c", "vc2"));
- ASSERT_OK(Delete("b"));
- iter->SeekToFirst();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "b->vb");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
- iter->SeekToLast();
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "b->vb");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- delete iter;
-}
-
-TEST(DBTest, IterSmallAndLargeMix) {
- ASSERT_OK(Put("a", "va"));
- ASSERT_OK(Put("b", std::string(100000, 'b')));
- ASSERT_OK(Put("c", "vc"));
- ASSERT_OK(Put("d", std::string(100000, 'd')));
- ASSERT_OK(Put("e", std::string(100000, 'e')));
-
- Iterator* iter = db_->NewIterator(ReadOptions());
-
- iter->SeekToFirst();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
- iter->Next();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- iter->SeekToLast();
- ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "a->va");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "(invalid)");
-
- delete iter;
-}
-
-TEST(DBTest, IterMultiWithDelete) {
- do {
- ASSERT_OK(Put("a", "va"));
- ASSERT_OK(Put("b", "vb"));
- ASSERT_OK(Put("c", "vc"));
- ASSERT_OK(Delete("b"));
- ASSERT_EQ("NOT_FOUND", Get("b"));
-
- Iterator* iter = db_->NewIterator(ReadOptions());
- iter->Seek("c");
- ASSERT_EQ(IterStatus(iter), "c->vc");
- iter->Prev();
- ASSERT_EQ(IterStatus(iter), "a->va");
- delete iter;
- } while (ChangeOptions());
-}
-
-TEST(DBTest, Recover) {
- do {
- ASSERT_OK(Put("foo", "v1"));
- ASSERT_OK(Put("baz", "v5"));
-
- Reopen();
- ASSERT_EQ("v1", Get("foo"));
-
- ASSERT_EQ("v1", Get("foo"));
- ASSERT_EQ("v5", Get("baz"));
- ASSERT_OK(Put("bar", "v2"));
- ASSERT_OK(Put("foo", "v3"));
-
- Reopen();
- ASSERT_EQ("v3", Get("foo"));
- ASSERT_OK(Put("foo", "v4"));
- ASSERT_EQ("v4", Get("foo"));
- ASSERT_EQ("v2", Get("bar"));
- ASSERT_EQ("v5", Get("baz"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, RecoveryWithEmptyLog) {
- do {
- ASSERT_OK(Put("foo", "v1"));
- ASSERT_OK(Put("foo", "v2"));
- Reopen();
- Reopen();
- ASSERT_OK(Put("foo", "v3"));
- Reopen();
- ASSERT_EQ("v3", Get("foo"));
- } while (ChangeOptions());
-}
-
-// Check that writes done during a memtable compaction are recovered
-// if the database is shutdown during the memtable compaction.
-TEST(DBTest, RecoverDuringMemtableCompaction) {
- do {
- Options options = CurrentOptions();
- options.env = env_;
- options.write_buffer_size = 1000000;
- Reopen(&options);
-
- // Trigger a long memtable compaction and reopen the database during it
- ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
- ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
- ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
- ASSERT_OK(Put("bar", "v2")); // Goes to new log file
-
- Reopen(&options);
- ASSERT_EQ("v1", Get("foo"));
- ASSERT_EQ("v2", Get("bar"));
- ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
- ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
- } while (ChangeOptions());
-}
-
-static std::string Key(int i) {
- char buf[100];
- snprintf(buf, sizeof(buf), "key%06d", i);
- return std::string(buf);
-}
-
-TEST(DBTest, MinorCompactionsHappen) {
- Options options = CurrentOptions();
- options.write_buffer_size = 10000;
- Reopen(&options);
-
- const int N = 500;
-
- int starting_num_tables = TotalTableFiles();
- for (int i = 0; i < N; i++) {
- ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
- }
- int ending_num_tables = TotalTableFiles();
- ASSERT_GT(ending_num_tables, starting_num_tables);
-
- for (int i = 0; i < N; i++) {
- ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
- }
-
- Reopen();
-
- for (int i = 0; i < N; i++) {
- ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
- }
-}
-
-TEST(DBTest, RecoverWithLargeLog) {
- {
- Options options = CurrentOptions();
- Reopen(&options);
- ASSERT_OK(Put("big1", std::string(200000, '1')));
- ASSERT_OK(Put("big2", std::string(200000, '2')));
- ASSERT_OK(Put("small3", std::string(10, '3')));
- ASSERT_OK(Put("small4", std::string(10, '4')));
- ASSERT_EQ(NumTableFilesAtLevel(0), 0);
- }
-
- // Make sure that if we re-open with a small write buffer size that
- // we flush table files in the middle of a large log file.
- Options options = CurrentOptions();
- options.write_buffer_size = 100000;
- Reopen(&options);
- ASSERT_EQ(NumTableFilesAtLevel(0), 3);
- ASSERT_EQ(std::string(200000, '1'), Get("big1"));
- ASSERT_EQ(std::string(200000, '2'), Get("big2"));
- ASSERT_EQ(std::string(10, '3'), Get("small3"));
- ASSERT_EQ(std::string(10, '4'), Get("small4"));
- ASSERT_GT(NumTableFilesAtLevel(0), 1);
-}
-
-TEST(DBTest, CompactionsGenerateMultipleFiles) {
- Options options = CurrentOptions();
- options.write_buffer_size = 100000000; // Large write buffer
- Reopen(&options);
-
- Random rnd(301);
-
- // Write 8MB (80 values, each 100K)
- ASSERT_EQ(NumTableFilesAtLevel(0), 0);
- std::vector<std::string> values;
- for (int i = 0; i < 80; i++) {
- values.push_back(RandomString(&rnd, 100000));
- ASSERT_OK(Put(Key(i), values[i]));
- }
-
- // Reopening moves updates to level-0
- Reopen(&options);
- dbfull()->TEST_CompactRange(0, NULL, NULL);
-
- ASSERT_EQ(NumTableFilesAtLevel(0), 0);
- ASSERT_GT(NumTableFilesAtLevel(1), 1);
- for (int i = 0; i < 80; i++) {
- ASSERT_EQ(Get(Key(i)), values[i]);
- }
-}
-
-TEST(DBTest, RepeatedWritesToSameKey) {
- Options options = CurrentOptions();
- options.env = env_;
- options.write_buffer_size = 100000; // Small write buffer
- Reopen(&options);
-
- // We must have at most one file per level except for level-0,
- // which may have up to kL0_StopWritesTrigger files.
- const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
-
- Random rnd(301);
- std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
- for (int i = 0; i < 5 * kMaxFiles; i++) {
- Put("key", value);
- ASSERT_LE(TotalTableFiles(), kMaxFiles);
- fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
- }
-}
-
-TEST(DBTest, SparseMerge) {
- Options options = CurrentOptions();
- options.compression = kNoCompression;
- Reopen(&options);
-
- FillLevels("A", "Z");
-
- // Suppose there is:
- // small amount of data with prefix A
- // large amount of data with prefix B
- // small amount of data with prefix C
- // and that recent updates have made small changes to all three prefixes.
- // Check that we do not do a compaction that merges all of B in one shot.
- const std::string value(1000, 'x');
- Put("A", "va");
- // Write approximately 100MB of "B" values
- for (int i = 0; i < 100000; i++) {
- char key[100];
- snprintf(key, sizeof(key), "B%010d", i);
- Put(key, value);
- }
- Put("C", "vc");
- dbfull()->TEST_CompactMemTable();
- dbfull()->TEST_CompactRange(0, NULL, NULL);
-
- // Make sparse update
- Put("A", "va2");
- Put("B100", "bvalue2");
- Put("C", "vc2");
- dbfull()->TEST_CompactMemTable();
-
- // Compactions should not cause us to create a situation where
- // a file overlaps too much data at the next level.
- ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
- dbfull()->TEST_CompactRange(0, NULL, NULL);
- ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
- dbfull()->TEST_CompactRange(1, NULL, NULL);
- ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
-}
-
-static bool Between(uint64_t val, uint64_t low, uint64_t high) {
- bool result = (val >= low) && (val <= high);
- if (!result) {
- fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
- (unsigned long long)(val),
- (unsigned long long)(low),
- (unsigned long long)(high));
- }
- return result;
-}
-
-TEST(DBTest, ApproximateSizes) {
- do {
- Options options = CurrentOptions();
- options.write_buffer_size = 100000000; // Large write buffer
- options.compression = kNoCompression;
- DestroyAndReopen();
-
- ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
- Reopen(&options);
- ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
-
- // Write 8MB (80 values, each 100K)
- ASSERT_EQ(NumTableFilesAtLevel(0), 0);
- const int N = 80;
- static const int S1 = 100000;
- static const int S2 = 105000; // Allow some expansion from metadata
- Random rnd(301);
- for (int i = 0; i < N; i++) {
- ASSERT_OK(Put(Key(i), RandomString(&rnd, S1)));
- }
-
- // 0 because GetApproximateSizes() does not account for memtable space
- ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
-
- // Check sizes across recovery by reopening a few times
- for (int run = 0; run < 3; run++) {
- Reopen(&options);
-
- for (int compact_start = 0; compact_start < N; compact_start += 10) {
- for (int i = 0; i < N; i += 10) {
- ASSERT_TRUE(Between(Size("", Key(i)), S1*i, S2*i));
- ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), S1*(i+1), S2*(i+1)));
- ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), S1*10, S2*10));
- }
- ASSERT_TRUE(Between(Size("", Key(50)), S1*50, S2*50));
- ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), S1*50, S2*50));
-
- std::string cstart_str = Key(compact_start);
- std::string cend_str = Key(compact_start + 9);
- Slice cstart = cstart_str;
- Slice cend = cend_str;
- dbfull()->TEST_CompactRange(0, &cstart, &cend);
- }
-
- ASSERT_EQ(NumTableFilesAtLevel(0), 0);
- ASSERT_GT(NumTableFilesAtLevel(1), 0);
- }
- } while (ChangeOptions());
-}
-
-TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
- do {
- Options options = CurrentOptions();
- options.compression = kNoCompression;
- Reopen();
-
- Random rnd(301);
- std::string big1 = RandomString(&rnd, 100000);
- ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
- ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
- ASSERT_OK(Put(Key(2), big1));
- ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
- ASSERT_OK(Put(Key(4), big1));
- ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
- ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
- ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
-
- // Check sizes across recovery by reopening a few times
- for (int run = 0; run < 3; run++) {
- Reopen(&options);
-
- ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
- ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
- ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
- ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
- ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
- ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
- ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
- ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
- ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000));
-
- ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
-
- dbfull()->TEST_CompactRange(0, NULL, NULL);
- }
- } while (ChangeOptions());
-}
-
-TEST(DBTest, IteratorPinsRef) {
- Put("foo", "hello");
-
- // Get iterator that will yield the current contents of the DB.
- Iterator* iter = db_->NewIterator(ReadOptions());
-
- // Write to force compactions
- Put("foo", "newvalue1");
- for (int i = 0; i < 100; i++) {
- ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
- }
- Put("foo", "newvalue2");
-
- iter->SeekToFirst();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("foo", iter->key().ToString());
- ASSERT_EQ("hello", iter->value().ToString());
- iter->Next();
- ASSERT_TRUE(!iter->Valid());
- delete iter;
-}
-
-TEST(DBTest, Snapshot) {
- do {
- Put("foo", "v1");
- const Snapshot* s1 = db_->GetSnapshot();
- Put("foo", "v2");
- const Snapshot* s2 = db_->GetSnapshot();
- Put("foo", "v3");
- const Snapshot* s3 = db_->GetSnapshot();
-
- Put("foo", "v4");
- ASSERT_EQ("v1", Get("foo", s1));
- ASSERT_EQ("v2", Get("foo", s2));
- ASSERT_EQ("v3", Get("foo", s3));
- ASSERT_EQ("v4", Get("foo"));
-
- db_->ReleaseSnapshot(s3);
- ASSERT_EQ("v1", Get("foo", s1));
- ASSERT_EQ("v2", Get("foo", s2));
- ASSERT_EQ("v4", Get("foo"));
-
- db_->ReleaseSnapshot(s1);
- ASSERT_EQ("v2", Get("foo", s2));
- ASSERT_EQ("v4", Get("foo"));
-
- db_->ReleaseSnapshot(s2);
- ASSERT_EQ("v4", Get("foo"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, HiddenValuesAreRemoved) {
- do {
- Random rnd(301);
- FillLevels("a", "z");
-
- std::string big = RandomString(&rnd, 50000);
- Put("foo", big);
- Put("pastfoo", "v");
- const Snapshot* snapshot = db_->GetSnapshot();
- Put("foo", "tiny");
- Put("pastfoo2", "v2"); // Advance sequence number one more
-
- ASSERT_OK(dbfull()->TEST_CompactMemTable());
- ASSERT_GT(NumTableFilesAtLevel(0), 0);
-
- ASSERT_EQ(big, Get("foo", snapshot));
- ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
- db_->ReleaseSnapshot(snapshot);
- ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
- Slice x("x");
- dbfull()->TEST_CompactRange(0, NULL, &x);
- ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
- ASSERT_EQ(NumTableFilesAtLevel(0), 0);
- ASSERT_GE(NumTableFilesAtLevel(1), 1);
- dbfull()->TEST_CompactRange(1, NULL, &x);
- ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
-
- ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, DeletionMarkers1) {
- Put("foo", "v1");
- ASSERT_OK(dbfull()->TEST_CompactMemTable());
- const int last = config::kMaxMemCompactLevel;
- ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
-
- // Place a table at level last-1 to prevent merging with preceding mutation
- Put("a", "begin");
- Put("z", "end");
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ(NumTableFilesAtLevel(last), 1);
- ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
-
- Delete("foo");
- Put("foo", "v2");
- ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
- ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
- ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
- Slice z("z");
- dbfull()->TEST_CompactRange(last-2, NULL, &z);
- // DEL eliminated, but v1 remains because we aren't compacting that level
- // (DEL can be eliminated because v2 hides v1).
- ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
- dbfull()->TEST_CompactRange(last-1, NULL, NULL);
- // Merging last-1 w/ last, so we are the base level for "foo", so
- // DEL is removed. (as is v1).
- ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
-}
-
-TEST(DBTest, DeletionMarkers2) {
- Put("foo", "v1");
- ASSERT_OK(dbfull()->TEST_CompactMemTable());
- const int last = config::kMaxMemCompactLevel;
- ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
-
- // Place a table at level last-1 to prevent merging with preceding mutation
- Put("a", "begin");
- Put("z", "end");
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ(NumTableFilesAtLevel(last), 1);
- ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
-
- Delete("foo");
- ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
- ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
- ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
- dbfull()->TEST_CompactRange(last-2, NULL, NULL);
- // DEL kept: "last" file overlaps
- ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
- dbfull()->TEST_CompactRange(last-1, NULL, NULL);
- // Merging last-1 w/ last, so we are the base level for "foo", so
- // DEL is removed. (as is v1).
- ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
-}
-
-TEST(DBTest, OverlapInLevel0) {
- do {
- ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
-
- // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
- ASSERT_OK(Put("100", "v100"));
- ASSERT_OK(Put("999", "v999"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_OK(Delete("100"));
- ASSERT_OK(Delete("999"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("0,1,1", FilesPerLevel());
-
- // Make files spanning the following ranges in level-0:
- // files[0] 200 .. 900
- // files[1] 300 .. 500
- // Note that files are sorted by smallest key.
- ASSERT_OK(Put("300", "v300"));
- ASSERT_OK(Put("500", "v500"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_OK(Put("200", "v200"));
- ASSERT_OK(Put("600", "v600"));
- ASSERT_OK(Put("900", "v900"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("2,1,1", FilesPerLevel());
-
- // Compact away the placeholder files we created initially
- dbfull()->TEST_CompactRange(1, NULL, NULL);
- dbfull()->TEST_CompactRange(2, NULL, NULL);
- ASSERT_EQ("2", FilesPerLevel());
-
- // Do a memtable compaction. Before bug-fix, the compaction would
- // not detect the overlap with level-0 files and would incorrectly place
- // the deletion in a deeper level.
- ASSERT_OK(Delete("600"));
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("3", FilesPerLevel());
- ASSERT_EQ("NOT_FOUND", Get("600"));
- } while (ChangeOptions());
-}
-
-TEST(DBTest, L0_CompactionBug_Issue44_a) {
- Reopen();
- ASSERT_OK(Put("b", "v"));
- Reopen();
- ASSERT_OK(Delete("b"));
- ASSERT_OK(Delete("a"));
- Reopen();
- ASSERT_OK(Delete("a"));
- Reopen();
- ASSERT_OK(Put("a", "v"));
- Reopen();
- Reopen();
- ASSERT_EQ("(a->v)", Contents());
- env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
- ASSERT_EQ("(a->v)", Contents());
-}
-
-TEST(DBTest, L0_CompactionBug_Issue44_b) {
- Reopen();
- Put("","");
- Reopen();
- Delete("e");
- Put("","");
- Reopen();
- Put("c", "cv");
- Reopen();
- Put("","");
- Reopen();
- Put("","");
- env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
- Reopen();
- Put("d","dv");
- Reopen();
- Put("","");
- Reopen();
- Delete("d");
- Delete("b");
- Reopen();
- ASSERT_EQ("(->)(c->cv)", Contents());
- env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
- ASSERT_EQ("(->)(c->cv)", Contents());
-}
-
-TEST(DBTest, ComparatorCheck) {
- class NewComparator : public Comparator {
- public:
- virtual const char* Name() const { return "leveldb.NewComparator"; }
- virtual int Compare(const Slice& a, const Slice& b) const {
- return BytewiseComparator()->Compare(a, b);
- }
- virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
- BytewiseComparator()->FindShortestSeparator(s, l);
- }
- virtual void FindShortSuccessor(std::string* key) const {
- BytewiseComparator()->FindShortSuccessor(key);
- }
- };
- NewComparator cmp;
- Options new_options = CurrentOptions();
- new_options.comparator = &cmp;
- Status s = TryReopen(&new_options);
- ASSERT_TRUE(!s.ok());
- ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
- << s.ToString();
-}
-
-TEST(DBTest, CustomComparator) {
- class NumberComparator : public Comparator {
- public:
- virtual const char* Name() const { return "test.NumberComparator"; }
- virtual int Compare(const Slice& a, const Slice& b) const {
- return ToNumber(a) - ToNumber(b);
- }
- virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
- ToNumber(*s); // Check format
- ToNumber(l); // Check format
- }
- virtual void FindShortSuccessor(std::string* key) const {
- ToNumber(*key); // Check format
- }
- private:
- static int ToNumber(const Slice& x) {
- // Check that there are no extra characters.
- ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size()-1] == ']')
- << EscapeString(x);
- int val;
- char ignored;
- ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
- << EscapeString(x);
- return val;
- }
- };
- NumberComparator cmp;
- Options new_options = CurrentOptions();
- new_options.create_if_missing = true;
- new_options.comparator = &cmp;
- new_options.filter_policy = NULL; // Cannot use bloom filters
- new_options.write_buffer_size = 1000; // Compact more often
- DestroyAndReopen(&new_options);
- ASSERT_OK(Put("[10]", "ten"));
- ASSERT_OK(Put("[0x14]", "twenty"));
- for (int i = 0; i < 2; i++) {
- ASSERT_EQ("ten", Get("[10]"));
- ASSERT_EQ("ten", Get("[0xa]"));
- ASSERT_EQ("twenty", Get("[20]"));
- ASSERT_EQ("twenty", Get("[0x14]"));
- ASSERT_EQ("NOT_FOUND", Get("[15]"));
- ASSERT_EQ("NOT_FOUND", Get("[0xf]"));
- Compact("[0]", "[9999]");
- }
-
- for (int run = 0; run < 2; run++) {
- for (int i = 0; i < 1000; i++) {
- char buf[100];
- snprintf(buf, sizeof(buf), "[%d]", i*10);
- ASSERT_OK(Put(buf, buf));
- }
- Compact("[0]", "[1000000]");
- }
-}
-
-TEST(DBTest, ManualCompaction) {
- ASSERT_EQ(config::kMaxMemCompactLevel, 2)
- << "Need to update this test to match kMaxMemCompactLevel";
-
- MakeTables(3, "p", "q");
- ASSERT_EQ("1,1,1", FilesPerLevel());
-
- // Compaction range falls before files
- Compact("", "c");
- ASSERT_EQ("1,1,1", FilesPerLevel());
-
- // Compaction range falls after files
- Compact("r", "z");
- ASSERT_EQ("1,1,1", FilesPerLevel());
-
- // Compaction range overlaps files
- Compact("p1", "p9");
- ASSERT_EQ("0,0,1", FilesPerLevel());
-
- // Populate a different range
- MakeTables(3, "c", "e");
- ASSERT_EQ("1,1,2", FilesPerLevel());
-
- // Compact just the new range
- Compact("b", "f");
- ASSERT_EQ("0,0,2", FilesPerLevel());
-
- // Compact all
- MakeTables(1, "a", "z");
- ASSERT_EQ("0,1,2", FilesPerLevel());
- db_->CompactRange(NULL, NULL);
- ASSERT_EQ("0,0,1", FilesPerLevel());
-}
-
-TEST(DBTest, DBOpen_Options) {
- std::string dbname = test::TmpDir() + "/db_options_test";
- DestroyDB(dbname, Options());
-
- // Does not exist, and create_if_missing == false: error
- DB* db = NULL;
- Options opts;
- opts.create_if_missing = false;
- Status s = DB::Open(opts, dbname, &db);
- ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL);
- ASSERT_TRUE(db == NULL);
-
- // Does not exist, and create_if_missing == true: OK
- opts.create_if_missing = true;
- s = DB::Open(opts, dbname, &db);
- ASSERT_OK(s);
- ASSERT_TRUE(db != NULL);
-
- delete db;
- db = NULL;
-
- // Does exist, and error_if_exists == true: error
- opts.create_if_missing = false;
- opts.error_if_exists = true;
- s = DB::Open(opts, dbname, &db);
- ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL);
- ASSERT_TRUE(db == NULL);
-
- // Does exist, and error_if_exists == false: OK
- opts.create_if_missing = true;
- opts.error_if_exists = false;
- s = DB::Open(opts, dbname, &db);
- ASSERT_OK(s);
- ASSERT_TRUE(db != NULL);
-
- delete db;
- db = NULL;
-}
-
-TEST(DBTest, Locking) {
- DB* db2 = NULL;
- Status s = DB::Open(CurrentOptions(), dbname_, &db2);
- ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
-}
-
-// Check that number of files does not grow when we are out of space
-TEST(DBTest, NoSpace) {
- Options options = CurrentOptions();
- options.env = env_;
- Reopen(&options);
-
- ASSERT_OK(Put("foo", "v1"));
- ASSERT_EQ("v1", Get("foo"));
- Compact("a", "z");
- const int num_files = CountFiles();
- env_->no_space_.Release_Store(env_); // Force out-of-space errors
- env_->sleep_counter_.Reset();
- for (int i = 0; i < 5; i++) {
- for (int level = 0; level < config::kNumLevels-1; level++) {
- dbfull()->TEST_CompactRange(level, NULL, NULL);
- }
- }
- env_->no_space_.Release_Store(NULL);
- ASSERT_LT(CountFiles(), num_files + 3);
-
- // Check that compaction attempts slept after errors
- ASSERT_GE(env_->sleep_counter_.Read(), 5);
-}
-
-TEST(DBTest, NonWritableFileSystem) {
- Options options = CurrentOptions();
- options.write_buffer_size = 1000;
- options.env = env_;
- Reopen(&options);
- ASSERT_OK(Put("foo", "v1"));
- env_->non_writable_.Release_Store(env_); // Force errors for new files
- std::string big(100000, 'x');
- int errors = 0;
- for (int i = 0; i < 20; i++) {
- fprintf(stderr, "iter %d; errors %d\n", i, errors);
- if (!Put("foo", big).ok()) {
- errors++;
- env_->SleepForMicroseconds(100000);
- }
- }
- ASSERT_GT(errors, 0);
- env_->non_writable_.Release_Store(NULL);
-}
-
-TEST(DBTest, ManifestWriteError) {
- // Test for the following problem:
- // (a) Compaction produces file F
- // (b) Log record containing F is written to MANIFEST file, but Sync() fails
- // (c) GC deletes F
- // (d) After reopening DB, reads fail since deleted F is named in log record
-
- // We iterate twice. In the second iteration, everything is the
- // same except the log record never makes it to the MANIFEST file.
- for (int iter = 0; iter < 2; iter++) {
- port::AtomicPointer* error_type = (iter == 0)
- ? &env_->manifest_sync_error_
- : &env_->manifest_write_error_;
-
- // Insert foo=>bar mapping
- Options options = CurrentOptions();
- options.env = env_;
- options.create_if_missing = true;
- options.error_if_exists = false;
- DestroyAndReopen(&options);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_EQ("bar", Get("foo"));
-
- // Memtable compaction (will succeed)
- dbfull()->TEST_CompactMemTable();
- ASSERT_EQ("bar", Get("foo"));
- const int last = config::kMaxMemCompactLevel;
- ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
-
- // Merging compaction (will fail)
- error_type->Release_Store(env_);
- dbfull()->TEST_CompactRange(last, NULL, NULL); // Should fail
- ASSERT_EQ("bar", Get("foo"));
-
- // Recovery: should not lose data
- error_type->Release_Store(NULL);
- Reopen(&options);
- ASSERT_EQ("bar", Get("foo"));
- }
-}
-
-TEST(DBTest, FilesDeletedAfterCompaction) {
- ASSERT_OK(Put("foo", "v2"));
- Compact("a", "z");
- const int num_files = CountFiles();
- for (int i = 0; i < 10; i++) {
- ASSERT_OK(Put("foo", "v2"));
- Compact("a", "z");
- }
- ASSERT_EQ(CountFiles(), num_files);
-}
-
-TEST(DBTest, BloomFilter) {
- env_->count_random_reads_ = true;
- Options options = CurrentOptions();
- options.env = env_;
- options.block_cache = NewLRUCache(0); // Prevent cache hits
- options.filter_policy = NewBloomFilterPolicy(10);
- Reopen(&options);
-
- // Populate multiple layers
- const int N = 10000;
- for (int i = 0; i < N; i++) {
- ASSERT_OK(Put(Key(i), Key(i)));
- }
- Compact("a", "z");
- for (int i = 0; i < N; i += 100) {
- ASSERT_OK(Put(Key(i), Key(i)));
- }
- dbfull()->TEST_CompactMemTable();
-
- // Prevent auto compactions triggered by seeks
- env_->delay_sstable_sync_.Release_Store(env_);
-
- // Lookup present keys. Should rarely read from small sstable.
- env_->random_read_counter_.Reset();
- for (int i = 0; i < N; i++) {
- ASSERT_EQ(Key(i), Get(Key(i)));
- }
- int reads = env_->random_read_counter_.Read();
- fprintf(stderr, "%d present => %d reads\n", N, reads);
- ASSERT_GE(reads, N);
- ASSERT_LE(reads, N + 2*N/100);
-
- // Lookup present keys. Should rarely read from either sstable.
- env_->random_read_counter_.Reset();
- for (int i = 0; i < N; i++) {
- ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing"));
- }
- reads = env_->random_read_counter_.Read();
- fprintf(stderr, "%d missing => %d reads\n", N, reads);
- ASSERT_LE(reads, 3*N/100);
-
- env_->delay_sstable_sync_.Release_Store(NULL);
- Close();
- delete options.block_cache;
- delete options.filter_policy;
-}
-
-// Multi-threaded test:
-namespace {
-
-static const int kNumThreads = 4;
-static const int kTestSeconds = 10;
-static const int kNumKeys = 1000;
-
-struct MTState {
- DBTest* test;
- port::AtomicPointer stop;
- port::AtomicPointer counter[kNumThreads];
- port::AtomicPointer thread_done[kNumThreads];
-};
-
-struct MTThread {
- MTState* state;
- int id;
-};
-
-static void MTThreadBody(void* arg) {
- MTThread* t = reinterpret_cast<MTThread*>(arg);
- int id = t->id;
- DB* db = t->state->test->db_;
- uintptr_t counter = 0;
- fprintf(stderr, "... starting thread %d\n", id);
- Random rnd(1000 + id);
- std::string value;
- char valbuf[1500];
- while (t->state->stop.Acquire_Load() == NULL) {
- t->state->counter[id].Release_Store(reinterpret_cast<void*>(counter));
-
- int key = rnd.Uniform(kNumKeys);
- char keybuf[20];
- snprintf(keybuf, sizeof(keybuf), "%016d", key);
-
- if (rnd.OneIn(2)) {
- // Write values of the form <key, my id, counter>.
- // We add some padding for force compactions.
- snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d",
- key, id, static_cast<int>(counter));
- ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
- } else {
- // Read a value and verify that it matches the pattern written above.
- Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
- if (s.IsNotFound()) {
- // Key has not yet been written
- } else {
- // Check that the writer thread counter is >= the counter in the value
- ASSERT_OK(s);
- int k, w, c;
- ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
- ASSERT_EQ(k, key);
- ASSERT_GE(w, 0);
- ASSERT_LT(w, kNumThreads);
- ASSERT_LE(c, reinterpret_cast<uintptr_t>(
- t->state->counter[w].Acquire_Load()));
- }
- }
- counter++;
- }
- t->state->thread_done[id].Release_Store(t);
- fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
-}
-
-} // namespace
-
-TEST(DBTest, MultiThreaded) {
- do {
- // Initialize state
- MTState mt;
- mt.test = this;
- mt.stop.Release_Store(0);
- for (int id = 0; id < kNumThreads; id++) {
- mt.counter[id].Release_Store(0);
- mt.thread_done[id].Release_Store(0);
- }
-
- // Start threads
- MTThread thread[kNumThreads];
- for (int id = 0; id < kNumThreads; id++) {
- thread[id].state = &mt;
- thread[id].id = id;
- env_->StartThread(MTThreadBody, &thread[id]);
- }
-
- // Let them run for a while
- env_->SleepForMicroseconds(kTestSeconds * 1000000);
-
- // Stop the threads and wait for them to finish
- mt.stop.Release_Store(&mt);
- for (int id = 0; id < kNumThreads; id++) {
- while (mt.thread_done[id].Acquire_Load() == NULL) {
- env_->SleepForMicroseconds(100000);
- }
- }
- } while (ChangeOptions());
-}
-
-namespace {
-typedef std::map<std::string, std::string> KVMap;
-}
-
-class ModelDB: public DB {
- public:
- class ModelSnapshot : public Snapshot {
- public:
- KVMap map_;
- };
-
- explicit ModelDB(const Options& options): options_(options) { }
- ~ModelDB() { }
- virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) {
- return DB::Put(o, k, v);
- }
- virtual Status Delete(const WriteOptions& o, const Slice& key) {
- return DB::Delete(o, key);
- }
- virtual Status Get(const ReadOptions& options,
- const Slice& key, std::string* value) {
- assert(false); // Not implemented
- return Status::NotFound(key);
- }
- virtual Iterator* NewIterator(const ReadOptions& options) {
- if (options.snapshot == NULL) {
- KVMap* saved = new KVMap;
- *saved = map_;
- return new ModelIter(saved, true);
- } else {
- const KVMap* snapshot_state =
- &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
- return new ModelIter(snapshot_state, false);
- }
- }
- virtual const Snapshot* GetSnapshot() {
- ModelSnapshot* snapshot = new ModelSnapshot;
- snapshot->map_ = map_;
- return snapshot;
- }
-
- virtual void ReleaseSnapshot(const Snapshot* snapshot) {
- delete reinterpret_cast<const ModelSnapshot*>(snapshot);
- }
- virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
- class Handler : public WriteBatch::Handler {
- public:
- KVMap* map_;
- virtual void Put(const Slice& key, const Slice& value) {
- (*map_)[key.ToString()] = value.ToString();
- }
- virtual void Delete(const Slice& key) {
- map_->erase(key.ToString());
- }
- };
- Handler handler;
- handler.map_ = &map_;
- return batch->Iterate(&handler);
- }
-
- virtual bool GetProperty(const Slice& property, std::string* value) {
- return false;
- }
- virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
- for (int i = 0; i < n; i++) {
- sizes[i] = 0;
- }
- }
- virtual void CompactRange(const Slice* start, const Slice* end) {
- }
-
- private:
- class ModelIter: public Iterator {
- public:
- ModelIter(const KVMap* map, bool owned)
- : map_(map), owned_(owned), iter_(map_->end()) {
- }
- ~ModelIter() {
- if (owned_) delete map_;
- }
- virtual bool Valid() const { return iter_ != map_->end(); }
- virtual void SeekToFirst() { iter_ = map_->begin(); }
- virtual void SeekToLast() {
- if (map_->empty()) {
- iter_ = map_->end();
- } else {
- iter_ = map_->find(map_->rbegin()->first);
- }
- }
- virtual void Seek(const Slice& k) {
- iter_ = map_->lower_bound(k.ToString());
- }
- virtual void Next() { ++iter_; }
- virtual void Prev() { --iter_; }
- virtual Slice key() const { return iter_->first; }
- virtual Slice value() const { return iter_->second; }
- virtual Status status() const { return Status::OK(); }
- private:
- const KVMap* const map_;
- const bool owned_; // Do we own map_
- KVMap::const_iterator iter_;
- };
- const Options options_;
- KVMap map_;
-};
-
-static std::string RandomKey(Random* rnd) {
- int len = (rnd->OneIn(3)
- ? 1 // Short sometimes to encourage collisions
- : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
- return test::RandomKey(rnd, len);
-}
-
-static bool CompareIterators(int step,
- DB* model,
- DB* db,
- const Snapshot* model_snap,
- const Snapshot* db_snap) {
- ReadOptions options;
- options.snapshot = model_snap;
- Iterator* miter = model->NewIterator(options);
- options.snapshot = db_snap;
- Iterator* dbiter = db->NewIterator(options);
- bool ok = true;
- int count = 0;
- for (miter->SeekToFirst(), dbiter->SeekToFirst();
- ok && miter->Valid() && dbiter->Valid();
- miter->Next(), dbiter->Next()) {
- count++;
- if (miter->key().compare(dbiter->key()) != 0) {
- fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
- step,
- EscapeString(miter->key()).c_str(),
- EscapeString(dbiter->key()).c_str());
- ok = false;
- break;
- }
-
- if (miter->value().compare(dbiter->value()) != 0) {
- fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
- step,
- EscapeString(miter->key()).c_str(),
- EscapeString(miter->value()).c_str(),
- EscapeString(miter->value()).c_str());
- ok = false;
- }
- }
-
- if (ok) {
- if (miter->Valid() != dbiter->Valid()) {
- fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
- step, miter->Valid(), dbiter->Valid());
- ok = false;
- }
- }
- fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
- delete miter;
- delete dbiter;
- return ok;
-}
-
-TEST(DBTest, Randomized) {
- Random rnd(test::RandomSeed());
- do {
- ModelDB model(CurrentOptions());
- const int N = 10000;
- const Snapshot* model_snap = NULL;
- const Snapshot* db_snap = NULL;
- std::string k, v;
- for (int step = 0; step < N; step++) {
- if (step % 100 == 0) {
- fprintf(stderr, "Step %d of %d\n", step, N);
- }
- // TODO(sanjay): Test Get() works
- int p = rnd.Uniform(100);
- if (p < 45) { // Put
- k = RandomKey(&rnd);
- v = RandomString(&rnd,
- rnd.OneIn(20)
- ? 100 + rnd.Uniform(100)
- : rnd.Uniform(8));
- ASSERT_OK(model.Put(WriteOptions(), k, v));
- ASSERT_OK(db_->Put(WriteOptions(), k, v));
-
- } else if (p < 90) { // Delete
- k = RandomKey(&rnd);
- ASSERT_OK(model.Delete(WriteOptions(), k));
- ASSERT_OK(db_->Delete(WriteOptions(), k));
-
-
- } else { // Multi-element batch
- WriteBatch b;
- const int num = rnd.Uniform(8);
- for (int i = 0; i < num; i++) {
- if (i == 0 || !rnd.OneIn(10)) {
- k = RandomKey(&rnd);
- } else {
- // Periodically re-use the same key from the previous iter, so
- // we have multiple entries in the write batch for the same key
- }
- if (rnd.OneIn(2)) {
- v = RandomString(&rnd, rnd.Uniform(10));
- b.Put(k, v);
- } else {
- b.Delete(k);
- }
- }
- ASSERT_OK(model.Write(WriteOptions(), &b));
- ASSERT_OK(db_->Write(WriteOptions(), &b));
- }
-
- if ((step % 100) == 0) {
- ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
- ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
- // Save a snapshot from each DB this time that we'll use next
- // time we compare things, to make sure the current state is
- // preserved with the snapshot
- if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
- if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
-
- Reopen();
- ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
-
- model_snap = model.GetSnapshot();
- db_snap = db_->GetSnapshot();
- }
- }
- if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
- if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
- } while (ChangeOptions());
-}
-
-std::string MakeKey(unsigned int num) {
- char buf[30];
- snprintf(buf, sizeof(buf), "%016u", num);
- return std::string(buf);
-}
-
-void BM_LogAndApply(int iters, int num_base_files) {
- std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
- DestroyDB(dbname, Options());
-
- DB* db = NULL;
- Options opts;
- opts.create_if_missing = true;
- Status s = DB::Open(opts, dbname, &db);
- ASSERT_OK(s);
- ASSERT_TRUE(db != NULL);
-
- delete db;
- db = NULL;
-
- Env* env = Env::Default();
-
- port::Mutex mu;
- MutexLock l(&mu);
-
- InternalKeyComparator cmp(BytewiseComparator());
- Options options;
- VersionSet vset(dbname, &options, NULL, &cmp);
- ASSERT_OK(vset.Recover());
- VersionEdit vbase;
- uint64_t fnum = 1;
- for (int i = 0; i < num_base_files; i++) {
- InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
- InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
- vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
- }
- ASSERT_OK(vset.LogAndApply(&vbase, &mu));
-
- uint64_t start_micros = env->NowMicros();
-
- for (int i = 0; i < iters; i++) {
- VersionEdit vedit;
- vedit.DeleteFile(2, fnum);
- InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
- InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
- vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
- vset.LogAndApply(&vedit, &mu);
- }
- uint64_t stop_micros = env->NowMicros();
- unsigned int us = stop_micros - start_micros;
- char buf[16];
- snprintf(buf, sizeof(buf), "%d", num_base_files);
- fprintf(stderr,
- "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n",
- buf, iters, us, ((float)us) / iters);
-}
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- if (argc > 1 && std::string(argv[1]) == "--benchmark") {
- leveldb::BM_LogAndApply(1000, 1);
- leveldb::BM_LogAndApply(1000, 100);
- leveldb::BM_LogAndApply(1000, 10000);
- leveldb::BM_LogAndApply(100, 100000);
- return 0;
- }
-
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/dbformat.cc b/src/leveldb/db/dbformat.cc
deleted file mode 100644
index 28e11b398d..0000000000
--- a/src/leveldb/db/dbformat.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include <stdio.h>
-#include "db/dbformat.h"
-#include "port/port.h"
-#include "util/coding.h"
-
-namespace leveldb {
-
-static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
- assert(seq <= kMaxSequenceNumber);
- assert(t <= kValueTypeForSeek);
- return (seq << 8) | t;
-}
-
-void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
- result->append(key.user_key.data(), key.user_key.size());
- PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
-}
-
-std::string ParsedInternalKey::DebugString() const {
- char buf[50];
- snprintf(buf, sizeof(buf), "' @ %llu : %d",
- (unsigned long long) sequence,
- int(type));
- std::string result = "'";
- result += user_key.ToString();
- result += buf;
- return result;
-}
-
-std::string InternalKey::DebugString() const {
- std::string result;
- ParsedInternalKey parsed;
- if (ParseInternalKey(rep_, &parsed)) {
- result = parsed.DebugString();
- } else {
- result = "(bad)";
- result.append(EscapeString(rep_));
- }
- return result;
-}
-
-const char* InternalKeyComparator::Name() const {
- return "leveldb.InternalKeyComparator";
-}
-
-int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
- // Order by:
- // increasing user key (according to user-supplied comparator)
- // decreasing sequence number
- // decreasing type (though sequence# should be enough to disambiguate)
- int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
- if (r == 0) {
- const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
- const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
- if (anum > bnum) {
- r = -1;
- } else if (anum < bnum) {
- r = +1;
- }
- }
- return r;
-}
-
-void InternalKeyComparator::FindShortestSeparator(
- std::string* start,
- const Slice& limit) const {
- // Attempt to shorten the user portion of the key
- Slice user_start = ExtractUserKey(*start);
- Slice user_limit = ExtractUserKey(limit);
- std::string tmp(user_start.data(), user_start.size());
- user_comparator_->FindShortestSeparator(&tmp, user_limit);
- if (tmp.size() < user_start.size() &&
- user_comparator_->Compare(user_start, tmp) < 0) {
- // User key has become shorter physically, but larger logically.
- // Tack on the earliest possible number to the shortened user key.
- PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
- assert(this->Compare(*start, tmp) < 0);
- assert(this->Compare(tmp, limit) < 0);
- start->swap(tmp);
- }
-}
-
-void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
- Slice user_key = ExtractUserKey(*key);
- std::string tmp(user_key.data(), user_key.size());
- user_comparator_->FindShortSuccessor(&tmp);
- if (tmp.size() < user_key.size() &&
- user_comparator_->Compare(user_key, tmp) < 0) {
- // User key has become shorter physically, but larger logically.
- // Tack on the earliest possible number to the shortened user key.
- PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
- assert(this->Compare(*key, tmp) < 0);
- key->swap(tmp);
- }
-}
-
-const char* InternalFilterPolicy::Name() const {
- return user_policy_->Name();
-}
-
-void InternalFilterPolicy::CreateFilter(const Slice* keys, int n,
- std::string* dst) const {
- // We rely on the fact that the code in table.cc does not mind us
- // adjusting keys[].
- Slice* mkey = const_cast<Slice*>(keys);
- for (int i = 0; i < n; i++) {
- mkey[i] = ExtractUserKey(keys[i]);
- // TODO(sanjay): Suppress dups?
- }
- user_policy_->CreateFilter(keys, n, dst);
-}
-
-bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const {
- return user_policy_->KeyMayMatch(ExtractUserKey(key), f);
-}
-
-LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
- size_t usize = user_key.size();
- size_t needed = usize + 13; // A conservative estimate
- char* dst;
- if (needed <= sizeof(space_)) {
- dst = space_;
- } else {
- dst = new char[needed];
- }
- start_ = dst;
- dst = EncodeVarint32(dst, usize + 8);
- kstart_ = dst;
- memcpy(dst, user_key.data(), usize);
- dst += usize;
- EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
- dst += 8;
- end_ = dst;
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/dbformat.h b/src/leveldb/db/dbformat.h
deleted file mode 100644
index f7f64dafb6..0000000000
--- a/src/leveldb/db/dbformat.h
+++ /dev/null
@@ -1,227 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_FORMAT_H_
-#define STORAGE_LEVELDB_DB_FORMAT_H_
-
-#include <stdio.h>
-#include "leveldb/comparator.h"
-#include "leveldb/db.h"
-#include "leveldb/filter_policy.h"
-#include "leveldb/slice.h"
-#include "leveldb/table_builder.h"
-#include "util/coding.h"
-#include "util/logging.h"
-
-namespace leveldb {
-
-// Grouping of constants. We may want to make some of these
-// parameters set via options.
-namespace config {
-static const int kNumLevels = 7;
-
-// Level-0 compaction is started when we hit this many files.
-static const int kL0_CompactionTrigger = 4;
-
-// Soft limit on number of level-0 files. We slow down writes at this point.
-static const int kL0_SlowdownWritesTrigger = 8;
-
-// Maximum number of level-0 files. We stop writes at this point.
-static const int kL0_StopWritesTrigger = 12;
-
-// Maximum level to which a new compacted memtable is pushed if it
-// does not create overlap. We try to push to level 2 to avoid the
-// relatively expensive level 0=>1 compactions and to avoid some
-// expensive manifest file operations. We do not push all the way to
-// the largest level since that can generate a lot of wasted disk
-// space if the same key space is being repeatedly overwritten.
-static const int kMaxMemCompactLevel = 2;
-
-} // namespace config
-
-class InternalKey;
-
-// Value types encoded as the last component of internal keys.
-// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
-// data structures.
-enum ValueType {
- kTypeDeletion = 0x0,
- kTypeValue = 0x1
-};
-// kValueTypeForSeek defines the ValueType that should be passed when
-// constructing a ParsedInternalKey object for seeking to a particular
-// sequence number (since we sort sequence numbers in decreasing order
-// and the value type is embedded as the low 8 bits in the sequence
-// number in internal keys, we need to use the highest-numbered
-// ValueType, not the lowest).
-static const ValueType kValueTypeForSeek = kTypeValue;
-
-typedef uint64_t SequenceNumber;
-
-// We leave eight bits empty at the bottom so a type and sequence#
-// can be packed together into 64-bits.
-static const SequenceNumber kMaxSequenceNumber =
- ((0x1ull << 56) - 1);
-
-struct ParsedInternalKey {
- Slice user_key;
- SequenceNumber sequence;
- ValueType type;
-
- ParsedInternalKey() { } // Intentionally left uninitialized (for speed)
- ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
- : user_key(u), sequence(seq), type(t) { }
- std::string DebugString() const;
-};
-
-// Return the length of the encoding of "key".
-inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
- return key.user_key.size() + 8;
-}
-
-// Append the serialization of "key" to *result.
-extern void AppendInternalKey(std::string* result,
- const ParsedInternalKey& key);
-
-// Attempt to parse an internal key from "internal_key". On success,
-// stores the parsed data in "*result", and returns true.
-//
-// On error, returns false, leaves "*result" in an undefined state.
-extern bool ParseInternalKey(const Slice& internal_key,
- ParsedInternalKey* result);
-
-// Returns the user key portion of an internal key.
-inline Slice ExtractUserKey(const Slice& internal_key) {
- assert(internal_key.size() >= 8);
- return Slice(internal_key.data(), internal_key.size() - 8);
-}
-
-inline ValueType ExtractValueType(const Slice& internal_key) {
- assert(internal_key.size() >= 8);
- const size_t n = internal_key.size();
- uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
- unsigned char c = num & 0xff;
- return static_cast<ValueType>(c);
-}
-
-// A comparator for internal keys that uses a specified comparator for
-// the user key portion and breaks ties by decreasing sequence number.
-class InternalKeyComparator : public Comparator {
- private:
- const Comparator* user_comparator_;
- public:
- explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
- virtual const char* Name() const;
- virtual int Compare(const Slice& a, const Slice& b) const;
- virtual void FindShortestSeparator(
- std::string* start,
- const Slice& limit) const;
- virtual void FindShortSuccessor(std::string* key) const;
-
- const Comparator* user_comparator() const { return user_comparator_; }
-
- int Compare(const InternalKey& a, const InternalKey& b) const;
-};
-
-// Filter policy wrapper that converts from internal keys to user keys
-class InternalFilterPolicy : public FilterPolicy {
- private:
- const FilterPolicy* const user_policy_;
- public:
- explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
- virtual const char* Name() const;
- virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
- virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
-};
-
-// Modules in this directory should keep internal keys wrapped inside
-// the following class instead of plain strings so that we do not
-// incorrectly use string comparisons instead of an InternalKeyComparator.
-class InternalKey {
- private:
- std::string rep_;
- public:
- InternalKey() { } // Leave rep_ as empty to indicate it is invalid
- InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
- AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
- }
-
- void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
- Slice Encode() const {
- assert(!rep_.empty());
- return rep_;
- }
-
- Slice user_key() const { return ExtractUserKey(rep_); }
-
- void SetFrom(const ParsedInternalKey& p) {
- rep_.clear();
- AppendInternalKey(&rep_, p);
- }
-
- void Clear() { rep_.clear(); }
-
- std::string DebugString() const;
-};
-
-inline int InternalKeyComparator::Compare(
- const InternalKey& a, const InternalKey& b) const {
- return Compare(a.Encode(), b.Encode());
-}
-
-inline bool ParseInternalKey(const Slice& internal_key,
- ParsedInternalKey* result) {
- const size_t n = internal_key.size();
- if (n < 8) return false;
- uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
- unsigned char c = num & 0xff;
- result->sequence = num >> 8;
- result->type = static_cast<ValueType>(c);
- result->user_key = Slice(internal_key.data(), n - 8);
- return (c <= static_cast<unsigned char>(kTypeValue));
-}
-
-// A helper class useful for DBImpl::Get()
-class LookupKey {
- public:
- // Initialize *this for looking up user_key at a snapshot with
- // the specified sequence number.
- LookupKey(const Slice& user_key, SequenceNumber sequence);
-
- ~LookupKey();
-
- // Return a key suitable for lookup in a MemTable.
- Slice memtable_key() const { return Slice(start_, end_ - start_); }
-
- // Return an internal key (suitable for passing to an internal iterator)
- Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
-
- // Return the user key
- Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
-
- private:
- // We construct a char array of the form:
- // klength varint32 <-- start_
- // userkey char[klength] <-- kstart_
- // tag uint64
- // <-- end_
- // The array is a suitable MemTable key.
- // The suffix starting with "userkey" can be used as an InternalKey.
- const char* start_;
- const char* kstart_;
- const char* end_;
- char space_[200]; // Avoid allocation for short keys
-
- // No copying allowed
- LookupKey(const LookupKey&);
- void operator=(const LookupKey&);
-};
-
-inline LookupKey::~LookupKey() {
- if (start_ != space_) delete[] start_;
-}
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_FORMAT_H_
diff --git a/src/leveldb/db/dbformat_test.cc b/src/leveldb/db/dbformat_test.cc
deleted file mode 100644
index 5d82f5d313..0000000000
--- a/src/leveldb/db/dbformat_test.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/dbformat.h"
-#include "util/logging.h"
-#include "util/testharness.h"
-
-namespace leveldb {
-
-static std::string IKey(const std::string& user_key,
- uint64_t seq,
- ValueType vt) {
- std::string encoded;
- AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt));
- return encoded;
-}
-
-static std::string Shorten(const std::string& s, const std::string& l) {
- std::string result = s;
- InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l);
- return result;
-}
-
-static std::string ShortSuccessor(const std::string& s) {
- std::string result = s;
- InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result);
- return result;
-}
-
-static void TestKey(const std::string& key,
- uint64_t seq,
- ValueType vt) {
- std::string encoded = IKey(key, seq, vt);
-
- Slice in(encoded);
- ParsedInternalKey decoded("", 0, kTypeValue);
-
- ASSERT_TRUE(ParseInternalKey(in, &decoded));
- ASSERT_EQ(key, decoded.user_key.ToString());
- ASSERT_EQ(seq, decoded.sequence);
- ASSERT_EQ(vt, decoded.type);
-
- ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded));
-}
-
-class FormatTest { };
-
-TEST(FormatTest, InternalKey_EncodeDecode) {
- const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" };
- const uint64_t seq[] = {
- 1, 2, 3,
- (1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1,
- (1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1,
- (1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1
- };
- for (int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) {
- for (int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) {
- TestKey(keys[k], seq[s], kTypeValue);
- TestKey("hello", 1, kTypeDeletion);
- }
- }
-}
-
-TEST(FormatTest, InternalKeyShortSeparator) {
- // When user keys are same
- ASSERT_EQ(IKey("foo", 100, kTypeValue),
- Shorten(IKey("foo", 100, kTypeValue),
- IKey("foo", 99, kTypeValue)));
- ASSERT_EQ(IKey("foo", 100, kTypeValue),
- Shorten(IKey("foo", 100, kTypeValue),
- IKey("foo", 101, kTypeValue)));
- ASSERT_EQ(IKey("foo", 100, kTypeValue),
- Shorten(IKey("foo", 100, kTypeValue),
- IKey("foo", 100, kTypeValue)));
- ASSERT_EQ(IKey("foo", 100, kTypeValue),
- Shorten(IKey("foo", 100, kTypeValue),
- IKey("foo", 100, kTypeDeletion)));
-
- // When user keys are misordered
- ASSERT_EQ(IKey("foo", 100, kTypeValue),
- Shorten(IKey("foo", 100, kTypeValue),
- IKey("bar", 99, kTypeValue)));
-
- // When user keys are different, but correctly ordered
- ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
- Shorten(IKey("foo", 100, kTypeValue),
- IKey("hello", 200, kTypeValue)));
-
- // When start user key is prefix of limit user key
- ASSERT_EQ(IKey("foo", 100, kTypeValue),
- Shorten(IKey("foo", 100, kTypeValue),
- IKey("foobar", 200, kTypeValue)));
-
- // When limit user key is prefix of start user key
- ASSERT_EQ(IKey("foobar", 100, kTypeValue),
- Shorten(IKey("foobar", 100, kTypeValue),
- IKey("foo", 200, kTypeValue)));
-}
-
-TEST(FormatTest, InternalKeyShortestSuccessor) {
- ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
- ShortSuccessor(IKey("foo", 100, kTypeValue)));
- ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue),
- ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
-}
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/filename.cc b/src/leveldb/db/filename.cc
deleted file mode 100644
index 3c4d49f64e..0000000000
--- a/src/leveldb/db/filename.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include <ctype.h>
-#include <stdio.h>
-#include "db/filename.h"
-#include "db/dbformat.h"
-#include "leveldb/env.h"
-#include "util/logging.h"
-
-namespace leveldb {
-
-// A utility routine: write "data" to the named file and Sync() it.
-extern Status WriteStringToFileSync(Env* env, const Slice& data,
- const std::string& fname);
-
-static std::string MakeFileName(const std::string& name, uint64_t number,
- const char* suffix) {
- char buf[100];
- snprintf(buf, sizeof(buf), "/%06llu.%s",
- static_cast<unsigned long long>(number),
- suffix);
- return name + buf;
-}
-
-std::string LogFileName(const std::string& name, uint64_t number) {
- assert(number > 0);
- return MakeFileName(name, number, "log");
-}
-
-std::string TableFileName(const std::string& name, uint64_t number) {
- assert(number > 0);
- return MakeFileName(name, number, "sst");
-}
-
-std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
- assert(number > 0);
- char buf[100];
- snprintf(buf, sizeof(buf), "/MANIFEST-%06llu",
- static_cast<unsigned long long>(number));
- return dbname + buf;
-}
-
-std::string CurrentFileName(const std::string& dbname) {
- return dbname + "/CURRENT";
-}
-
-std::string LockFileName(const std::string& dbname) {
- return dbname + "/LOCK";
-}
-
-std::string TempFileName(const std::string& dbname, uint64_t number) {
- assert(number > 0);
- return MakeFileName(dbname, number, "dbtmp");
-}
-
-std::string InfoLogFileName(const std::string& dbname) {
- return dbname + "/LOG";
-}
-
-// Return the name of the old info log file for "dbname".
-std::string OldInfoLogFileName(const std::string& dbname) {
- return dbname + "/LOG.old";
-}
-
-
-// Owned filenames have the form:
-// dbname/CURRENT
-// dbname/LOCK
-// dbname/LOG
-// dbname/LOG.old
-// dbname/MANIFEST-[0-9]+
-// dbname/[0-9]+.(log|sst)
-bool ParseFileName(const std::string& fname,
- uint64_t* number,
- FileType* type) {
- Slice rest(fname);
- if (rest == "CURRENT") {
- *number = 0;
- *type = kCurrentFile;
- } else if (rest == "LOCK") {
- *number = 0;
- *type = kDBLockFile;
- } else if (rest == "LOG" || rest == "LOG.old") {
- *number = 0;
- *type = kInfoLogFile;
- } else if (rest.starts_with("MANIFEST-")) {
- rest.remove_prefix(strlen("MANIFEST-"));
- uint64_t num;
- if (!ConsumeDecimalNumber(&rest, &num)) {
- return false;
- }
- if (!rest.empty()) {
- return false;
- }
- *type = kDescriptorFile;
- *number = num;
- } else {
- // Avoid strtoull() to keep filename format independent of the
- // current locale
- uint64_t num;
- if (!ConsumeDecimalNumber(&rest, &num)) {
- return false;
- }
- Slice suffix = rest;
- if (suffix == Slice(".log")) {
- *type = kLogFile;
- } else if (suffix == Slice(".sst")) {
- *type = kTableFile;
- } else if (suffix == Slice(".dbtmp")) {
- *type = kTempFile;
- } else {
- return false;
- }
- *number = num;
- }
- return true;
-}
-
-Status SetCurrentFile(Env* env, const std::string& dbname,
- uint64_t descriptor_number) {
- // Remove leading "dbname/" and add newline to manifest file name
- std::string manifest = DescriptorFileName(dbname, descriptor_number);
- Slice contents = manifest;
- assert(contents.starts_with(dbname + "/"));
- contents.remove_prefix(dbname.size() + 1);
- std::string tmp = TempFileName(dbname, descriptor_number);
- Status s = WriteStringToFileSync(env, contents.ToString() + "\n", tmp);
- if (s.ok()) {
- s = env->RenameFile(tmp, CurrentFileName(dbname));
- }
- if (!s.ok()) {
- env->DeleteFile(tmp);
- }
- return s;
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/filename.h b/src/leveldb/db/filename.h
deleted file mode 100644
index d5d09b1146..0000000000
--- a/src/leveldb/db/filename.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// File names used by DB code
-
-#ifndef STORAGE_LEVELDB_DB_FILENAME_H_
-#define STORAGE_LEVELDB_DB_FILENAME_H_
-
-#include <stdint.h>
-#include <string>
-#include "leveldb/slice.h"
-#include "leveldb/status.h"
-#include "port/port.h"
-
-namespace leveldb {
-
-class Env;
-
-enum FileType {
- kLogFile,
- kDBLockFile,
- kTableFile,
- kDescriptorFile,
- kCurrentFile,
- kTempFile,
- kInfoLogFile // Either the current one, or an old one
-};
-
-// Return the name of the log file with the specified number
-// in the db named by "dbname". The result will be prefixed with
-// "dbname".
-extern std::string LogFileName(const std::string& dbname, uint64_t number);
-
-// Return the name of the sstable with the specified number
-// in the db named by "dbname". The result will be prefixed with
-// "dbname".
-extern std::string TableFileName(const std::string& dbname, uint64_t number);
-
-// Return the name of the descriptor file for the db named by
-// "dbname" and the specified incarnation number. The result will be
-// prefixed with "dbname".
-extern std::string DescriptorFileName(const std::string& dbname,
- uint64_t number);
-
-// Return the name of the current file. This file contains the name
-// of the current manifest file. The result will be prefixed with
-// "dbname".
-extern std::string CurrentFileName(const std::string& dbname);
-
-// Return the name of the lock file for the db named by
-// "dbname". The result will be prefixed with "dbname".
-extern std::string LockFileName(const std::string& dbname);
-
-// Return the name of a temporary file owned by the db named "dbname".
-// The result will be prefixed with "dbname".
-extern std::string TempFileName(const std::string& dbname, uint64_t number);
-
-// Return the name of the info log file for "dbname".
-extern std::string InfoLogFileName(const std::string& dbname);
-
-// Return the name of the old info log file for "dbname".
-extern std::string OldInfoLogFileName(const std::string& dbname);
-
-// If filename is a leveldb file, store the type of the file in *type.
-// The number encoded in the filename is stored in *number. If the
-// filename was successfully parsed, returns true. Else return false.
-extern bool ParseFileName(const std::string& filename,
- uint64_t* number,
- FileType* type);
-
-// Make the CURRENT file point to the descriptor file with the
-// specified number.
-extern Status SetCurrentFile(Env* env, const std::string& dbname,
- uint64_t descriptor_number);
-
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_FILENAME_H_
diff --git a/src/leveldb/db/filename_test.cc b/src/leveldb/db/filename_test.cc
deleted file mode 100644
index 47353d6c9a..0000000000
--- a/src/leveldb/db/filename_test.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/filename.h"
-
-#include "db/dbformat.h"
-#include "port/port.h"
-#include "util/logging.h"
-#include "util/testharness.h"
-
-namespace leveldb {
-
-class FileNameTest { };
-
-TEST(FileNameTest, Parse) {
- Slice db;
- FileType type;
- uint64_t number;
-
- // Successful parses
- static struct {
- const char* fname;
- uint64_t number;
- FileType type;
- } cases[] = {
- { "100.log", 100, kLogFile },
- { "0.log", 0, kLogFile },
- { "0.sst", 0, kTableFile },
- { "CURRENT", 0, kCurrentFile },
- { "LOCK", 0, kDBLockFile },
- { "MANIFEST-2", 2, kDescriptorFile },
- { "MANIFEST-7", 7, kDescriptorFile },
- { "LOG", 0, kInfoLogFile },
- { "LOG.old", 0, kInfoLogFile },
- { "18446744073709551615.log", 18446744073709551615ull, kLogFile },
- };
- for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
- std::string f = cases[i].fname;
- ASSERT_TRUE(ParseFileName(f, &number, &type)) << f;
- ASSERT_EQ(cases[i].type, type) << f;
- ASSERT_EQ(cases[i].number, number) << f;
- }
-
- // Errors
- static const char* errors[] = {
- "",
- "foo",
- "foo-dx-100.log",
- ".log",
- "",
- "manifest",
- "CURREN",
- "CURRENTX",
- "MANIFES",
- "MANIFEST",
- "MANIFEST-",
- "XMANIFEST-3",
- "MANIFEST-3x",
- "LOC",
- "LOCKx",
- "LO",
- "LOGx",
- "18446744073709551616.log",
- "184467440737095516150.log",
- "100",
- "100.",
- "100.lop"
- };
- for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
- std::string f = errors[i];
- ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
- };
-}
-
-TEST(FileNameTest, Construction) {
- uint64_t number;
- FileType type;
- std::string fname;
-
- fname = CurrentFileName("foo");
- ASSERT_EQ("foo/", std::string(fname.data(), 4));
- ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
- ASSERT_EQ(0, number);
- ASSERT_EQ(kCurrentFile, type);
-
- fname = LockFileName("foo");
- ASSERT_EQ("foo/", std::string(fname.data(), 4));
- ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
- ASSERT_EQ(0, number);
- ASSERT_EQ(kDBLockFile, type);
-
- fname = LogFileName("foo", 192);
- ASSERT_EQ("foo/", std::string(fname.data(), 4));
- ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
- ASSERT_EQ(192, number);
- ASSERT_EQ(kLogFile, type);
-
- fname = TableFileName("bar", 200);
- ASSERT_EQ("bar/", std::string(fname.data(), 4));
- ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
- ASSERT_EQ(200, number);
- ASSERT_EQ(kTableFile, type);
-
- fname = DescriptorFileName("bar", 100);
- ASSERT_EQ("bar/", std::string(fname.data(), 4));
- ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
- ASSERT_EQ(100, number);
- ASSERT_EQ(kDescriptorFile, type);
-
- fname = TempFileName("tmp", 999);
- ASSERT_EQ("tmp/", std::string(fname.data(), 4));
- ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
- ASSERT_EQ(999, number);
- ASSERT_EQ(kTempFile, type);
-}
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/leveldb_main.cc b/src/leveldb/db/leveldb_main.cc
deleted file mode 100644
index 995d76107a..0000000000
--- a/src/leveldb/db/leveldb_main.cc
+++ /dev/null
@@ -1,238 +0,0 @@
-// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include <stdio.h>
-#include "db/dbformat.h"
-#include "db/filename.h"
-#include "db/log_reader.h"
-#include "db/version_edit.h"
-#include "db/write_batch_internal.h"
-#include "leveldb/env.h"
-#include "leveldb/iterator.h"
-#include "leveldb/options.h"
-#include "leveldb/status.h"
-#include "leveldb/table.h"
-#include "leveldb/write_batch.h"
-#include "util/logging.h"
-
-namespace leveldb {
-
-namespace {
-
-bool GuessType(const std::string& fname, FileType* type) {
- size_t pos = fname.rfind('/');
- std::string basename;
- if (pos == std::string::npos) {
- basename = fname;
- } else {
- basename = std::string(fname.data() + pos + 1, fname.size() - pos - 1);
- }
- uint64_t ignored;
- return ParseFileName(basename, &ignored, type);
-}
-
-// Notified when log reader encounters corruption.
-class CorruptionReporter : public log::Reader::Reporter {
- public:
- virtual void Corruption(size_t bytes, const Status& status) {
- printf("corruption: %d bytes; %s\n",
- static_cast<int>(bytes),
- status.ToString().c_str());
- }
-};
-
-// Print contents of a log file. (*func)() is called on every record.
-bool PrintLogContents(Env* env, const std::string& fname,
- void (*func)(Slice)) {
- SequentialFile* file;
- Status s = env->NewSequentialFile(fname, &file);
- if (!s.ok()) {
- fprintf(stderr, "%s\n", s.ToString().c_str());
- return false;
- }
- CorruptionReporter reporter;
- log::Reader reader(file, &reporter, true, 0);
- Slice record;
- std::string scratch;
- while (reader.ReadRecord(&record, &scratch)) {
- printf("--- offset %llu; ",
- static_cast<unsigned long long>(reader.LastRecordOffset()));
- (*func)(record);
- }
- delete file;
- return true;
-}
-
-// Called on every item found in a WriteBatch.
-class WriteBatchItemPrinter : public WriteBatch::Handler {
- public:
- uint64_t offset_;
- uint64_t sequence_;
-
- virtual void Put(const Slice& key, const Slice& value) {
- printf(" put '%s' '%s'\n",
- EscapeString(key).c_str(),
- EscapeString(value).c_str());
- }
- virtual void Delete(const Slice& key) {
- printf(" del '%s'\n",
- EscapeString(key).c_str());
- }
-};
-
-
-// Called on every log record (each one of which is a WriteBatch)
-// found in a kLogFile.
-static void WriteBatchPrinter(Slice record) {
- if (record.size() < 12) {
- printf("log record length %d is too small\n",
- static_cast<int>(record.size()));
- return;
- }
- WriteBatch batch;
- WriteBatchInternal::SetContents(&batch, record);
- printf("sequence %llu\n",
- static_cast<unsigned long long>(WriteBatchInternal::Sequence(&batch)));
- WriteBatchItemPrinter batch_item_printer;
- Status s = batch.Iterate(&batch_item_printer);
- if (!s.ok()) {
- printf(" error: %s\n", s.ToString().c_str());
- }
-}
-
-bool DumpLog(Env* env, const std::string& fname) {
- return PrintLogContents(env, fname, WriteBatchPrinter);
-}
-
-// Called on every log record (each one of which is a WriteBatch)
-// found in a kDescriptorFile.
-static void VersionEditPrinter(Slice record) {
- VersionEdit edit;
- Status s = edit.DecodeFrom(record);
- if (!s.ok()) {
- printf("%s\n", s.ToString().c_str());
- return;
- }
- printf("%s", edit.DebugString().c_str());
-}
-
-bool DumpDescriptor(Env* env, const std::string& fname) {
- return PrintLogContents(env, fname, VersionEditPrinter);
-}
-
-bool DumpTable(Env* env, const std::string& fname) {
- uint64_t file_size;
- RandomAccessFile* file = NULL;
- Table* table = NULL;
- Status s = env->GetFileSize(fname, &file_size);
- if (s.ok()) {
- s = env->NewRandomAccessFile(fname, &file);
- }
- if (s.ok()) {
- // We use the default comparator, which may or may not match the
- // comparator used in this database. However this should not cause
- // problems since we only use Table operations that do not require
- // any comparisons. In particular, we do not call Seek or Prev.
- s = Table::Open(Options(), file, file_size, &table);
- }
- if (!s.ok()) {
- fprintf(stderr, "%s\n", s.ToString().c_str());
- delete table;
- delete file;
- return false;
- }
-
- ReadOptions ro;
- ro.fill_cache = false;
- Iterator* iter = table->NewIterator(ro);
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ParsedInternalKey key;
- if (!ParseInternalKey(iter->key(), &key)) {
- printf("badkey '%s' => '%s'\n",
- EscapeString(iter->key()).c_str(),
- EscapeString(iter->value()).c_str());
- } else {
- char kbuf[20];
- const char* type;
- if (key.type == kTypeDeletion) {
- type = "del";
- } else if (key.type == kTypeValue) {
- type = "val";
- } else {
- snprintf(kbuf, sizeof(kbuf), "%d", static_cast<int>(key.type));
- type = kbuf;
- }
- printf("'%s' @ %8llu : %s => '%s'\n",
- EscapeString(key.user_key).c_str(),
- static_cast<unsigned long long>(key.sequence),
- type,
- EscapeString(iter->value()).c_str());
- }
- }
- s = iter->status();
- if (!s.ok()) {
- printf("iterator error: %s\n", s.ToString().c_str());
- }
-
- delete iter;
- delete table;
- delete file;
- return true;
-}
-
-bool DumpFile(Env* env, const std::string& fname) {
- FileType ftype;
- if (!GuessType(fname, &ftype)) {
- fprintf(stderr, "%s: unknown file type\n", fname.c_str());
- return false;
- }
- switch (ftype) {
- case kLogFile: return DumpLog(env, fname);
- case kDescriptorFile: return DumpDescriptor(env, fname);
- case kTableFile: return DumpTable(env, fname);
-
- default: {
- fprintf(stderr, "%s: not a dump-able file type\n", fname.c_str());
- break;
- }
- }
- return false;
-}
-
-bool HandleDumpCommand(Env* env, char** files, int num) {
- bool ok = true;
- for (int i = 0; i < num; i++) {
- ok &= DumpFile(env, files[i]);
- }
- return ok;
-}
-
-}
-} // namespace leveldb
-
-static void Usage() {
- fprintf(
- stderr,
- "Usage: leveldbutil command...\n"
- " dump files... -- dump contents of specified files\n"
- );
-}
-
-int main(int argc, char** argv) {
- leveldb::Env* env = leveldb::Env::Default();
- bool ok = true;
- if (argc < 2) {
- Usage();
- ok = false;
- } else {
- std::string command = argv[1];
- if (command == "dump") {
- ok = leveldb::HandleDumpCommand(env, argv+2, argc-2);
- } else {
- Usage();
- ok = false;
- }
- }
- return (ok ? 0 : 1);
-}
diff --git a/src/leveldb/db/log_format.h b/src/leveldb/db/log_format.h
deleted file mode 100644
index 2690cb9789..0000000000
--- a/src/leveldb/db/log_format.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// Log format information shared by reader and writer.
-// See ../doc/log_format.txt for more detail.
-
-#ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_
-#define STORAGE_LEVELDB_DB_LOG_FORMAT_H_
-
-namespace leveldb {
-namespace log {
-
-enum RecordType {
- // Zero is reserved for preallocated files
- kZeroType = 0,
-
- kFullType = 1,
-
- // For fragments
- kFirstType = 2,
- kMiddleType = 3,
- kLastType = 4
-};
-static const int kMaxRecordType = kLastType;
-
-static const int kBlockSize = 32768;
-
-// Header is checksum (4 bytes), type (1 byte), length (2 bytes).
-static const int kHeaderSize = 4 + 1 + 2;
-
-} // namespace log
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_LOG_FORMAT_H_
diff --git a/src/leveldb/db/log_reader.cc b/src/leveldb/db/log_reader.cc
deleted file mode 100644
index b35f115aad..0000000000
--- a/src/leveldb/db/log_reader.cc
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/log_reader.h"
-
-#include <stdio.h>
-#include "leveldb/env.h"
-#include "util/coding.h"
-#include "util/crc32c.h"
-
-namespace leveldb {
-namespace log {
-
-Reader::Reporter::~Reporter() {
-}
-
-Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
- uint64_t initial_offset)
- : file_(file),
- reporter_(reporter),
- checksum_(checksum),
- backing_store_(new char[kBlockSize]),
- buffer_(),
- eof_(false),
- last_record_offset_(0),
- end_of_buffer_offset_(0),
- initial_offset_(initial_offset) {
-}
-
-Reader::~Reader() {
- delete[] backing_store_;
-}
-
-bool Reader::SkipToInitialBlock() {
- size_t offset_in_block = initial_offset_ % kBlockSize;
- uint64_t block_start_location = initial_offset_ - offset_in_block;
-
- // Don't search a block if we'd be in the trailer
- if (offset_in_block > kBlockSize - 6) {
- offset_in_block = 0;
- block_start_location += kBlockSize;
- }
-
- end_of_buffer_offset_ = block_start_location;
-
- // Skip to start of first block that can contain the initial record
- if (block_start_location > 0) {
- Status skip_status = file_->Skip(block_start_location);
- if (!skip_status.ok()) {
- ReportDrop(block_start_location, skip_status);
- return false;
- }
- }
-
- return true;
-}
-
-bool Reader::ReadRecord(Slice* record, std::string* scratch) {
- if (last_record_offset_ < initial_offset_) {
- if (!SkipToInitialBlock()) {
- return false;
- }
- }
-
- scratch->clear();
- record->clear();
- bool in_fragmented_record = false;
- // Record offset of the logical record that we're reading
- // 0 is a dummy value to make compilers happy
- uint64_t prospective_record_offset = 0;
-
- Slice fragment;
- while (true) {
- uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
- const unsigned int record_type = ReadPhysicalRecord(&fragment);
- switch (record_type) {
- case kFullType:
- if (in_fragmented_record) {
- // Handle bug in earlier versions of log::Writer where
- // it could emit an empty kFirstType record at the tail end
- // of a block followed by a kFullType or kFirstType record
- // at the beginning of the next block.
- if (scratch->empty()) {
- in_fragmented_record = false;
- } else {
- ReportCorruption(scratch->size(), "partial record without end(1)");
- }
- }
- prospective_record_offset = physical_record_offset;
- scratch->clear();
- *record = fragment;
- last_record_offset_ = prospective_record_offset;
- return true;
-
- case kFirstType:
- if (in_fragmented_record) {
- // Handle bug in earlier versions of log::Writer where
- // it could emit an empty kFirstType record at the tail end
- // of a block followed by a kFullType or kFirstType record
- // at the beginning of the next block.
- if (scratch->empty()) {
- in_fragmented_record = false;
- } else {
- ReportCorruption(scratch->size(), "partial record without end(2)");
- }
- }
- prospective_record_offset = physical_record_offset;
- scratch->assign(fragment.data(), fragment.size());
- in_fragmented_record = true;
- break;
-
- case kMiddleType:
- if (!in_fragmented_record) {
- ReportCorruption(fragment.size(),
- "missing start of fragmented record(1)");
- } else {
- scratch->append(fragment.data(), fragment.size());
- }
- break;
-
- case kLastType:
- if (!in_fragmented_record) {
- ReportCorruption(fragment.size(),
- "missing start of fragmented record(2)");
- } else {
- scratch->append(fragment.data(), fragment.size());
- *record = Slice(*scratch);
- last_record_offset_ = prospective_record_offset;
- return true;
- }
- break;
-
- case kEof:
- if (in_fragmented_record) {
- ReportCorruption(scratch->size(), "partial record without end(3)");
- scratch->clear();
- }
- return false;
-
- case kBadRecord:
- if (in_fragmented_record) {
- ReportCorruption(scratch->size(), "error in middle of record");
- in_fragmented_record = false;
- scratch->clear();
- }
- break;
-
- default: {
- char buf[40];
- snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
- ReportCorruption(
- (fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
- buf);
- in_fragmented_record = false;
- scratch->clear();
- break;
- }
- }
- }
- return false;
-}
-
-uint64_t Reader::LastRecordOffset() {
- return last_record_offset_;
-}
-
-void Reader::ReportCorruption(size_t bytes, const char* reason) {
- ReportDrop(bytes, Status::Corruption(reason));
-}
-
-void Reader::ReportDrop(size_t bytes, const Status& reason) {
- if (reporter_ != NULL &&
- end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) {
- reporter_->Corruption(bytes, reason);
- }
-}
-
-unsigned int Reader::ReadPhysicalRecord(Slice* result) {
- while (true) {
- if (buffer_.size() < kHeaderSize) {
- if (!eof_) {
- // Last read was a full read, so this is a trailer to skip
- buffer_.clear();
- Status status = file_->Read(kBlockSize, &buffer_, backing_store_);
- end_of_buffer_offset_ += buffer_.size();
- if (!status.ok()) {
- buffer_.clear();
- ReportDrop(kBlockSize, status);
- eof_ = true;
- return kEof;
- } else if (buffer_.size() < kBlockSize) {
- eof_ = true;
- }
- continue;
- } else if (buffer_.size() == 0) {
- // End of file
- return kEof;
- } else {
- size_t drop_size = buffer_.size();
- buffer_.clear();
- ReportCorruption(drop_size, "truncated record at end of file");
- return kEof;
- }
- }
-
- // Parse the header
- const char* header = buffer_.data();
- const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
- const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
- const unsigned int type = header[6];
- const uint32_t length = a | (b << 8);
- if (kHeaderSize + length > buffer_.size()) {
- size_t drop_size = buffer_.size();
- buffer_.clear();
- ReportCorruption(drop_size, "bad record length");
- return kBadRecord;
- }
-
- if (type == kZeroType && length == 0) {
- // Skip zero length record without reporting any drops since
- // such records are produced by the mmap based writing code in
- // env_posix.cc that preallocates file regions.
- buffer_.clear();
- return kBadRecord;
- }
-
- // Check crc
- if (checksum_) {
- uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
- uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
- if (actual_crc != expected_crc) {
- // Drop the rest of the buffer since "length" itself may have
- // been corrupted and if we trust it, we could find some
- // fragment of a real log record that just happens to look
- // like a valid log record.
- size_t drop_size = buffer_.size();
- buffer_.clear();
- ReportCorruption(drop_size, "checksum mismatch");
- return kBadRecord;
- }
- }
-
- buffer_.remove_prefix(kHeaderSize + length);
-
- // Skip physical record that started before initial_offset_
- if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length <
- initial_offset_) {
- result->clear();
- return kBadRecord;
- }
-
- *result = Slice(header + kHeaderSize, length);
- return type;
- }
-}
-
-} // namespace log
-} // namespace leveldb
diff --git a/src/leveldb/db/log_reader.h b/src/leveldb/db/log_reader.h
deleted file mode 100644
index 82d4bee68d..0000000000
--- a/src/leveldb/db/log_reader.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_LOG_READER_H_
-#define STORAGE_LEVELDB_DB_LOG_READER_H_
-
-#include <stdint.h>
-
-#include "db/log_format.h"
-#include "leveldb/slice.h"
-#include "leveldb/status.h"
-
-namespace leveldb {
-
-class SequentialFile;
-
-namespace log {
-
-class Reader {
- public:
- // Interface for reporting errors.
- class Reporter {
- public:
- virtual ~Reporter();
-
- // Some corruption was detected. "size" is the approximate number
- // of bytes dropped due to the corruption.
- virtual void Corruption(size_t bytes, const Status& status) = 0;
- };
-
- // Create a reader that will return log records from "*file".
- // "*file" must remain live while this Reader is in use.
- //
- // If "reporter" is non-NULL, it is notified whenever some data is
- // dropped due to a detected corruption. "*reporter" must remain
- // live while this Reader is in use.
- //
- // If "checksum" is true, verify checksums if available.
- //
- // The Reader will start reading at the first record located at physical
- // position >= initial_offset within the file.
- Reader(SequentialFile* file, Reporter* reporter, bool checksum,
- uint64_t initial_offset);
-
- ~Reader();
-
- // Read the next record into *record. Returns true if read
- // successfully, false if we hit end of the input. May use
- // "*scratch" as temporary storage. The contents filled in *record
- // will only be valid until the next mutating operation on this
- // reader or the next mutation to *scratch.
- bool ReadRecord(Slice* record, std::string* scratch);
-
- // Returns the physical offset of the last record returned by ReadRecord.
- //
- // Undefined before the first call to ReadRecord.
- uint64_t LastRecordOffset();
-
- private:
- SequentialFile* const file_;
- Reporter* const reporter_;
- bool const checksum_;
- char* const backing_store_;
- Slice buffer_;
- bool eof_; // Last Read() indicated EOF by returning < kBlockSize
-
- // Offset of the last record returned by ReadRecord.
- uint64_t last_record_offset_;
- // Offset of the first location past the end of buffer_.
- uint64_t end_of_buffer_offset_;
-
- // Offset at which to start looking for the first record to return
- uint64_t const initial_offset_;
-
- // Extend record types with the following special values
- enum {
- kEof = kMaxRecordType + 1,
- // Returned whenever we find an invalid physical record.
- // Currently there are three situations in which this happens:
- // * The record has an invalid CRC (ReadPhysicalRecord reports a drop)
- // * The record is a 0-length record (No drop is reported)
- // * The record is below constructor's initial_offset (No drop is reported)
- kBadRecord = kMaxRecordType + 2
- };
-
- // Skips all blocks that are completely before "initial_offset_".
- //
- // Returns true on success. Handles reporting.
- bool SkipToInitialBlock();
-
- // Return type, or one of the preceding special values
- unsigned int ReadPhysicalRecord(Slice* result);
-
- // Reports dropped bytes to the reporter.
- // buffer_ must be updated to remove the dropped bytes prior to invocation.
- void ReportCorruption(size_t bytes, const char* reason);
- void ReportDrop(size_t bytes, const Status& reason);
-
- // No copying allowed
- Reader(const Reader&);
- void operator=(const Reader&);
-};
-
-} // namespace log
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_LOG_READER_H_
diff --git a/src/leveldb/db/log_test.cc b/src/leveldb/db/log_test.cc
deleted file mode 100644
index 4c5cf87573..0000000000
--- a/src/leveldb/db/log_test.cc
+++ /dev/null
@@ -1,500 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/log_reader.h"
-#include "db/log_writer.h"
-#include "leveldb/env.h"
-#include "util/coding.h"
-#include "util/crc32c.h"
-#include "util/random.h"
-#include "util/testharness.h"
-
-namespace leveldb {
-namespace log {
-
-// Construct a string of the specified length made out of the supplied
-// partial string.
-static std::string BigString(const std::string& partial_string, size_t n) {
- std::string result;
- while (result.size() < n) {
- result.append(partial_string);
- }
- result.resize(n);
- return result;
-}
-
-// Construct a string from a number
-static std::string NumberString(int n) {
- char buf[50];
- snprintf(buf, sizeof(buf), "%d.", n);
- return std::string(buf);
-}
-
-// Return a skewed potentially long string
-static std::string RandomSkewedString(int i, Random* rnd) {
- return BigString(NumberString(i), rnd->Skewed(17));
-}
-
-class LogTest {
- private:
- class StringDest : public WritableFile {
- public:
- std::string contents_;
-
- virtual Status Close() { return Status::OK(); }
- virtual Status Flush() { return Status::OK(); }
- virtual Status Sync() { return Status::OK(); }
- virtual Status Append(const Slice& slice) {
- contents_.append(slice.data(), slice.size());
- return Status::OK();
- }
- };
-
- class StringSource : public SequentialFile {
- public:
- Slice contents_;
- bool force_error_;
- bool returned_partial_;
- StringSource() : force_error_(false), returned_partial_(false) { }
-
- virtual Status Read(size_t n, Slice* result, char* scratch) {
- ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error";
-
- if (force_error_) {
- force_error_ = false;
- returned_partial_ = true;
- return Status::Corruption("read error");
- }
-
- if (contents_.size() < n) {
- n = contents_.size();
- returned_partial_ = true;
- }
- *result = Slice(contents_.data(), n);
- contents_.remove_prefix(n);
- return Status::OK();
- }
-
- virtual Status Skip(uint64_t n) {
- if (n > contents_.size()) {
- contents_.clear();
- return Status::NotFound("in-memory file skipepd past end");
- }
-
- contents_.remove_prefix(n);
-
- return Status::OK();
- }
- };
-
- class ReportCollector : public Reader::Reporter {
- public:
- size_t dropped_bytes_;
- std::string message_;
-
- ReportCollector() : dropped_bytes_(0) { }
- virtual void Corruption(size_t bytes, const Status& status) {
- dropped_bytes_ += bytes;
- message_.append(status.ToString());
- }
- };
-
- StringDest dest_;
- StringSource source_;
- ReportCollector report_;
- bool reading_;
- Writer writer_;
- Reader reader_;
-
- // Record metadata for testing initial offset functionality
- static size_t initial_offset_record_sizes_[];
- static uint64_t initial_offset_last_record_offsets_[];
-
- public:
- LogTest() : reading_(false),
- writer_(&dest_),
- reader_(&source_, &report_, true/*checksum*/,
- 0/*initial_offset*/) {
- }
-
- void Write(const std::string& msg) {
- ASSERT_TRUE(!reading_) << "Write() after starting to read";
- writer_.AddRecord(Slice(msg));
- }
-
- size_t WrittenBytes() const {
- return dest_.contents_.size();
- }
-
- std::string Read() {
- if (!reading_) {
- reading_ = true;
- source_.contents_ = Slice(dest_.contents_);
- }
- std::string scratch;
- Slice record;
- if (reader_.ReadRecord(&record, &scratch)) {
- return record.ToString();
- } else {
- return "EOF";
- }
- }
-
- void IncrementByte(int offset, int delta) {
- dest_.contents_[offset] += delta;
- }
-
- void SetByte(int offset, char new_byte) {
- dest_.contents_[offset] = new_byte;
- }
-
- void ShrinkSize(int bytes) {
- dest_.contents_.resize(dest_.contents_.size() - bytes);
- }
-
- void FixChecksum(int header_offset, int len) {
- // Compute crc of type/len/data
- uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len);
- crc = crc32c::Mask(crc);
- EncodeFixed32(&dest_.contents_[header_offset], crc);
- }
-
- void ForceError() {
- source_.force_error_ = true;
- }
-
- size_t DroppedBytes() const {
- return report_.dropped_bytes_;
- }
-
- std::string ReportMessage() const {
- return report_.message_;
- }
-
- // Returns OK iff recorded error message contains "msg"
- std::string MatchError(const std::string& msg) const {
- if (report_.message_.find(msg) == std::string::npos) {
- return report_.message_;
- } else {
- return "OK";
- }
- }
-
- void WriteInitialOffsetLog() {
- for (int i = 0; i < 4; i++) {
- std::string record(initial_offset_record_sizes_[i],
- static_cast<char>('a' + i));
- Write(record);
- }
- }
-
- void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
- WriteInitialOffsetLog();
- reading_ = true;
- source_.contents_ = Slice(dest_.contents_);
- Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
- WrittenBytes() + offset_past_end);
- Slice record;
- std::string scratch;
- ASSERT_TRUE(!offset_reader->ReadRecord(&record, &scratch));
- delete offset_reader;
- }
-
- void CheckInitialOffsetRecord(uint64_t initial_offset,
- int expected_record_offset) {
- WriteInitialOffsetLog();
- reading_ = true;
- source_.contents_ = Slice(dest_.contents_);
- Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
- initial_offset);
- Slice record;
- std::string scratch;
- ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
- ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
- record.size());
- ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
- offset_reader->LastRecordOffset());
- ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
- delete offset_reader;
- }
-
-};
-
-size_t LogTest::initial_offset_record_sizes_[] =
- {10000, // Two sizable records in first block
- 10000,
- 2 * log::kBlockSize - 1000, // Span three blocks
- 1};
-
-uint64_t LogTest::initial_offset_last_record_offsets_[] =
- {0,
- kHeaderSize + 10000,
- 2 * (kHeaderSize + 10000),
- 2 * (kHeaderSize + 10000) +
- (2 * log::kBlockSize - 1000) + 3 * kHeaderSize};
-
-
-TEST(LogTest, Empty) {
- ASSERT_EQ("EOF", Read());
-}
-
-TEST(LogTest, ReadWrite) {
- Write("foo");
- Write("bar");
- Write("");
- Write("xxxx");
- ASSERT_EQ("foo", Read());
- ASSERT_EQ("bar", Read());
- ASSERT_EQ("", Read());
- ASSERT_EQ("xxxx", Read());
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
-}
-
-TEST(LogTest, ManyBlocks) {
- for (int i = 0; i < 100000; i++) {
- Write(NumberString(i));
- }
- for (int i = 0; i < 100000; i++) {
- ASSERT_EQ(NumberString(i), Read());
- }
- ASSERT_EQ("EOF", Read());
-}
-
-TEST(LogTest, Fragmentation) {
- Write("small");
- Write(BigString("medium", 50000));
- Write(BigString("large", 100000));
- ASSERT_EQ("small", Read());
- ASSERT_EQ(BigString("medium", 50000), Read());
- ASSERT_EQ(BigString("large", 100000), Read());
- ASSERT_EQ("EOF", Read());
-}
-
-TEST(LogTest, MarginalTrailer) {
- // Make a trailer that is exactly the same length as an empty record.
- const int n = kBlockSize - 2*kHeaderSize;
- Write(BigString("foo", n));
- ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
- Write("");
- Write("bar");
- ASSERT_EQ(BigString("foo", n), Read());
- ASSERT_EQ("", Read());
- ASSERT_EQ("bar", Read());
- ASSERT_EQ("EOF", Read());
-}
-
-TEST(LogTest, MarginalTrailer2) {
- // Make a trailer that is exactly the same length as an empty record.
- const int n = kBlockSize - 2*kHeaderSize;
- Write(BigString("foo", n));
- ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
- Write("bar");
- ASSERT_EQ(BigString("foo", n), Read());
- ASSERT_EQ("bar", Read());
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(0, DroppedBytes());
- ASSERT_EQ("", ReportMessage());
-}
-
-TEST(LogTest, ShortTrailer) {
- const int n = kBlockSize - 2*kHeaderSize + 4;
- Write(BigString("foo", n));
- ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
- Write("");
- Write("bar");
- ASSERT_EQ(BigString("foo", n), Read());
- ASSERT_EQ("", Read());
- ASSERT_EQ("bar", Read());
- ASSERT_EQ("EOF", Read());
-}
-
-TEST(LogTest, AlignedEof) {
- const int n = kBlockSize - 2*kHeaderSize + 4;
- Write(BigString("foo", n));
- ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
- ASSERT_EQ(BigString("foo", n), Read());
- ASSERT_EQ("EOF", Read());
-}
-
-TEST(LogTest, RandomRead) {
- const int N = 500;
- Random write_rnd(301);
- for (int i = 0; i < N; i++) {
- Write(RandomSkewedString(i, &write_rnd));
- }
- Random read_rnd(301);
- for (int i = 0; i < N; i++) {
- ASSERT_EQ(RandomSkewedString(i, &read_rnd), Read());
- }
- ASSERT_EQ("EOF", Read());
-}
-
-// Tests of all the error paths in log_reader.cc follow:
-
-TEST(LogTest, ReadError) {
- Write("foo");
- ForceError();
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(kBlockSize, DroppedBytes());
- ASSERT_EQ("OK", MatchError("read error"));
-}
-
-TEST(LogTest, BadRecordType) {
- Write("foo");
- // Type is stored in header[6]
- IncrementByte(6, 100);
- FixChecksum(0, 3);
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(3, DroppedBytes());
- ASSERT_EQ("OK", MatchError("unknown record type"));
-}
-
-TEST(LogTest, TruncatedTrailingRecord) {
- Write("foo");
- ShrinkSize(4); // Drop all payload as well as a header byte
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(kHeaderSize - 1, DroppedBytes());
- ASSERT_EQ("OK", MatchError("truncated record at end of file"));
-}
-
-TEST(LogTest, BadLength) {
- Write("foo");
- ShrinkSize(1);
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(kHeaderSize + 2, DroppedBytes());
- ASSERT_EQ("OK", MatchError("bad record length"));
-}
-
-TEST(LogTest, ChecksumMismatch) {
- Write("foo");
- IncrementByte(0, 10);
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(10, DroppedBytes());
- ASSERT_EQ("OK", MatchError("checksum mismatch"));
-}
-
-TEST(LogTest, UnexpectedMiddleType) {
- Write("foo");
- SetByte(6, kMiddleType);
- FixChecksum(0, 3);
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(3, DroppedBytes());
- ASSERT_EQ("OK", MatchError("missing start"));
-}
-
-TEST(LogTest, UnexpectedLastType) {
- Write("foo");
- SetByte(6, kLastType);
- FixChecksum(0, 3);
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(3, DroppedBytes());
- ASSERT_EQ("OK", MatchError("missing start"));
-}
-
-TEST(LogTest, UnexpectedFullType) {
- Write("foo");
- Write("bar");
- SetByte(6, kFirstType);
- FixChecksum(0, 3);
- ASSERT_EQ("bar", Read());
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(3, DroppedBytes());
- ASSERT_EQ("OK", MatchError("partial record without end"));
-}
-
-TEST(LogTest, UnexpectedFirstType) {
- Write("foo");
- Write(BigString("bar", 100000));
- SetByte(6, kFirstType);
- FixChecksum(0, 3);
- ASSERT_EQ(BigString("bar", 100000), Read());
- ASSERT_EQ("EOF", Read());
- ASSERT_EQ(3, DroppedBytes());
- ASSERT_EQ("OK", MatchError("partial record without end"));
-}
-
-TEST(LogTest, ErrorJoinsRecords) {
- // Consider two fragmented records:
- // first(R1) last(R1) first(R2) last(R2)
- // where the middle two fragments disappear. We do not want
- // first(R1),last(R2) to get joined and returned as a valid record.
-
- // Write records that span two blocks
- Write(BigString("foo", kBlockSize));
- Write(BigString("bar", kBlockSize));
- Write("correct");
-
- // Wipe the middle block
- for (int offset = kBlockSize; offset < 2*kBlockSize; offset++) {
- SetByte(offset, 'x');
- }
-
- ASSERT_EQ("correct", Read());
- ASSERT_EQ("EOF", Read());
- const int dropped = DroppedBytes();
- ASSERT_LE(dropped, 2*kBlockSize + 100);
- ASSERT_GE(dropped, 2*kBlockSize);
-}
-
-TEST(LogTest, ReadStart) {
- CheckInitialOffsetRecord(0, 0);
-}
-
-TEST(LogTest, ReadSecondOneOff) {
- CheckInitialOffsetRecord(1, 1);
-}
-
-TEST(LogTest, ReadSecondTenThousand) {
- CheckInitialOffsetRecord(10000, 1);
-}
-
-TEST(LogTest, ReadSecondStart) {
- CheckInitialOffsetRecord(10007, 1);
-}
-
-TEST(LogTest, ReadThirdOneOff) {
- CheckInitialOffsetRecord(10008, 2);
-}
-
-TEST(LogTest, ReadThirdStart) {
- CheckInitialOffsetRecord(20014, 2);
-}
-
-TEST(LogTest, ReadFourthOneOff) {
- CheckInitialOffsetRecord(20015, 3);
-}
-
-TEST(LogTest, ReadFourthFirstBlockTrailer) {
- CheckInitialOffsetRecord(log::kBlockSize - 4, 3);
-}
-
-TEST(LogTest, ReadFourthMiddleBlock) {
- CheckInitialOffsetRecord(log::kBlockSize + 1, 3);
-}
-
-TEST(LogTest, ReadFourthLastBlock) {
- CheckInitialOffsetRecord(2 * log::kBlockSize + 1, 3);
-}
-
-TEST(LogTest, ReadFourthStart) {
- CheckInitialOffsetRecord(
- 2 * (kHeaderSize + 1000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
- 3);
-}
-
-TEST(LogTest, ReadEnd) {
- CheckOffsetPastEndReturnsNoRecords(0);
-}
-
-TEST(LogTest, ReadPastEnd) {
- CheckOffsetPastEndReturnsNoRecords(5);
-}
-
-} // namespace log
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/log_writer.cc b/src/leveldb/db/log_writer.cc
deleted file mode 100644
index 2da99ac088..0000000000
--- a/src/leveldb/db/log_writer.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/log_writer.h"
-
-#include <stdint.h>
-#include "leveldb/env.h"
-#include "util/coding.h"
-#include "util/crc32c.h"
-
-namespace leveldb {
-namespace log {
-
-Writer::Writer(WritableFile* dest)
- : dest_(dest),
- block_offset_(0) {
- for (int i = 0; i <= kMaxRecordType; i++) {
- char t = static_cast<char>(i);
- type_crc_[i] = crc32c::Value(&t, 1);
- }
-}
-
-Writer::~Writer() {
-}
-
-Status Writer::AddRecord(const Slice& slice) {
- const char* ptr = slice.data();
- size_t left = slice.size();
-
- // Fragment the record if necessary and emit it. Note that if slice
- // is empty, we still want to iterate once to emit a single
- // zero-length record
- Status s;
- bool begin = true;
- do {
- const int leftover = kBlockSize - block_offset_;
- assert(leftover >= 0);
- if (leftover < kHeaderSize) {
- // Switch to a new block
- if (leftover > 0) {
- // Fill the trailer (literal below relies on kHeaderSize being 7)
- assert(kHeaderSize == 7);
- dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
- }
- block_offset_ = 0;
- }
-
- // Invariant: we never leave < kHeaderSize bytes in a block.
- assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
-
- const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
- const size_t fragment_length = (left < avail) ? left : avail;
-
- RecordType type;
- const bool end = (left == fragment_length);
- if (begin && end) {
- type = kFullType;
- } else if (begin) {
- type = kFirstType;
- } else if (end) {
- type = kLastType;
- } else {
- type = kMiddleType;
- }
-
- s = EmitPhysicalRecord(type, ptr, fragment_length);
- ptr += fragment_length;
- left -= fragment_length;
- begin = false;
- } while (s.ok() && left > 0);
- return s;
-}
-
-Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
- assert(n <= 0xffff); // Must fit in two bytes
- assert(block_offset_ + kHeaderSize + n <= kBlockSize);
-
- // Format the header
- char buf[kHeaderSize];
- buf[4] = static_cast<char>(n & 0xff);
- buf[5] = static_cast<char>(n >> 8);
- buf[6] = static_cast<char>(t);
-
- // Compute the crc of the record type and the payload.
- uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);
- crc = crc32c::Mask(crc); // Adjust for storage
- EncodeFixed32(buf, crc);
-
- // Write the header and the payload
- Status s = dest_->Append(Slice(buf, kHeaderSize));
- if (s.ok()) {
- s = dest_->Append(Slice(ptr, n));
- if (s.ok()) {
- s = dest_->Flush();
- }
- }
- block_offset_ += kHeaderSize + n;
- return s;
-}
-
-} // namespace log
-} // namespace leveldb
diff --git a/src/leveldb/db/log_writer.h b/src/leveldb/db/log_writer.h
deleted file mode 100644
index a3a954d967..0000000000
--- a/src/leveldb/db/log_writer.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_
-#define STORAGE_LEVELDB_DB_LOG_WRITER_H_
-
-#include <stdint.h>
-#include "db/log_format.h"
-#include "leveldb/slice.h"
-#include "leveldb/status.h"
-
-namespace leveldb {
-
-class WritableFile;
-
-namespace log {
-
-class Writer {
- public:
- // Create a writer that will append data to "*dest".
- // "*dest" must be initially empty.
- // "*dest" must remain live while this Writer is in use.
- explicit Writer(WritableFile* dest);
- ~Writer();
-
- Status AddRecord(const Slice& slice);
-
- private:
- WritableFile* dest_;
- int block_offset_; // Current offset in block
-
- // crc32c values for all supported record types. These are
- // pre-computed to reduce the overhead of computing the crc of the
- // record type stored in the header.
- uint32_t type_crc_[kMaxRecordType + 1];
-
- Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
-
- // No copying allowed
- Writer(const Writer&);
- void operator=(const Writer&);
-};
-
-} // namespace log
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_
diff --git a/src/leveldb/db/memtable.cc b/src/leveldb/db/memtable.cc
deleted file mode 100644
index bfec0a7e7a..0000000000
--- a/src/leveldb/db/memtable.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/memtable.h"
-#include "db/dbformat.h"
-#include "leveldb/comparator.h"
-#include "leveldb/env.h"
-#include "leveldb/iterator.h"
-#include "util/coding.h"
-
-namespace leveldb {
-
-static Slice GetLengthPrefixedSlice(const char* data) {
- uint32_t len;
- const char* p = data;
- p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted
- return Slice(p, len);
-}
-
-MemTable::MemTable(const InternalKeyComparator& cmp)
- : comparator_(cmp),
- refs_(0),
- table_(comparator_, &arena_) {
-}
-
-MemTable::~MemTable() {
- assert(refs_ == 0);
-}
-
-size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); }
-
-int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
- const {
- // Internal keys are encoded as length-prefixed strings.
- Slice a = GetLengthPrefixedSlice(aptr);
- Slice b = GetLengthPrefixedSlice(bptr);
- return comparator.Compare(a, b);
-}
-
-// Encode a suitable internal key target for "target" and return it.
-// Uses *scratch as scratch space, and the returned pointer will point
-// into this scratch space.
-static const char* EncodeKey(std::string* scratch, const Slice& target) {
- scratch->clear();
- PutVarint32(scratch, target.size());
- scratch->append(target.data(), target.size());
- return scratch->data();
-}
-
-class MemTableIterator: public Iterator {
- public:
- explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }
-
- virtual bool Valid() const { return iter_.Valid(); }
- virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
- virtual void SeekToFirst() { iter_.SeekToFirst(); }
- virtual void SeekToLast() { iter_.SeekToLast(); }
- virtual void Next() { iter_.Next(); }
- virtual void Prev() { iter_.Prev(); }
- virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
- virtual Slice value() const {
- Slice key_slice = GetLengthPrefixedSlice(iter_.key());
- return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
- }
-
- virtual Status status() const { return Status::OK(); }
-
- private:
- MemTable::Table::Iterator iter_;
- std::string tmp_; // For passing to EncodeKey
-
- // No copying allowed
- MemTableIterator(const MemTableIterator&);
- void operator=(const MemTableIterator&);
-};
-
-Iterator* MemTable::NewIterator() {
- return new MemTableIterator(&table_);
-}
-
-void MemTable::Add(SequenceNumber s, ValueType type,
- const Slice& key,
- const Slice& value) {
- // Format of an entry is concatenation of:
- // key_size : varint32 of internal_key.size()
- // key bytes : char[internal_key.size()]
- // value_size : varint32 of value.size()
- // value bytes : char[value.size()]
- size_t key_size = key.size();
- size_t val_size = value.size();
- size_t internal_key_size = key_size + 8;
- const size_t encoded_len =
- VarintLength(internal_key_size) + internal_key_size +
- VarintLength(val_size) + val_size;
- char* buf = arena_.Allocate(encoded_len);
- char* p = EncodeVarint32(buf, internal_key_size);
- memcpy(p, key.data(), key_size);
- p += key_size;
- EncodeFixed64(p, (s << 8) | type);
- p += 8;
- p = EncodeVarint32(p, val_size);
- memcpy(p, value.data(), val_size);
- assert((p + val_size) - buf == encoded_len);
- table_.Insert(buf);
-}
-
-bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
- Slice memkey = key.memtable_key();
- Table::Iterator iter(&table_);
- iter.Seek(memkey.data());
- if (iter.Valid()) {
- // entry format is:
- // klength varint32
- // userkey char[klength]
- // tag uint64
- // vlength varint32
- // value char[vlength]
- // Check that it belongs to same user key. We do not check the
- // sequence number since the Seek() call above should have skipped
- // all entries with overly large sequence numbers.
- const char* entry = iter.key();
- uint32_t key_length;
- const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
- if (comparator_.comparator.user_comparator()->Compare(
- Slice(key_ptr, key_length - 8),
- key.user_key()) == 0) {
- // Correct user key
- const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
- switch (static_cast<ValueType>(tag & 0xff)) {
- case kTypeValue: {
- Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
- value->assign(v.data(), v.size());
- return true;
- }
- case kTypeDeletion:
- *s = Status::NotFound(Slice());
- return true;
- }
- }
- }
- return false;
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/memtable.h b/src/leveldb/db/memtable.h
deleted file mode 100644
index 92e90bb099..0000000000
--- a/src/leveldb/db/memtable.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_
-#define STORAGE_LEVELDB_DB_MEMTABLE_H_
-
-#include <string>
-#include "leveldb/db.h"
-#include "db/dbformat.h"
-#include "db/skiplist.h"
-#include "util/arena.h"
-
-namespace leveldb {
-
-class InternalKeyComparator;
-class Mutex;
-class MemTableIterator;
-
-class MemTable {
- public:
- // MemTables are reference counted. The initial reference count
- // is zero and the caller must call Ref() at least once.
- explicit MemTable(const InternalKeyComparator& comparator);
-
- // Increase reference count.
- void Ref() { ++refs_; }
-
- // Drop reference count. Delete if no more references exist.
- void Unref() {
- --refs_;
- assert(refs_ >= 0);
- if (refs_ <= 0) {
- delete this;
- }
- }
-
- // Returns an estimate of the number of bytes of data in use by this
- // data structure.
- //
- // REQUIRES: external synchronization to prevent simultaneous
- // operations on the same MemTable.
- size_t ApproximateMemoryUsage();
-
- // Return an iterator that yields the contents of the memtable.
- //
- // The caller must ensure that the underlying MemTable remains live
- // while the returned iterator is live. The keys returned by this
- // iterator are internal keys encoded by AppendInternalKey in the
- // db/format.{h,cc} module.
- Iterator* NewIterator();
-
- // Add an entry into memtable that maps key to value at the
- // specified sequence number and with the specified type.
- // Typically value will be empty if type==kTypeDeletion.
- void Add(SequenceNumber seq, ValueType type,
- const Slice& key,
- const Slice& value);
-
- // If memtable contains a value for key, store it in *value and return true.
- // If memtable contains a deletion for key, store a NotFound() error
- // in *status and return true.
- // Else, return false.
- bool Get(const LookupKey& key, std::string* value, Status* s);
-
- private:
- ~MemTable(); // Private since only Unref() should be used to delete it
-
- struct KeyComparator {
- const InternalKeyComparator comparator;
- explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { }
- int operator()(const char* a, const char* b) const;
- };
- friend class MemTableIterator;
- friend class MemTableBackwardIterator;
-
- typedef SkipList<const char*, KeyComparator> Table;
-
- KeyComparator comparator_;
- int refs_;
- Arena arena_;
- Table table_;
-
- // No copying allowed
- MemTable(const MemTable&);
- void operator=(const MemTable&);
-};
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_MEMTABLE_H_
diff --git a/src/leveldb/db/repair.cc b/src/leveldb/db/repair.cc
deleted file mode 100644
index 022d52f3de..0000000000
--- a/src/leveldb/db/repair.cc
+++ /dev/null
@@ -1,389 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// We recover the contents of the descriptor from the other files we find.
-// (1) Any log files are first converted to tables
-// (2) We scan every table to compute
-// (a) smallest/largest for the table
-// (b) largest sequence number in the table
-// (3) We generate descriptor contents:
-// - log number is set to zero
-// - next-file-number is set to 1 + largest file number we found
-// - last-sequence-number is set to largest sequence# found across
-// all tables (see 2c)
-// - compaction pointers are cleared
-// - every table file is added at level 0
-//
-// Possible optimization 1:
-// (a) Compute total size and use to pick appropriate max-level M
-// (b) Sort tables by largest sequence# in the table
-// (c) For each table: if it overlaps earlier table, place in level-0,
-// else place in level-M.
-// Possible optimization 2:
-// Store per-table metadata (smallest, largest, largest-seq#, ...)
-// in the table's meta section to speed up ScanTable.
-
-#include "db/builder.h"
-#include "db/db_impl.h"
-#include "db/dbformat.h"
-#include "db/filename.h"
-#include "db/log_reader.h"
-#include "db/log_writer.h"
-#include "db/memtable.h"
-#include "db/table_cache.h"
-#include "db/version_edit.h"
-#include "db/write_batch_internal.h"
-#include "leveldb/comparator.h"
-#include "leveldb/db.h"
-#include "leveldb/env.h"
-
-namespace leveldb {
-
-namespace {
-
-class Repairer {
- public:
- Repairer(const std::string& dbname, const Options& options)
- : dbname_(dbname),
- env_(options.env),
- icmp_(options.comparator),
- ipolicy_(options.filter_policy),
- options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)),
- owns_info_log_(options_.info_log != options.info_log),
- owns_cache_(options_.block_cache != options.block_cache),
- next_file_number_(1) {
- // TableCache can be small since we expect each table to be opened once.
- table_cache_ = new TableCache(dbname_, &options_, 10);
- }
-
- ~Repairer() {
- delete table_cache_;
- if (owns_info_log_) {
- delete options_.info_log;
- }
- if (owns_cache_) {
- delete options_.block_cache;
- }
- }
-
- Status Run() {
- Status status = FindFiles();
- if (status.ok()) {
- ConvertLogFilesToTables();
- ExtractMetaData();
- status = WriteDescriptor();
- }
- if (status.ok()) {
- unsigned long long bytes = 0;
- for (size_t i = 0; i < tables_.size(); i++) {
- bytes += tables_[i].meta.file_size;
- }
- Log(options_.info_log,
- "**** Repaired leveldb %s; "
- "recovered %d files; %llu bytes. "
- "Some data may have been lost. "
- "****",
- dbname_.c_str(),
- static_cast<int>(tables_.size()),
- bytes);
- }
- return status;
- }
-
- private:
- struct TableInfo {
- FileMetaData meta;
- SequenceNumber max_sequence;
- };
-
- std::string const dbname_;
- Env* const env_;
- InternalKeyComparator const icmp_;
- InternalFilterPolicy const ipolicy_;
- Options const options_;
- bool owns_info_log_;
- bool owns_cache_;
- TableCache* table_cache_;
- VersionEdit edit_;
-
- std::vector<std::string> manifests_;
- std::vector<uint64_t> table_numbers_;
- std::vector<uint64_t> logs_;
- std::vector<TableInfo> tables_;
- uint64_t next_file_number_;
-
- Status FindFiles() {
- std::vector<std::string> filenames;
- Status status = env_->GetChildren(dbname_, &filenames);
- if (!status.ok()) {
- return status;
- }
- if (filenames.empty()) {
- return Status::IOError(dbname_, "repair found no files");
- }
-
- uint64_t number;
- FileType type;
- for (size_t i = 0; i < filenames.size(); i++) {
- if (ParseFileName(filenames[i], &number, &type)) {
- if (type == kDescriptorFile) {
- manifests_.push_back(filenames[i]);
- } else {
- if (number + 1 > next_file_number_) {
- next_file_number_ = number + 1;
- }
- if (type == kLogFile) {
- logs_.push_back(number);
- } else if (type == kTableFile) {
- table_numbers_.push_back(number);
- } else {
- // Ignore other files
- }
- }
- }
- }
- return status;
- }
-
- void ConvertLogFilesToTables() {
- for (size_t i = 0; i < logs_.size(); i++) {
- std::string logname = LogFileName(dbname_, logs_[i]);
- Status status = ConvertLogToTable(logs_[i]);
- if (!status.ok()) {
- Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
- (unsigned long long) logs_[i],
- status.ToString().c_str());
- }
- ArchiveFile(logname);
- }
- }
-
- Status ConvertLogToTable(uint64_t log) {
- struct LogReporter : public log::Reader::Reporter {
- Env* env;
- Logger* info_log;
- uint64_t lognum;
- virtual void Corruption(size_t bytes, const Status& s) {
- // We print error messages for corruption, but continue repairing.
- Log(info_log, "Log #%llu: dropping %d bytes; %s",
- (unsigned long long) lognum,
- static_cast<int>(bytes),
- s.ToString().c_str());
- }
- };
-
- // Open the log file
- std::string logname = LogFileName(dbname_, log);
- SequentialFile* lfile;
- Status status = env_->NewSequentialFile(logname, &lfile);
- if (!status.ok()) {
- return status;
- }
-
- // Create the log reader.
- LogReporter reporter;
- reporter.env = env_;
- reporter.info_log = options_.info_log;
- reporter.lognum = log;
- // We intentially make log::Reader do checksumming so that
- // corruptions cause entire commits to be skipped instead of
- // propagating bad information (like overly large sequence
- // numbers).
- log::Reader reader(lfile, &reporter, false/*do not checksum*/,
- 0/*initial_offset*/);
-
- // Read all the records and add to a memtable
- std::string scratch;
- Slice record;
- WriteBatch batch;
- MemTable* mem = new MemTable(icmp_);
- mem->Ref();
- int counter = 0;
- while (reader.ReadRecord(&record, &scratch)) {
- if (record.size() < 12) {
- reporter.Corruption(
- record.size(), Status::Corruption("log record too small"));
- continue;
- }
- WriteBatchInternal::SetContents(&batch, record);
- status = WriteBatchInternal::InsertInto(&batch, mem);
- if (status.ok()) {
- counter += WriteBatchInternal::Count(&batch);
- } else {
- Log(options_.info_log, "Log #%llu: ignoring %s",
- (unsigned long long) log,
- status.ToString().c_str());
- status = Status::OK(); // Keep going with rest of file
- }
- }
- delete lfile;
-
- // Do not record a version edit for this conversion to a Table
- // since ExtractMetaData() will also generate edits.
- FileMetaData meta;
- meta.number = next_file_number_++;
- Iterator* iter = mem->NewIterator();
- status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
- delete iter;
- mem->Unref();
- mem = NULL;
- if (status.ok()) {
- if (meta.file_size > 0) {
- table_numbers_.push_back(meta.number);
- }
- }
- Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
- (unsigned long long) log,
- counter,
- (unsigned long long) meta.number,
- status.ToString().c_str());
- return status;
- }
-
- void ExtractMetaData() {
- std::vector<TableInfo> kept;
- for (size_t i = 0; i < table_numbers_.size(); i++) {
- TableInfo t;
- t.meta.number = table_numbers_[i];
- Status status = ScanTable(&t);
- if (!status.ok()) {
- std::string fname = TableFileName(dbname_, table_numbers_[i]);
- Log(options_.info_log, "Table #%llu: ignoring %s",
- (unsigned long long) table_numbers_[i],
- status.ToString().c_str());
- ArchiveFile(fname);
- } else {
- tables_.push_back(t);
- }
- }
- }
-
- Status ScanTable(TableInfo* t) {
- std::string fname = TableFileName(dbname_, t->meta.number);
- int counter = 0;
- Status status = env_->GetFileSize(fname, &t->meta.file_size);
- if (status.ok()) {
- Iterator* iter = table_cache_->NewIterator(
- ReadOptions(), t->meta.number, t->meta.file_size);
- bool empty = true;
- ParsedInternalKey parsed;
- t->max_sequence = 0;
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- Slice key = iter->key();
- if (!ParseInternalKey(key, &parsed)) {
- Log(options_.info_log, "Table #%llu: unparsable key %s",
- (unsigned long long) t->meta.number,
- EscapeString(key).c_str());
- continue;
- }
-
- counter++;
- if (empty) {
- empty = false;
- t->meta.smallest.DecodeFrom(key);
- }
- t->meta.largest.DecodeFrom(key);
- if (parsed.sequence > t->max_sequence) {
- t->max_sequence = parsed.sequence;
- }
- }
- if (!iter->status().ok()) {
- status = iter->status();
- }
- delete iter;
- }
- Log(options_.info_log, "Table #%llu: %d entries %s",
- (unsigned long long) t->meta.number,
- counter,
- status.ToString().c_str());
- return status;
- }
-
- Status WriteDescriptor() {
- std::string tmp = TempFileName(dbname_, 1);
- WritableFile* file;
- Status status = env_->NewWritableFile(tmp, &file);
- if (!status.ok()) {
- return status;
- }
-
- SequenceNumber max_sequence = 0;
- for (size_t i = 0; i < tables_.size(); i++) {
- if (max_sequence < tables_[i].max_sequence) {
- max_sequence = tables_[i].max_sequence;
- }
- }
-
- edit_.SetComparatorName(icmp_.user_comparator()->Name());
- edit_.SetLogNumber(0);
- edit_.SetNextFile(next_file_number_);
- edit_.SetLastSequence(max_sequence);
-
- for (size_t i = 0; i < tables_.size(); i++) {
- // TODO(opt): separate out into multiple levels
- const TableInfo& t = tables_[i];
- edit_.AddFile(0, t.meta.number, t.meta.file_size,
- t.meta.smallest, t.meta.largest);
- }
-
- //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
- {
- log::Writer log(file);
- std::string record;
- edit_.EncodeTo(&record);
- status = log.AddRecord(record);
- }
- if (status.ok()) {
- status = file->Close();
- }
- delete file;
- file = NULL;
-
- if (!status.ok()) {
- env_->DeleteFile(tmp);
- } else {
- // Discard older manifests
- for (size_t i = 0; i < manifests_.size(); i++) {
- ArchiveFile(dbname_ + "/" + manifests_[i]);
- }
-
- // Install new manifest
- status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1));
- if (status.ok()) {
- status = SetCurrentFile(env_, dbname_, 1);
- } else {
- env_->DeleteFile(tmp);
- }
- }
- return status;
- }
-
- void ArchiveFile(const std::string& fname) {
- // Move into another directory. E.g., for
- // dir/foo
- // rename to
- // dir/lost/foo
- const char* slash = strrchr(fname.c_str(), '/');
- std::string new_dir;
- if (slash != NULL) {
- new_dir.assign(fname.data(), slash - fname.data());
- }
- new_dir.append("/lost");
- env_->CreateDir(new_dir); // Ignore error
- std::string new_file = new_dir;
- new_file.append("/");
- new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
- Status s = env_->RenameFile(fname, new_file);
- Log(options_.info_log, "Archiving %s: %s\n",
- fname.c_str(), s.ToString().c_str());
- }
-};
-} // namespace
-
-Status RepairDB(const std::string& dbname, const Options& options) {
- Repairer repairer(dbname, options);
- return repairer.Run();
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/skiplist.h b/src/leveldb/db/skiplist.h
deleted file mode 100644
index af85be6d01..0000000000
--- a/src/leveldb/db/skiplist.h
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// Thread safety
-// -------------
-//
-// Writes require external synchronization, most likely a mutex.
-// Reads require a guarantee that the SkipList will not be destroyed
-// while the read is in progress. Apart from that, reads progress
-// without any internal locking or synchronization.
-//
-// Invariants:
-//
-// (1) Allocated nodes are never deleted until the SkipList is
-// destroyed. This is trivially guaranteed by the code since we
-// never delete any skip list nodes.
-//
-// (2) The contents of a Node except for the next/prev pointers are
-// immutable after the Node has been linked into the SkipList.
-// Only Insert() modifies the list, and it is careful to initialize
-// a node and use release-stores to publish the nodes in one or
-// more lists.
-//
-// ... prev vs. next pointer ordering ...
-
-#include <assert.h>
-#include <stdlib.h>
-#include "port/port.h"
-#include "util/arena.h"
-#include "util/random.h"
-
-namespace leveldb {
-
-class Arena;
-
-template<typename Key, class Comparator>
-class SkipList {
- private:
- struct Node;
-
- public:
- // Create a new SkipList object that will use "cmp" for comparing keys,
- // and will allocate memory using "*arena". Objects allocated in the arena
- // must remain allocated for the lifetime of the skiplist object.
- explicit SkipList(Comparator cmp, Arena* arena);
-
- // Insert key into the list.
- // REQUIRES: nothing that compares equal to key is currently in the list.
- void Insert(const Key& key);
-
- // Returns true iff an entry that compares equal to key is in the list.
- bool Contains(const Key& key) const;
-
- // Iteration over the contents of a skip list
- class Iterator {
- public:
- // Initialize an iterator over the specified list.
- // The returned iterator is not valid.
- explicit Iterator(const SkipList* list);
-
- // Returns true iff the iterator is positioned at a valid node.
- bool Valid() const;
-
- // Returns the key at the current position.
- // REQUIRES: Valid()
- const Key& key() const;
-
- // Advances to the next position.
- // REQUIRES: Valid()
- void Next();
-
- // Advances to the previous position.
- // REQUIRES: Valid()
- void Prev();
-
- // Advance to the first entry with a key >= target
- void Seek(const Key& target);
-
- // Position at the first entry in list.
- // Final state of iterator is Valid() iff list is not empty.
- void SeekToFirst();
-
- // Position at the last entry in list.
- // Final state of iterator is Valid() iff list is not empty.
- void SeekToLast();
-
- private:
- const SkipList* list_;
- Node* node_;
- // Intentionally copyable
- };
-
- private:
- enum { kMaxHeight = 12 };
-
- // Immutable after construction
- Comparator const compare_;
- Arena* const arena_; // Arena used for allocations of nodes
-
- Node* const head_;
-
- // Modified only by Insert(). Read racily by readers, but stale
- // values are ok.
- port::AtomicPointer max_height_; // Height of the entire list
-
- inline int GetMaxHeight() const {
- return static_cast<int>(
- reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));
- }
-
- // Read/written only by Insert().
- Random rnd_;
-
- Node* NewNode(const Key& key, int height);
- int RandomHeight();
- bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
-
- // Return true if key is greater than the data stored in "n"
- bool KeyIsAfterNode(const Key& key, Node* n) const;
-
- // Return the earliest node that comes at or after key.
- // Return NULL if there is no such node.
- //
- // If prev is non-NULL, fills prev[level] with pointer to previous
- // node at "level" for every level in [0..max_height_-1].
- Node* FindGreaterOrEqual(const Key& key, Node** prev) const;
-
- // Return the latest node with a key < key.
- // Return head_ if there is no such node.
- Node* FindLessThan(const Key& key) const;
-
- // Return the last node in the list.
- // Return head_ if list is empty.
- Node* FindLast() const;
-
- // No copying allowed
- SkipList(const SkipList&);
- void operator=(const SkipList&);
-};
-
-// Implementation details follow
-template<typename Key, class Comparator>
-struct SkipList<Key,Comparator>::Node {
- explicit Node(const Key& k) : key(k) { }
-
- Key const key;
-
- // Accessors/mutators for links. Wrapped in methods so we can
- // add the appropriate barriers as necessary.
- Node* Next(int n) {
- assert(n >= 0);
- // Use an 'acquire load' so that we observe a fully initialized
- // version of the returned Node.
- return reinterpret_cast<Node*>(next_[n].Acquire_Load());
- }
- void SetNext(int n, Node* x) {
- assert(n >= 0);
- // Use a 'release store' so that anybody who reads through this
- // pointer observes a fully initialized version of the inserted node.
- next_[n].Release_Store(x);
- }
-
- // No-barrier variants that can be safely used in a few locations.
- Node* NoBarrier_Next(int n) {
- assert(n >= 0);
- return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());
- }
- void NoBarrier_SetNext(int n, Node* x) {
- assert(n >= 0);
- next_[n].NoBarrier_Store(x);
- }
-
- private:
- // Array of length equal to the node height. next_[0] is lowest level link.
- port::AtomicPointer next_[1];
-};
-
-template<typename Key, class Comparator>
-typename SkipList<Key,Comparator>::Node*
-SkipList<Key,Comparator>::NewNode(const Key& key, int height) {
- char* mem = arena_->AllocateAligned(
- sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));
- return new (mem) Node(key);
-}
-
-template<typename Key, class Comparator>
-inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
- list_ = list;
- node_ = NULL;
-}
-
-template<typename Key, class Comparator>
-inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
- return node_ != NULL;
-}
-
-template<typename Key, class Comparator>
-inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
- assert(Valid());
- return node_->key;
-}
-
-template<typename Key, class Comparator>
-inline void SkipList<Key,Comparator>::Iterator::Next() {
- assert(Valid());
- node_ = node_->Next(0);
-}
-
-template<typename Key, class Comparator>
-inline void SkipList<Key,Comparator>::Iterator::Prev() {
- // Instead of using explicit "prev" links, we just search for the
- // last node that falls before key.
- assert(Valid());
- node_ = list_->FindLessThan(node_->key);
- if (node_ == list_->head_) {
- node_ = NULL;
- }
-}
-
-template<typename Key, class Comparator>
-inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
- node_ = list_->FindGreaterOrEqual(target, NULL);
-}
-
-template<typename Key, class Comparator>
-inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
- node_ = list_->head_->Next(0);
-}
-
-template<typename Key, class Comparator>
-inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
- node_ = list_->FindLast();
- if (node_ == list_->head_) {
- node_ = NULL;
- }
-}
-
-template<typename Key, class Comparator>
-int SkipList<Key,Comparator>::RandomHeight() {
- // Increase height with probability 1 in kBranching
- static const unsigned int kBranching = 4;
- int height = 1;
- while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
- height++;
- }
- assert(height > 0);
- assert(height <= kMaxHeight);
- return height;
-}
-
-template<typename Key, class Comparator>
-bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
- // NULL n is considered infinite
- return (n != NULL) && (compare_(n->key, key) < 0);
-}
-
-template<typename Key, class Comparator>
-typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)
- const {
- Node* x = head_;
- int level = GetMaxHeight() - 1;
- while (true) {
- Node* next = x->Next(level);
- if (KeyIsAfterNode(key, next)) {
- // Keep searching in this list
- x = next;
- } else {
- if (prev != NULL) prev[level] = x;
- if (level == 0) {
- return next;
- } else {
- // Switch to next list
- level--;
- }
- }
- }
-}
-
-template<typename Key, class Comparator>
-typename SkipList<Key,Comparator>::Node*
-SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
- Node* x = head_;
- int level = GetMaxHeight() - 1;
- while (true) {
- assert(x == head_ || compare_(x->key, key) < 0);
- Node* next = x->Next(level);
- if (next == NULL || compare_(next->key, key) >= 0) {
- if (level == 0) {
- return x;
- } else {
- // Switch to next list
- level--;
- }
- } else {
- x = next;
- }
- }
-}
-
-template<typename Key, class Comparator>
-typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
- const {
- Node* x = head_;
- int level = GetMaxHeight() - 1;
- while (true) {
- Node* next = x->Next(level);
- if (next == NULL) {
- if (level == 0) {
- return x;
- } else {
- // Switch to next list
- level--;
- }
- } else {
- x = next;
- }
- }
-}
-
-template<typename Key, class Comparator>
-SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
- : compare_(cmp),
- arena_(arena),
- head_(NewNode(0 /* any key will do */, kMaxHeight)),
- max_height_(reinterpret_cast<void*>(1)),
- rnd_(0xdeadbeef) {
- for (int i = 0; i < kMaxHeight; i++) {
- head_->SetNext(i, NULL);
- }
-}
-
-template<typename Key, class Comparator>
-void SkipList<Key,Comparator>::Insert(const Key& key) {
- // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
- // here since Insert() is externally synchronized.
- Node* prev[kMaxHeight];
- Node* x = FindGreaterOrEqual(key, prev);
-
- // Our data structure does not allow duplicate insertion
- assert(x == NULL || !Equal(key, x->key));
-
- int height = RandomHeight();
- if (height > GetMaxHeight()) {
- for (int i = GetMaxHeight(); i < height; i++) {
- prev[i] = head_;
- }
- //fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
-
- // It is ok to mutate max_height_ without any synchronization
- // with concurrent readers. A concurrent reader that observes
- // the new value of max_height_ will see either the old value of
- // new level pointers from head_ (NULL), or a new value set in
- // the loop below. In the former case the reader will
- // immediately drop to the next level since NULL sorts after all
- // keys. In the latter case the reader will use the new node.
- max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));
- }
-
- x = NewNode(key, height);
- for (int i = 0; i < height; i++) {
- // NoBarrier_SetNext() suffices since we will add a barrier when
- // we publish a pointer to "x" in prev[i].
- x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
- prev[i]->SetNext(i, x);
- }
-}
-
-template<typename Key, class Comparator>
-bool SkipList<Key,Comparator>::Contains(const Key& key) const {
- Node* x = FindGreaterOrEqual(key, NULL);
- if (x != NULL && Equal(key, x->key)) {
- return true;
- } else {
- return false;
- }
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/skiplist_test.cc b/src/leveldb/db/skiplist_test.cc
deleted file mode 100644
index c78f4b4fb1..0000000000
--- a/src/leveldb/db/skiplist_test.cc
+++ /dev/null
@@ -1,378 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/skiplist.h"
-#include <set>
-#include "leveldb/env.h"
-#include "util/arena.h"
-#include "util/hash.h"
-#include "util/random.h"
-#include "util/testharness.h"
-
-namespace leveldb {
-
-typedef uint64_t Key;
-
-struct Comparator {
- int operator()(const Key& a, const Key& b) const {
- if (a < b) {
- return -1;
- } else if (a > b) {
- return +1;
- } else {
- return 0;
- }
- }
-};
-
-class SkipTest { };
-
-TEST(SkipTest, Empty) {
- Arena arena;
- Comparator cmp;
- SkipList<Key, Comparator> list(cmp, &arena);
- ASSERT_TRUE(!list.Contains(10));
-
- SkipList<Key, Comparator>::Iterator iter(&list);
- ASSERT_TRUE(!iter.Valid());
- iter.SeekToFirst();
- ASSERT_TRUE(!iter.Valid());
- iter.Seek(100);
- ASSERT_TRUE(!iter.Valid());
- iter.SeekToLast();
- ASSERT_TRUE(!iter.Valid());
-}
-
-TEST(SkipTest, InsertAndLookup) {
- const int N = 2000;
- const int R = 5000;
- Random rnd(1000);
- std::set<Key> keys;
- Arena arena;
- Comparator cmp;
- SkipList<Key, Comparator> list(cmp, &arena);
- for (int i = 0; i < N; i++) {
- Key key = rnd.Next() % R;
- if (keys.insert(key).second) {
- list.Insert(key);
- }
- }
-
- for (int i = 0; i < R; i++) {
- if (list.Contains(i)) {
- ASSERT_EQ(keys.count(i), 1);
- } else {
- ASSERT_EQ(keys.count(i), 0);
- }
- }
-
- // Simple iterator tests
- {
- SkipList<Key, Comparator>::Iterator iter(&list);
- ASSERT_TRUE(!iter.Valid());
-
- iter.Seek(0);
- ASSERT_TRUE(iter.Valid());
- ASSERT_EQ(*(keys.begin()), iter.key());
-
- iter.SeekToFirst();
- ASSERT_TRUE(iter.Valid());
- ASSERT_EQ(*(keys.begin()), iter.key());
-
- iter.SeekToLast();
- ASSERT_TRUE(iter.Valid());
- ASSERT_EQ(*(keys.rbegin()), iter.key());
- }
-
- // Forward iteration test
- for (int i = 0; i < R; i++) {
- SkipList<Key, Comparator>::Iterator iter(&list);
- iter.Seek(i);
-
- // Compare against model iterator
- std::set<Key>::iterator model_iter = keys.lower_bound(i);
- for (int j = 0; j < 3; j++) {
- if (model_iter == keys.end()) {
- ASSERT_TRUE(!iter.Valid());
- break;
- } else {
- ASSERT_TRUE(iter.Valid());
- ASSERT_EQ(*model_iter, iter.key());
- ++model_iter;
- iter.Next();
- }
- }
- }
-
- // Backward iteration test
- {
- SkipList<Key, Comparator>::Iterator iter(&list);
- iter.SeekToLast();
-
- // Compare against model iterator
- for (std::set<Key>::reverse_iterator model_iter = keys.rbegin();
- model_iter != keys.rend();
- ++model_iter) {
- ASSERT_TRUE(iter.Valid());
- ASSERT_EQ(*model_iter, iter.key());
- iter.Prev();
- }
- ASSERT_TRUE(!iter.Valid());
- }
-}
-
-// We want to make sure that with a single writer and multiple
-// concurrent readers (with no synchronization other than when a
-// reader's iterator is created), the reader always observes all the
-// data that was present in the skip list when the iterator was
-// constructor. Because insertions are happening concurrently, we may
-// also observe new values that were inserted since the iterator was
-// constructed, but we should never miss any values that were present
-// at iterator construction time.
-//
-// We generate multi-part keys:
-// <key,gen,hash>
-// where:
-// key is in range [0..K-1]
-// gen is a generation number for key
-// hash is hash(key,gen)
-//
-// The insertion code picks a random key, sets gen to be 1 + the last
-// generation number inserted for that key, and sets hash to Hash(key,gen).
-//
-// At the beginning of a read, we snapshot the last inserted
-// generation number for each key. We then iterate, including random
-// calls to Next() and Seek(). For every key we encounter, we
-// check that it is either expected given the initial snapshot or has
-// been concurrently added since the iterator started.
-class ConcurrentTest {
- private:
- static const uint32_t K = 4;
-
- static uint64_t key(Key key) { return (key >> 40); }
- static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; }
- static uint64_t hash(Key key) { return key & 0xff; }
-
- static uint64_t HashNumbers(uint64_t k, uint64_t g) {
- uint64_t data[2] = { k, g };
- return Hash(reinterpret_cast<char*>(data), sizeof(data), 0);
- }
-
- static Key MakeKey(uint64_t k, uint64_t g) {
- assert(sizeof(Key) == sizeof(uint64_t));
- assert(k <= K); // We sometimes pass K to seek to the end of the skiplist
- assert(g <= 0xffffffffu);
- return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff));
- }
-
- static bool IsValidKey(Key k) {
- return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff);
- }
-
- static Key RandomTarget(Random* rnd) {
- switch (rnd->Next() % 10) {
- case 0:
- // Seek to beginning
- return MakeKey(0, 0);
- case 1:
- // Seek to end
- return MakeKey(K, 0);
- default:
- // Seek to middle
- return MakeKey(rnd->Next() % K, 0);
- }
- }
-
- // Per-key generation
- struct State {
- port::AtomicPointer generation[K];
- void Set(int k, intptr_t v) {
- generation[k].Release_Store(reinterpret_cast<void*>(v));
- }
- intptr_t Get(int k) {
- return reinterpret_cast<intptr_t>(generation[k].Acquire_Load());
- }
-
- State() {
- for (int k = 0; k < K; k++) {
- Set(k, 0);
- }
- }
- };
-
- // Current state of the test
- State current_;
-
- Arena arena_;
-
- // SkipList is not protected by mu_. We just use a single writer
- // thread to modify it.
- SkipList<Key, Comparator> list_;
-
- public:
- ConcurrentTest() : list_(Comparator(), &arena_) { }
-
- // REQUIRES: External synchronization
- void WriteStep(Random* rnd) {
- const uint32_t k = rnd->Next() % K;
- const intptr_t g = current_.Get(k) + 1;
- const Key key = MakeKey(k, g);
- list_.Insert(key);
- current_.Set(k, g);
- }
-
- void ReadStep(Random* rnd) {
- // Remember the initial committed state of the skiplist.
- State initial_state;
- for (int k = 0; k < K; k++) {
- initial_state.Set(k, current_.Get(k));
- }
-
- Key pos = RandomTarget(rnd);
- SkipList<Key, Comparator>::Iterator iter(&list_);
- iter.Seek(pos);
- while (true) {
- Key current;
- if (!iter.Valid()) {
- current = MakeKey(K, 0);
- } else {
- current = iter.key();
- ASSERT_TRUE(IsValidKey(current)) << current;
- }
- ASSERT_LE(pos, current) << "should not go backwards";
-
- // Verify that everything in [pos,current) was not present in
- // initial_state.
- while (pos < current) {
- ASSERT_LT(key(pos), K) << pos;
-
- // Note that generation 0 is never inserted, so it is ok if
- // <*,0,*> is missing.
- ASSERT_TRUE((gen(pos) == 0) ||
- (gen(pos) > initial_state.Get(key(pos)))
- ) << "key: " << key(pos)
- << "; gen: " << gen(pos)
- << "; initgen: "
- << initial_state.Get(key(pos));
-
- // Advance to next key in the valid key space
- if (key(pos) < key(current)) {
- pos = MakeKey(key(pos) + 1, 0);
- } else {
- pos = MakeKey(key(pos), gen(pos) + 1);
- }
- }
-
- if (!iter.Valid()) {
- break;
- }
-
- if (rnd->Next() % 2) {
- iter.Next();
- pos = MakeKey(key(pos), gen(pos) + 1);
- } else {
- Key new_target = RandomTarget(rnd);
- if (new_target > pos) {
- pos = new_target;
- iter.Seek(new_target);
- }
- }
- }
- }
-};
-const uint32_t ConcurrentTest::K;
-
-// Simple test that does single-threaded testing of the ConcurrentTest
-// scaffolding.
-TEST(SkipTest, ConcurrentWithoutThreads) {
- ConcurrentTest test;
- Random rnd(test::RandomSeed());
- for (int i = 0; i < 10000; i++) {
- test.ReadStep(&rnd);
- test.WriteStep(&rnd);
- }
-}
-
-class TestState {
- public:
- ConcurrentTest t_;
- int seed_;
- port::AtomicPointer quit_flag_;
-
- enum ReaderState {
- STARTING,
- RUNNING,
- DONE
- };
-
- explicit TestState(int s)
- : seed_(s),
- quit_flag_(NULL),
- state_(STARTING),
- state_cv_(&mu_) {}
-
- void Wait(ReaderState s) {
- mu_.Lock();
- while (state_ != s) {
- state_cv_.Wait();
- }
- mu_.Unlock();
- }
-
- void Change(ReaderState s) {
- mu_.Lock();
- state_ = s;
- state_cv_.Signal();
- mu_.Unlock();
- }
-
- private:
- port::Mutex mu_;
- ReaderState state_;
- port::CondVar state_cv_;
-};
-
-static void ConcurrentReader(void* arg) {
- TestState* state = reinterpret_cast<TestState*>(arg);
- Random rnd(state->seed_);
- int64_t reads = 0;
- state->Change(TestState::RUNNING);
- while (!state->quit_flag_.Acquire_Load()) {
- state->t_.ReadStep(&rnd);
- ++reads;
- }
- state->Change(TestState::DONE);
-}
-
-static void RunConcurrent(int run) {
- const int seed = test::RandomSeed() + (run * 100);
- Random rnd(seed);
- const int N = 1000;
- const int kSize = 1000;
- for (int i = 0; i < N; i++) {
- if ((i % 100) == 0) {
- fprintf(stderr, "Run %d of %d\n", i, N);
- }
- TestState state(seed + 1);
- Env::Default()->Schedule(ConcurrentReader, &state);
- state.Wait(TestState::RUNNING);
- for (int i = 0; i < kSize; i++) {
- state.t_.WriteStep(&rnd);
- }
- state.quit_flag_.Release_Store(&state); // Any non-NULL arg will do
- state.Wait(TestState::DONE);
- }
-}
-
-TEST(SkipTest, Concurrent1) { RunConcurrent(1); }
-TEST(SkipTest, Concurrent2) { RunConcurrent(2); }
-TEST(SkipTest, Concurrent3) { RunConcurrent(3); }
-TEST(SkipTest, Concurrent4) { RunConcurrent(4); }
-TEST(SkipTest, Concurrent5) { RunConcurrent(5); }
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/snapshot.h b/src/leveldb/db/snapshot.h
deleted file mode 100644
index e7f8fd2c37..0000000000
--- a/src/leveldb/db/snapshot.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_
-#define STORAGE_LEVELDB_DB_SNAPSHOT_H_
-
-#include "leveldb/db.h"
-
-namespace leveldb {
-
-class SnapshotList;
-
-// Snapshots are kept in a doubly-linked list in the DB.
-// Each SnapshotImpl corresponds to a particular sequence number.
-class SnapshotImpl : public Snapshot {
- public:
- SequenceNumber number_; // const after creation
-
- private:
- friend class SnapshotList;
-
- // SnapshotImpl is kept in a doubly-linked circular list
- SnapshotImpl* prev_;
- SnapshotImpl* next_;
-
- SnapshotList* list_; // just for sanity checks
-};
-
-class SnapshotList {
- public:
- SnapshotList() {
- list_.prev_ = &list_;
- list_.next_ = &list_;
- }
-
- bool empty() const { return list_.next_ == &list_; }
- SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; }
- SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; }
-
- const SnapshotImpl* New(SequenceNumber seq) {
- SnapshotImpl* s = new SnapshotImpl;
- s->number_ = seq;
- s->list_ = this;
- s->next_ = &list_;
- s->prev_ = list_.prev_;
- s->prev_->next_ = s;
- s->next_->prev_ = s;
- return s;
- }
-
- void Delete(const SnapshotImpl* s) {
- assert(s->list_ == this);
- s->prev_->next_ = s->next_;
- s->next_->prev_ = s->prev_;
- delete s;
- }
-
- private:
- // Dummy head of doubly-linked list of snapshots
- SnapshotImpl list_;
-};
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_SNAPSHOT_H_
diff --git a/src/leveldb/db/table_cache.cc b/src/leveldb/db/table_cache.cc
deleted file mode 100644
index 497db27076..0000000000
--- a/src/leveldb/db/table_cache.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/table_cache.h"
-
-#include "db/filename.h"
-#include "leveldb/env.h"
-#include "leveldb/table.h"
-#include "util/coding.h"
-
-namespace leveldb {
-
-struct TableAndFile {
- RandomAccessFile* file;
- Table* table;
-};
-
-static void DeleteEntry(const Slice& key, void* value) {
- TableAndFile* tf = reinterpret_cast<TableAndFile*>(value);
- delete tf->table;
- delete tf->file;
- delete tf;
-}
-
-static void UnrefEntry(void* arg1, void* arg2) {
- Cache* cache = reinterpret_cast<Cache*>(arg1);
- Cache::Handle* h = reinterpret_cast<Cache::Handle*>(arg2);
- cache->Release(h);
-}
-
-TableCache::TableCache(const std::string& dbname,
- const Options* options,
- int entries)
- : env_(options->env),
- dbname_(dbname),
- options_(options),
- cache_(NewLRUCache(entries)) {
-}
-
-TableCache::~TableCache() {
- delete cache_;
-}
-
-Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
- Cache::Handle** handle) {
- Status s;
- char buf[sizeof(file_number)];
- EncodeFixed64(buf, file_number);
- Slice key(buf, sizeof(buf));
- *handle = cache_->Lookup(key);
- if (*handle == NULL) {
- std::string fname = TableFileName(dbname_, file_number);
- RandomAccessFile* file = NULL;
- Table* table = NULL;
- s = env_->NewRandomAccessFile(fname, &file);
- if (s.ok()) {
- s = Table::Open(*options_, file, file_size, &table);
- }
-
- if (!s.ok()) {
- assert(table == NULL);
- delete file;
- // We do not cache error results so that if the error is transient,
- // or somebody repairs the file, we recover automatically.
- } else {
- TableAndFile* tf = new TableAndFile;
- tf->file = file;
- tf->table = table;
- *handle = cache_->Insert(key, tf, 1, &DeleteEntry);
- }
- }
- return s;
-}
-
-Iterator* TableCache::NewIterator(const ReadOptions& options,
- uint64_t file_number,
- uint64_t file_size,
- Table** tableptr) {
- if (tableptr != NULL) {
- *tableptr = NULL;
- }
-
- Cache::Handle* handle = NULL;
- Status s = FindTable(file_number, file_size, &handle);
- if (!s.ok()) {
- return NewErrorIterator(s);
- }
-
- Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
- Iterator* result = table->NewIterator(options);
- result->RegisterCleanup(&UnrefEntry, cache_, handle);
- if (tableptr != NULL) {
- *tableptr = table;
- }
- return result;
-}
-
-Status TableCache::Get(const ReadOptions& options,
- uint64_t file_number,
- uint64_t file_size,
- const Slice& k,
- void* arg,
- void (*saver)(void*, const Slice&, const Slice&)) {
- Cache::Handle* handle = NULL;
- Status s = FindTable(file_number, file_size, &handle);
- if (s.ok()) {
- Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
- s = t->InternalGet(options, k, arg, saver);
- cache_->Release(handle);
- }
- return s;
-}
-
-void TableCache::Evict(uint64_t file_number) {
- char buf[sizeof(file_number)];
- EncodeFixed64(buf, file_number);
- cache_->Erase(Slice(buf, sizeof(buf)));
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/table_cache.h b/src/leveldb/db/table_cache.h
deleted file mode 100644
index 8cf4aaf12d..0000000000
--- a/src/leveldb/db/table_cache.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// Thread-safe (provides internal synchronization)
-
-#ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_
-#define STORAGE_LEVELDB_DB_TABLE_CACHE_H_
-
-#include <string>
-#include <stdint.h>
-#include "db/dbformat.h"
-#include "leveldb/cache.h"
-#include "leveldb/table.h"
-#include "port/port.h"
-
-namespace leveldb {
-
-class Env;
-
-class TableCache {
- public:
- TableCache(const std::string& dbname, const Options* options, int entries);
- ~TableCache();
-
- // Return an iterator for the specified file number (the corresponding
- // file length must be exactly "file_size" bytes). If "tableptr" is
- // non-NULL, also sets "*tableptr" to point to the Table object
- // underlying the returned iterator, or NULL if no Table object underlies
- // the returned iterator. The returned "*tableptr" object is owned by
- // the cache and should not be deleted, and is valid for as long as the
- // returned iterator is live.
- Iterator* NewIterator(const ReadOptions& options,
- uint64_t file_number,
- uint64_t file_size,
- Table** tableptr = NULL);
-
- // If a seek to internal key "k" in specified file finds an entry,
- // call (*handle_result)(arg, found_key, found_value).
- Status Get(const ReadOptions& options,
- uint64_t file_number,
- uint64_t file_size,
- const Slice& k,
- void* arg,
- void (*handle_result)(void*, const Slice&, const Slice&));
-
- // Evict any entry for the specified file number
- void Evict(uint64_t file_number);
-
- private:
- Env* const env_;
- const std::string dbname_;
- const Options* options_;
- Cache* cache_;
-
- Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
-};
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_TABLE_CACHE_H_
diff --git a/src/leveldb/db/version_edit.cc b/src/leveldb/db/version_edit.cc
deleted file mode 100644
index f10a2d58b2..0000000000
--- a/src/leveldb/db/version_edit.cc
+++ /dev/null
@@ -1,266 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/version_edit.h"
-
-#include "db/version_set.h"
-#include "util/coding.h"
-
-namespace leveldb {
-
-// Tag numbers for serialized VersionEdit. These numbers are written to
-// disk and should not be changed.
-enum Tag {
- kComparator = 1,
- kLogNumber = 2,
- kNextFileNumber = 3,
- kLastSequence = 4,
- kCompactPointer = 5,
- kDeletedFile = 6,
- kNewFile = 7,
- // 8 was used for large value refs
- kPrevLogNumber = 9
-};
-
-void VersionEdit::Clear() {
- comparator_.clear();
- log_number_ = 0;
- prev_log_number_ = 0;
- last_sequence_ = 0;
- next_file_number_ = 0;
- has_comparator_ = false;
- has_log_number_ = false;
- has_prev_log_number_ = false;
- has_next_file_number_ = false;
- has_last_sequence_ = false;
- deleted_files_.clear();
- new_files_.clear();
-}
-
-void VersionEdit::EncodeTo(std::string* dst) const {
- if (has_comparator_) {
- PutVarint32(dst, kComparator);
- PutLengthPrefixedSlice(dst, comparator_);
- }
- if (has_log_number_) {
- PutVarint32(dst, kLogNumber);
- PutVarint64(dst, log_number_);
- }
- if (has_prev_log_number_) {
- PutVarint32(dst, kPrevLogNumber);
- PutVarint64(dst, prev_log_number_);
- }
- if (has_next_file_number_) {
- PutVarint32(dst, kNextFileNumber);
- PutVarint64(dst, next_file_number_);
- }
- if (has_last_sequence_) {
- PutVarint32(dst, kLastSequence);
- PutVarint64(dst, last_sequence_);
- }
-
- for (size_t i = 0; i < compact_pointers_.size(); i++) {
- PutVarint32(dst, kCompactPointer);
- PutVarint32(dst, compact_pointers_[i].first); // level
- PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
- }
-
- for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
- iter != deleted_files_.end();
- ++iter) {
- PutVarint32(dst, kDeletedFile);
- PutVarint32(dst, iter->first); // level
- PutVarint64(dst, iter->second); // file number
- }
-
- for (size_t i = 0; i < new_files_.size(); i++) {
- const FileMetaData& f = new_files_[i].second;
- PutVarint32(dst, kNewFile);
- PutVarint32(dst, new_files_[i].first); // level
- PutVarint64(dst, f.number);
- PutVarint64(dst, f.file_size);
- PutLengthPrefixedSlice(dst, f.smallest.Encode());
- PutLengthPrefixedSlice(dst, f.largest.Encode());
- }
-}
-
-static bool GetInternalKey(Slice* input, InternalKey* dst) {
- Slice str;
- if (GetLengthPrefixedSlice(input, &str)) {
- dst->DecodeFrom(str);
- return true;
- } else {
- return false;
- }
-}
-
-static bool GetLevel(Slice* input, int* level) {
- uint32_t v;
- if (GetVarint32(input, &v) &&
- v < config::kNumLevels) {
- *level = v;
- return true;
- } else {
- return false;
- }
-}
-
-Status VersionEdit::DecodeFrom(const Slice& src) {
- Clear();
- Slice input = src;
- const char* msg = NULL;
- uint32_t tag;
-
- // Temporary storage for parsing
- int level;
- uint64_t number;
- FileMetaData f;
- Slice str;
- InternalKey key;
-
- while (msg == NULL && GetVarint32(&input, &tag)) {
- switch (tag) {
- case kComparator:
- if (GetLengthPrefixedSlice(&input, &str)) {
- comparator_ = str.ToString();
- has_comparator_ = true;
- } else {
- msg = "comparator name";
- }
- break;
-
- case kLogNumber:
- if (GetVarint64(&input, &log_number_)) {
- has_log_number_ = true;
- } else {
- msg = "log number";
- }
- break;
-
- case kPrevLogNumber:
- if (GetVarint64(&input, &prev_log_number_)) {
- has_prev_log_number_ = true;
- } else {
- msg = "previous log number";
- }
- break;
-
- case kNextFileNumber:
- if (GetVarint64(&input, &next_file_number_)) {
- has_next_file_number_ = true;
- } else {
- msg = "next file number";
- }
- break;
-
- case kLastSequence:
- if (GetVarint64(&input, &last_sequence_)) {
- has_last_sequence_ = true;
- } else {
- msg = "last sequence number";
- }
- break;
-
- case kCompactPointer:
- if (GetLevel(&input, &level) &&
- GetInternalKey(&input, &key)) {
- compact_pointers_.push_back(std::make_pair(level, key));
- } else {
- msg = "compaction pointer";
- }
- break;
-
- case kDeletedFile:
- if (GetLevel(&input, &level) &&
- GetVarint64(&input, &number)) {
- deleted_files_.insert(std::make_pair(level, number));
- } else {
- msg = "deleted file";
- }
- break;
-
- case kNewFile:
- if (GetLevel(&input, &level) &&
- GetVarint64(&input, &f.number) &&
- GetVarint64(&input, &f.file_size) &&
- GetInternalKey(&input, &f.smallest) &&
- GetInternalKey(&input, &f.largest)) {
- new_files_.push_back(std::make_pair(level, f));
- } else {
- msg = "new-file entry";
- }
- break;
-
- default:
- msg = "unknown tag";
- break;
- }
- }
-
- if (msg == NULL && !input.empty()) {
- msg = "invalid tag";
- }
-
- Status result;
- if (msg != NULL) {
- result = Status::Corruption("VersionEdit", msg);
- }
- return result;
-}
-
-std::string VersionEdit::DebugString() const {
- std::string r;
- r.append("VersionEdit {");
- if (has_comparator_) {
- r.append("\n Comparator: ");
- r.append(comparator_);
- }
- if (has_log_number_) {
- r.append("\n LogNumber: ");
- AppendNumberTo(&r, log_number_);
- }
- if (has_prev_log_number_) {
- r.append("\n PrevLogNumber: ");
- AppendNumberTo(&r, prev_log_number_);
- }
- if (has_next_file_number_) {
- r.append("\n NextFile: ");
- AppendNumberTo(&r, next_file_number_);
- }
- if (has_last_sequence_) {
- r.append("\n LastSeq: ");
- AppendNumberTo(&r, last_sequence_);
- }
- for (size_t i = 0; i < compact_pointers_.size(); i++) {
- r.append("\n CompactPointer: ");
- AppendNumberTo(&r, compact_pointers_[i].first);
- r.append(" ");
- r.append(compact_pointers_[i].second.DebugString());
- }
- for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
- iter != deleted_files_.end();
- ++iter) {
- r.append("\n DeleteFile: ");
- AppendNumberTo(&r, iter->first);
- r.append(" ");
- AppendNumberTo(&r, iter->second);
- }
- for (size_t i = 0; i < new_files_.size(); i++) {
- const FileMetaData& f = new_files_[i].second;
- r.append("\n AddFile: ");
- AppendNumberTo(&r, new_files_[i].first);
- r.append(" ");
- AppendNumberTo(&r, f.number);
- r.append(" ");
- AppendNumberTo(&r, f.file_size);
- r.append(" ");
- r.append(f.smallest.DebugString());
- r.append(" .. ");
- r.append(f.largest.DebugString());
- }
- r.append("\n}\n");
- return r;
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/version_edit.h b/src/leveldb/db/version_edit.h
deleted file mode 100644
index eaef77b327..0000000000
--- a/src/leveldb/db/version_edit.h
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_VERSION_EDIT_H_
-#define STORAGE_LEVELDB_DB_VERSION_EDIT_H_
-
-#include <set>
-#include <utility>
-#include <vector>
-#include "db/dbformat.h"
-
-namespace leveldb {
-
-class VersionSet;
-
-struct FileMetaData {
- int refs;
- int allowed_seeks; // Seeks allowed until compaction
- uint64_t number;
- uint64_t file_size; // File size in bytes
- InternalKey smallest; // Smallest internal key served by table
- InternalKey largest; // Largest internal key served by table
-
- FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { }
-};
-
-class VersionEdit {
- public:
- VersionEdit() { Clear(); }
- ~VersionEdit() { }
-
- void Clear();
-
- void SetComparatorName(const Slice& name) {
- has_comparator_ = true;
- comparator_ = name.ToString();
- }
- void SetLogNumber(uint64_t num) {
- has_log_number_ = true;
- log_number_ = num;
- }
- void SetPrevLogNumber(uint64_t num) {
- has_prev_log_number_ = true;
- prev_log_number_ = num;
- }
- void SetNextFile(uint64_t num) {
- has_next_file_number_ = true;
- next_file_number_ = num;
- }
- void SetLastSequence(SequenceNumber seq) {
- has_last_sequence_ = true;
- last_sequence_ = seq;
- }
- void SetCompactPointer(int level, const InternalKey& key) {
- compact_pointers_.push_back(std::make_pair(level, key));
- }
-
- // Add the specified file at the specified number.
- // REQUIRES: This version has not been saved (see VersionSet::SaveTo)
- // REQUIRES: "smallest" and "largest" are smallest and largest keys in file
- void AddFile(int level, uint64_t file,
- uint64_t file_size,
- const InternalKey& smallest,
- const InternalKey& largest) {
- FileMetaData f;
- f.number = file;
- f.file_size = file_size;
- f.smallest = smallest;
- f.largest = largest;
- new_files_.push_back(std::make_pair(level, f));
- }
-
- // Delete the specified "file" from the specified "level".
- void DeleteFile(int level, uint64_t file) {
- deleted_files_.insert(std::make_pair(level, file));
- }
-
- void EncodeTo(std::string* dst) const;
- Status DecodeFrom(const Slice& src);
-
- std::string DebugString() const;
-
- private:
- friend class VersionSet;
-
- typedef std::set< std::pair<int, uint64_t> > DeletedFileSet;
-
- std::string comparator_;
- uint64_t log_number_;
- uint64_t prev_log_number_;
- uint64_t next_file_number_;
- SequenceNumber last_sequence_;
- bool has_comparator_;
- bool has_log_number_;
- bool has_prev_log_number_;
- bool has_next_file_number_;
- bool has_last_sequence_;
-
- std::vector< std::pair<int, InternalKey> > compact_pointers_;
- DeletedFileSet deleted_files_;
- std::vector< std::pair<int, FileMetaData> > new_files_;
-};
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_VERSION_EDIT_H_
diff --git a/src/leveldb/db/version_edit_test.cc b/src/leveldb/db/version_edit_test.cc
deleted file mode 100644
index 280310b49d..0000000000
--- a/src/leveldb/db/version_edit_test.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/version_edit.h"
-#include "util/testharness.h"
-
-namespace leveldb {
-
-static void TestEncodeDecode(const VersionEdit& edit) {
- std::string encoded, encoded2;
- edit.EncodeTo(&encoded);
- VersionEdit parsed;
- Status s = parsed.DecodeFrom(encoded);
- ASSERT_TRUE(s.ok()) << s.ToString();
- parsed.EncodeTo(&encoded2);
- ASSERT_EQ(encoded, encoded2);
-}
-
-class VersionEditTest { };
-
-TEST(VersionEditTest, EncodeDecode) {
- static const uint64_t kBig = 1ull << 50;
-
- VersionEdit edit;
- for (int i = 0; i < 4; i++) {
- TestEncodeDecode(edit);
- edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
- InternalKey("foo", kBig + 500 + i, kTypeValue),
- InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
- edit.DeleteFile(4, kBig + 700 + i);
- edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
- }
-
- edit.SetComparatorName("foo");
- edit.SetLogNumber(kBig + 100);
- edit.SetNextFile(kBig + 200);
- edit.SetLastSequence(kBig + 1000);
- TestEncodeDecode(edit);
-}
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/version_set.cc b/src/leveldb/db/version_set.cc
deleted file mode 100644
index 7d0a5de2b9..0000000000
--- a/src/leveldb/db/version_set.cc
+++ /dev/null
@@ -1,1438 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/version_set.h"
-
-#include <algorithm>
-#include <stdio.h>
-#include "db/filename.h"
-#include "db/log_reader.h"
-#include "db/log_writer.h"
-#include "db/memtable.h"
-#include "db/table_cache.h"
-#include "leveldb/env.h"
-#include "leveldb/table_builder.h"
-#include "table/merger.h"
-#include "table/two_level_iterator.h"
-#include "util/coding.h"
-#include "util/logging.h"
-
-namespace leveldb {
-
-static const int kTargetFileSize = 2 * 1048576;
-
-// Maximum bytes of overlaps in grandparent (i.e., level+2) before we
-// stop building a single file in a level->level+1 compaction.
-static const int64_t kMaxGrandParentOverlapBytes = 10 * kTargetFileSize;
-
-// Maximum number of bytes in all compacted files. We avoid expanding
-// the lower level file set of a compaction if it would make the
-// total compaction cover more than this many bytes.
-static const int64_t kExpandedCompactionByteSizeLimit = 25 * kTargetFileSize;
-
-static double MaxBytesForLevel(int level) {
- // Note: the result for level zero is not really used since we set
- // the level-0 compaction threshold based on number of files.
- double result = 10 * 1048576.0; // Result for both level-0 and level-1
- while (level > 1) {
- result *= 10;
- level--;
- }
- return result;
-}
-
-static uint64_t MaxFileSizeForLevel(int level) {
- return kTargetFileSize; // We could vary per level to reduce number of files?
-}
-
-static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
- int64_t sum = 0;
- for (size_t i = 0; i < files.size(); i++) {
- sum += files[i]->file_size;
- }
- return sum;
-}
-
-namespace {
-std::string IntSetToString(const std::set<uint64_t>& s) {
- std::string result = "{";
- for (std::set<uint64_t>::const_iterator it = s.begin();
- it != s.end();
- ++it) {
- result += (result.size() > 1) ? "," : "";
- result += NumberToString(*it);
- }
- result += "}";
- return result;
-}
-} // namespace
-
-Version::~Version() {
- assert(refs_ == 0);
-
- // Remove from linked list
- prev_->next_ = next_;
- next_->prev_ = prev_;
-
- // Drop references to files
- for (int level = 0; level < config::kNumLevels; level++) {
- for (size_t i = 0; i < files_[level].size(); i++) {
- FileMetaData* f = files_[level][i];
- assert(f->refs > 0);
- f->refs--;
- if (f->refs <= 0) {
- delete f;
- }
- }
- }
-}
-
-int FindFile(const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>& files,
- const Slice& key) {
- uint32_t left = 0;
- uint32_t right = files.size();
- while (left < right) {
- uint32_t mid = (left + right) / 2;
- const FileMetaData* f = files[mid];
- if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) {
- // Key at "mid.largest" is < "target". Therefore all
- // files at or before "mid" are uninteresting.
- left = mid + 1;
- } else {
- // Key at "mid.largest" is >= "target". Therefore all files
- // after "mid" are uninteresting.
- right = mid;
- }
- }
- return right;
-}
-
-static bool AfterFile(const Comparator* ucmp,
- const Slice* user_key, const FileMetaData* f) {
- // NULL user_key occurs before all keys and is therefore never after *f
- return (user_key != NULL &&
- ucmp->Compare(*user_key, f->largest.user_key()) > 0);
-}
-
-static bool BeforeFile(const Comparator* ucmp,
- const Slice* user_key, const FileMetaData* f) {
- // NULL user_key occurs after all keys and is therefore never before *f
- return (user_key != NULL &&
- ucmp->Compare(*user_key, f->smallest.user_key()) < 0);
-}
-
-bool SomeFileOverlapsRange(
- const InternalKeyComparator& icmp,
- bool disjoint_sorted_files,
- const std::vector<FileMetaData*>& files,
- const Slice* smallest_user_key,
- const Slice* largest_user_key) {
- const Comparator* ucmp = icmp.user_comparator();
- if (!disjoint_sorted_files) {
- // Need to check against all files
- for (size_t i = 0; i < files.size(); i++) {
- const FileMetaData* f = files[i];
- if (AfterFile(ucmp, smallest_user_key, f) ||
- BeforeFile(ucmp, largest_user_key, f)) {
- // No overlap
- } else {
- return true; // Overlap
- }
- }
- return false;
- }
-
- // Binary search over file list
- uint32_t index = 0;
- if (smallest_user_key != NULL) {
- // Find the earliest possible internal key for smallest_user_key
- InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek);
- index = FindFile(icmp, files, small.Encode());
- }
-
- if (index >= files.size()) {
- // beginning of range is after all files, so no overlap.
- return false;
- }
-
- return !BeforeFile(ucmp, largest_user_key, files[index]);
-}
-
-// An internal iterator. For a given version/level pair, yields
-// information about the files in the level. For a given entry, key()
-// is the largest key that occurs in the file, and value() is an
-// 16-byte value containing the file number and file size, both
-// encoded using EncodeFixed64.
-class Version::LevelFileNumIterator : public Iterator {
- public:
- LevelFileNumIterator(const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>* flist)
- : icmp_(icmp),
- flist_(flist),
- index_(flist->size()) { // Marks as invalid
- }
- virtual bool Valid() const {
- return index_ < flist_->size();
- }
- virtual void Seek(const Slice& target) {
- index_ = FindFile(icmp_, *flist_, target);
- }
- virtual void SeekToFirst() { index_ = 0; }
- virtual void SeekToLast() {
- index_ = flist_->empty() ? 0 : flist_->size() - 1;
- }
- virtual void Next() {
- assert(Valid());
- index_++;
- }
- virtual void Prev() {
- assert(Valid());
- if (index_ == 0) {
- index_ = flist_->size(); // Marks as invalid
- } else {
- index_--;
- }
- }
- Slice key() const {
- assert(Valid());
- return (*flist_)[index_]->largest.Encode();
- }
- Slice value() const {
- assert(Valid());
- EncodeFixed64(value_buf_, (*flist_)[index_]->number);
- EncodeFixed64(value_buf_+8, (*flist_)[index_]->file_size);
- return Slice(value_buf_, sizeof(value_buf_));
- }
- virtual Status status() const { return Status::OK(); }
- private:
- const InternalKeyComparator icmp_;
- const std::vector<FileMetaData*>* const flist_;
- uint32_t index_;
-
- // Backing store for value(). Holds the file number and size.
- mutable char value_buf_[16];
-};
-
-static Iterator* GetFileIterator(void* arg,
- const ReadOptions& options,
- const Slice& file_value) {
- TableCache* cache = reinterpret_cast<TableCache*>(arg);
- if (file_value.size() != 16) {
- return NewErrorIterator(
- Status::Corruption("FileReader invoked with unexpected value"));
- } else {
- return cache->NewIterator(options,
- DecodeFixed64(file_value.data()),
- DecodeFixed64(file_value.data() + 8));
- }
-}
-
-Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
- int level) const {
- return NewTwoLevelIterator(
- new LevelFileNumIterator(vset_->icmp_, &files_[level]),
- &GetFileIterator, vset_->table_cache_, options);
-}
-
-void Version::AddIterators(const ReadOptions& options,
- std::vector<Iterator*>* iters) {
- // Merge all level zero files together since they may overlap
- for (size_t i = 0; i < files_[0].size(); i++) {
- iters->push_back(
- vset_->table_cache_->NewIterator(
- options, files_[0][i]->number, files_[0][i]->file_size));
- }
-
- // For levels > 0, we can use a concatenating iterator that sequentially
- // walks through the non-overlapping files in the level, opening them
- // lazily.
- for (int level = 1; level < config::kNumLevels; level++) {
- if (!files_[level].empty()) {
- iters->push_back(NewConcatenatingIterator(options, level));
- }
- }
-}
-
-// Callback from TableCache::Get()
-namespace {
-enum SaverState {
- kNotFound,
- kFound,
- kDeleted,
- kCorrupt,
-};
-struct Saver {
- SaverState state;
- const Comparator* ucmp;
- Slice user_key;
- std::string* value;
-};
-}
-static void SaveValue(void* arg, const Slice& ikey, const Slice& v) {
- Saver* s = reinterpret_cast<Saver*>(arg);
- ParsedInternalKey parsed_key;
- if (!ParseInternalKey(ikey, &parsed_key)) {
- s->state = kCorrupt;
- } else {
- if (s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0) {
- s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
- if (s->state == kFound) {
- s->value->assign(v.data(), v.size());
- }
- }
- }
-}
-
-static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
- return a->number > b->number;
-}
-
-Status Version::Get(const ReadOptions& options,
- const LookupKey& k,
- std::string* value,
- GetStats* stats) {
- Slice ikey = k.internal_key();
- Slice user_key = k.user_key();
- const Comparator* ucmp = vset_->icmp_.user_comparator();
- Status s;
-
- stats->seek_file = NULL;
- stats->seek_file_level = -1;
- FileMetaData* last_file_read = NULL;
- int last_file_read_level = -1;
-
- // We can search level-by-level since entries never hop across
- // levels. Therefore we are guaranteed that if we find data
- // in an smaller level, later levels are irrelevant.
- std::vector<FileMetaData*> tmp;
- FileMetaData* tmp2;
- for (int level = 0; level < config::kNumLevels; level++) {
- size_t num_files = files_[level].size();
- if (num_files == 0) continue;
-
- // Get the list of files to search in this level
- FileMetaData* const* files = &files_[level][0];
- if (level == 0) {
- // Level-0 files may overlap each other. Find all files that
- // overlap user_key and process them in order from newest to oldest.
- tmp.reserve(num_files);
- for (uint32_t i = 0; i < num_files; i++) {
- FileMetaData* f = files[i];
- if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
- ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
- tmp.push_back(f);
- }
- }
- if (tmp.empty()) continue;
-
- std::sort(tmp.begin(), tmp.end(), NewestFirst);
- files = &tmp[0];
- num_files = tmp.size();
- } else {
- // Binary search to find earliest index whose largest key >= ikey.
- uint32_t index = FindFile(vset_->icmp_, files_[level], ikey);
- if (index >= num_files) {
- files = NULL;
- num_files = 0;
- } else {
- tmp2 = files[index];
- if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) {
- // All of "tmp2" is past any data for user_key
- files = NULL;
- num_files = 0;
- } else {
- files = &tmp2;
- num_files = 1;
- }
- }
- }
-
- for (uint32_t i = 0; i < num_files; ++i) {
- if (last_file_read != NULL && stats->seek_file == NULL) {
- // We have had more than one seek for this read. Charge the 1st file.
- stats->seek_file = last_file_read;
- stats->seek_file_level = last_file_read_level;
- }
-
- FileMetaData* f = files[i];
- last_file_read = f;
- last_file_read_level = level;
-
- Saver saver;
- saver.state = kNotFound;
- saver.ucmp = ucmp;
- saver.user_key = user_key;
- saver.value = value;
- s = vset_->table_cache_->Get(options, f->number, f->file_size,
- ikey, &saver, SaveValue);
- if (!s.ok()) {
- return s;
- }
- switch (saver.state) {
- case kNotFound:
- break; // Keep searching in other files
- case kFound:
- return s;
- case kDeleted:
- s = Status::NotFound(Slice()); // Use empty error message for speed
- return s;
- case kCorrupt:
- s = Status::Corruption("corrupted key for ", user_key);
- return s;
- }
- }
- }
-
- return Status::NotFound(Slice()); // Use an empty error message for speed
-}
-
-bool Version::UpdateStats(const GetStats& stats) {
- FileMetaData* f = stats.seek_file;
- if (f != NULL) {
- f->allowed_seeks--;
- if (f->allowed_seeks <= 0 && file_to_compact_ == NULL) {
- file_to_compact_ = f;
- file_to_compact_level_ = stats.seek_file_level;
- return true;
- }
- }
- return false;
-}
-
-void Version::Ref() {
- ++refs_;
-}
-
-void Version::Unref() {
- assert(this != &vset_->dummy_versions_);
- assert(refs_ >= 1);
- --refs_;
- if (refs_ == 0) {
- delete this;
- }
-}
-
-bool Version::OverlapInLevel(int level,
- const Slice* smallest_user_key,
- const Slice* largest_user_key) {
- return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level],
- smallest_user_key, largest_user_key);
-}
-
-int Version::PickLevelForMemTableOutput(
- const Slice& smallest_user_key,
- const Slice& largest_user_key) {
- int level = 0;
- if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) {
- // Push to next level if there is no overlap in next level,
- // and the #bytes overlapping in the level after that are limited.
- InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek);
- InternalKey limit(largest_user_key, 0, static_cast<ValueType>(0));
- std::vector<FileMetaData*> overlaps;
- while (level < config::kMaxMemCompactLevel) {
- if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
- break;
- }
- GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
- const int64_t sum = TotalFileSize(overlaps);
- if (sum > kMaxGrandParentOverlapBytes) {
- break;
- }
- level++;
- }
- }
- return level;
-}
-
-// Store in "*inputs" all files in "level" that overlap [begin,end]
-void Version::GetOverlappingInputs(
- int level,
- const InternalKey* begin,
- const InternalKey* end,
- std::vector<FileMetaData*>* inputs) {
- inputs->clear();
- Slice user_begin, user_end;
- if (begin != NULL) {
- user_begin = begin->user_key();
- }
- if (end != NULL) {
- user_end = end->user_key();
- }
- const Comparator* user_cmp = vset_->icmp_.user_comparator();
- for (size_t i = 0; i < files_[level].size(); ) {
- FileMetaData* f = files_[level][i++];
- const Slice file_start = f->smallest.user_key();
- const Slice file_limit = f->largest.user_key();
- if (begin != NULL && user_cmp->Compare(file_limit, user_begin) < 0) {
- // "f" is completely before specified range; skip it
- } else if (end != NULL && user_cmp->Compare(file_start, user_end) > 0) {
- // "f" is completely after specified range; skip it
- } else {
- inputs->push_back(f);
- if (level == 0) {
- // Level-0 files may overlap each other. So check if the newly
- // added file has expanded the range. If so, restart search.
- if (begin != NULL && user_cmp->Compare(file_start, user_begin) < 0) {
- user_begin = file_start;
- inputs->clear();
- i = 0;
- } else if (end != NULL && user_cmp->Compare(file_limit, user_end) > 0) {
- user_end = file_limit;
- inputs->clear();
- i = 0;
- }
- }
- }
- }
-}
-
-std::string Version::DebugString() const {
- std::string r;
- for (int level = 0; level < config::kNumLevels; level++) {
- // E.g.,
- // --- level 1 ---
- // 17:123['a' .. 'd']
- // 20:43['e' .. 'g']
- r.append("--- level ");
- AppendNumberTo(&r, level);
- r.append(" ---\n");
- const std::vector<FileMetaData*>& files = files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- r.push_back(' ');
- AppendNumberTo(&r, files[i]->number);
- r.push_back(':');
- AppendNumberTo(&r, files[i]->file_size);
- r.append("[");
- r.append(files[i]->smallest.DebugString());
- r.append(" .. ");
- r.append(files[i]->largest.DebugString());
- r.append("]\n");
- }
- }
- return r;
-}
-
-// A helper class so we can efficiently apply a whole sequence
-// of edits to a particular state without creating intermediate
-// Versions that contain full copies of the intermediate state.
-class VersionSet::Builder {
- private:
- // Helper to sort by v->files_[file_number].smallest
- struct BySmallestKey {
- const InternalKeyComparator* internal_comparator;
-
- bool operator()(FileMetaData* f1, FileMetaData* f2) const {
- int r = internal_comparator->Compare(f1->smallest, f2->smallest);
- if (r != 0) {
- return (r < 0);
- } else {
- // Break ties by file number
- return (f1->number < f2->number);
- }
- }
- };
-
- typedef std::set<FileMetaData*, BySmallestKey> FileSet;
- struct LevelState {
- std::set<uint64_t> deleted_files;
- FileSet* added_files;
- };
-
- VersionSet* vset_;
- Version* base_;
- LevelState levels_[config::kNumLevels];
-
- public:
- // Initialize a builder with the files from *base and other info from *vset
- Builder(VersionSet* vset, Version* base)
- : vset_(vset),
- base_(base) {
- base_->Ref();
- BySmallestKey cmp;
- cmp.internal_comparator = &vset_->icmp_;
- for (int level = 0; level < config::kNumLevels; level++) {
- levels_[level].added_files = new FileSet(cmp);
- }
- }
-
- ~Builder() {
- for (int level = 0; level < config::kNumLevels; level++) {
- const FileSet* added = levels_[level].added_files;
- std::vector<FileMetaData*> to_unref;
- to_unref.reserve(added->size());
- for (FileSet::const_iterator it = added->begin();
- it != added->end(); ++it) {
- to_unref.push_back(*it);
- }
- delete added;
- for (uint32_t i = 0; i < to_unref.size(); i++) {
- FileMetaData* f = to_unref[i];
- f->refs--;
- if (f->refs <= 0) {
- delete f;
- }
- }
- }
- base_->Unref();
- }
-
- // Apply all of the edits in *edit to the current state.
- void Apply(VersionEdit* edit) {
- // Update compaction pointers
- for (size_t i = 0; i < edit->compact_pointers_.size(); i++) {
- const int level = edit->compact_pointers_[i].first;
- vset_->compact_pointer_[level] =
- edit->compact_pointers_[i].second.Encode().ToString();
- }
-
- // Delete files
- const VersionEdit::DeletedFileSet& del = edit->deleted_files_;
- for (VersionEdit::DeletedFileSet::const_iterator iter = del.begin();
- iter != del.end();
- ++iter) {
- const int level = iter->first;
- const uint64_t number = iter->second;
- levels_[level].deleted_files.insert(number);
- }
-
- // Add new files
- for (size_t i = 0; i < edit->new_files_.size(); i++) {
- const int level = edit->new_files_[i].first;
- FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
- f->refs = 1;
-
- // We arrange to automatically compact this file after
- // a certain number of seeks. Let's assume:
- // (1) One seek costs 10ms
- // (2) Writing or reading 1MB costs 10ms (100MB/s)
- // (3) A compaction of 1MB does 25MB of IO:
- // 1MB read from this level
- // 10-12MB read from next level (boundaries may be misaligned)
- // 10-12MB written to next level
- // This implies that 25 seeks cost the same as the compaction
- // of 1MB of data. I.e., one seek costs approximately the
- // same as the compaction of 40KB of data. We are a little
- // conservative and allow approximately one seek for every 16KB
- // of data before triggering a compaction.
- f->allowed_seeks = (f->file_size / 16384);
- if (f->allowed_seeks < 100) f->allowed_seeks = 100;
-
- levels_[level].deleted_files.erase(f->number);
- levels_[level].added_files->insert(f);
- }
- }
-
- // Save the current state in *v.
- void SaveTo(Version* v) {
- BySmallestKey cmp;
- cmp.internal_comparator = &vset_->icmp_;
- for (int level = 0; level < config::kNumLevels; level++) {
- // Merge the set of added files with the set of pre-existing files.
- // Drop any deleted files. Store the result in *v.
- const std::vector<FileMetaData*>& base_files = base_->files_[level];
- std::vector<FileMetaData*>::const_iterator base_iter = base_files.begin();
- std::vector<FileMetaData*>::const_iterator base_end = base_files.end();
- const FileSet* added = levels_[level].added_files;
- v->files_[level].reserve(base_files.size() + added->size());
- for (FileSet::const_iterator added_iter = added->begin();
- added_iter != added->end();
- ++added_iter) {
- // Add all smaller files listed in base_
- for (std::vector<FileMetaData*>::const_iterator bpos
- = std::upper_bound(base_iter, base_end, *added_iter, cmp);
- base_iter != bpos;
- ++base_iter) {
- MaybeAddFile(v, level, *base_iter);
- }
-
- MaybeAddFile(v, level, *added_iter);
- }
-
- // Add remaining base files
- for (; base_iter != base_end; ++base_iter) {
- MaybeAddFile(v, level, *base_iter);
- }
-
-#ifndef NDEBUG
- // Make sure there is no overlap in levels > 0
- if (level > 0) {
- for (uint32_t i = 1; i < v->files_[level].size(); i++) {
- const InternalKey& prev_end = v->files_[level][i-1]->largest;
- const InternalKey& this_begin = v->files_[level][i]->smallest;
- if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) {
- fprintf(stderr, "overlapping ranges in same level %s vs. %s\n",
- prev_end.DebugString().c_str(),
- this_begin.DebugString().c_str());
- abort();
- }
- }
- }
-#endif
- }
- }
-
- void MaybeAddFile(Version* v, int level, FileMetaData* f) {
- if (levels_[level].deleted_files.count(f->number) > 0) {
- // File is deleted: do nothing
- } else {
- std::vector<FileMetaData*>* files = &v->files_[level];
- if (level > 0 && !files->empty()) {
- // Must not overlap
- assert(vset_->icmp_.Compare((*files)[files->size()-1]->largest,
- f->smallest) < 0);
- }
- f->refs++;
- files->push_back(f);
- }
- }
-};
-
-VersionSet::VersionSet(const std::string& dbname,
- const Options* options,
- TableCache* table_cache,
- const InternalKeyComparator* cmp)
- : env_(options->env),
- dbname_(dbname),
- options_(options),
- table_cache_(table_cache),
- icmp_(*cmp),
- next_file_number_(2),
- manifest_file_number_(0), // Filled by Recover()
- last_sequence_(0),
- log_number_(0),
- prev_log_number_(0),
- descriptor_file_(NULL),
- descriptor_log_(NULL),
- dummy_versions_(this),
- current_(NULL) {
- AppendVersion(new Version(this));
-}
-
-VersionSet::~VersionSet() {
- current_->Unref();
- assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty
- delete descriptor_log_;
- delete descriptor_file_;
-}
-
-void VersionSet::AppendVersion(Version* v) {
- // Make "v" current
- assert(v->refs_ == 0);
- assert(v != current_);
- if (current_ != NULL) {
- current_->Unref();
- }
- current_ = v;
- v->Ref();
-
- // Append to linked list
- v->prev_ = dummy_versions_.prev_;
- v->next_ = &dummy_versions_;
- v->prev_->next_ = v;
- v->next_->prev_ = v;
-}
-
-Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
- if (edit->has_log_number_) {
- assert(edit->log_number_ >= log_number_);
- assert(edit->log_number_ < next_file_number_);
- } else {
- edit->SetLogNumber(log_number_);
- }
-
- if (!edit->has_prev_log_number_) {
- edit->SetPrevLogNumber(prev_log_number_);
- }
-
- edit->SetNextFile(next_file_number_);
- edit->SetLastSequence(last_sequence_);
-
- Version* v = new Version(this);
- {
- Builder builder(this, current_);
- builder.Apply(edit);
- builder.SaveTo(v);
- }
- Finalize(v);
-
- // Initialize new descriptor log file if necessary by creating
- // a temporary file that contains a snapshot of the current version.
- std::string new_manifest_file;
- Status s;
- if (descriptor_log_ == NULL) {
- // No reason to unlock *mu here since we only hit this path in the
- // first call to LogAndApply (when opening the database).
- assert(descriptor_file_ == NULL);
- new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_);
- edit->SetNextFile(next_file_number_);
- s = env_->NewWritableFile(new_manifest_file, &descriptor_file_);
- if (s.ok()) {
- descriptor_log_ = new log::Writer(descriptor_file_);
- s = WriteSnapshot(descriptor_log_);
- }
- }
-
- // Unlock during expensive MANIFEST log write
- {
- mu->Unlock();
-
- // Write new record to MANIFEST log
- if (s.ok()) {
- std::string record;
- edit->EncodeTo(&record);
- s = descriptor_log_->AddRecord(record);
- if (s.ok()) {
- s = descriptor_file_->Sync();
- }
- if (!s.ok()) {
- Log(options_->info_log, "MANIFEST write: %s\n", s.ToString().c_str());
- if (ManifestContains(record)) {
- Log(options_->info_log,
- "MANIFEST contains log record despite error; advancing to new "
- "version to prevent mismatch between in-memory and logged state");
- s = Status::OK();
- }
- }
- }
-
- // If we just created a new descriptor file, install it by writing a
- // new CURRENT file that points to it.
- if (s.ok() && !new_manifest_file.empty()) {
- s = SetCurrentFile(env_, dbname_, manifest_file_number_);
- // No need to double-check MANIFEST in case of error since it
- // will be discarded below.
- }
-
- mu->Lock();
- }
-
- // Install the new version
- if (s.ok()) {
- AppendVersion(v);
- log_number_ = edit->log_number_;
- prev_log_number_ = edit->prev_log_number_;
- } else {
- delete v;
- if (!new_manifest_file.empty()) {
- delete descriptor_log_;
- delete descriptor_file_;
- descriptor_log_ = NULL;
- descriptor_file_ = NULL;
- env_->DeleteFile(new_manifest_file);
- }
- }
-
- return s;
-}
-
-Status VersionSet::Recover() {
- struct LogReporter : public log::Reader::Reporter {
- Status* status;
- virtual void Corruption(size_t bytes, const Status& s) {
- if (this->status->ok()) *this->status = s;
- }
- };
-
- // Read "CURRENT" file, which contains a pointer to the current manifest file
- std::string current;
- Status s = ReadFileToString(env_, CurrentFileName(dbname_), &current);
- if (!s.ok()) {
- return s;
- }
- if (current.empty() || current[current.size()-1] != '\n') {
- return Status::Corruption("CURRENT file does not end with newline");
- }
- current.resize(current.size() - 1);
-
- std::string dscname = dbname_ + "/" + current;
- SequentialFile* file;
- s = env_->NewSequentialFile(dscname, &file);
- if (!s.ok()) {
- return s;
- }
-
- bool have_log_number = false;
- bool have_prev_log_number = false;
- bool have_next_file = false;
- bool have_last_sequence = false;
- uint64_t next_file = 0;
- uint64_t last_sequence = 0;
- uint64_t log_number = 0;
- uint64_t prev_log_number = 0;
- Builder builder(this, current_);
-
- {
- LogReporter reporter;
- reporter.status = &s;
- log::Reader reader(file, &reporter, true/*checksum*/, 0/*initial_offset*/);
- Slice record;
- std::string scratch;
- while (reader.ReadRecord(&record, &scratch) && s.ok()) {
- VersionEdit edit;
- s = edit.DecodeFrom(record);
- if (s.ok()) {
- if (edit.has_comparator_ &&
- edit.comparator_ != icmp_.user_comparator()->Name()) {
- s = Status::InvalidArgument(
- edit.comparator_ + " does not match existing comparator ",
- icmp_.user_comparator()->Name());
- }
- }
-
- if (s.ok()) {
- builder.Apply(&edit);
- }
-
- if (edit.has_log_number_) {
- log_number = edit.log_number_;
- have_log_number = true;
- }
-
- if (edit.has_prev_log_number_) {
- prev_log_number = edit.prev_log_number_;
- have_prev_log_number = true;
- }
-
- if (edit.has_next_file_number_) {
- next_file = edit.next_file_number_;
- have_next_file = true;
- }
-
- if (edit.has_last_sequence_) {
- last_sequence = edit.last_sequence_;
- have_last_sequence = true;
- }
- }
- }
- delete file;
- file = NULL;
-
- if (s.ok()) {
- if (!have_next_file) {
- s = Status::Corruption("no meta-nextfile entry in descriptor");
- } else if (!have_log_number) {
- s = Status::Corruption("no meta-lognumber entry in descriptor");
- } else if (!have_last_sequence) {
- s = Status::Corruption("no last-sequence-number entry in descriptor");
- }
-
- if (!have_prev_log_number) {
- prev_log_number = 0;
- }
-
- MarkFileNumberUsed(prev_log_number);
- MarkFileNumberUsed(log_number);
- }
-
- if (s.ok()) {
- Version* v = new Version(this);
- builder.SaveTo(v);
- // Install recovered version
- Finalize(v);
- AppendVersion(v);
- manifest_file_number_ = next_file;
- next_file_number_ = next_file + 1;
- last_sequence_ = last_sequence;
- log_number_ = log_number;
- prev_log_number_ = prev_log_number;
- }
-
- return s;
-}
-
-void VersionSet::MarkFileNumberUsed(uint64_t number) {
- if (next_file_number_ <= number) {
- next_file_number_ = number + 1;
- }
-}
-
-void VersionSet::Finalize(Version* v) {
- // Precomputed best level for next compaction
- int best_level = -1;
- double best_score = -1;
-
- for (int level = 0; level < config::kNumLevels-1; level++) {
- double score;
- if (level == 0) {
- // We treat level-0 specially by bounding the number of files
- // instead of number of bytes for two reasons:
- //
- // (1) With larger write-buffer sizes, it is nice not to do too
- // many level-0 compactions.
- //
- // (2) The files in level-0 are merged on every read and
- // therefore we wish to avoid too many files when the individual
- // file size is small (perhaps because of a small write-buffer
- // setting, or very high compression ratios, or lots of
- // overwrites/deletions).
- score = v->files_[level].size() /
- static_cast<double>(config::kL0_CompactionTrigger);
- } else {
- // Compute the ratio of current size to size limit.
- const uint64_t level_bytes = TotalFileSize(v->files_[level]);
- score = static_cast<double>(level_bytes) / MaxBytesForLevel(level);
- }
-
- if (score > best_score) {
- best_level = level;
- best_score = score;
- }
- }
-
- v->compaction_level_ = best_level;
- v->compaction_score_ = best_score;
-}
-
-Status VersionSet::WriteSnapshot(log::Writer* log) {
- // TODO: Break up into multiple records to reduce memory usage on recovery?
-
- // Save metadata
- VersionEdit edit;
- edit.SetComparatorName(icmp_.user_comparator()->Name());
-
- // Save compaction pointers
- for (int level = 0; level < config::kNumLevels; level++) {
- if (!compact_pointer_[level].empty()) {
- InternalKey key;
- key.DecodeFrom(compact_pointer_[level]);
- edit.SetCompactPointer(level, key);
- }
- }
-
- // Save files
- for (int level = 0; level < config::kNumLevels; level++) {
- const std::vector<FileMetaData*>& files = current_->files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- const FileMetaData* f = files[i];
- edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
- }
- }
-
- std::string record;
- edit.EncodeTo(&record);
- return log->AddRecord(record);
-}
-
-int VersionSet::NumLevelFiles(int level) const {
- assert(level >= 0);
- assert(level < config::kNumLevels);
- return current_->files_[level].size();
-}
-
-const char* VersionSet::LevelSummary(LevelSummaryStorage* scratch) const {
- // Update code if kNumLevels changes
- assert(config::kNumLevels == 7);
- snprintf(scratch->buffer, sizeof(scratch->buffer),
- "files[ %d %d %d %d %d %d %d ]",
- int(current_->files_[0].size()),
- int(current_->files_[1].size()),
- int(current_->files_[2].size()),
- int(current_->files_[3].size()),
- int(current_->files_[4].size()),
- int(current_->files_[5].size()),
- int(current_->files_[6].size()));
- return scratch->buffer;
-}
-
-// Return true iff the manifest contains the specified record.
-bool VersionSet::ManifestContains(const std::string& record) const {
- std::string fname = DescriptorFileName(dbname_, manifest_file_number_);
- Log(options_->info_log, "ManifestContains: checking %s\n", fname.c_str());
- SequentialFile* file = NULL;
- Status s = env_->NewSequentialFile(fname, &file);
- if (!s.ok()) {
- Log(options_->info_log, "ManifestContains: %s\n", s.ToString().c_str());
- return false;
- }
- log::Reader reader(file, NULL, true/*checksum*/, 0);
- Slice r;
- std::string scratch;
- bool result = false;
- while (reader.ReadRecord(&r, &scratch)) {
- if (r == Slice(record)) {
- result = true;
- break;
- }
- }
- delete file;
- Log(options_->info_log, "ManifestContains: result = %d\n", result ? 1 : 0);
- return result;
-}
-
-uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
- uint64_t result = 0;
- for (int level = 0; level < config::kNumLevels; level++) {
- const std::vector<FileMetaData*>& files = v->files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
- // Entire file is before "ikey", so just add the file size
- result += files[i]->file_size;
- } else if (icmp_.Compare(files[i]->smallest, ikey) > 0) {
- // Entire file is after "ikey", so ignore
- if (level > 0) {
- // Files other than level 0 are sorted by meta->smallest, so
- // no further files in this level will contain data for
- // "ikey".
- break;
- }
- } else {
- // "ikey" falls in the range for this table. Add the
- // approximate offset of "ikey" within the table.
- Table* tableptr;
- Iterator* iter = table_cache_->NewIterator(
- ReadOptions(), files[i]->number, files[i]->file_size, &tableptr);
- if (tableptr != NULL) {
- result += tableptr->ApproximateOffsetOf(ikey.Encode());
- }
- delete iter;
- }
- }
- }
- return result;
-}
-
-void VersionSet::AddLiveFiles(std::set<uint64_t>* live) {
- for (Version* v = dummy_versions_.next_;
- v != &dummy_versions_;
- v = v->next_) {
- for (int level = 0; level < config::kNumLevels; level++) {
- const std::vector<FileMetaData*>& files = v->files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- live->insert(files[i]->number);
- }
- }
- }
-}
-
-int64_t VersionSet::NumLevelBytes(int level) const {
- assert(level >= 0);
- assert(level < config::kNumLevels);
- return TotalFileSize(current_->files_[level]);
-}
-
-int64_t VersionSet::MaxNextLevelOverlappingBytes() {
- int64_t result = 0;
- std::vector<FileMetaData*> overlaps;
- for (int level = 1; level < config::kNumLevels - 1; level++) {
- for (size_t i = 0; i < current_->files_[level].size(); i++) {
- const FileMetaData* f = current_->files_[level][i];
- current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest,
- &overlaps);
- const int64_t sum = TotalFileSize(overlaps);
- if (sum > result) {
- result = sum;
- }
- }
- }
- return result;
-}
-
-// Stores the minimal range that covers all entries in inputs in
-// *smallest, *largest.
-// REQUIRES: inputs is not empty
-void VersionSet::GetRange(const std::vector<FileMetaData*>& inputs,
- InternalKey* smallest,
- InternalKey* largest) {
- assert(!inputs.empty());
- smallest->Clear();
- largest->Clear();
- for (size_t i = 0; i < inputs.size(); i++) {
- FileMetaData* f = inputs[i];
- if (i == 0) {
- *smallest = f->smallest;
- *largest = f->largest;
- } else {
- if (icmp_.Compare(f->smallest, *smallest) < 0) {
- *smallest = f->smallest;
- }
- if (icmp_.Compare(f->largest, *largest) > 0) {
- *largest = f->largest;
- }
- }
- }
-}
-
-// Stores the minimal range that covers all entries in inputs1 and inputs2
-// in *smallest, *largest.
-// REQUIRES: inputs is not empty
-void VersionSet::GetRange2(const std::vector<FileMetaData*>& inputs1,
- const std::vector<FileMetaData*>& inputs2,
- InternalKey* smallest,
- InternalKey* largest) {
- std::vector<FileMetaData*> all = inputs1;
- all.insert(all.end(), inputs2.begin(), inputs2.end());
- GetRange(all, smallest, largest);
-}
-
-Iterator* VersionSet::MakeInputIterator(Compaction* c) {
- ReadOptions options;
- options.verify_checksums = options_->paranoid_checks;
- options.fill_cache = false;
-
- // Level-0 files have to be merged together. For other levels,
- // we will make a concatenating iterator per level.
- // TODO(opt): use concatenating iterator for level-0 if there is no overlap
- const int space = (c->level() == 0 ? c->inputs_[0].size() + 1 : 2);
- Iterator** list = new Iterator*[space];
- int num = 0;
- for (int which = 0; which < 2; which++) {
- if (!c->inputs_[which].empty()) {
- if (c->level() + which == 0) {
- const std::vector<FileMetaData*>& files = c->inputs_[which];
- for (size_t i = 0; i < files.size(); i++) {
- list[num++] = table_cache_->NewIterator(
- options, files[i]->number, files[i]->file_size);
- }
- } else {
- // Create concatenating iterator for the files from this level
- list[num++] = NewTwoLevelIterator(
- new Version::LevelFileNumIterator(icmp_, &c->inputs_[which]),
- &GetFileIterator, table_cache_, options);
- }
- }
- }
- assert(num <= space);
- Iterator* result = NewMergingIterator(&icmp_, list, num);
- delete[] list;
- return result;
-}
-
-Compaction* VersionSet::PickCompaction() {
- Compaction* c;
- int level;
-
- // We prefer compactions triggered by too much data in a level over
- // the compactions triggered by seeks.
- const bool size_compaction = (current_->compaction_score_ >= 1);
- const bool seek_compaction = (current_->file_to_compact_ != NULL);
- if (size_compaction) {
- level = current_->compaction_level_;
- assert(level >= 0);
- assert(level+1 < config::kNumLevels);
- c = new Compaction(level);
-
- // Pick the first file that comes after compact_pointer_[level]
- for (size_t i = 0; i < current_->files_[level].size(); i++) {
- FileMetaData* f = current_->files_[level][i];
- if (compact_pointer_[level].empty() ||
- icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
- c->inputs_[0].push_back(f);
- break;
- }
- }
- if (c->inputs_[0].empty()) {
- // Wrap-around to the beginning of the key space
- c->inputs_[0].push_back(current_->files_[level][0]);
- }
- } else if (seek_compaction) {
- level = current_->file_to_compact_level_;
- c = new Compaction(level);
- c->inputs_[0].push_back(current_->file_to_compact_);
- } else {
- return NULL;
- }
-
- c->input_version_ = current_;
- c->input_version_->Ref();
-
- // Files in level 0 may overlap each other, so pick up all overlapping ones
- if (level == 0) {
- InternalKey smallest, largest;
- GetRange(c->inputs_[0], &smallest, &largest);
- // Note that the next call will discard the file we placed in
- // c->inputs_[0] earlier and replace it with an overlapping set
- // which will include the picked file.
- current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]);
- assert(!c->inputs_[0].empty());
- }
-
- SetupOtherInputs(c);
-
- return c;
-}
-
-void VersionSet::SetupOtherInputs(Compaction* c) {
- const int level = c->level();
- InternalKey smallest, largest;
- GetRange(c->inputs_[0], &smallest, &largest);
-
- current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]);
-
- // Get entire range covered by compaction
- InternalKey all_start, all_limit;
- GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit);
-
- // See if we can grow the number of inputs in "level" without
- // changing the number of "level+1" files we pick up.
- if (!c->inputs_[1].empty()) {
- std::vector<FileMetaData*> expanded0;
- current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0);
- const int64_t inputs0_size = TotalFileSize(c->inputs_[0]);
- const int64_t inputs1_size = TotalFileSize(c->inputs_[1]);
- const int64_t expanded0_size = TotalFileSize(expanded0);
- if (expanded0.size() > c->inputs_[0].size() &&
- inputs1_size + expanded0_size < kExpandedCompactionByteSizeLimit) {
- InternalKey new_start, new_limit;
- GetRange(expanded0, &new_start, &new_limit);
- std::vector<FileMetaData*> expanded1;
- current_->GetOverlappingInputs(level+1, &new_start, &new_limit,
- &expanded1);
- if (expanded1.size() == c->inputs_[1].size()) {
- Log(options_->info_log,
- "Expanding@%d %d+%d (%ld+%ld bytes) to %d+%d (%ld+%ld bytes)\n",
- level,
- int(c->inputs_[0].size()),
- int(c->inputs_[1].size()),
- long(inputs0_size), long(inputs1_size),
- int(expanded0.size()),
- int(expanded1.size()),
- long(expanded0_size), long(inputs1_size));
- smallest = new_start;
- largest = new_limit;
- c->inputs_[0] = expanded0;
- c->inputs_[1] = expanded1;
- GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit);
- }
- }
- }
-
- // Compute the set of grandparent files that overlap this compaction
- // (parent == level+1; grandparent == level+2)
- if (level + 2 < config::kNumLevels) {
- current_->GetOverlappingInputs(level + 2, &all_start, &all_limit,
- &c->grandparents_);
- }
-
- if (false) {
- Log(options_->info_log, "Compacting %d '%s' .. '%s'",
- level,
- smallest.DebugString().c_str(),
- largest.DebugString().c_str());
- }
-
- // Update the place where we will do the next compaction for this level.
- // We update this immediately instead of waiting for the VersionEdit
- // to be applied so that if the compaction fails, we will try a different
- // key range next time.
- compact_pointer_[level] = largest.Encode().ToString();
- c->edit_.SetCompactPointer(level, largest);
-}
-
-Compaction* VersionSet::CompactRange(
- int level,
- const InternalKey* begin,
- const InternalKey* end) {
- std::vector<FileMetaData*> inputs;
- current_->GetOverlappingInputs(level, begin, end, &inputs);
- if (inputs.empty()) {
- return NULL;
- }
-
- // Avoid compacting too much in one shot in case the range is large.
- const uint64_t limit = MaxFileSizeForLevel(level);
- uint64_t total = 0;
- for (size_t i = 0; i < inputs.size(); i++) {
- uint64_t s = inputs[i]->file_size;
- total += s;
- if (total >= limit) {
- inputs.resize(i + 1);
- break;
- }
- }
-
- Compaction* c = new Compaction(level);
- c->input_version_ = current_;
- c->input_version_->Ref();
- c->inputs_[0] = inputs;
- SetupOtherInputs(c);
- return c;
-}
-
-Compaction::Compaction(int level)
- : level_(level),
- max_output_file_size_(MaxFileSizeForLevel(level)),
- input_version_(NULL),
- grandparent_index_(0),
- seen_key_(false),
- overlapped_bytes_(0) {
- for (int i = 0; i < config::kNumLevels; i++) {
- level_ptrs_[i] = 0;
- }
-}
-
-Compaction::~Compaction() {
- if (input_version_ != NULL) {
- input_version_->Unref();
- }
-}
-
-bool Compaction::IsTrivialMove() const {
- // Avoid a move if there is lots of overlapping grandparent data.
- // Otherwise, the move could create a parent file that will require
- // a very expensive merge later on.
- return (num_input_files(0) == 1 &&
- num_input_files(1) == 0 &&
- TotalFileSize(grandparents_) <= kMaxGrandParentOverlapBytes);
-}
-
-void Compaction::AddInputDeletions(VersionEdit* edit) {
- for (int which = 0; which < 2; which++) {
- for (size_t i = 0; i < inputs_[which].size(); i++) {
- edit->DeleteFile(level_ + which, inputs_[which][i]->number);
- }
- }
-}
-
-bool Compaction::IsBaseLevelForKey(const Slice& user_key) {
- // Maybe use binary search to find right entry instead of linear search?
- const Comparator* user_cmp = input_version_->vset_->icmp_.user_comparator();
- for (int lvl = level_ + 2; lvl < config::kNumLevels; lvl++) {
- const std::vector<FileMetaData*>& files = input_version_->files_[lvl];
- for (; level_ptrs_[lvl] < files.size(); ) {
- FileMetaData* f = files[level_ptrs_[lvl]];
- if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
- // We've advanced far enough
- if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
- // Key falls in this file's range, so definitely not base level
- return false;
- }
- break;
- }
- level_ptrs_[lvl]++;
- }
- }
- return true;
-}
-
-bool Compaction::ShouldStopBefore(const Slice& internal_key) {
- // Scan to find earliest grandparent file that contains key.
- const InternalKeyComparator* icmp = &input_version_->vset_->icmp_;
- while (grandparent_index_ < grandparents_.size() &&
- icmp->Compare(internal_key,
- grandparents_[grandparent_index_]->largest.Encode()) > 0) {
- if (seen_key_) {
- overlapped_bytes_ += grandparents_[grandparent_index_]->file_size;
- }
- grandparent_index_++;
- }
- seen_key_ = true;
-
- if (overlapped_bytes_ > kMaxGrandParentOverlapBytes) {
- // Too much overlap for current output; start new output
- overlapped_bytes_ = 0;
- return true;
- } else {
- return false;
- }
-}
-
-void Compaction::ReleaseInputs() {
- if (input_version_ != NULL) {
- input_version_->Unref();
- input_version_ = NULL;
- }
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/version_set.h b/src/leveldb/db/version_set.h
deleted file mode 100644
index 9d084fdb7d..0000000000
--- a/src/leveldb/db/version_set.h
+++ /dev/null
@@ -1,383 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// The representation of a DBImpl consists of a set of Versions. The
-// newest version is called "current". Older versions may be kept
-// around to provide a consistent view to live iterators.
-//
-// Each Version keeps track of a set of Table files per level. The
-// entire set of versions is maintained in a VersionSet.
-//
-// Version,VersionSet are thread-compatible, but require external
-// synchronization on all accesses.
-
-#ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_
-#define STORAGE_LEVELDB_DB_VERSION_SET_H_
-
-#include <map>
-#include <set>
-#include <vector>
-#include "db/dbformat.h"
-#include "db/version_edit.h"
-#include "port/port.h"
-#include "port/thread_annotations.h"
-
-namespace leveldb {
-
-namespace log { class Writer; }
-
-class Compaction;
-class Iterator;
-class MemTable;
-class TableBuilder;
-class TableCache;
-class Version;
-class VersionSet;
-class WritableFile;
-
-// Return the smallest index i such that files[i]->largest >= key.
-// Return files.size() if there is no such file.
-// REQUIRES: "files" contains a sorted list of non-overlapping files.
-extern int FindFile(const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>& files,
- const Slice& key);
-
-// Returns true iff some file in "files" overlaps the user key range
-// [*smallest,*largest].
-// smallest==NULL represents a key smaller than all keys in the DB.
-// largest==NULL represents a key largest than all keys in the DB.
-// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
-// in sorted order.
-extern bool SomeFileOverlapsRange(
- const InternalKeyComparator& icmp,
- bool disjoint_sorted_files,
- const std::vector<FileMetaData*>& files,
- const Slice* smallest_user_key,
- const Slice* largest_user_key);
-
-class Version {
- public:
- // Append to *iters a sequence of iterators that will
- // yield the contents of this Version when merged together.
- // REQUIRES: This version has been saved (see VersionSet::SaveTo)
- void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
-
- // Lookup the value for key. If found, store it in *val and
- // return OK. Else return a non-OK status. Fills *stats.
- // REQUIRES: lock is not held
- struct GetStats {
- FileMetaData* seek_file;
- int seek_file_level;
- };
- Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
- GetStats* stats);
-
- // Adds "stats" into the current state. Returns true if a new
- // compaction may need to be triggered, false otherwise.
- // REQUIRES: lock is held
- bool UpdateStats(const GetStats& stats);
-
- // Reference count management (so Versions do not disappear out from
- // under live iterators)
- void Ref();
- void Unref();
-
- void GetOverlappingInputs(
- int level,
- const InternalKey* begin, // NULL means before all keys
- const InternalKey* end, // NULL means after all keys
- std::vector<FileMetaData*>* inputs);
-
- // Returns true iff some file in the specified level overlaps
- // some part of [*smallest_user_key,*largest_user_key].
- // smallest_user_key==NULL represents a key smaller than all keys in the DB.
- // largest_user_key==NULL represents a key largest than all keys in the DB.
- bool OverlapInLevel(int level,
- const Slice* smallest_user_key,
- const Slice* largest_user_key);
-
- // Return the level at which we should place a new memtable compaction
- // result that covers the range [smallest_user_key,largest_user_key].
- int PickLevelForMemTableOutput(const Slice& smallest_user_key,
- const Slice& largest_user_key);
-
- int NumFiles(int level) const { return files_[level].size(); }
-
- // Return a human readable string that describes this version's contents.
- std::string DebugString() const;
-
- private:
- friend class Compaction;
- friend class VersionSet;
-
- class LevelFileNumIterator;
- Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
-
- VersionSet* vset_; // VersionSet to which this Version belongs
- Version* next_; // Next version in linked list
- Version* prev_; // Previous version in linked list
- int refs_; // Number of live refs to this version
-
- // List of files per level
- std::vector<FileMetaData*> files_[config::kNumLevels];
-
- // Next file to compact based on seek stats.
- FileMetaData* file_to_compact_;
- int file_to_compact_level_;
-
- // Level that should be compacted next and its compaction score.
- // Score < 1 means compaction is not strictly needed. These fields
- // are initialized by Finalize().
- double compaction_score_;
- int compaction_level_;
-
- explicit Version(VersionSet* vset)
- : vset_(vset), next_(this), prev_(this), refs_(0),
- file_to_compact_(NULL),
- file_to_compact_level_(-1),
- compaction_score_(-1),
- compaction_level_(-1) {
- }
-
- ~Version();
-
- // No copying allowed
- Version(const Version&);
- void operator=(const Version&);
-};
-
-class VersionSet {
- public:
- VersionSet(const std::string& dbname,
- const Options* options,
- TableCache* table_cache,
- const InternalKeyComparator*);
- ~VersionSet();
-
- // Apply *edit to the current version to form a new descriptor that
- // is both saved to persistent state and installed as the new
- // current version. Will release *mu while actually writing to the file.
- // REQUIRES: *mu is held on entry.
- // REQUIRES: no other thread concurrently calls LogAndApply()
- Status LogAndApply(VersionEdit* edit, port::Mutex* mu)
- EXCLUSIVE_LOCKS_REQUIRED(mu);
-
- // Recover the last saved descriptor from persistent storage.
- Status Recover();
-
- // Return the current version.
- Version* current() const { return current_; }
-
- // Return the current manifest file number
- uint64_t ManifestFileNumber() const { return manifest_file_number_; }
-
- // Allocate and return a new file number
- uint64_t NewFileNumber() { return next_file_number_++; }
-
- // Arrange to reuse "file_number" unless a newer file number has
- // already been allocated.
- // REQUIRES: "file_number" was returned by a call to NewFileNumber().
- void ReuseFileNumber(uint64_t file_number) {
- if (next_file_number_ == file_number + 1) {
- next_file_number_ = file_number;
- }
- }
-
- // Return the number of Table files at the specified level.
- int NumLevelFiles(int level) const;
-
- // Return the combined file size of all files at the specified level.
- int64_t NumLevelBytes(int level) const;
-
- // Return the last sequence number.
- uint64_t LastSequence() const { return last_sequence_; }
-
- // Set the last sequence number to s.
- void SetLastSequence(uint64_t s) {
- assert(s >= last_sequence_);
- last_sequence_ = s;
- }
-
- // Mark the specified file number as used.
- void MarkFileNumberUsed(uint64_t number);
-
- // Return the current log file number.
- uint64_t LogNumber() const { return log_number_; }
-
- // Return the log file number for the log file that is currently
- // being compacted, or zero if there is no such log file.
- uint64_t PrevLogNumber() const { return prev_log_number_; }
-
- // Pick level and inputs for a new compaction.
- // Returns NULL if there is no compaction to be done.
- // Otherwise returns a pointer to a heap-allocated object that
- // describes the compaction. Caller should delete the result.
- Compaction* PickCompaction();
-
- // Return a compaction object for compacting the range [begin,end] in
- // the specified level. Returns NULL if there is nothing in that
- // level that overlaps the specified range. Caller should delete
- // the result.
- Compaction* CompactRange(
- int level,
- const InternalKey* begin,
- const InternalKey* end);
-
- // Return the maximum overlapping data (in bytes) at next level for any
- // file at a level >= 1.
- int64_t MaxNextLevelOverlappingBytes();
-
- // Create an iterator that reads over the compaction inputs for "*c".
- // The caller should delete the iterator when no longer needed.
- Iterator* MakeInputIterator(Compaction* c);
-
- // Returns true iff some level needs a compaction.
- bool NeedsCompaction() const {
- Version* v = current_;
- return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL);
- }
-
- // Add all files listed in any live version to *live.
- // May also mutate some internal state.
- void AddLiveFiles(std::set<uint64_t>* live);
-
- // Return the approximate offset in the database of the data for
- // "key" as of version "v".
- uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
-
- // Return a human-readable short (single-line) summary of the number
- // of files per level. Uses *scratch as backing store.
- struct LevelSummaryStorage {
- char buffer[100];
- };
- const char* LevelSummary(LevelSummaryStorage* scratch) const;
-
- private:
- class Builder;
-
- friend class Compaction;
- friend class Version;
-
- void Finalize(Version* v);
-
- void GetRange(const std::vector<FileMetaData*>& inputs,
- InternalKey* smallest,
- InternalKey* largest);
-
- void GetRange2(const std::vector<FileMetaData*>& inputs1,
- const std::vector<FileMetaData*>& inputs2,
- InternalKey* smallest,
- InternalKey* largest);
-
- void SetupOtherInputs(Compaction* c);
-
- // Save current contents to *log
- Status WriteSnapshot(log::Writer* log);
-
- void AppendVersion(Version* v);
-
- bool ManifestContains(const std::string& record) const;
-
- Env* const env_;
- const std::string dbname_;
- const Options* const options_;
- TableCache* const table_cache_;
- const InternalKeyComparator icmp_;
- uint64_t next_file_number_;
- uint64_t manifest_file_number_;
- uint64_t last_sequence_;
- uint64_t log_number_;
- uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
-
- // Opened lazily
- WritableFile* descriptor_file_;
- log::Writer* descriptor_log_;
- Version dummy_versions_; // Head of circular doubly-linked list of versions.
- Version* current_; // == dummy_versions_.prev_
-
- // Per-level key at which the next compaction at that level should start.
- // Either an empty string, or a valid InternalKey.
- std::string compact_pointer_[config::kNumLevels];
-
- // No copying allowed
- VersionSet(const VersionSet&);
- void operator=(const VersionSet&);
-};
-
-// A Compaction encapsulates information about a compaction.
-class Compaction {
- public:
- ~Compaction();
-
- // Return the level that is being compacted. Inputs from "level"
- // and "level+1" will be merged to produce a set of "level+1" files.
- int level() const { return level_; }
-
- // Return the object that holds the edits to the descriptor done
- // by this compaction.
- VersionEdit* edit() { return &edit_; }
-
- // "which" must be either 0 or 1
- int num_input_files(int which) const { return inputs_[which].size(); }
-
- // Return the ith input file at "level()+which" ("which" must be 0 or 1).
- FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
-
- // Maximum size of files to build during this compaction.
- uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
-
- // Is this a trivial compaction that can be implemented by just
- // moving a single input file to the next level (no merging or splitting)
- bool IsTrivialMove() const;
-
- // Add all inputs to this compaction as delete operations to *edit.
- void AddInputDeletions(VersionEdit* edit);
-
- // Returns true if the information we have available guarantees that
- // the compaction is producing data in "level+1" for which no data exists
- // in levels greater than "level+1".
- bool IsBaseLevelForKey(const Slice& user_key);
-
- // Returns true iff we should stop building the current output
- // before processing "internal_key".
- bool ShouldStopBefore(const Slice& internal_key);
-
- // Release the input version for the compaction, once the compaction
- // is successful.
- void ReleaseInputs();
-
- private:
- friend class Version;
- friend class VersionSet;
-
- explicit Compaction(int level);
-
- int level_;
- uint64_t max_output_file_size_;
- Version* input_version_;
- VersionEdit edit_;
-
- // Each compaction reads inputs from "level_" and "level_+1"
- std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
-
- // State used to check for number of of overlapping grandparent files
- // (parent == level_ + 1, grandparent == level_ + 2)
- std::vector<FileMetaData*> grandparents_;
- size_t grandparent_index_; // Index in grandparent_starts_
- bool seen_key_; // Some output key has been seen
- int64_t overlapped_bytes_; // Bytes of overlap between current output
- // and grandparent files
-
- // State for implementing IsBaseLevelForKey
-
- // level_ptrs_ holds indices into input_version_->levels_: our state
- // is that we are positioned at one of the file ranges for each
- // higher level than the ones involved in this compaction (i.e. for
- // all L >= level_ + 2).
- size_t level_ptrs_[config::kNumLevels];
-};
-
-} // namespace leveldb
-
-#endif // STORAGE_LEVELDB_DB_VERSION_SET_H_
diff --git a/src/leveldb/db/version_set_test.cc b/src/leveldb/db/version_set_test.cc
deleted file mode 100644
index 501e34d133..0000000000
--- a/src/leveldb/db/version_set_test.cc
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "db/version_set.h"
-#include "util/logging.h"
-#include "util/testharness.h"
-#include "util/testutil.h"
-
-namespace leveldb {
-
-class FindFileTest {
- public:
- std::vector<FileMetaData*> files_;
- bool disjoint_sorted_files_;
-
- FindFileTest() : disjoint_sorted_files_(true) { }
-
- ~FindFileTest() {
- for (int i = 0; i < files_.size(); i++) {
- delete files_[i];
- }
- }
-
- void Add(const char* smallest, const char* largest,
- SequenceNumber smallest_seq = 100,
- SequenceNumber largest_seq = 100) {
- FileMetaData* f = new FileMetaData;
- f->number = files_.size() + 1;
- f->smallest = InternalKey(smallest, smallest_seq, kTypeValue);
- f->largest = InternalKey(largest, largest_seq, kTypeValue);
- files_.push_back(f);
- }
-
- int Find(const char* key) {
- InternalKey target(key, 100, kTypeValue);
- InternalKeyComparator cmp(BytewiseComparator());
- return FindFile(cmp, files_, target.Encode());
- }
-
- bool Overlaps(const char* smallest, const char* largest) {
- InternalKeyComparator cmp(BytewiseComparator());
- Slice s(smallest != NULL ? smallest : "");
- Slice l(largest != NULL ? largest : "");
- return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, files_,
- (smallest != NULL ? &s : NULL),
- (largest != NULL ? &l : NULL));
- }
-};
-
-TEST(FindFileTest, Empty) {
- ASSERT_EQ(0, Find("foo"));
- ASSERT_TRUE(! Overlaps("a", "z"));
- ASSERT_TRUE(! Overlaps(NULL, "z"));
- ASSERT_TRUE(! Overlaps("a", NULL));
- ASSERT_TRUE(! Overlaps(NULL, NULL));
-}
-
-TEST(FindFileTest, Single) {
- Add("p", "q");
- ASSERT_EQ(0, Find("a"));
- ASSERT_EQ(0, Find("p"));
- ASSERT_EQ(0, Find("p1"));
- ASSERT_EQ(0, Find("q"));
- ASSERT_EQ(1, Find("q1"));
- ASSERT_EQ(1, Find("z"));
-
- ASSERT_TRUE(! Overlaps("a", "b"));
- ASSERT_TRUE(! Overlaps("z1", "z2"));
- ASSERT_TRUE(Overlaps("a", "p"));
- ASSERT_TRUE(Overlaps("a", "q"));
- ASSERT_TRUE(Overlaps("a", "z"));
- ASSERT_TRUE(Overlaps("p", "p1"));
- ASSERT_TRUE(Overlaps("p", "q"));
- ASSERT_TRUE(Overlaps("p", "z"));
- ASSERT_TRUE(Overlaps("p1", "p2"));
- ASSERT_TRUE(Overlaps("p1", "z"));
- ASSERT_TRUE(Overlaps("q", "q"));
- ASSERT_TRUE(Overlaps("q", "q1"));
-
- ASSERT_TRUE(! Overlaps(NULL, "j"));
- ASSERT_TRUE(! Overlaps("r", NULL));
- ASSERT_TRUE(Overlaps(NULL, "p"));
- ASSERT_TRUE(Overlaps(NULL, "p1"));
- ASSERT_TRUE(Overlaps("q", NULL));
- ASSERT_TRUE(Overlaps(NULL, NULL));
-}
-
-
-TEST(FindFileTest, Multiple) {
- Add("150", "200");
- Add("200", "250");
- Add("300", "350");
- Add("400", "450");
- ASSERT_EQ(0, Find("100"));
- ASSERT_EQ(0, Find("150"));
- ASSERT_EQ(0, Find("151"));
- ASSERT_EQ(0, Find("199"));
- ASSERT_EQ(0, Find("200"));
- ASSERT_EQ(1, Find("201"));
- ASSERT_EQ(1, Find("249"));
- ASSERT_EQ(1, Find("250"));
- ASSERT_EQ(2, Find("251"));
- ASSERT_EQ(2, Find("299"));
- ASSERT_EQ(2, Find("300"));
- ASSERT_EQ(2, Find("349"));
- ASSERT_EQ(2, Find("350"));
- ASSERT_EQ(3, Find("351"));
- ASSERT_EQ(3, Find("400"));
- ASSERT_EQ(3, Find("450"));
- ASSERT_EQ(4, Find("451"));
-
- ASSERT_TRUE(! Overlaps("100", "149"));
- ASSERT_TRUE(! Overlaps("251", "299"));
- ASSERT_TRUE(! Overlaps("451", "500"));
- ASSERT_TRUE(! Overlaps("351", "399"));
-
- ASSERT_TRUE(Overlaps("100", "150"));
- ASSERT_TRUE(Overlaps("100", "200"));
- ASSERT_TRUE(Overlaps("100", "300"));
- ASSERT_TRUE(Overlaps("100", "400"));
- ASSERT_TRUE(Overlaps("100", "500"));
- ASSERT_TRUE(Overlaps("375", "400"));
- ASSERT_TRUE(Overlaps("450", "450"));
- ASSERT_TRUE(Overlaps("450", "500"));
-}
-
-TEST(FindFileTest, MultipleNullBoundaries) {
- Add("150", "200");
- Add("200", "250");
- Add("300", "350");
- Add("400", "450");
- ASSERT_TRUE(! Overlaps(NULL, "149"));
- ASSERT_TRUE(! Overlaps("451", NULL));
- ASSERT_TRUE(Overlaps(NULL, NULL));
- ASSERT_TRUE(Overlaps(NULL, "150"));
- ASSERT_TRUE(Overlaps(NULL, "199"));
- ASSERT_TRUE(Overlaps(NULL, "200"));
- ASSERT_TRUE(Overlaps(NULL, "201"));
- ASSERT_TRUE(Overlaps(NULL, "400"));
- ASSERT_TRUE(Overlaps(NULL, "800"));
- ASSERT_TRUE(Overlaps("100", NULL));
- ASSERT_TRUE(Overlaps("200", NULL));
- ASSERT_TRUE(Overlaps("449", NULL));
- ASSERT_TRUE(Overlaps("450", NULL));
-}
-
-TEST(FindFileTest, OverlapSequenceChecks) {
- Add("200", "200", 5000, 3000);
- ASSERT_TRUE(! Overlaps("199", "199"));
- ASSERT_TRUE(! Overlaps("201", "300"));
- ASSERT_TRUE(Overlaps("200", "200"));
- ASSERT_TRUE(Overlaps("190", "200"));
- ASSERT_TRUE(Overlaps("200", "210"));
-}
-
-TEST(FindFileTest, OverlappingFiles) {
- Add("150", "600");
- Add("400", "500");
- disjoint_sorted_files_ = false;
- ASSERT_TRUE(! Overlaps("100", "149"));
- ASSERT_TRUE(! Overlaps("601", "700"));
- ASSERT_TRUE(Overlaps("100", "150"));
- ASSERT_TRUE(Overlaps("100", "200"));
- ASSERT_TRUE(Overlaps("100", "300"));
- ASSERT_TRUE(Overlaps("100", "400"));
- ASSERT_TRUE(Overlaps("100", "500"));
- ASSERT_TRUE(Overlaps("375", "400"));
- ASSERT_TRUE(Overlaps("450", "450"));
- ASSERT_TRUE(Overlaps("450", "500"));
- ASSERT_TRUE(Overlaps("450", "700"));
- ASSERT_TRUE(Overlaps("600", "700"));
-}
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}
diff --git a/src/leveldb/db/write_batch.cc b/src/leveldb/db/write_batch.cc
deleted file mode 100644
index 33f4a4257e..0000000000
--- a/src/leveldb/db/write_batch.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
-// WriteBatch::rep_ :=
-// sequence: fixed64
-// count: fixed32
-// data: record[count]
-// record :=
-// kTypeValue varstring varstring |
-// kTypeDeletion varstring
-// varstring :=
-// len: varint32
-// data: uint8[len]
-
-#include "leveldb/write_batch.h"
-
-#include "leveldb/db.h"
-#include "db/dbformat.h"
-#include "db/memtable.h"
-#include "db/write_batch_internal.h"
-#include "util/coding.h"
-
-namespace leveldb {
-
-// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
-static const size_t kHeader = 12;
-
-WriteBatch::WriteBatch() {
- Clear();
-}
-
-WriteBatch::~WriteBatch() { }
-
-WriteBatch::Handler::~Handler() { }
-
-void WriteBatch::Clear() {
- rep_.clear();
- rep_.resize(kHeader);
-}
-
-Status WriteBatch::Iterate(Handler* handler) const {
- Slice input(rep_);
- if (input.size() < kHeader) {
- return Status::Corruption("malformed WriteBatch (too small)");
- }
-
- input.remove_prefix(kHeader);
- Slice key, value;
- int found = 0;
- while (!input.empty()) {
- found++;
- char tag = input[0];
- input.remove_prefix(1);
- switch (tag) {
- case kTypeValue:
- if (GetLengthPrefixedSlice(&input, &key) &&
- GetLengthPrefixedSlice(&input, &value)) {
- handler->Put(key, value);
- } else {
- return Status::Corruption("bad WriteBatch Put");
- }
- break;
- case kTypeDeletion:
- if (GetLengthPrefixedSlice(&input, &key)) {
- handler->Delete(key);
- } else {
- return Status::Corruption("bad WriteBatch Delete");
- }
- break;
- default:
- return Status::Corruption("unknown WriteBatch tag");
- }
- }
- if (found != WriteBatchInternal::Count(this)) {
- return Status::Corruption("WriteBatch has wrong count");
- } else {
- return Status::OK();
- }
-}
-
-int WriteBatchInternal::Count(const WriteBatch* b) {
- return DecodeFixed32(b->rep_.data() + 8);
-}
-
-void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
- EncodeFixed32(&b->rep_[8], n);
-}
-
-SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) {
- return SequenceNumber(DecodeFixed64(b->rep_.data()));
-}
-
-void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
- EncodeFixed64(&b->rep_[0], seq);
-}
-
-void WriteBatch::Put(const Slice& key, const Slice& value) {
- WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
- rep_.push_back(static_cast<char>(kTypeValue));
- PutLengthPrefixedSlice(&rep_, key);
- PutLengthPrefixedSlice(&rep_, value);
-}
-
-void WriteBatch::Delete(const Slice& key) {
- WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
- rep_.push_back(static_cast<char>(kTypeDeletion));
- PutLengthPrefixedSlice(&rep_, key);
-}
-
-namespace {
-class MemTableInserter : public WriteBatch::Handler {
- public:
- SequenceNumber sequence_;
- MemTable* mem_;
-
- virtual void Put(const Slice& key, const Slice& value) {
- mem_->Add(sequence_, kTypeValue, key, value);
- sequence_++;
- }
- virtual void Delete(const Slice& key) {
- mem_->Add(sequence_, kTypeDeletion, key, Slice());
- sequence_++;
- }
-};
-} // namespace
-
-Status WriteBatchInternal::InsertInto(const WriteBatch* b,
- MemTable* memtable) {
- MemTableInserter inserter;
- inserter.sequence_ = WriteBatchInternal::Sequence(b);
- inserter.mem_ = memtable;
- return b->Iterate(&inserter);
-}
-
-void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
- assert(contents.size() >= kHeader);
- b->rep_.assign(contents.data(), contents.size());
-}
-
-void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
- SetCount(dst, Count(dst) + Count(src));
- assert(src->rep_.size() >= kHeader);
- dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
-}
-
-} // namespace leveldb
diff --git a/src/leveldb/db/write_batch_internal.h b/src/leveldb/db/write_batch_internal.h
deleted file mode 100644
index 4423a7f318..0000000000
--- a/src/leveldb/db/write_batch_internal.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
-#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
-
-#include "leveldb/write_batch.h"
-
-namespace leveldb {
-
-class MemTable;
-
-// WriteBatchInternal provides static methods for manipulating a
-// WriteBatch that we don't want in the public WriteBatch interface.
-class WriteBatchInternal {
- public:
- // Return the number of entries in the batch.
- static int Count(const WriteBatch* batch);
-
- // Set the count for the number of entries in the batch.
- static void SetCount(WriteBatch* batch, int n);
-
- // Return the seqeunce number for the start of this batch.
- static SequenceNumber Sequence(const WriteBatch* batch);
-
- // Store the specified number as the seqeunce number for the start of
- // this batch.
- static void SetSequence(WriteBatch* batch, SequenceNumber seq);
-
- static Slice Contents(const WriteBatch* batch) {
- return Slice(batch->rep_);
- }
-
- static size_t ByteSize(const WriteBatch* batch) {
- return batch->rep_.size();
- }
-
- static void SetContents(WriteBatch* batch, const Slice& contents);
-
- static Status InsertInto(const WriteBatch* batch, MemTable* memtable);
-
- static void Append(WriteBatch* dst, const WriteBatch* src);
-};
-
-} // namespace leveldb
-
-
-#endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
diff --git a/src/leveldb/db/write_batch_test.cc b/src/leveldb/db/write_batch_test.cc
deleted file mode 100644
index 9064e3d85e..0000000000
--- a/src/leveldb/db/write_batch_test.cc
+++ /dev/null
@@ -1,120 +0,0 @@
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#include "leveldb/db.h"
-
-#include "db/memtable.h"
-#include "db/write_batch_internal.h"
-#include "leveldb/env.h"
-#include "util/logging.h"
-#include "util/testharness.h"
-
-namespace leveldb {
-
-static std::string PrintContents(WriteBatch* b) {
- InternalKeyComparator cmp(BytewiseComparator());
- MemTable* mem = new MemTable(cmp);
- mem->Ref();
- std::string state;
- Status s = WriteBatchInternal::InsertInto(b, mem);
- int count = 0;
- Iterator* iter = mem->NewIterator();
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ParsedInternalKey ikey;
- ASSERT_TRUE(ParseInternalKey(iter->key(), &ikey));
- switch (ikey.type) {
- case kTypeValue:
- state.append("Put(");
- state.append(ikey.user_key.ToString());
- state.append(", ");
- state.append(iter->value().ToString());
- state.append(")");
- count++;
- break;
- case kTypeDeletion:
- state.append("Delete(");
- state.append(ikey.user_key.ToString());
- state.append(")");
- count++;
- break;
- }
- state.append("@");
- state.append(NumberToString(ikey.sequence));
- }
- delete iter;
- if (!s.ok()) {
- state.append("ParseError()");
- } else if (count != WriteBatchInternal::Count(b)) {
- state.append("CountMismatch()");
- }
- mem->Unref();
- return state;
-}
-
-class WriteBatchTest { };
-
-TEST(WriteBatchTest, Empty) {
- WriteBatch batch;
- ASSERT_EQ("", PrintContents(&batch));
- ASSERT_EQ(0, WriteBatchInternal::Count(&batch));
-}
-
-TEST(WriteBatchTest, Multiple) {
- WriteBatch batch;
- batch.Put(Slice("foo"), Slice("bar"));
- batch.Delete(Slice("box"));
- batch.Put(Slice("baz"), Slice("boo"));
- WriteBatchInternal::SetSequence(&batch, 100);
- ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch));
- ASSERT_EQ(3, WriteBatchInternal::Count(&batch));
- ASSERT_EQ("Put(baz, boo)@102"
- "Delete(box)@101"
- "Put(foo, bar)@100",
- PrintContents(&batch));
-}
-
-TEST(WriteBatchTest, Corruption) {
- WriteBatch batch;
- batch.Put(Slice("foo"), Slice("bar"));
- batch.Delete(Slice("box"));
- WriteBatchInternal::SetSequence(&batch, 200);
- Slice contents = WriteBatchInternal::Contents(&batch);
- WriteBatchInternal::SetContents(&batch,
- Slice(contents.data(),contents.size()-1));
- ASSERT_EQ("Put(foo, bar)@200"
- "ParseError()",
- PrintContents(&batch));
-}
-
-TEST(WriteBatchTest, Append) {
- WriteBatch b1, b2;
- WriteBatchInternal::SetSequence(&b1, 200);
- WriteBatchInternal::SetSequence(&b2, 300);
- WriteBatchInternal::Append(&b1, &b2);
- ASSERT_EQ("",
- PrintContents(&b1));
- b2.Put("a", "va");
- WriteBatchInternal::Append(&b1, &b2);
- ASSERT_EQ("Put(a, va)@200",
- PrintContents(&b1));
- b2.Clear();
- b2.Put("b", "vb");
- WriteBatchInternal::Append(&b1, &b2);
- ASSERT_EQ("Put(a, va)@200"
- "Put(b, vb)@201",
- PrintContents(&b1));
- b2.Delete("foo");
- WriteBatchInternal::Append(&b1, &b2);
- ASSERT_EQ("Put(a, va)@200"
- "Put(b, vb)@202"
- "Put(b, vb)@201"
- "Delete(foo)@203",
- PrintContents(&b1));
-}
-
-} // namespace leveldb
-
-int main(int argc, char** argv) {
- return leveldb::test::RunAllTests();
-}