aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/bitcoinrpc.cpp1
-rw-r--r--src/bitcoinrpc.h1
-rw-r--r--src/leveldb/Makefile6
-rw-r--r--src/leveldb/db/autocompact_test.cc118
-rw-r--r--src/leveldb/db/corruption_test.cc51
-rw-r--r--src/leveldb/db/db_impl.cc41
-rw-r--r--src/leveldb/db/db_impl.h9
-rw-r--r--src/leveldb/db/db_iter.cc41
-rw-r--r--src/leveldb/db/db_iter.h8
-rw-r--r--src/leveldb/db/dbformat.h3
-rw-r--r--src/leveldb/db/version_set.cc96
-rw-r--r--src/leveldb/db/version_set.h15
-rw-r--r--src/leveldb/include/leveldb/db.h2
-rw-r--r--src/leveldb/util/env_posix.cc33
-rw-r--r--src/leveldb/util/random.h7
-rw-r--r--src/main.cpp85
-rw-r--r--src/net.cpp15
-rw-r--r--src/net.h12
-rw-r--r--src/rpcnet.cpp21
19 files changed, 494 insertions, 71 deletions
diff --git a/src/bitcoinrpc.cpp b/src/bitcoinrpc.cpp
index ef50ccd07f..b0df9c5e28 100644
--- a/src/bitcoinrpc.cpp
+++ b/src/bitcoinrpc.cpp
@@ -229,6 +229,7 @@ static const CRPCCommand vRPCCommands[] =
{ "getbestblockhash", &getbestblockhash, true, false, false },
{ "getconnectioncount", &getconnectioncount, true, false, false },
{ "getpeerinfo", &getpeerinfo, true, false, false },
+ { "ping", &ping, true, false, false },
{ "addnode", &addnode, true, true, false },
{ "getaddednodeinfo", &getaddednodeinfo, true, true, false },
{ "getdifficulty", &getdifficulty, true, false, false },
diff --git a/src/bitcoinrpc.h b/src/bitcoinrpc.h
index ce1c0e68b7..f495870b38 100644
--- a/src/bitcoinrpc.h
+++ b/src/bitcoinrpc.h
@@ -153,6 +153,7 @@ extern void EnsureWalletIsUnlocked();
extern json_spirit::Value getconnectioncount(const json_spirit::Array& params, bool fHelp); // in rpcnet.cpp
extern json_spirit::Value getpeerinfo(const json_spirit::Array& params, bool fHelp);
+extern json_spirit::Value ping(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value addnode(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value getaddednodeinfo(const json_spirit::Array& params, bool fHelp);
diff --git a/src/leveldb/Makefile b/src/leveldb/Makefile
index 96af7765be..20c9c4f287 100644
--- a/src/leveldb/Makefile
+++ b/src/leveldb/Makefile
@@ -31,6 +31,7 @@ TESTHARNESS = ./util/testharness.o $(TESTUTIL)
TESTS = \
arena_test \
+ autocompact_test \
bloom_test \
c_test \
cache_test \
@@ -70,7 +71,7 @@ SHARED = $(SHARED1)
else
# Update db.h if you change these.
SHARED_MAJOR = 1
-SHARED_MINOR = 12
+SHARED_MINOR = 13
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
@@ -114,6 +115,9 @@ leveldbutil: db/leveldb_main.o $(LIBOBJECTS)
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+autocompact_test: db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
diff --git a/src/leveldb/db/autocompact_test.cc b/src/leveldb/db/autocompact_test.cc
new file mode 100644
index 0000000000..d20a2362c3
--- /dev/null
+++ b/src/leveldb/db/autocompact_test.cc
@@ -0,0 +1,118 @@
+// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "leveldb/db.h"
+#include "db/db_impl.h"
+#include "leveldb/cache.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace leveldb {
+
+class AutoCompactTest {
+ public:
+ std::string dbname_;
+ Cache* tiny_cache_;
+ Options options_;
+ DB* db_;
+
+ AutoCompactTest() {
+ dbname_ = test::TmpDir() + "/autocompact_test";
+ tiny_cache_ = NewLRUCache(100);
+ options_.block_cache = tiny_cache_;
+ DestroyDB(dbname_, options_);
+ options_.create_if_missing = true;
+ options_.compression = kNoCompression;
+ ASSERT_OK(DB::Open(options_, dbname_, &db_));
+ }
+
+ ~AutoCompactTest() {
+ delete db_;
+ DestroyDB(dbname_, Options());
+ delete tiny_cache_;
+ }
+
+ std::string Key(int i) {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "key%06d", i);
+ return std::string(buf);
+ }
+
+ uint64_t Size(const Slice& start, const Slice& limit) {
+ Range r(start, limit);
+ uint64_t size;
+ db_->GetApproximateSizes(&r, 1, &size);
+ return size;
+ }
+
+ void DoReads(int n);
+};
+
+static const int kValueSize = 200 * 1024;
+static const int kTotalSize = 100 * 1024 * 1024;
+static const int kCount = kTotalSize / kValueSize;
+
+// Read through the first n keys repeatedly and check that they get
+// compacted (verified by checking the size of the key space).
+void AutoCompactTest::DoReads(int n) {
+ std::string value(kValueSize, 'x');
+ DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
+
+ // Fill database
+ for (int i = 0; i < kCount; i++) {
+ ASSERT_OK(db_->Put(WriteOptions(), Key(i), value));
+ }
+ ASSERT_OK(dbi->TEST_CompactMemTable());
+
+ // Delete everything
+ for (int i = 0; i < kCount; i++) {
+ ASSERT_OK(db_->Delete(WriteOptions(), Key(i)));
+ }
+ ASSERT_OK(dbi->TEST_CompactMemTable());
+
+ // Get initial measurement of the space we will be reading.
+ const int64_t initial_size = Size(Key(0), Key(n));
+ const int64_t initial_other_size = Size(Key(n), Key(kCount));
+
+ // Read until size drops significantly.
+ std::string limit_key = Key(n);
+ for (int read = 0; true; read++) {
+ ASSERT_LT(read, 100) << "Taking too long to compact";
+ Iterator* iter = db_->NewIterator(ReadOptions());
+ for (iter->SeekToFirst();
+ iter->Valid() && iter->key().ToString() < limit_key;
+ iter->Next()) {
+ // Drop data
+ }
+ delete iter;
+ // Wait a little bit to allow any triggered compactions to complete.
+ Env::Default()->SleepForMicroseconds(1000000);
+ uint64_t size = Size(Key(0), Key(n));
+ fprintf(stderr, "iter %3d => %7.3f MB [other %7.3f MB]\n",
+ read+1, size/1048576.0, Size(Key(n), Key(kCount))/1048576.0);
+ if (size <= initial_size/10) {
+ break;
+ }
+ }
+
+ // Verify that the size of the key space not touched by the reads
+ // is pretty much unchanged.
+ const int64_t final_other_size = Size(Key(n), Key(kCount));
+ ASSERT_LE(final_other_size, initial_other_size + 1048576);
+ ASSERT_GE(final_other_size, initial_other_size/5 - 1048576);
+}
+
+TEST(AutoCompactTest, ReadAll) {
+ DoReads(kCount);
+}
+
+TEST(AutoCompactTest, ReadHalf) {
+ DoReads(kCount/2);
+}
+
+} // namespace leveldb
+
+int main(int argc, char** argv) {
+ return leveldb::test::RunAllTests();
+}
diff --git a/src/leveldb/db/corruption_test.cc b/src/leveldb/db/corruption_test.cc
index 31b2d5f416..b37ffdfe64 100644
--- a/src/leveldb/db/corruption_test.cc
+++ b/src/leveldb/db/corruption_test.cc
@@ -35,6 +35,7 @@ class CorruptionTest {
CorruptionTest() {
tiny_cache_ = NewLRUCache(100);
options_.env = &env_;
+ options_.block_cache = tiny_cache_;
dbname_ = test::TmpDir() + "/db_test";
DestroyDB(dbname_, options_);
@@ -50,17 +51,14 @@ class CorruptionTest {
delete tiny_cache_;
}
- Status TryReopen(Options* options = NULL) {
+ Status TryReopen() {
delete db_;
db_ = NULL;
- Options opt = (options ? *options : options_);
- opt.env = &env_;
- opt.block_cache = tiny_cache_;
- return DB::Open(opt, dbname_, &db_);
+ return DB::Open(options_, dbname_, &db_);
}
- void Reopen(Options* options = NULL) {
- ASSERT_OK(TryReopen(options));
+ void Reopen() {
+ ASSERT_OK(TryReopen());
}
void RepairDB() {
@@ -92,6 +90,10 @@ class CorruptionTest {
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
uint64_t key;
Slice in(iter->key());
+ if (in == "" || in == "~") {
+ // Ignore boundary keys.
+ continue;
+ }
if (!ConsumeDecimalNumber(&in, &key) ||
!in.empty() ||
key < next_expected) {
@@ -233,7 +235,7 @@ TEST(CorruptionTest, TableFile) {
dbi->TEST_CompactRange(1, NULL, NULL);
Corrupt(kTableFile, 100, 1);
- Check(99, 99);
+ Check(90, 99);
}
TEST(CorruptionTest, TableFileIndexData) {
@@ -299,7 +301,7 @@ TEST(CorruptionTest, CompactionInputError) {
ASSERT_EQ(1, Property("leveldb.num-files-at-level" + NumberToString(last)));
Corrupt(kTableFile, 100, 1);
- Check(9, 9);
+ Check(5, 9);
// Force compactions by writing lots of values
Build(10000);
@@ -307,32 +309,23 @@ TEST(CorruptionTest, CompactionInputError) {
}
TEST(CorruptionTest, CompactionInputErrorParanoid) {
- Options options;
- options.paranoid_checks = true;
- options.write_buffer_size = 1048576;
- Reopen(&options);
+ options_.paranoid_checks = true;
+ options_.write_buffer_size = 512 << 10;
+ Reopen();
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
- // Fill levels >= 1 so memtable compaction outputs to level 1
- for (int level = 1; level < config::kNumLevels; level++) {
- dbi->Put(WriteOptions(), "", "begin");
- dbi->Put(WriteOptions(), "~", "end");
+ // Make multiple inputs so we need to compact.
+ for (int i = 0; i < 2; i++) {
+ Build(10);
dbi->TEST_CompactMemTable();
+ Corrupt(kTableFile, 100, 1);
+ env_.SleepForMicroseconds(100000);
}
+ dbi->CompactRange(NULL, NULL);
- Build(10);
- dbi->TEST_CompactMemTable();
- ASSERT_EQ(1, Property("leveldb.num-files-at-level0"));
-
- Corrupt(kTableFile, 100, 1);
- Check(9, 9);
-
- // Write must eventually fail because of corrupted table
- Status s;
+ // Write must fail because of corrupted table
std::string tmp1, tmp2;
- for (int i = 0; i < 10000 && s.ok(); i++) {
- s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2));
- }
+ Status s = db_->Put(WriteOptions(), Key(5, &tmp1), Value(5, &tmp2));
ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db";
}
diff --git a/src/leveldb/db/db_impl.cc b/src/leveldb/db/db_impl.cc
index 395d3172ad..fa1351038b 100644
--- a/src/leveldb/db/db_impl.cc
+++ b/src/leveldb/db/db_impl.cc
@@ -113,14 +113,14 @@ Options SanitizeOptions(const std::string& dbname,
return result;
}
-DBImpl::DBImpl(const Options& options, const std::string& dbname)
- : env_(options.env),
- internal_comparator_(options.comparator),
- internal_filter_policy_(options.filter_policy),
- options_(SanitizeOptions(
- dbname, &internal_comparator_, &internal_filter_policy_, options)),
- owns_info_log_(options_.info_log != options.info_log),
- owns_cache_(options_.block_cache != options.block_cache),
+DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
+ : env_(raw_options.env),
+ internal_comparator_(raw_options.comparator),
+ internal_filter_policy_(raw_options.filter_policy),
+ options_(SanitizeOptions(dbname, &internal_comparator_,
+ &internal_filter_policy_, raw_options)),
+ owns_info_log_(options_.info_log != raw_options.info_log),
+ owns_cache_(options_.block_cache != raw_options.block_cache),
dbname_(dbname),
db_lock_(NULL),
shutting_down_(NULL),
@@ -130,6 +130,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
logfile_(NULL),
logfile_number_(0),
log_(NULL),
+ seed_(0),
tmp_batch_(new WriteBatch),
bg_compaction_scheduled_(false),
manual_compaction_(NULL),
@@ -138,7 +139,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
has_imm_.Release_Store(NULL);
// Reserve ten files or so for other uses and give the rest to TableCache.
- const int table_cache_size = options.max_open_files - kNumNonTableCacheFiles;
+ const int table_cache_size = options_.max_open_files - kNumNonTableCacheFiles;
table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
versions_ = new VersionSet(dbname_, &options_, table_cache_,
@@ -1027,7 +1028,8 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
} // namespace
Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
- SequenceNumber* latest_snapshot) {
+ SequenceNumber* latest_snapshot,
+ uint32_t* seed) {
IterState* cleanup = new IterState;
mutex_.Lock();
*latest_snapshot = versions_->LastSequence();
@@ -1051,13 +1053,15 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
cleanup->version = versions_->current();
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL);
+ *seed = ++seed_;
mutex_.Unlock();
return internal_iter;
}
Iterator* DBImpl::TEST_NewInternalIterator() {
SequenceNumber ignored;
- return NewInternalIterator(ReadOptions(), &ignored);
+ uint32_t ignored_seed;
+ return NewInternalIterator(ReadOptions(), &ignored, &ignored_seed);
}
int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
@@ -1114,12 +1118,21 @@ Status DBImpl::Get(const ReadOptions& options,
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
SequenceNumber latest_snapshot;
- Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
+ uint32_t seed;
+ Iterator* iter = NewInternalIterator(options, &latest_snapshot, &seed);
return NewDBIterator(
- &dbname_, env_, user_comparator(), internal_iter,
+ this, user_comparator(), iter,
(options.snapshot != NULL
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
- : latest_snapshot));
+ : latest_snapshot),
+ seed);
+}
+
+void DBImpl::RecordReadSample(Slice key) {
+ MutexLock l(&mutex_);
+ if (versions_->current()->RecordReadSample(key)) {
+ MaybeScheduleCompaction();
+ }
}
const Snapshot* DBImpl::GetSnapshot() {
diff --git a/src/leveldb/db/db_impl.h b/src/leveldb/db/db_impl.h
index 3c8d711ae0..75fd30abe9 100644
--- a/src/leveldb/db/db_impl.h
+++ b/src/leveldb/db/db_impl.h
@@ -59,13 +59,19 @@ class DBImpl : public DB {
// file at a level >= 1.
int64_t TEST_MaxNextLevelOverlappingBytes();
+ // Record a sample of bytes read at the specified internal key.
+ // Samples are taken approximately once every config::kReadBytesPeriod
+ // bytes.
+ void RecordReadSample(Slice key);
+
private:
friend class DB;
struct CompactionState;
struct Writer;
Iterator* NewInternalIterator(const ReadOptions&,
- SequenceNumber* latest_snapshot);
+ SequenceNumber* latest_snapshot,
+ uint32_t* seed);
Status NewDB();
@@ -135,6 +141,7 @@ class DBImpl : public DB {
WritableFile* logfile_;
uint64_t logfile_number_;
log::Writer* log_;
+ uint32_t seed_; // For sampling.
// Queue of writers.
std::deque<Writer*> writers_;
diff --git a/src/leveldb/db/db_iter.cc b/src/leveldb/db/db_iter.cc
index 87dca2ded4..071a54e3f4 100644
--- a/src/leveldb/db/db_iter.cc
+++ b/src/leveldb/db/db_iter.cc
@@ -5,12 +5,14 @@
#include "db/db_iter.h"
#include "db/filename.h"
+#include "db/db_impl.h"
#include "db/dbformat.h"
#include "leveldb/env.h"
#include "leveldb/iterator.h"
#include "port/port.h"
#include "util/logging.h"
#include "util/mutexlock.h"
+#include "util/random.h"
namespace leveldb {
@@ -46,15 +48,16 @@ class DBIter: public Iterator {
kReverse
};
- DBIter(const std::string* dbname, Env* env,
- const Comparator* cmp, Iterator* iter, SequenceNumber s)
- : dbname_(dbname),
- env_(env),
+ DBIter(DBImpl* db, const Comparator* cmp, Iterator* iter, SequenceNumber s,
+ uint32_t seed)
+ : db_(db),
user_comparator_(cmp),
iter_(iter),
sequence_(s),
direction_(kForward),
- valid_(false) {
+ valid_(false),
+ rnd_(seed),
+ bytes_counter_(RandomPeriod()) {
}
virtual ~DBIter() {
delete iter_;
@@ -100,8 +103,12 @@ class DBIter: public Iterator {
}
}
- const std::string* const dbname_;
- Env* const env_;
+ // Pick next gap with average value of config::kReadBytesPeriod.
+ ssize_t RandomPeriod() {
+ return rnd_.Uniform(2*config::kReadBytesPeriod);
+ }
+
+ DBImpl* db_;
const Comparator* const user_comparator_;
Iterator* const iter_;
SequenceNumber const sequence_;
@@ -112,13 +119,23 @@ class DBIter: public Iterator {
Direction direction_;
bool valid_;
+ Random rnd_;
+ ssize_t bytes_counter_;
+
// No copying allowed
DBIter(const DBIter&);
void operator=(const DBIter&);
};
inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
- if (!ParseInternalKey(iter_->key(), ikey)) {
+ Slice k = iter_->key();
+ ssize_t n = k.size() + iter_->value().size();
+ bytes_counter_ -= n;
+ while (bytes_counter_ < 0) {
+ bytes_counter_ += RandomPeriod();
+ db_->RecordReadSample(k);
+ }
+ if (!ParseInternalKey(k, ikey)) {
status_ = Status::Corruption("corrupted internal key in DBIter");
return false;
} else {
@@ -288,12 +305,12 @@ void DBIter::SeekToLast() {
} // anonymous namespace
Iterator* NewDBIterator(
- const std::string* dbname,
- Env* env,
+ DBImpl* db,
const Comparator* user_key_comparator,
Iterator* internal_iter,
- const SequenceNumber& sequence) {
- return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);
+ SequenceNumber sequence,
+ uint32_t seed) {
+ return new DBIter(db, user_key_comparator, internal_iter, sequence, seed);
}
} // namespace leveldb
diff --git a/src/leveldb/db/db_iter.h b/src/leveldb/db/db_iter.h
index d9e1b174ab..04927e937b 100644
--- a/src/leveldb/db/db_iter.h
+++ b/src/leveldb/db/db_iter.h
@@ -11,15 +11,17 @@
namespace leveldb {
+class DBImpl;
+
// Return a new iterator that converts internal keys (yielded by
// "*internal_iter") that were live at the specified "sequence" number
// into appropriate user keys.
extern Iterator* NewDBIterator(
- const std::string* dbname,
- Env* env,
+ DBImpl* db,
const Comparator* user_key_comparator,
Iterator* internal_iter,
- const SequenceNumber& sequence);
+ SequenceNumber sequence,
+ uint32_t seed);
} // namespace leveldb
diff --git a/src/leveldb/db/dbformat.h b/src/leveldb/db/dbformat.h
index f7f64dafb6..5d8a032bd3 100644
--- a/src/leveldb/db/dbformat.h
+++ b/src/leveldb/db/dbformat.h
@@ -38,6 +38,9 @@ static const int kL0_StopWritesTrigger = 12;
// space if the same key space is being repeatedly overwritten.
static const int kMaxMemCompactLevel = 2;
+// Approximate gap in bytes between samples of data read during iteration.
+static const int kReadBytesPeriod = 1048576;
+
} // namespace config
class InternalKey;
diff --git a/src/leveldb/db/version_set.cc b/src/leveldb/db/version_set.cc
index 4fd1ddef21..66d73be71f 100644
--- a/src/leveldb/db/version_set.cc
+++ b/src/leveldb/db/version_set.cc
@@ -289,6 +289,51 @@ static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
return a->number > b->number;
}
+void Version::ForEachOverlapping(Slice user_key, Slice internal_key,
+ void* arg,
+ bool (*func)(void*, int, FileMetaData*)) {
+ // TODO(sanjay): Change Version::Get() to use this function.
+ const Comparator* ucmp = vset_->icmp_.user_comparator();
+
+ // Search level-0 in order from newest to oldest.
+ std::vector<FileMetaData*> tmp;
+ tmp.reserve(files_[0].size());
+ for (uint32_t i = 0; i < files_[0].size(); i++) {
+ FileMetaData* f = files_[0][i];
+ if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
+ ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
+ tmp.push_back(f);
+ }
+ }
+ if (!tmp.empty()) {
+ std::sort(tmp.begin(), tmp.end(), NewestFirst);
+ for (uint32_t i = 0; i < tmp.size(); i++) {
+ if (!(*func)(arg, 0, tmp[i])) {
+ return;
+ }
+ }
+ }
+
+ // Search other levels.
+ for (int level = 1; level < config::kNumLevels; level++) {
+ size_t num_files = files_[level].size();
+ if (num_files == 0) continue;
+
+ // Binary search to find earliest index whose largest key >= internal_key.
+ uint32_t index = FindFile(vset_->icmp_, files_[level], internal_key);
+ if (index < num_files) {
+ FileMetaData* f = files_[level][index];
+ if (ucmp->Compare(user_key, f->smallest.user_key()) < 0) {
+ // All of "f" is past any data for user_key
+ } else {
+ if (!(*func)(arg, level, f)) {
+ return;
+ }
+ }
+ }
+ }
+}
+
Status Version::Get(const ReadOptions& options,
const LookupKey& k,
std::string* value,
@@ -401,6 +446,44 @@ bool Version::UpdateStats(const GetStats& stats) {
return false;
}
+bool Version::RecordReadSample(Slice internal_key) {
+ ParsedInternalKey ikey;
+ if (!ParseInternalKey(internal_key, &ikey)) {
+ return false;
+ }
+
+ struct State {
+ GetStats stats; // Holds first matching file
+ int matches;
+
+ static bool Match(void* arg, int level, FileMetaData* f) {
+ State* state = reinterpret_cast<State*>(arg);
+ state->matches++;
+ if (state->matches == 1) {
+ // Remember first match.
+ state->stats.seek_file = f;
+ state->stats.seek_file_level = level;
+ }
+ // We can stop iterating once we have a second match.
+ return state->matches < 2;
+ }
+ };
+
+ State state;
+ state.matches = 0;
+ ForEachOverlapping(ikey.user_key, internal_key, &state, &State::Match);
+
+ // Must have at least two matches since we want to merge across
+ // files. But what if we have a single file that contains many
+ // overwrites and deletions? Should we have another mechanism for
+ // finding such files?
+ if (state.matches >= 2) {
+ // 1MB cost is about 1 seek (see comment in Builder::Apply).
+ return UpdateStats(state.stats);
+ }
+ return false;
+}
+
void Version::Ref() {
++refs_;
}
@@ -435,10 +518,13 @@ int Version::PickLevelForMemTableOutput(
if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
break;
}
- GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
- const int64_t sum = TotalFileSize(overlaps);
- if (sum > kMaxGrandParentOverlapBytes) {
- break;
+ if (level + 2 < config::kNumLevels) {
+ // Check that file does not overlap too many grandparent bytes.
+ GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
+ const int64_t sum = TotalFileSize(overlaps);
+ if (sum > kMaxGrandParentOverlapBytes) {
+ break;
+ }
}
level++;
}
@@ -452,6 +538,8 @@ void Version::GetOverlappingInputs(
const InternalKey* begin,
const InternalKey* end,
std::vector<FileMetaData*>* inputs) {
+ assert(level >= 0);
+ assert(level < config::kNumLevels);
inputs->clear();
Slice user_begin, user_end;
if (begin != NULL) {
diff --git a/src/leveldb/db/version_set.h b/src/leveldb/db/version_set.h
index 9d084fdb7d..20de0e2629 100644
--- a/src/leveldb/db/version_set.h
+++ b/src/leveldb/db/version_set.h
@@ -78,6 +78,12 @@ class Version {
// REQUIRES: lock is held
bool UpdateStats(const GetStats& stats);
+ // Record a sample of bytes read at the specified internal key.
+ // Samples are taken approximately once every config::kReadBytesPeriod
+ // bytes. Returns true if a new compaction may need to be triggered.
+ // REQUIRES: lock is held
+ bool RecordReadSample(Slice key);
+
// Reference count management (so Versions do not disappear out from
// under live iterators)
void Ref();
@@ -114,6 +120,15 @@ class Version {
class LevelFileNumIterator;
Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
+ // Call func(arg, level, f) for every file that overlaps user_key in
+ // order from newest to oldest. If an invocation of func returns
+ // false, makes no more calls.
+ //
+ // REQUIRES: user portion of internal_key == user_key.
+ void ForEachOverlapping(Slice user_key, Slice internal_key,
+ void* arg,
+ bool (*func)(void*, int, FileMetaData*));
+
VersionSet* vset_; // VersionSet to which this Version belongs
Version* next_; // Next version in linked list
Version* prev_; // Previous version in linked list
diff --git a/src/leveldb/include/leveldb/db.h b/src/leveldb/include/leveldb/db.h
index da8b11a8c0..57c00a5da0 100644
--- a/src/leveldb/include/leveldb/db.h
+++ b/src/leveldb/include/leveldb/db.h
@@ -14,7 +14,7 @@ namespace leveldb {
// Update Makefile if you change these
static const int kMajorVersion = 1;
-static const int kMinorVersion = 12;
+static const int kMinorVersion = 13;
struct Options;
struct ReadOptions;
diff --git a/src/leveldb/util/env_posix.cc b/src/leveldb/util/env_posix.cc
index 6badfdc230..0f5dcfac5a 100644
--- a/src/leveldb/util/env_posix.cc
+++ b/src/leveldb/util/env_posix.cc
@@ -320,8 +320,39 @@ class PosixMmapFile : public WritableFile {
return Status::OK();
}
- virtual Status Sync() {
+ Status SyncDirIfManifest() {
+ const char* f = filename_.c_str();
+ const char* sep = strrchr(f, '/');
+ Slice basename;
+ std::string dir;
+ if (sep == NULL) {
+ dir = ".";
+ basename = f;
+ } else {
+ dir = std::string(f, sep - f);
+ basename = sep + 1;
+ }
Status s;
+ if (basename.starts_with("MANIFEST")) {
+ int fd = open(dir.c_str(), O_RDONLY);
+ if (fd < 0) {
+ s = IOError(dir, errno);
+ } else {
+ if (fsync(fd) < 0) {
+ s = IOError(dir, errno);
+ }
+ close(fd);
+ }
+ }
+ return s;
+ }
+
+ virtual Status Sync() {
+ // Ensure new files referred to by the manifest are in the filesystem.
+ Status s = SyncDirIfManifest();
+ if (!s.ok()) {
+ return s;
+ }
if (pending_sync_) {
// Some unmapped data was not synced
diff --git a/src/leveldb/util/random.h b/src/leveldb/util/random.h
index 07538242ea..ddd51b1c7b 100644
--- a/src/leveldb/util/random.h
+++ b/src/leveldb/util/random.h
@@ -16,7 +16,12 @@ class Random {
private:
uint32_t seed_;
public:
- explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { }
+ explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
+ // Avoid bad seeds.
+ if (seed_ == 0 || seed_ == 2147483647L) {
+ seed_ = 1;
+ }
+ }
uint32_t Next() {
static const uint32_t M = 2147483647L; // 2^31-1
static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0
diff --git a/src/main.cpp b/src/main.cpp
index d0cf3f8eb2..732a828524 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -3862,6 +3862,63 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
}
+ else if (strCommand == "pong")
+ {
+ int64 pingUsecEnd = GetTimeMicros();
+ uint64 nonce = 0;
+ size_t nAvail = vRecv.in_avail();
+ bool bPingFinished = false;
+ std::string sProblem;
+
+ if (nAvail >= sizeof(nonce)) {
+ vRecv >> nonce;
+
+ // Only process pong message if there is an outstanding ping (old ping without nonce should never pong)
+ if (pfrom->nPingNonceSent != 0) {
+ if (nonce == pfrom->nPingNonceSent) {
+ // Matching pong received, this ping is no longer outstanding
+ bPingFinished = true;
+ int64 pingUsecTime = pingUsecEnd - pfrom->nPingUsecStart;
+ if (pingUsecTime > 0) {
+ // Successful ping time measurement, replace previous
+ pfrom->nPingUsecTime = pingUsecTime;
+ } else {
+ // This should never happen
+ sProblem = "Timing mishap";
+ }
+ } else {
+ // Nonce mismatches are normal when pings are overlapping
+ sProblem = "Nonce mismatch";
+ if (nonce == 0) {
+ // This is most likely a bug in another implementation somewhere, cancel this ping
+ bPingFinished = true;
+ sProblem = "Nonce zero";
+ }
+ }
+ } else {
+ sProblem = "Unsolicited pong without ping";
+ }
+ } else {
+ // This is most likely a bug in another implementation somewhere, cancel this ping
+ bPingFinished = true;
+ sProblem = "Short payload";
+ }
+
+ if (!(sProblem.empty())) {
+ LogPrint("net", "pong %s %s: %s, %"PRI64x" expected, %"PRI64x" received, %zu bytes\n"
+ , pfrom->addr.ToString().c_str()
+ , pfrom->strSubVer.c_str()
+ , sProblem.c_str()
+ , pfrom->nPingNonceSent
+ , nonce
+ , nAvail);
+ }
+ if (bPingFinished) {
+ pfrom->nPingNonceSent = 0;
+ }
+ }
+
+
else if (strCommand == "alert")
{
CAlert alert;
@@ -4083,14 +4140,34 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
if (pto->nVersion == 0)
return true;
- // Keep-alive ping. We send a nonce of zero because we don't use it anywhere
- // right now.
+ //
+ // Message: ping
+ //
+ bool pingSend = false;
+ if (pto->fPingQueued) {
+ // RPC ping request by user
+ pingSend = true;
+ }
if (pto->nLastSend && GetTime() - pto->nLastSend > 30 * 60 && pto->vSendMsg.empty()) {
+ // Ping automatically sent as a keepalive
+ pingSend = true;
+ }
+ if (pingSend) {
uint64 nonce = 0;
- if (pto->nVersion > BIP0031_VERSION)
+ while (nonce == 0) {
+ RAND_bytes((unsigned char*)&nonce, sizeof(nonce));
+ }
+ pto->nPingNonceSent = nonce;
+ pto->fPingQueued = false;
+ if (pto->nVersion > BIP0031_VERSION) {
+ // Take timestamp as close as possible before transmitting ping
+ pto->nPingUsecStart = GetTimeMicros();
pto->PushMessage("ping", nonce);
- else
+ } else {
+ // Peer is too old to support ping command with nonce, pong will never arrive, disable timing
+ pto->nPingUsecStart = 0;
pto->PushMessage("ping");
+ }
}
// Start block sync
diff --git a/src/net.cpp b/src/net.cpp
index 4f77f42f9a..5d6f1f46a8 100644
--- a/src/net.cpp
+++ b/src/net.cpp
@@ -622,6 +622,21 @@ void CNode::copyStats(CNodeStats &stats)
X(nSendBytes);
X(nRecvBytes);
stats.fSyncNode = (this == pnodeSync);
+
+ // It is common for nodes with good ping times to suddenly become lagged,
+ // due to a new block arriving or other large transfer.
+ // Merely reporting pingtime might fool the caller into thinking the node was still responsive,
+ // since pingtime does not update until the ping is complete, which might take a while.
+ // So, if a ping is taking an unusually long time in flight,
+ // the caller can immediately detect that this is happening.
+ int64 nPingUsecWait = 0;
+ if ((0 != nPingNonceSent) && (0 != nPingUsecStart)) {
+ nPingUsecWait = GetTimeMicros() - nPingUsecStart;
+ }
+
+ // Raw ping time is in microseconds, but show it to user as whole seconds (Bitcoin users should be well used to small numbers with many decimal places by now :)
+ stats.dPingTime = (((double)nPingUsecTime) / 1e6);
+ stats.dPingWait = (((double)nPingUsecWait) / 1e6);
}
#undef X
diff --git a/src/net.h b/src/net.h
index 2bee212018..c4dd2d8a24 100644
--- a/src/net.h
+++ b/src/net.h
@@ -119,6 +119,8 @@ public:
uint64 nSendBytes;
uint64 nRecvBytes;
bool fSyncNode;
+ double dPingTime;
+ double dPingWait;
};
@@ -234,6 +236,12 @@ public:
CCriticalSection cs_inventory;
std::multimap<int64, CInv> mapAskFor;
+ // Ping time measurement
+ uint64 nPingNonceSent;
+ int64 nPingUsecStart;
+ int64 nPingUsecTime;
+ bool fPingQueued;
+
CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn = "", bool fInboundIn=false) : ssSend(SER_NETWORK, MIN_PROTO_VERSION)
{
nServices = 0;
@@ -268,6 +276,10 @@ public:
fRelayTxes = false;
setInventoryKnown.max_size(SendBufferSize() / 1000);
pfilter = new CBloomFilter();
+ nPingNonceSent = 0;
+ nPingUsecStart = 0;
+ nPingUsecTime = 0;
+ fPingQueued = false;
// Be shy and don't send version until we hear
if (hSocket != INVALID_SOCKET && !fInbound)
diff --git a/src/rpcnet.cpp b/src/rpcnet.cpp
index bd7bc0ba10..7df86cc2db 100644
--- a/src/rpcnet.cpp
+++ b/src/rpcnet.cpp
@@ -19,6 +19,24 @@ Value getconnectioncount(const Array& params, bool fHelp)
return (int)vNodes.size();
}
+Value ping(const Array& params, bool fHelp)
+{
+ if (fHelp || params.size() != 0)
+ throw runtime_error(
+ "ping\n"
+ "Requests that a ping be sent to all other nodes, to measure ping time.\n"
+ "Results provided in getpeerinfo, pingtime and pingwait fields are decimal seconds.\n"
+ "Ping command is handled in queue with all other commands, so it measures processing backlog, not just network ping.");
+
+ // Request that each node send a ping during next message processing pass
+ LOCK(cs_vNodes);
+ BOOST_FOREACH(CNode* pNode, vNodes) {
+ pNode->fPingQueued = true;
+ }
+
+ return Value::null;
+}
+
static void CopyNodeStats(std::vector<CNodeStats>& vstats)
{
vstats.clear();
@@ -54,6 +72,9 @@ Value getpeerinfo(const Array& params, bool fHelp)
obj.push_back(Pair("bytessent", (boost::int64_t)stats.nSendBytes));
obj.push_back(Pair("bytesrecv", (boost::int64_t)stats.nRecvBytes));
obj.push_back(Pair("conntime", (boost::int64_t)stats.nTimeConnected));
+ obj.push_back(Pair("pingtime", stats.dPingTime));
+ if (stats.dPingWait > 0.0)
+ obj.push_back(Pair("pingwait", stats.dPingWait));
obj.push_back(Pair("version", stats.nVersion));
obj.push_back(Pair("subver", stats.strSubVer));
obj.push_back(Pair("inbound", stats.fInbound));