diff options
author | Vinnie Falco <vinnie.falco@gmail.com> | 2013-07-01 08:36:32 -0700 |
---|---|---|
committer | Vinnie Falco <vinnie.falco@gmail.com> | 2013-07-01 08:36:32 -0700 |
commit | fb1da62318f5a7f6e3ec31cdc02178a5445870e4 (patch) | |
tree | 4ef6f0b1017c747e84132427666eea4a36d10b0d /src/leveldb/db | |
parent | 28bcf3b7ef804f7084938b21708b29c1f5d7c8f8 (diff) | |
parent | adae78ea9940f4d44382967d1296e7db0b54a4de (diff) |
Merge commit 'adae78ea9940f4d44382967d1296e7db0b54a4de' into leveldb-squashed
Diffstat (limited to 'src/leveldb/db')
-rw-r--r-- | src/leveldb/db/db_impl.cc | 41 | ||||
-rw-r--r-- | src/leveldb/db/db_impl.h | 1 | ||||
-rw-r--r-- | src/leveldb/db/db_test.cc | 85 | ||||
-rw-r--r-- | src/leveldb/db/dbformat.cc | 2 | ||||
-rw-r--r-- | src/leveldb/db/filename_test.cc | 2 | ||||
-rw-r--r-- | src/leveldb/db/version_set.cc | 21 |
6 files changed, 122 insertions, 30 deletions
diff --git a/src/leveldb/db/db_impl.cc b/src/leveldb/db/db_impl.cc index c9de169f29..af02467b33 100644 --- a/src/leveldb/db/db_impl.cc +++ b/src/leveldb/db/db_impl.cc @@ -35,6 +35,8 @@ namespace leveldb { +const int kNumNonTableCacheFiles = 10; + // Information kept for every waiting writer struct DBImpl::Writer { Status status; @@ -92,9 +94,9 @@ Options SanitizeOptions(const std::string& dbname, Options result = src; result.comparator = icmp; result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL; - ClipToRange(&result.max_open_files, 20, 50000); - ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); - ClipToRange(&result.block_size, 1<<10, 4<<20); + ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000); + ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); + ClipToRange(&result.block_size, 1<<10, 4<<20); if (result.info_log == NULL) { // Open a log file in the same directory as the db src.env->CreateDir(dbname); // In case it does not exist @@ -130,12 +132,13 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) log_(NULL), tmp_batch_(new WriteBatch), bg_compaction_scheduled_(false), - manual_compaction_(NULL) { + manual_compaction_(NULL), + consecutive_compaction_errors_(0) { mem_->Ref(); has_imm_.Release_Store(NULL); // Reserve ten files or so for other uses and give the rest to TableCache. - const int table_cache_size = options.max_open_files - 10; + const int table_cache_size = options.max_open_files - kNumNonTableCacheFiles; table_cache_ = new TableCache(dbname_, &options_, table_cache_size); versions_ = new VersionSet(dbname_, &options_, table_cache_, @@ -310,16 +313,24 @@ Status DBImpl::Recover(VersionEdit* edit) { if (!s.ok()) { return s; } + std::set<uint64_t> expected; + versions_->AddLiveFiles(&expected); uint64_t number; FileType type; std::vector<uint64_t> logs; for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) - && type == kLogFile - && ((number >= min_log) || (number == prev_log))) { + if (ParseFileName(filenames[i], &number, &type)) { + expected.erase(number); + if (type == kLogFile && ((number >= min_log) || (number == prev_log))) logs.push_back(number); } } + if (!expected.empty()) { + char buf[50]; + snprintf(buf, sizeof(buf), "%d missing files; e.g.", + static_cast<int>(expected.size())); + return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin()))); + } // Recover in the order in which the logs were generated std::sort(logs.begin(), logs.end()); @@ -611,6 +622,7 @@ void DBImpl::BackgroundCall() { Status s = BackgroundCompaction(); if (s.ok()) { // Success + consecutive_compaction_errors_ = 0; } else if (shutting_down_.Acquire_Load()) { // Error most likely due to shutdown; do not wait } else { @@ -622,7 +634,12 @@ void DBImpl::BackgroundCall() { Log(options_.info_log, "Waiting after background compaction error: %s", s.ToString().c_str()); mutex_.Unlock(); - env_->SleepForMicroseconds(1000000); + ++consecutive_compaction_errors_; + int seconds_to_sleep = 1; + for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) { + seconds_to_sleep *= 2; + } + env_->SleepForMicroseconds(seconds_to_sleep * 1000000); mutex_.Lock(); } } @@ -805,6 +822,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, (unsigned long long) output_number, (unsigned long long) current_entries, (unsigned long long) current_bytes); + + // rate-limit compaction file creation with a 100ms pause + env_->SleepForMicroseconds(100000); } } return s; @@ -1268,10 +1288,11 @@ Status DBImpl::MakeRoomForWrite(bool force) { } else if (imm_ != NULL) { // We have filled up the current memtable, but the previous // one is still being compacted, so we wait. + Log(options_.info_log, "Current memtable full; waiting...\n"); bg_cv_.Wait(); } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) { // There are too many level-0 files. - Log(options_.info_log, "waiting...\n"); + Log(options_.info_log, "Too many L0 files; waiting...\n"); bg_cv_.Wait(); } else { // Attempt to switch to a new memtable and trigger compaction of old diff --git a/src/leveldb/db/db_impl.h b/src/leveldb/db/db_impl.h index bd29dd8055..3c8d711ae0 100644 --- a/src/leveldb/db/db_impl.h +++ b/src/leveldb/db/db_impl.h @@ -163,6 +163,7 @@ class DBImpl : public DB { // Have we encountered a background error in paranoid mode? Status bg_error_; + int consecutive_compaction_errors_; // Per level compaction stats. stats_[level] stores the stats for // compactions that produced data for the specified "level". diff --git a/src/leveldb/db/db_test.cc b/src/leveldb/db/db_test.cc index 684ea3bdbc..49aae04dbd 100644 --- a/src/leveldb/db/db_test.cc +++ b/src/leveldb/db/db_test.cc @@ -33,8 +33,11 @@ class AtomicCounter { public: AtomicCounter() : count_(0) { } void Increment() { + IncrementBy(1); + } + void IncrementBy(int count) { MutexLock l(&mu_); - count_++; + count_ += count; } int Read() { MutexLock l(&mu_); @@ -45,6 +48,10 @@ class AtomicCounter { count_ = 0; } }; + +void DelayMilliseconds(int millis) { + Env::Default()->SleepForMicroseconds(millis * 1000); +} } // Special Env used to delay background operations @@ -69,6 +76,7 @@ class SpecialEnv : public EnvWrapper { AtomicCounter random_read_counter_; AtomicCounter sleep_counter_; + AtomicCounter sleep_time_counter_; explicit SpecialEnv(Env* base) : EnvWrapper(base) { delay_sstable_sync_.Release_Store(NULL); @@ -103,7 +111,7 @@ class SpecialEnv : public EnvWrapper { Status Flush() { return base_->Flush(); } Status Sync() { while (env_->delay_sstable_sync_.Acquire_Load() != NULL) { - env_->SleepForMicroseconds(100000); + DelayMilliseconds(100); } return base_->Sync(); } @@ -174,8 +182,9 @@ class SpecialEnv : public EnvWrapper { virtual void SleepForMicroseconds(int micros) { sleep_counter_.Increment(); - target()->SleepForMicroseconds(micros); + sleep_time_counter_.IncrementBy(micros); } + }; class DBTest { @@ -461,6 +470,20 @@ class DBTest { } return result; } + + bool DeleteAnSSTFile() { + std::vector<std::string> filenames; + ASSERT_OK(env_->GetChildren(dbname_, &filenames)); + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) { + ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number))); + return true; + } + } + return false; + } }; TEST(DBTest, Empty) { @@ -611,7 +634,7 @@ TEST(DBTest, GetEncountersEmptyLevel) { } // Step 4: Wait for compaction to finish - env_->SleepForMicroseconds(1000000); + DelayMilliseconds(1000); ASSERT_EQ(NumTableFilesAtLevel(0), 0); } while (ChangeOptions()); @@ -1295,7 +1318,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_a) { Reopen(); Reopen(); ASSERT_EQ("(a->v)", Contents()); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish + DelayMilliseconds(1000); // Wait for compaction to finish ASSERT_EQ("(a->v)", Contents()); } @@ -1311,7 +1334,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) { Put("",""); Reopen(); Put("",""); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish + DelayMilliseconds(1000); // Wait for compaction to finish Reopen(); Put("d","dv"); Reopen(); @@ -1321,7 +1344,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) { Delete("b"); Reopen(); ASSERT_EQ("(->)(c->cv)", Contents()); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish + DelayMilliseconds(1000); // Wait for compaction to finish ASSERT_EQ("(->)(c->cv)", Contents()); } @@ -1506,6 +1529,30 @@ TEST(DBTest, NoSpace) { ASSERT_GE(env_->sleep_counter_.Read(), 5); } +TEST(DBTest, ExponentialBackoff) { + Options options = CurrentOptions(); + options.env = env_; + Reopen(&options); + + ASSERT_OK(Put("foo", "v1")); + ASSERT_EQ("v1", Get("foo")); + Compact("a", "z"); + env_->non_writable_.Release_Store(env_); // Force errors for new files + env_->sleep_counter_.Reset(); + env_->sleep_time_counter_.Reset(); + for (int i = 0; i < 5; i++) { + dbfull()->TEST_CompactRange(2, NULL, NULL); + } + env_->non_writable_.Release_Store(NULL); + + // Wait for compaction to finish + DelayMilliseconds(1000); + + ASSERT_GE(env_->sleep_counter_.Read(), 5); + ASSERT_LT(env_->sleep_counter_.Read(), 10); + ASSERT_GE(env_->sleep_time_counter_.Read(), 10e6); +} + TEST(DBTest, NonWritableFileSystem) { Options options = CurrentOptions(); options.write_buffer_size = 1000; @@ -1519,7 +1566,7 @@ TEST(DBTest, NonWritableFileSystem) { fprintf(stderr, "iter %d; errors %d\n", i, errors); if (!Put("foo", big).ok()) { errors++; - env_->SleepForMicroseconds(100000); + DelayMilliseconds(100); } } ASSERT_GT(errors, 0); @@ -1567,6 +1614,24 @@ TEST(DBTest, ManifestWriteError) { } } +TEST(DBTest, MissingSSTFile) { + ASSERT_OK(Put("foo", "bar")); + ASSERT_EQ("bar", Get("foo")); + + // Dump the memtable to disk. + dbfull()->TEST_CompactMemTable(); + ASSERT_EQ("bar", Get("foo")); + + Close(); + ASSERT_TRUE(DeleteAnSSTFile()); + Options options = CurrentOptions(); + options.paranoid_checks = true; + Status s = TryReopen(&options); + ASSERT_TRUE(!s.ok()); + ASSERT_TRUE(s.ToString().find("issing") != std::string::npos) + << s.ToString(); +} + TEST(DBTest, FilesDeletedAfterCompaction) { ASSERT_OK(Put("foo", "v2")); Compact("a", "z"); @@ -1711,13 +1776,13 @@ TEST(DBTest, MultiThreaded) { } // Let them run for a while - env_->SleepForMicroseconds(kTestSeconds * 1000000); + DelayMilliseconds(kTestSeconds * 1000); // Stop the threads and wait for them to finish mt.stop.Release_Store(&mt); for (int id = 0; id < kNumThreads; id++) { while (mt.thread_done[id].Acquire_Load() == NULL) { - env_->SleepForMicroseconds(100000); + DelayMilliseconds(100); } } } while (ChangeOptions()); diff --git a/src/leveldb/db/dbformat.cc b/src/leveldb/db/dbformat.cc index 28e11b398d..20a7ca4462 100644 --- a/src/leveldb/db/dbformat.cc +++ b/src/leveldb/db/dbformat.cc @@ -26,7 +26,7 @@ std::string ParsedInternalKey::DebugString() const { (unsigned long long) sequence, int(type)); std::string result = "'"; - result += user_key.ToString(); + result += EscapeString(user_key.ToString()); result += buf; return result; } diff --git a/src/leveldb/db/filename_test.cc b/src/leveldb/db/filename_test.cc index 47353d6c9a..5a26da4728 100644 --- a/src/leveldb/db/filename_test.cc +++ b/src/leveldb/db/filename_test.cc @@ -70,7 +70,7 @@ TEST(FileNameTest, Parse) { for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { std::string f = errors[i]; ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; - }; + } } TEST(FileNameTest, Construction) { diff --git a/src/leveldb/db/version_set.cc b/src/leveldb/db/version_set.cc index 7d0a5de2b9..4fd1ddef21 100644 --- a/src/leveldb/db/version_set.cc +++ b/src/leveldb/db/version_set.cc @@ -1331,14 +1331,19 @@ Compaction* VersionSet::CompactRange( } // Avoid compacting too much in one shot in case the range is large. - const uint64_t limit = MaxFileSizeForLevel(level); - uint64_t total = 0; - for (size_t i = 0; i < inputs.size(); i++) { - uint64_t s = inputs[i]->file_size; - total += s; - if (total >= limit) { - inputs.resize(i + 1); - break; + // But we cannot do this for level-0 since level-0 files can overlap + // and we must not pick one file and drop another older file if the + // two files overlap. + if (level > 0) { + const uint64_t limit = MaxFileSizeForLevel(level); + uint64_t total = 0; + for (size_t i = 0; i < inputs.size(); i++) { + uint64_t s = inputs[i]->file_size; + total += s; + if (total >= limit) { + inputs.resize(i + 1); + break; + } } } |