aboutsummaryrefslogtreecommitdiff
path: root/src/leveldb/db
diff options
context:
space:
mode:
authorVinnie Falco <vinnie.falco@gmail.com>2013-07-01 08:36:32 -0700
committerVinnie Falco <vinnie.falco@gmail.com>2013-07-01 08:36:32 -0700
commitfb1da62318f5a7f6e3ec31cdc02178a5445870e4 (patch)
tree4ef6f0b1017c747e84132427666eea4a36d10b0d /src/leveldb/db
parent28bcf3b7ef804f7084938b21708b29c1f5d7c8f8 (diff)
parentadae78ea9940f4d44382967d1296e7db0b54a4de (diff)
Merge commit 'adae78ea9940f4d44382967d1296e7db0b54a4de' into leveldb-squashed
Diffstat (limited to 'src/leveldb/db')
-rw-r--r--src/leveldb/db/db_impl.cc41
-rw-r--r--src/leveldb/db/db_impl.h1
-rw-r--r--src/leveldb/db/db_test.cc85
-rw-r--r--src/leveldb/db/dbformat.cc2
-rw-r--r--src/leveldb/db/filename_test.cc2
-rw-r--r--src/leveldb/db/version_set.cc21
6 files changed, 122 insertions, 30 deletions
diff --git a/src/leveldb/db/db_impl.cc b/src/leveldb/db/db_impl.cc
index c9de169f29..af02467b33 100644
--- a/src/leveldb/db/db_impl.cc
+++ b/src/leveldb/db/db_impl.cc
@@ -35,6 +35,8 @@
namespace leveldb {
+const int kNumNonTableCacheFiles = 10;
+
// Information kept for every waiting writer
struct DBImpl::Writer {
Status status;
@@ -92,9 +94,9 @@ Options SanitizeOptions(const std::string& dbname,
Options result = src;
result.comparator = icmp;
result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
- ClipToRange(&result.max_open_files, 20, 50000);
- ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
- ClipToRange(&result.block_size, 1<<10, 4<<20);
+ ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000);
+ ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
+ ClipToRange(&result.block_size, 1<<10, 4<<20);
if (result.info_log == NULL) {
// Open a log file in the same directory as the db
src.env->CreateDir(dbname); // In case it does not exist
@@ -130,12 +132,13 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
log_(NULL),
tmp_batch_(new WriteBatch),
bg_compaction_scheduled_(false),
- manual_compaction_(NULL) {
+ manual_compaction_(NULL),
+ consecutive_compaction_errors_(0) {
mem_->Ref();
has_imm_.Release_Store(NULL);
// Reserve ten files or so for other uses and give the rest to TableCache.
- const int table_cache_size = options.max_open_files - 10;
+ const int table_cache_size = options.max_open_files - kNumNonTableCacheFiles;
table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
versions_ = new VersionSet(dbname_, &options_, table_cache_,
@@ -310,16 +313,24 @@ Status DBImpl::Recover(VersionEdit* edit) {
if (!s.ok()) {
return s;
}
+ std::set<uint64_t> expected;
+ versions_->AddLiveFiles(&expected);
uint64_t number;
FileType type;
std::vector<uint64_t> logs;
for (size_t i = 0; i < filenames.size(); i++) {
- if (ParseFileName(filenames[i], &number, &type)
- && type == kLogFile
- && ((number >= min_log) || (number == prev_log))) {
+ if (ParseFileName(filenames[i], &number, &type)) {
+ expected.erase(number);
+ if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
logs.push_back(number);
}
}
+ if (!expected.empty()) {
+ char buf[50];
+ snprintf(buf, sizeof(buf), "%d missing files; e.g.",
+ static_cast<int>(expected.size()));
+ return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
+ }
// Recover in the order in which the logs were generated
std::sort(logs.begin(), logs.end());
@@ -611,6 +622,7 @@ void DBImpl::BackgroundCall() {
Status s = BackgroundCompaction();
if (s.ok()) {
// Success
+ consecutive_compaction_errors_ = 0;
} else if (shutting_down_.Acquire_Load()) {
// Error most likely due to shutdown; do not wait
} else {
@@ -622,7 +634,12 @@ void DBImpl::BackgroundCall() {
Log(options_.info_log, "Waiting after background compaction error: %s",
s.ToString().c_str());
mutex_.Unlock();
- env_->SleepForMicroseconds(1000000);
+ ++consecutive_compaction_errors_;
+ int seconds_to_sleep = 1;
+ for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) {
+ seconds_to_sleep *= 2;
+ }
+ env_->SleepForMicroseconds(seconds_to_sleep * 1000000);
mutex_.Lock();
}
}
@@ -805,6 +822,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
(unsigned long long) output_number,
(unsigned long long) current_entries,
(unsigned long long) current_bytes);
+
+ // rate-limit compaction file creation with a 100ms pause
+ env_->SleepForMicroseconds(100000);
}
}
return s;
@@ -1268,10 +1288,11 @@ Status DBImpl::MakeRoomForWrite(bool force) {
} else if (imm_ != NULL) {
// We have filled up the current memtable, but the previous
// one is still being compacted, so we wait.
+ Log(options_.info_log, "Current memtable full; waiting...\n");
bg_cv_.Wait();
} else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
// There are too many level-0 files.
- Log(options_.info_log, "waiting...\n");
+ Log(options_.info_log, "Too many L0 files; waiting...\n");
bg_cv_.Wait();
} else {
// Attempt to switch to a new memtable and trigger compaction of old
diff --git a/src/leveldb/db/db_impl.h b/src/leveldb/db/db_impl.h
index bd29dd8055..3c8d711ae0 100644
--- a/src/leveldb/db/db_impl.h
+++ b/src/leveldb/db/db_impl.h
@@ -163,6 +163,7 @@ class DBImpl : public DB {
// Have we encountered a background error in paranoid mode?
Status bg_error_;
+ int consecutive_compaction_errors_;
// Per level compaction stats. stats_[level] stores the stats for
// compactions that produced data for the specified "level".
diff --git a/src/leveldb/db/db_test.cc b/src/leveldb/db/db_test.cc
index 684ea3bdbc..49aae04dbd 100644
--- a/src/leveldb/db/db_test.cc
+++ b/src/leveldb/db/db_test.cc
@@ -33,8 +33,11 @@ class AtomicCounter {
public:
AtomicCounter() : count_(0) { }
void Increment() {
+ IncrementBy(1);
+ }
+ void IncrementBy(int count) {
MutexLock l(&mu_);
- count_++;
+ count_ += count;
}
int Read() {
MutexLock l(&mu_);
@@ -45,6 +48,10 @@ class AtomicCounter {
count_ = 0;
}
};
+
+void DelayMilliseconds(int millis) {
+ Env::Default()->SleepForMicroseconds(millis * 1000);
+}
}
// Special Env used to delay background operations
@@ -69,6 +76,7 @@ class SpecialEnv : public EnvWrapper {
AtomicCounter random_read_counter_;
AtomicCounter sleep_counter_;
+ AtomicCounter sleep_time_counter_;
explicit SpecialEnv(Env* base) : EnvWrapper(base) {
delay_sstable_sync_.Release_Store(NULL);
@@ -103,7 +111,7 @@ class SpecialEnv : public EnvWrapper {
Status Flush() { return base_->Flush(); }
Status Sync() {
while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
- env_->SleepForMicroseconds(100000);
+ DelayMilliseconds(100);
}
return base_->Sync();
}
@@ -174,8 +182,9 @@ class SpecialEnv : public EnvWrapper {
virtual void SleepForMicroseconds(int micros) {
sleep_counter_.Increment();
- target()->SleepForMicroseconds(micros);
+ sleep_time_counter_.IncrementBy(micros);
}
+
};
class DBTest {
@@ -461,6 +470,20 @@ class DBTest {
}
return result;
}
+
+ bool DeleteAnSSTFile() {
+ std::vector<std::string> filenames;
+ ASSERT_OK(env_->GetChildren(dbname_, &filenames));
+ uint64_t number;
+ FileType type;
+ for (size_t i = 0; i < filenames.size(); i++) {
+ if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
+ ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number)));
+ return true;
+ }
+ }
+ return false;
+ }
};
TEST(DBTest, Empty) {
@@ -611,7 +634,7 @@ TEST(DBTest, GetEncountersEmptyLevel) {
}
// Step 4: Wait for compaction to finish
- env_->SleepForMicroseconds(1000000);
+ DelayMilliseconds(1000);
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
} while (ChangeOptions());
@@ -1295,7 +1318,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_a) {
Reopen();
Reopen();
ASSERT_EQ("(a->v)", Contents());
- env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
+ DelayMilliseconds(1000); // Wait for compaction to finish
ASSERT_EQ("(a->v)", Contents());
}
@@ -1311,7 +1334,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) {
Put("","");
Reopen();
Put("","");
- env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
+ DelayMilliseconds(1000); // Wait for compaction to finish
Reopen();
Put("d","dv");
Reopen();
@@ -1321,7 +1344,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) {
Delete("b");
Reopen();
ASSERT_EQ("(->)(c->cv)", Contents());
- env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
+ DelayMilliseconds(1000); // Wait for compaction to finish
ASSERT_EQ("(->)(c->cv)", Contents());
}
@@ -1506,6 +1529,30 @@ TEST(DBTest, NoSpace) {
ASSERT_GE(env_->sleep_counter_.Read(), 5);
}
+TEST(DBTest, ExponentialBackoff) {
+ Options options = CurrentOptions();
+ options.env = env_;
+ Reopen(&options);
+
+ ASSERT_OK(Put("foo", "v1"));
+ ASSERT_EQ("v1", Get("foo"));
+ Compact("a", "z");
+ env_->non_writable_.Release_Store(env_); // Force errors for new files
+ env_->sleep_counter_.Reset();
+ env_->sleep_time_counter_.Reset();
+ for (int i = 0; i < 5; i++) {
+ dbfull()->TEST_CompactRange(2, NULL, NULL);
+ }
+ env_->non_writable_.Release_Store(NULL);
+
+ // Wait for compaction to finish
+ DelayMilliseconds(1000);
+
+ ASSERT_GE(env_->sleep_counter_.Read(), 5);
+ ASSERT_LT(env_->sleep_counter_.Read(), 10);
+ ASSERT_GE(env_->sleep_time_counter_.Read(), 10e6);
+}
+
TEST(DBTest, NonWritableFileSystem) {
Options options = CurrentOptions();
options.write_buffer_size = 1000;
@@ -1519,7 +1566,7 @@ TEST(DBTest, NonWritableFileSystem) {
fprintf(stderr, "iter %d; errors %d\n", i, errors);
if (!Put("foo", big).ok()) {
errors++;
- env_->SleepForMicroseconds(100000);
+ DelayMilliseconds(100);
}
}
ASSERT_GT(errors, 0);
@@ -1567,6 +1614,24 @@ TEST(DBTest, ManifestWriteError) {
}
}
+TEST(DBTest, MissingSSTFile) {
+ ASSERT_OK(Put("foo", "bar"));
+ ASSERT_EQ("bar", Get("foo"));
+
+ // Dump the memtable to disk.
+ dbfull()->TEST_CompactMemTable();
+ ASSERT_EQ("bar", Get("foo"));
+
+ Close();
+ ASSERT_TRUE(DeleteAnSSTFile());
+ Options options = CurrentOptions();
+ options.paranoid_checks = true;
+ Status s = TryReopen(&options);
+ ASSERT_TRUE(!s.ok());
+ ASSERT_TRUE(s.ToString().find("issing") != std::string::npos)
+ << s.ToString();
+}
+
TEST(DBTest, FilesDeletedAfterCompaction) {
ASSERT_OK(Put("foo", "v2"));
Compact("a", "z");
@@ -1711,13 +1776,13 @@ TEST(DBTest, MultiThreaded) {
}
// Let them run for a while
- env_->SleepForMicroseconds(kTestSeconds * 1000000);
+ DelayMilliseconds(kTestSeconds * 1000);
// Stop the threads and wait for them to finish
mt.stop.Release_Store(&mt);
for (int id = 0; id < kNumThreads; id++) {
while (mt.thread_done[id].Acquire_Load() == NULL) {
- env_->SleepForMicroseconds(100000);
+ DelayMilliseconds(100);
}
}
} while (ChangeOptions());
diff --git a/src/leveldb/db/dbformat.cc b/src/leveldb/db/dbformat.cc
index 28e11b398d..20a7ca4462 100644
--- a/src/leveldb/db/dbformat.cc
+++ b/src/leveldb/db/dbformat.cc
@@ -26,7 +26,7 @@ std::string ParsedInternalKey::DebugString() const {
(unsigned long long) sequence,
int(type));
std::string result = "'";
- result += user_key.ToString();
+ result += EscapeString(user_key.ToString());
result += buf;
return result;
}
diff --git a/src/leveldb/db/filename_test.cc b/src/leveldb/db/filename_test.cc
index 47353d6c9a..5a26da4728 100644
--- a/src/leveldb/db/filename_test.cc
+++ b/src/leveldb/db/filename_test.cc
@@ -70,7 +70,7 @@ TEST(FileNameTest, Parse) {
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
std::string f = errors[i];
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
- };
+ }
}
TEST(FileNameTest, Construction) {
diff --git a/src/leveldb/db/version_set.cc b/src/leveldb/db/version_set.cc
index 7d0a5de2b9..4fd1ddef21 100644
--- a/src/leveldb/db/version_set.cc
+++ b/src/leveldb/db/version_set.cc
@@ -1331,14 +1331,19 @@ Compaction* VersionSet::CompactRange(
}
// Avoid compacting too much in one shot in case the range is large.
- const uint64_t limit = MaxFileSizeForLevel(level);
- uint64_t total = 0;
- for (size_t i = 0; i < inputs.size(); i++) {
- uint64_t s = inputs[i]->file_size;
- total += s;
- if (total >= limit) {
- inputs.resize(i + 1);
- break;
+ // But we cannot do this for level-0 since level-0 files can overlap
+ // and we must not pick one file and drop another older file if the
+ // two files overlap.
+ if (level > 0) {
+ const uint64_t limit = MaxFileSizeForLevel(level);
+ uint64_t total = 0;
+ for (size_t i = 0; i < inputs.size(); i++) {
+ uint64_t s = inputs[i]->file_size;
+ total += s;
+ if (total >= limit) {
+ inputs.resize(i + 1);
+ break;
+ }
}
}