aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml13
-rw-r--r--Makefile467
-rw-r--r--README51
-rw-r--r--README.md39
-rwxr-xr-xbuild_detect_platform2
-rw-r--r--db/corruption_test.cc2
-rw-r--r--db/db_bench.cc24
-rw-r--r--db/db_impl.cc194
-rw-r--r--db/db_impl.h8
-rw-r--r--db/db_test.cc32
-rw-r--r--db/fault_injection_test.cc554
-rw-r--r--db/leveldbutil.cc (renamed from db/leveldb_main.cc)0
-rw-r--r--db/log_reader.cc22
-rw-r--r--db/log_reader.h5
-rw-r--r--db/log_test.cc101
-rw-r--r--db/log_writer.cc17
-rw-r--r--db/log_writer.h6
-rw-r--r--db/memtable.h5
-rw-r--r--db/recovery_test.cc324
-rw-r--r--db/skiplist.h8
-rw-r--r--db/skiplist_test.cc2
-rw-r--r--db/snapshot.h1
-rw-r--r--db/version_set.cc40
-rw-r--r--db/version_set.h4
-rw-r--r--db/write_batch_internal.h1
-rw-r--r--doc/index.html2
-rw-r--r--helpers/memenv/memenv.cc13
-rw-r--r--helpers/memenv/memenv_test.cc13
-rw-r--r--include/leveldb/cache.h11
-rw-r--r--include/leveldb/db.h4
-rw-r--r--include/leveldb/env.h18
-rw-r--r--include/leveldb/iterator.h2
-rw-r--r--include/leveldb/options.h6
-rw-r--r--include/leveldb/status.h6
-rw-r--r--port/atomic_pointer.h19
-rw-r--r--port/port_posix.cc1
-rw-r--r--table/filter_block.cc4
-rw-r--r--table/format.cc3
-rw-r--r--table/iterator_wrapper.h3
-rw-r--r--table/table.cc2
-rw-r--r--table/table_test.cc20
-rw-r--r--util/arena.cc6
-rw-r--r--util/arena.h10
-rw-r--r--util/bloom.cc2
-rw-r--r--util/bloom_test.cc3
-rw-r--r--util/cache.cc136
-rw-r--r--util/cache_test.cc42
-rw-r--r--util/env.cc4
-rw-r--r--util/env_posix.cc13
-rw-r--r--util/env_win.cc37
-rw-r--r--util/options.cc2
-rw-r--r--util/testutil.h10
52 files changed, 1942 insertions, 372 deletions
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000000..f5bd74c454
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,13 @@
+language: cpp
+compiler:
+- clang
+- gcc
+os:
+- linux
+- osx
+sudo: false
+before_install:
+- echo $LANG
+- echo $LC_ALL
+script:
+- make -j 4 check
diff --git a/Makefile b/Makefile
index 2bd2cadcdd..07a5a1ead6 100644
--- a/Makefile
+++ b/Makefile
@@ -20,208 +20,395 @@ $(shell CC="$(CC)" CXX="$(CXX)" TARGET_OS="$(TARGET_OS)" \
# this file is generated by the previous line to set build flags and sources
include build_config.mk
+TESTS = \
+ db/autocompact_test \
+ db/c_test \
+ db/corruption_test \
+ db/db_test \
+ db/dbformat_test \
+ db/fault_injection_test \
+ db/filename_test \
+ db/log_test \
+ db/recovery_test \
+ db/skiplist_test \
+ db/version_edit_test \
+ db/version_set_test \
+ db/write_batch_test \
+ helpers/memenv/memenv_test \
+ issues/issue178_test \
+ issues/issue200_test \
+ table/filter_block_test \
+ table/table_test \
+ util/arena_test \
+ util/bloom_test \
+ util/cache_test \
+ util/coding_test \
+ util/crc32c_test \
+ util/env_test \
+ util/hash_test
+
+UTILS = \
+ db/db_bench \
+ db/leveldbutil
+
+# Put the object files in a subdirectory, but the application at the top of the object dir.
+PROGNAMES := $(notdir $(TESTS) $(UTILS))
+
+# On Linux may need libkyotocabinet-dev for dependency.
+BENCHMARKS = \
+ doc/bench/db_bench_sqlite3 \
+ doc/bench/db_bench_tree_db
+
CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT)
LDFLAGS += $(PLATFORM_LDFLAGS)
LIBS += $(PLATFORM_LIBS)
-LIBOBJECTS = $(SOURCES:.cc=.o)
-MEMENVOBJECTS = $(MEMENV_SOURCES:.cc=.o)
-
-TESTUTIL = ./util/testutil.o
-TESTHARNESS = ./util/testharness.o $(TESTUTIL)
+SIMULATOR_OUTDIR=out-ios-x86
+DEVICE_OUTDIR=out-ios-arm
-# Note: iOS should probably be using libtool, not ar.
ifeq ($(PLATFORM), IOS)
+# Note: iOS should probably be using libtool, not ar.
AR=xcrun ar
+SIMULATORSDK=$(shell xcrun -sdk iphonesimulator --show-sdk-path)
+DEVICESDK=$(shell xcrun -sdk iphoneos --show-sdk-path)
+DEVICE_CFLAGS = -isysroot "$(DEVICESDK)" -arch armv6 -arch armv7 -arch armv7s -arch arm64
+SIMULATOR_CFLAGS = -isysroot "$(SIMULATORSDK)" -arch i686 -arch x86_64
+STATIC_OUTDIR=out-ios-universal
+else
+STATIC_OUTDIR=out-static
+SHARED_OUTDIR=out-shared
+STATIC_PROGRAMS := $(addprefix $(STATIC_OUTDIR)/, $(PROGNAMES))
+SHARED_PROGRAMS := $(addprefix $(SHARED_OUTDIR)/, db_bench)
endif
-TESTS = \
- arena_test \
- autocompact_test \
- bloom_test \
- c_test \
- cache_test \
- coding_test \
- corruption_test \
- crc32c_test \
- db_test \
- dbformat_test \
- env_test \
- filename_test \
- filter_block_test \
- hash_test \
- issue178_test \
- issue200_test \
- log_test \
- memenv_test \
- skiplist_test \
- table_test \
- version_edit_test \
- version_set_test \
- write_batch_test
-
-PROGRAMS = db_bench leveldbutil $(TESTS)
-BENCHMARKS = db_bench_sqlite3 db_bench_tree_db
-
-LIBRARY = libleveldb.a
-MEMENVLIBRARY = libmemenv.a
+STATIC_LIBOBJECTS := $(addprefix $(STATIC_OUTDIR)/, $(SOURCES:.cc=.o))
+STATIC_MEMENVOBJECTS := $(addprefix $(STATIC_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+DEVICE_LIBOBJECTS := $(addprefix $(DEVICE_OUTDIR)/, $(SOURCES:.cc=.o))
+DEVICE_MEMENVOBJECTS := $(addprefix $(DEVICE_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+SIMULATOR_LIBOBJECTS := $(addprefix $(SIMULATOR_OUTDIR)/, $(SOURCES:.cc=.o))
+SIMULATOR_MEMENVOBJECTS := $(addprefix $(SIMULATOR_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+SHARED_LIBOBJECTS := $(addprefix $(SHARED_OUTDIR)/, $(SOURCES:.cc=.o))
+SHARED_MEMENVOBJECTS := $(addprefix $(SHARED_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+TESTUTIL := $(STATIC_OUTDIR)/util/testutil.o
+TESTHARNESS := $(STATIC_OUTDIR)/util/testharness.o $(TESTUTIL)
+
+STATIC_TESTOBJS := $(addprefix $(STATIC_OUTDIR)/, $(addsuffix .o, $(TESTS)))
+STATIC_UTILOBJS := $(addprefix $(STATIC_OUTDIR)/, $(addsuffix .o, $(UTILS)))
+STATIC_ALLOBJS := $(STATIC_LIBOBJECTS) $(STATIC_MEMENVOBJECTS) $(STATIC_TESTOBJS) $(STATIC_UTILOBJS) $(TESTHARNESS)
+DEVICE_ALLOBJS := $(DEVICE_LIBOBJECTS) $(DEVICE_MEMENVOBJECTS)
+SIMULATOR_ALLOBJS := $(SIMULATOR_LIBOBJECTS) $(SIMULATOR_MEMENVOBJECTS)
default: all
# Should we build shared libraries?
ifneq ($(PLATFORM_SHARED_EXT),)
+# Many leveldb test apps use non-exported API's. Only build a subset for testing.
+SHARED_ALLOBJS := $(SHARED_LIBOBJECTS) $(SHARED_MEMENVOBJECTS) $(TESTHARNESS)
+
ifneq ($(PLATFORM_SHARED_VERSIONED),true)
-SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
-SHARED2 = $(SHARED1)
-SHARED3 = $(SHARED1)
-SHARED = $(SHARED1)
+SHARED_LIB1 = libleveldb.$(PLATFORM_SHARED_EXT)
+SHARED_LIB2 = $(SHARED_LIB1)
+SHARED_LIB3 = $(SHARED_LIB1)
+SHARED_LIBS = $(SHARED_LIB1)
+SHARED_MEMENVLIB = $(SHARED_OUTDIR)/libmemenv.a
else
# Update db.h if you change these.
-SHARED_MAJOR = 1
-SHARED_MINOR = 18
-SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
-SHARED2 = $(SHARED1).$(SHARED_MAJOR)
-SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
-SHARED = $(SHARED1) $(SHARED2) $(SHARED3)
-$(SHARED1): $(SHARED3)
- ln -fs $(SHARED3) $(SHARED1)
-$(SHARED2): $(SHARED3)
- ln -fs $(SHARED3) $(SHARED2)
+SHARED_VERSION_MAJOR = 1
+SHARED_VERSION_MINOR = 19
+SHARED_LIB1 = libleveldb.$(PLATFORM_SHARED_EXT)
+SHARED_LIB2 = $(SHARED_LIB1).$(SHARED_VERSION_MAJOR)
+SHARED_LIB3 = $(SHARED_LIB1).$(SHARED_VERSION_MAJOR).$(SHARED_VERSION_MINOR)
+SHARED_LIBS = $(SHARED_OUTDIR)/$(SHARED_LIB1) $(SHARED_OUTDIR)/$(SHARED_LIB2) $(SHARED_OUTDIR)/$(SHARED_LIB3)
+$(SHARED_OUTDIR)/$(SHARED_LIB1): $(SHARED_OUTDIR)/$(SHARED_LIB3)
+ ln -fs $(SHARED_LIB3) $(SHARED_OUTDIR)/$(SHARED_LIB1)
+$(SHARED_OUTDIR)/$(SHARED_LIB2): $(SHARED_OUTDIR)/$(SHARED_LIB3)
+ ln -fs $(SHARED_LIB3) $(SHARED_OUTDIR)/$(SHARED_LIB2)
+SHARED_MEMENVLIB = $(SHARED_OUTDIR)/libmemenv.a
endif
-$(SHARED3):
- $(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) $(LIBS)
+$(SHARED_OUTDIR)/$(SHARED_LIB3): $(SHARED_LIBOBJECTS)
+ $(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED_LIB2) $(SHARED_LIBOBJECTS) -o $(SHARED_OUTDIR)/$(SHARED_LIB3) $(LIBS)
endif # PLATFORM_SHARED_EXT
-all: $(SHARED) $(LIBRARY)
+all: $(SHARED_LIBS) $(SHARED_PROGRAMS) $(STATIC_OUTDIR)/libleveldb.a $(STATIC_OUTDIR)/libmemenv.a $(STATIC_PROGRAMS)
-check: all $(PROGRAMS) $(TESTS)
- for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done
+check: $(STATIC_PROGRAMS)
+ for t in $(notdir $(TESTS)); do echo "***** Running $$t"; $(STATIC_OUTDIR)/$$t || exit 1; done
clean:
- -rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk
- -rm -rf ios-x86/* ios-arm/*
+ -rm -rf out-static out-shared out-ios-x86 out-ios-arm out-ios-universal
+ -rm -f build_config.mk
+ -rm -rf ios-x86 ios-arm
-$(LIBRARY): $(LIBOBJECTS)
- rm -f $@
- $(AR) -rs $@ $(LIBOBJECTS)
+$(STATIC_OUTDIR):
+ mkdir $@
-db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
- $(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS)
+$(STATIC_OUTDIR)/db: | $(STATIC_OUTDIR)
+ mkdir $@
-db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
- $(CXX) $(LDFLAGS) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS)
+$(STATIC_OUTDIR)/helpers/memenv: | $(STATIC_OUTDIR)
+ mkdir -p $@
-db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
- $(CXX) $(LDFLAGS) doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS)
+$(STATIC_OUTDIR)/port: | $(STATIC_OUTDIR)
+ mkdir $@
-leveldbutil: db/leveldb_main.o $(LIBOBJECTS)
- $(CXX) $(LDFLAGS) db/leveldb_main.o $(LIBOBJECTS) -o $@ $(LIBS)
+$(STATIC_OUTDIR)/table: | $(STATIC_OUTDIR)
+ mkdir $@
-arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(STATIC_OUTDIR)/util: | $(STATIC_OUTDIR)
+ mkdir $@
-autocompact_test: db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+.PHONY: STATIC_OBJDIRS
+STATIC_OBJDIRS: \
+ $(STATIC_OUTDIR)/db \
+ $(STATIC_OUTDIR)/port \
+ $(STATIC_OUTDIR)/table \
+ $(STATIC_OUTDIR)/util \
+ $(STATIC_OUTDIR)/helpers/memenv
-bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR):
+ mkdir $@
-c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/db: | $(SHARED_OUTDIR)
+ mkdir $@
-cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/helpers/memenv: | $(SHARED_OUTDIR)
+ mkdir -p $@
-coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/port: | $(SHARED_OUTDIR)
+ mkdir $@
-corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/table: | $(SHARED_OUTDIR)
+ mkdir $@
-crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/util: | $(SHARED_OUTDIR)
+ mkdir $@
-db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+.PHONY: SHARED_OBJDIRS
+SHARED_OBJDIRS: \
+ $(SHARED_OUTDIR)/db \
+ $(SHARED_OUTDIR)/port \
+ $(SHARED_OUTDIR)/table \
+ $(SHARED_OUTDIR)/util \
+ $(SHARED_OUTDIR)/helpers/memenv
-dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR):
+ mkdir $@
-env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/db: | $(DEVICE_OUTDIR)
+ mkdir $@
-filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/helpers/memenv: | $(DEVICE_OUTDIR)
+ mkdir -p $@
-filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/port: | $(DEVICE_OUTDIR)
+ mkdir $@
-hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/table: | $(DEVICE_OUTDIR)
+ mkdir $@
-issue178_test: issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/util: | $(DEVICE_OUTDIR)
+ mkdir $@
-issue200_test: issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+.PHONY: DEVICE_OBJDIRS
+DEVICE_OBJDIRS: \
+ $(DEVICE_OUTDIR)/db \
+ $(DEVICE_OUTDIR)/port \
+ $(DEVICE_OUTDIR)/table \
+ $(DEVICE_OUTDIR)/util \
+ $(DEVICE_OUTDIR)/helpers/memenv
-log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR):
+ mkdir $@
-table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/db: | $(SIMULATOR_OUTDIR)
+ mkdir $@
-skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/helpers/memenv: | $(SIMULATOR_OUTDIR)
+ mkdir -p $@
-version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/port: | $(SIMULATOR_OUTDIR)
+ mkdir $@
-version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/table: | $(SIMULATOR_OUTDIR)
+ mkdir $@
-write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/util: | $(SIMULATOR_OUTDIR)
+ mkdir $@
-$(MEMENVLIBRARY) : $(MEMENVOBJECTS)
- rm -f $@
- $(AR) -rs $@ $(MEMENVOBJECTS)
+.PHONY: SIMULATOR_OBJDIRS
+SIMULATOR_OBJDIRS: \
+ $(SIMULATOR_OUTDIR)/db \
+ $(SIMULATOR_OUTDIR)/port \
+ $(SIMULATOR_OUTDIR)/table \
+ $(SIMULATOR_OUTDIR)/util \
+ $(SIMULATOR_OUTDIR)/helpers/memenv
-memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS)
- $(CXX) $(LDFLAGS) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LIBS)
+$(STATIC_ALLOBJS): | STATIC_OBJDIRS
+$(DEVICE_ALLOBJS): | DEVICE_OBJDIRS
+$(SIMULATOR_ALLOBJS): | SIMULATOR_OBJDIRS
+$(SHARED_ALLOBJS): | SHARED_OBJDIRS
ifeq ($(PLATFORM), IOS)
-# For iOS, create universal object files to be used on both the simulator and
+$(DEVICE_OUTDIR)/libleveldb.a: $(DEVICE_LIBOBJECTS)
+ rm -f $@
+ $(AR) -rs $@ $(DEVICE_LIBOBJECTS)
+
+$(SIMULATOR_OUTDIR)/libleveldb.a: $(SIMULATOR_LIBOBJECTS)
+ rm -f $@
+ $(AR) -rs $@ $(SIMULATOR_LIBOBJECTS)
+
+$(DEVICE_OUTDIR)/libmemenv.a: $(DEVICE_MEMENVOBJECTS)
+ rm -f $@
+ $(AR) -rs $@ $(DEVICE_MEMENVOBJECTS)
+
+$(SIMULATOR_OUTDIR)/libmemenv.a: $(SIMULATOR_MEMENVOBJECTS)
+ rm -f $@
+ $(AR) -rs $@ $(SIMULATOR_MEMENVOBJECTS)
+
+# For iOS, create universal object libraries to be used on both the simulator and
# a device.
-PLATFORMSROOT=/Applications/Xcode.app/Contents/Developer/Platforms
-SIMULATORROOT=$(PLATFORMSROOT)/iPhoneSimulator.platform/Developer
-DEVICEROOT=$(PLATFORMSROOT)/iPhoneOS.platform/Developer
-IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBundleShortVersionString)
-IOSARCH=-arch armv6 -arch armv7 -arch armv7s -arch arm64
-
-.cc.o:
- mkdir -p ios-x86/$(dir $@)
- xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
- mkdir -p ios-arm/$(dir $@)
- xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
- xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
-
-.c.o:
- mkdir -p ios-x86/$(dir $@)
- xcrun -sdk iphonesimulator $(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
- mkdir -p ios-arm/$(dir $@)
- xcrun -sdk iphoneos $(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
- xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
+$(STATIC_OUTDIR)/libleveldb.a: $(STATIC_OUTDIR) $(DEVICE_OUTDIR)/libleveldb.a $(SIMULATOR_OUTDIR)/libleveldb.a
+ lipo -create $(DEVICE_OUTDIR)/libleveldb.a $(SIMULATOR_OUTDIR)/libleveldb.a -output $@
+$(STATIC_OUTDIR)/libmemenv.a: $(STATIC_OUTDIR) $(DEVICE_OUTDIR)/libmemenv.a $(SIMULATOR_OUTDIR)/libmemenv.a
+ lipo -create $(DEVICE_OUTDIR)/libmemenv.a $(SIMULATOR_OUTDIR)/libmemenv.a -output $@
else
-.cc.o:
+$(STATIC_OUTDIR)/libleveldb.a:$(STATIC_LIBOBJECTS)
+ rm -f $@
+ $(AR) -rs $@ $(STATIC_LIBOBJECTS)
+
+$(STATIC_OUTDIR)/libmemenv.a:$(STATIC_MEMENVOBJECTS)
+ rm -f $@
+ $(AR) -rs $@ $(STATIC_MEMENVOBJECTS)
+endif
+
+$(SHARED_MEMENVLIB):$(SHARED_MEMENVOBJECTS)
+ rm -f $@
+ $(AR) -rs $@ $(SHARED_MEMENVOBJECTS)
+
+$(STATIC_OUTDIR)/db_bench:db/db_bench.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/db_bench.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/db_bench_sqlite3:doc/bench/db_bench_sqlite3.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) doc/bench/db_bench_sqlite3.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS)
+
+$(STATIC_OUTDIR)/db_bench_tree_db:doc/bench/db_bench_tree_db.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) doc/bench/db_bench_tree_db.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS)
+
+$(STATIC_OUTDIR)/leveldbutil:db/leveldbutil.cc $(STATIC_LIBOBJECTS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/leveldbutil.cc $(STATIC_LIBOBJECTS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/arena_test:util/arena_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) util/arena_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/autocompact_test:db/autocompact_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/autocompact_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/bloom_test:util/bloom_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) util/bloom_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/c_test:$(STATIC_OUTDIR)/db/c_test.o $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(STATIC_OUTDIR)/db/c_test.o $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/cache_test:util/cache_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) util/cache_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/coding_test:util/coding_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) util/coding_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/corruption_test:db/corruption_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/corruption_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/crc32c_test:util/crc32c_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) util/crc32c_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/db_test:db/db_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/db_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/dbformat_test:db/dbformat_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/dbformat_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/env_test:util/env_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) util/env_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/fault_injection_test:db/fault_injection_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/fault_injection_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/filename_test:db/filename_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/filename_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/filter_block_test:table/filter_block_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) table/filter_block_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/hash_test:util/hash_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) util/hash_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/issue178_test:issues/issue178_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) issues/issue178_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/issue200_test:issues/issue200_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) issues/issue200_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/log_test:db/log_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/log_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/recovery_test:db/recovery_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/recovery_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/table_test:table/table_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) table/table_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/skiplist_test:db/skiplist_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/skiplist_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/version_edit_test:db/version_edit_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/version_edit_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/version_set_test:db/version_set_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/version_set_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/write_batch_test:db/write_batch_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+ $(CXX) $(LDFLAGS) $(CXXFLAGS) db/write_batch_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/memenv_test:$(STATIC_OUTDIR)/helpers/memenv/memenv_test.o $(STATIC_OUTDIR)/libmemenv.a $(STATIC_OUTDIR)/libleveldb.a $(TESTHARNESS)
+ $(XCRUN) $(CXX) $(LDFLAGS) $(STATIC_OUTDIR)/helpers/memenv/memenv_test.o $(STATIC_OUTDIR)/libmemenv.a $(STATIC_OUTDIR)/libleveldb.a $(TESTHARNESS) -o $@ $(LIBS)
+
+$(SHARED_OUTDIR)/db_bench:$(SHARED_OUTDIR)/db/db_bench.o $(SHARED_LIBS) $(TESTUTIL)
+ $(XCRUN) $(CXX) $(LDFLAGS) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SHARED_OUTDIR)/db/db_bench.o $(TESTUTIL) $(SHARED_OUTDIR)/$(SHARED_LIB3) -o $@ $(LIBS)
+
+.PHONY: run-shared
+run-shared: $(SHARED_OUTDIR)/db_bench
+ LD_LIBRARY_PATH=$(SHARED_OUTDIR) $(SHARED_OUTDIR)/db_bench
+
+$(SIMULATOR_OUTDIR)/%.o: %.cc
+ xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) $(SIMULATOR_CFLAGS) -c $< -o $@
+
+$(DEVICE_OUTDIR)/%.o: %.cc
+ xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) $(DEVICE_CFLAGS) -c $< -o $@
+
+$(SIMULATOR_OUTDIR)/%.o: %.c
+ xcrun -sdk iphonesimulator $(CC) $(CFLAGS) $(SIMULATOR_CFLAGS) -c $< -o $@
+
+$(DEVICE_OUTDIR)/%.o: %.c
+ xcrun -sdk iphoneos $(CC) $(CFLAGS) $(DEVICE_CFLAGS) -c $< -o $@
+
+$(STATIC_OUTDIR)/%.o: %.cc
$(CXX) $(CXXFLAGS) -c $< -o $@
-.c.o:
+$(STATIC_OUTDIR)/%.o: %.c
$(CC) $(CFLAGS) -c $< -o $@
-endif
+
+$(SHARED_OUTDIR)/%.o: %.cc
+ $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@
+
+$(SHARED_OUTDIR)/%.o: %.c
+ $(CC) $(CFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@
diff --git a/README b/README
deleted file mode 100644
index 3618adeeed..0000000000
--- a/README
+++ /dev/null
@@ -1,51 +0,0 @@
-leveldb: A key-value store
-Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
-
-The code under this directory implements a system for maintaining a
-persistent key/value store.
-
-See doc/index.html for more explanation.
-See doc/impl.html for a brief overview of the implementation.
-
-The public interface is in include/*.h. Callers should not include or
-rely on the details of any other header files in this package. Those
-internal APIs may be changed without warning.
-
-Guide to header files:
-
-include/db.h
- Main interface to the DB: Start here
-
-include/options.h
- Control over the behavior of an entire database, and also
- control over the behavior of individual reads and writes.
-
-include/comparator.h
- Abstraction for user-specified comparison function. If you want
- just bytewise comparison of keys, you can use the default comparator,
- but clients can write their own comparator implementations if they
- want custom ordering (e.g. to handle different character
- encodings, etc.)
-
-include/iterator.h
- Interface for iterating over data. You can get an iterator
- from a DB object.
-
-include/write_batch.h
- Interface for atomically applying multiple updates to a database.
-
-include/slice.h
- A simple module for maintaining a pointer and a length into some
- other byte array.
-
-include/status.h
- Status is returned from many of the public interfaces and is used
- to report success and various kinds of errors.
-
-include/env.h
- Abstraction of the OS environment. A posix implementation of
- this interface is in util/env_posix.cc
-
-include/table.h
-include/table_builder.h
- Lower-level modules that most clients probably won't use directly
diff --git a/README.md b/README.md
index 480affb5ca..c75b185e0e 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
**LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.**
+[![Build Status](https://travis-ci.org/google/leveldb.svg?branch=master)](https://travis-ci.org/google/leveldb)
+
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
# Features
@@ -10,9 +12,11 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
* Multiple changes can be made in one atomic batch.
* Users can create a transient snapshot to get a consistent view of data.
* Forward and backward iteration is supported over the data.
- * Data is automatically compressed using the [Snappy compression library](http://code.google.com/p/snappy).
+ * Data is automatically compressed using the [Snappy compression library](http://google.github.io/snappy/).
* External activity (file system operations etc.) is relayed through a virtual interface so users can customize the operating system interactions.
- * [Detailed documentation](http://htmlpreview.github.io/?https://github.com/google/leveldb/blob/master/doc/index.html) about how to use the library is included with the source code.
+
+# Documentation
+ [LevelDB library documentation](https://rawgit.com/google/leveldb/master/doc/index.html) is online and bundled with the source code.
# Limitations
@@ -20,6 +24,37 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
* Only a single process (possibly multi-threaded) can access a particular database at a time.
* There is no client-server support builtin to the library. An application that needs such support will have to wrap their own server around the library.
+# Contributing to the leveldb Project
+The leveldb project welcomes contributions. leveldb's primary goal is to be
+a reliable and fast key/value store. Changes that are in line with the
+features/limitations outlined above, and meet the requirements below,
+will be considered.
+
+Contribution requirements:
+
+1. **POSIX only**. We _generally_ will only accept changes that are both
+ compiled, and tested on a POSIX platform - usually Linux. Very small
+ changes will sometimes be accepted, but consider that more of an
+ exception than the rule.
+
+2. **Stable API**. We strive very hard to maintain a stable API. Changes that
+ require changes for projects using leveldb _might_ be rejected without
+ sufficient benefit to the project.
+
+3. **Tests**: All changes must be accompanied by a new (or changed) test, or
+ a sufficient explanation as to why a new (or changed) test is not required.
+
+## Submitting a Pull Request
+Before any pull request will be accepted the author must first sign a
+Contributor License Agreement (CLA) at https://cla.developers.google.com/.
+
+In order to keep the commit timeline linear
+[squash](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Squashing-Commits)
+your changes down to a single commit and [rebase](https://git-scm.com/docs/git-rebase)
+on google/leveldb/master. This keeps the commit timeline linear and more easily sync'ed
+with the internal repository at Google. More information at GitHub's
+[About Git rebase](https://help.github.com/articles/about-git-rebase/) page.
+
# Performance
Here is a performance report (with explanations) from the run of the
diff --git a/build_detect_platform b/build_detect_platform
index a1101c1bda..d7edab1d87 100755
--- a/build_detect_platform
+++ b/build_detect_platform
@@ -175,7 +175,7 @@ DIRS="$PREFIX/db $PREFIX/util $PREFIX/table"
set -f # temporarily disable globbing so that our patterns aren't expanded
PRUNE_TEST="-name *test*.cc -prune"
PRUNE_BENCH="-name *_bench.cc -prune"
-PRUNE_TOOL="-name leveldb_main.cc -prune"
+PRUNE_TOOL="-name leveldbutil.cc -prune"
PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o $PRUNE_TOOL -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "`
set +f # re-enable globbing
diff --git a/db/corruption_test.cc b/db/corruption_test.cc
index 96afc68913..37a484d25f 100644
--- a/db/corruption_test.cc
+++ b/db/corruption_test.cc
@@ -36,7 +36,7 @@ class CorruptionTest {
tiny_cache_ = NewLRUCache(100);
options_.env = &env_;
options_.block_cache = tiny_cache_;
- dbname_ = test::TmpDir() + "/db_test";
+ dbname_ = test::TmpDir() + "/corruption_test";
DestroyDB(dbname_, options_);
db_ = NULL;
diff --git a/db/db_bench.cc b/db/db_bench.cc
index 705a170aae..7a0f5e08cd 100644
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -33,6 +33,7 @@
// readmissing -- read N missing keys in random order
// readhot -- read N times in random order from 1% section of DB
// seekrandom -- N random seeks
+// open -- cost of opening a DB
// crc32c -- repeated crc32c of 4K of data
// acquireload -- load N*1000 times
// Meta operations:
@@ -99,6 +100,9 @@ static int FLAGS_bloom_bits = -1;
// benchmark will fail.
static bool FLAGS_use_existing_db = false;
+// If true, reuse existing log/MANIFEST files when re-opening a database.
+static bool FLAGS_reuse_logs = false;
+
// Use the db with the following name.
static const char* FLAGS_db = NULL;
@@ -138,6 +142,7 @@ class RandomGenerator {
}
};
+#if defined(__linux)
static Slice TrimSpace(Slice s) {
size_t start = 0;
while (start < s.size() && isspace(s[start])) {
@@ -149,6 +154,7 @@ static Slice TrimSpace(Slice s) {
}
return Slice(s.data() + start, limit - start);
}
+#endif
static void AppendWithSpace(std::string* str, Slice msg) {
if (msg.empty()) return;
@@ -442,7 +448,11 @@ class Benchmark {
bool fresh_db = false;
int num_threads = FLAGS_threads;
- if (name == Slice("fillseq")) {
+ if (name == Slice("open")) {
+ method = &Benchmark::OpenBench;
+ num_ /= 10000;
+ if (num_ < 1) num_ = 1;
+ } else if (name == Slice("fillseq")) {
fresh_db = true;
method = &Benchmark::WriteSeq;
} else if (name == Slice("fillbatch")) {
@@ -695,6 +705,7 @@ class Benchmark {
options.write_buffer_size = FLAGS_write_buffer_size;
options.max_open_files = FLAGS_open_files;
options.filter_policy = filter_policy_;
+ options.reuse_logs = FLAGS_reuse_logs;
Status s = DB::Open(options, FLAGS_db, &db_);
if (!s.ok()) {
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
@@ -702,6 +713,14 @@ class Benchmark {
}
}
+ void OpenBench(ThreadState* thread) {
+ for (int i = 0; i < num_; i++) {
+ delete db_;
+ Open();
+ thread->stats.FinishedSingleOp();
+ }
+ }
+
void WriteSeq(ThreadState* thread) {
DoWrite(thread, true);
}
@@ -941,6 +960,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
(n == 0 || n == 1)) {
FLAGS_use_existing_db = n;
+ } else if (sscanf(argv[i], "--reuse_logs=%d%c", &n, &junk) == 1 &&
+ (n == 0 || n == 1)) {
+ FLAGS_reuse_logs = n;
} else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
FLAGS_num = n;
} else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
diff --git a/db/db_impl.cc b/db/db_impl.cc
index 49b95953b4..60f4e66e55 100644
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -125,7 +125,7 @@ DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
db_lock_(NULL),
shutting_down_(NULL),
bg_cv_(&mutex_),
- mem_(new MemTable(internal_comparator_)),
+ mem_(NULL),
imm_(NULL),
logfile_(NULL),
logfile_number_(0),
@@ -134,7 +134,6 @@ DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
tmp_batch_(new WriteBatch),
bg_compaction_scheduled_(false),
manual_compaction_(NULL) {
- mem_->Ref();
has_imm_.Release_Store(NULL);
// Reserve ten files or so for other uses and give the rest to TableCache.
@@ -271,7 +270,7 @@ void DBImpl::DeleteObsoleteFiles() {
}
}
-Status DBImpl::Recover(VersionEdit* edit) {
+Status DBImpl::Recover(VersionEdit* edit, bool *save_manifest) {
mutex_.AssertHeld();
// Ignore error from CreateDir since the creation of the DB is
@@ -301,66 +300,69 @@ Status DBImpl::Recover(VersionEdit* edit) {
}
}
- s = versions_->Recover();
- if (s.ok()) {
- SequenceNumber max_sequence(0);
-
- // Recover from all newer log files than the ones named in the
- // descriptor (new log files may have been added by the previous
- // incarnation without registering them in the descriptor).
- //
- // Note that PrevLogNumber() is no longer used, but we pay
- // attention to it in case we are recovering a database
- // produced by an older version of leveldb.
- const uint64_t min_log = versions_->LogNumber();
- const uint64_t prev_log = versions_->PrevLogNumber();
- std::vector<std::string> filenames;
- s = env_->GetChildren(dbname_, &filenames);
+ s = versions_->Recover(save_manifest);
+ if (!s.ok()) {
+ return s;
+ }
+ SequenceNumber max_sequence(0);
+
+ // Recover from all newer log files than the ones named in the
+ // descriptor (new log files may have been added by the previous
+ // incarnation without registering them in the descriptor).
+ //
+ // Note that PrevLogNumber() is no longer used, but we pay
+ // attention to it in case we are recovering a database
+ // produced by an older version of leveldb.
+ const uint64_t min_log = versions_->LogNumber();
+ const uint64_t prev_log = versions_->PrevLogNumber();
+ std::vector<std::string> filenames;
+ s = env_->GetChildren(dbname_, &filenames);
+ if (!s.ok()) {
+ return s;
+ }
+ std::set<uint64_t> expected;
+ versions_->AddLiveFiles(&expected);
+ uint64_t number;
+ FileType type;
+ std::vector<uint64_t> logs;
+ for (size_t i = 0; i < filenames.size(); i++) {
+ if (ParseFileName(filenames[i], &number, &type)) {
+ expected.erase(number);
+ if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
+ logs.push_back(number);
+ }
+ }
+ if (!expected.empty()) {
+ char buf[50];
+ snprintf(buf, sizeof(buf), "%d missing files; e.g.",
+ static_cast<int>(expected.size()));
+ return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
+ }
+
+ // Recover in the order in which the logs were generated
+ std::sort(logs.begin(), logs.end());
+ for (size_t i = 0; i < logs.size(); i++) {
+ s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit,
+ &max_sequence);
if (!s.ok()) {
return s;
}
- std::set<uint64_t> expected;
- versions_->AddLiveFiles(&expected);
- uint64_t number;
- FileType type;
- std::vector<uint64_t> logs;
- for (size_t i = 0; i < filenames.size(); i++) {
- if (ParseFileName(filenames[i], &number, &type)) {
- expected.erase(number);
- if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
- logs.push_back(number);
- }
- }
- if (!expected.empty()) {
- char buf[50];
- snprintf(buf, sizeof(buf), "%d missing files; e.g.",
- static_cast<int>(expected.size()));
- return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
- }
-
- // Recover in the order in which the logs were generated
- std::sort(logs.begin(), logs.end());
- for (size_t i = 0; i < logs.size(); i++) {
- s = RecoverLogFile(logs[i], edit, &max_sequence);
- // The previous incarnation may not have written any MANIFEST
- // records after allocating this log number. So we manually
- // update the file number allocation counter in VersionSet.
- versions_->MarkFileNumberUsed(logs[i]);
- }
+ // The previous incarnation may not have written any MANIFEST
+ // records after allocating this log number. So we manually
+ // update the file number allocation counter in VersionSet.
+ versions_->MarkFileNumberUsed(logs[i]);
+ }
- if (s.ok()) {
- if (versions_->LastSequence() < max_sequence) {
- versions_->SetLastSequence(max_sequence);
- }
- }
+ if (versions_->LastSequence() < max_sequence) {
+ versions_->SetLastSequence(max_sequence);
}
- return s;
+ return Status::OK();
}
-Status DBImpl::RecoverLogFile(uint64_t log_number,
- VersionEdit* edit,
+Status DBImpl::RecoverLogFile(uint64_t log_number, bool last_log,
+ bool* save_manifest, VersionEdit* edit,
SequenceNumber* max_sequence) {
struct LogReporter : public log::Reader::Reporter {
Env* env;
@@ -405,6 +407,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
std::string scratch;
Slice record;
WriteBatch batch;
+ int compactions = 0;
MemTable* mem = NULL;
while (reader.ReadRecord(&record, &scratch) &&
status.ok()) {
@@ -432,25 +435,52 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
}
if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
+ compactions++;
+ *save_manifest = true;
status = WriteLevel0Table(mem, edit, NULL);
+ mem->Unref();
+ mem = NULL;
if (!status.ok()) {
// Reflect errors immediately so that conditions like full
// file-systems cause the DB::Open() to fail.
break;
}
- mem->Unref();
- mem = NULL;
}
}
- if (status.ok() && mem != NULL) {
- status = WriteLevel0Table(mem, edit, NULL);
- // Reflect errors immediately so that conditions like full
- // file-systems cause the DB::Open() to fail.
+ delete file;
+
+ // See if we should keep reusing the last log file.
+ if (status.ok() && options_.reuse_logs && last_log && compactions == 0) {
+ assert(logfile_ == NULL);
+ assert(log_ == NULL);
+ assert(mem_ == NULL);
+ uint64_t lfile_size;
+ if (env_->GetFileSize(fname, &lfile_size).ok() &&
+ env_->NewAppendableFile(fname, &logfile_).ok()) {
+ Log(options_.info_log, "Reusing old log %s \n", fname.c_str());
+ log_ = new log::Writer(logfile_, lfile_size);
+ logfile_number_ = log_number;
+ if (mem != NULL) {
+ mem_ = mem;
+ mem = NULL;
+ } else {
+ // mem can be NULL if lognum exists but was empty.
+ mem_ = new MemTable(internal_comparator_);
+ mem_->Ref();
+ }
+ }
+ }
+
+ if (mem != NULL) {
+ // mem did not get reused; compact it.
+ if (status.ok()) {
+ *save_manifest = true;
+ status = WriteLevel0Table(mem, edit, NULL);
+ }
+ mem->Unref();
}
- if (mem != NULL) mem->Unref();
- delete file;
return status;
}
@@ -821,8 +851,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
delete iter;
if (s.ok()) {
Log(options_.info_log,
- "Generated table #%llu: %lld keys, %lld bytes",
+ "Generated table #%llu@%d: %lld keys, %lld bytes",
(unsigned long long) output_number,
+ compact->compaction->level(),
(unsigned long long) current_entries,
(unsigned long long) current_bytes);
}
@@ -1395,6 +1426,19 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
} else if (in == "sstables") {
*value = versions_->current()->DebugString();
return true;
+ } else if (in == "approximate-memory-usage") {
+ size_t total_usage = options_.block_cache->TotalCharge();
+ if (mem_) {
+ total_usage += mem_->ApproximateMemoryUsage();
+ }
+ if (imm_) {
+ total_usage += imm_->ApproximateMemoryUsage();
+ }
+ char buf[50];
+ snprintf(buf, sizeof(buf), "%llu",
+ static_cast<unsigned long long>(total_usage));
+ value->append(buf);
+ return true;
}
return false;
@@ -1449,8 +1493,11 @@ Status DB::Open(const Options& options, const std::string& dbname,
DBImpl* impl = new DBImpl(options, dbname);
impl->mutex_.Lock();
VersionEdit edit;
- Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists
- if (s.ok()) {
+ // Recover handles create_if_missing, error_if_exists
+ bool save_manifest = false;
+ Status s = impl->Recover(&edit, &save_manifest);
+ if (s.ok() && impl->mem_ == NULL) {
+ // Create new log and a corresponding memtable.
uint64_t new_log_number = impl->versions_->NewFileNumber();
WritableFile* lfile;
s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
@@ -1460,15 +1507,22 @@ Status DB::Open(const Options& options, const std::string& dbname,
impl->logfile_ = lfile;
impl->logfile_number_ = new_log_number;
impl->log_ = new log::Writer(lfile);
- s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
- }
- if (s.ok()) {
- impl->DeleteObsoleteFiles();
- impl->MaybeScheduleCompaction();
+ impl->mem_ = new MemTable(impl->internal_comparator_);
+ impl->mem_->Ref();
}
}
+ if (s.ok() && save_manifest) {
+ edit.SetPrevLogNumber(0); // No older logs needed after recovery.
+ edit.SetLogNumber(impl->logfile_number_);
+ s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
+ }
+ if (s.ok()) {
+ impl->DeleteObsoleteFiles();
+ impl->MaybeScheduleCompaction();
+ }
impl->mutex_.Unlock();
if (s.ok()) {
+ assert(impl->mem_ != NULL);
*dbptr = impl;
} else {
delete impl;
diff --git a/db/db_impl.h b/db/db_impl.h
index cfc998164a..8ff323e728 100644
--- a/db/db_impl.h
+++ b/db/db_impl.h
@@ -78,7 +78,8 @@ class DBImpl : public DB {
// Recover the descriptor from persistent storage. May do a significant
// amount of work to recover recently logged updates. Any changes to
// be made to the descriptor are added to *edit.
- Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ Status Recover(VersionEdit* edit, bool* save_manifest)
+ EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void MaybeIgnoreError(Status* s) const;
@@ -90,9 +91,8 @@ class DBImpl : public DB {
// Errors are recorded in bg_error_.
void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- Status RecoverLogFile(uint64_t log_number,
- VersionEdit* edit,
- SequenceNumber* max_sequence)
+ Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest,
+ VersionEdit* edit, SequenceNumber* max_sequence)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)
diff --git a/db/db_test.cc b/db/db_test.cc
index 0fed9137d5..a0b08bc19c 100644
--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -193,6 +193,7 @@ class DBTest {
// Sequence of option configurations to try
enum OptionConfig {
kDefault,
+ kReuse,
kFilter,
kUncompressed,
kEnd
@@ -237,7 +238,11 @@ class DBTest {
// Return the current option configuration.
Options CurrentOptions() {
Options options;
+ options.reuse_logs = false;
switch (option_config_) {
+ case kReuse:
+ options.reuse_logs = true;
+ break;
case kFilter:
options.filter_policy = filter_policy_;
break;
@@ -558,6 +563,17 @@ TEST(DBTest, GetFromVersions) {
} while (ChangeOptions());
}
+TEST(DBTest, GetMemUsage) {
+ do {
+ ASSERT_OK(Put("foo", "v1"));
+ std::string val;
+ ASSERT_TRUE(db_->GetProperty("leveldb.approximate-memory-usage", &val));
+ int mem_usage = atoi(val.c_str());
+ ASSERT_GT(mem_usage, 0);
+ ASSERT_LT(mem_usage, 5*1024*1024);
+ } while (ChangeOptions());
+}
+
TEST(DBTest, GetSnapshot) {
do {
// Try with both a short key and a long key
@@ -1080,6 +1096,14 @@ TEST(DBTest, ApproximateSizes) {
// 0 because GetApproximateSizes() does not account for memtable space
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
+ if (options.reuse_logs) {
+ // Recovery will reuse memtable, and GetApproximateSizes() does not
+ // account for memtable usage;
+ Reopen(&options);
+ ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
+ continue;
+ }
+
// Check sizes across recovery by reopening a few times
for (int run = 0; run < 3; run++) {
Reopen(&options);
@@ -1123,6 +1147,11 @@ TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
+ if (options.reuse_logs) {
+ // Need to force a memtable compaction since recovery does not do so.
+ ASSERT_OK(dbfull()->TEST_CompactMemTable());
+ }
+
// Check sizes across recovery by reopening a few times
for (int run = 0; run < 3; run++) {
Reopen(&options);
@@ -2084,7 +2113,8 @@ void BM_LogAndApply(int iters, int num_base_files) {
InternalKeyComparator cmp(BytewiseComparator());
Options options;
VersionSet vset(dbname, &options, NULL, &cmp);
- ASSERT_OK(vset.Recover());
+ bool save_manifest;
+ ASSERT_OK(vset.Recover(&save_manifest));
VersionEdit vbase;
uint64_t fnum = 1;
for (int i = 0; i < num_base_files; i++) {
diff --git a/db/fault_injection_test.cc b/db/fault_injection_test.cc
new file mode 100644
index 0000000000..875dfe81ee
--- /dev/null
+++ b/db/fault_injection_test.cc
@@ -0,0 +1,554 @@
+// Copyright 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// This test uses a custom Env to keep track of the state of a filesystem as of
+// the last "sync". It then checks for data loss errors by purposely dropping
+// file data (or entire files) not protected by a "sync".
+
+#include "leveldb/db.h"
+
+#include <map>
+#include <set>
+#include "db/db_impl.h"
+#include "db/filename.h"
+#include "db/log_format.h"
+#include "db/version_set.h"
+#include "leveldb/cache.h"
+#include "leveldb/env.h"
+#include "leveldb/table.h"
+#include "leveldb/write_batch.h"
+#include "util/logging.h"
+#include "util/mutexlock.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace leveldb {
+
+static const int kValueSize = 1000;
+static const int kMaxNumValues = 2000;
+static const size_t kNumIterations = 3;
+
+class FaultInjectionTestEnv;
+
+namespace {
+
+// Assume a filename, and not a directory name like "/foo/bar/"
+static std::string GetDirName(const std::string filename) {
+ size_t found = filename.find_last_of("/\\");
+ if (found == std::string::npos) {
+ return "";
+ } else {
+ return filename.substr(0, found);
+ }
+}
+
+Status SyncDir(const std::string& dir) {
+ // As this is a test it isn't required to *actually* sync this directory.
+ return Status::OK();
+}
+
+// A basic file truncation function suitable for this test.
+Status Truncate(const std::string& filename, uint64_t length) {
+ leveldb::Env* env = leveldb::Env::Default();
+
+ SequentialFile* orig_file;
+ Status s = env->NewSequentialFile(filename, &orig_file);
+ if (!s.ok())
+ return s;
+
+ char* scratch = new char[length];
+ leveldb::Slice result;
+ s = orig_file->Read(length, &result, scratch);
+ delete orig_file;
+ if (s.ok()) {
+ std::string tmp_name = GetDirName(filename) + "/truncate.tmp";
+ WritableFile* tmp_file;
+ s = env->NewWritableFile(tmp_name, &tmp_file);
+ if (s.ok()) {
+ s = tmp_file->Append(result);
+ delete tmp_file;
+ if (s.ok()) {
+ s = env->RenameFile(tmp_name, filename);
+ } else {
+ env->DeleteFile(tmp_name);
+ }
+ }
+ }
+
+ delete[] scratch;
+
+ return s;
+}
+
+struct FileState {
+ std::string filename_;
+ ssize_t pos_;
+ ssize_t pos_at_last_sync_;
+ ssize_t pos_at_last_flush_;
+
+ FileState(const std::string& filename)
+ : filename_(filename),
+ pos_(-1),
+ pos_at_last_sync_(-1),
+ pos_at_last_flush_(-1) { }
+
+ FileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {}
+
+ bool IsFullySynced() const { return pos_ <= 0 || pos_ == pos_at_last_sync_; }
+
+ Status DropUnsyncedData() const;
+};
+
+} // anonymous namespace
+
+// A wrapper around WritableFile which informs another Env whenever this file
+// is written to or sync'ed.
+class TestWritableFile : public WritableFile {
+ public:
+ TestWritableFile(const FileState& state,
+ WritableFile* f,
+ FaultInjectionTestEnv* env);
+ virtual ~TestWritableFile();
+ virtual Status Append(const Slice& data);
+ virtual Status Close();
+ virtual Status Flush();
+ virtual Status Sync();
+
+ private:
+ FileState state_;
+ WritableFile* target_;
+ bool writable_file_opened_;
+ FaultInjectionTestEnv* env_;
+
+ Status SyncParent();
+};
+
+class FaultInjectionTestEnv : public EnvWrapper {
+ public:
+ FaultInjectionTestEnv() : EnvWrapper(Env::Default()), filesystem_active_(true) {}
+ virtual ~FaultInjectionTestEnv() { }
+ virtual Status NewWritableFile(const std::string& fname,
+ WritableFile** result);
+ virtual Status NewAppendableFile(const std::string& fname,
+ WritableFile** result);
+ virtual Status DeleteFile(const std::string& f);
+ virtual Status RenameFile(const std::string& s, const std::string& t);
+
+ void WritableFileClosed(const FileState& state);
+ Status DropUnsyncedFileData();
+ Status DeleteFilesCreatedAfterLastDirSync();
+ void DirWasSynced();
+ bool IsFileCreatedSinceLastDirSync(const std::string& filename);
+ void ResetState();
+ void UntrackFile(const std::string& f);
+ // Setting the filesystem to inactive is the test equivalent to simulating a
+ // system reset. Setting to inactive will freeze our saved filesystem state so
+ // that it will stop being recorded. It can then be reset back to the state at
+ // the time of the reset.
+ bool IsFilesystemActive() const { return filesystem_active_; }
+ void SetFilesystemActive(bool active) { filesystem_active_ = active; }
+
+ private:
+ port::Mutex mutex_;
+ std::map<std::string, FileState> db_file_state_;
+ std::set<std::string> new_files_since_last_dir_sync_;
+ bool filesystem_active_; // Record flushes, syncs, writes
+};
+
+TestWritableFile::TestWritableFile(const FileState& state,
+ WritableFile* f,
+ FaultInjectionTestEnv* env)
+ : state_(state),
+ target_(f),
+ writable_file_opened_(true),
+ env_(env) {
+ assert(f != NULL);
+}
+
+TestWritableFile::~TestWritableFile() {
+ if (writable_file_opened_) {
+ Close();
+ }
+ delete target_;
+}
+
+Status TestWritableFile::Append(const Slice& data) {
+ Status s = target_->Append(data);
+ if (s.ok() && env_->IsFilesystemActive()) {
+ state_.pos_ += data.size();
+ }
+ return s;
+}
+
+Status TestWritableFile::Close() {
+ writable_file_opened_ = false;
+ Status s = target_->Close();
+ if (s.ok()) {
+ env_->WritableFileClosed(state_);
+ }
+ return s;
+}
+
+Status TestWritableFile::Flush() {
+ Status s = target_->Flush();
+ if (s.ok() && env_->IsFilesystemActive()) {
+ state_.pos_at_last_flush_ = state_.pos_;
+ }
+ return s;
+}
+
+Status TestWritableFile::SyncParent() {
+ Status s = SyncDir(GetDirName(state_.filename_));
+ if (s.ok()) {
+ env_->DirWasSynced();
+ }
+ return s;
+}
+
+Status TestWritableFile::Sync() {
+ if (!env_->IsFilesystemActive()) {
+ return Status::OK();
+ }
+ // Ensure new files referred to by the manifest are in the filesystem.
+ Status s = target_->Sync();
+ if (s.ok()) {
+ state_.pos_at_last_sync_ = state_.pos_;
+ }
+ if (env_->IsFileCreatedSinceLastDirSync(state_.filename_)) {
+ Status ps = SyncParent();
+ if (s.ok() && !ps.ok()) {
+ s = ps;
+ }
+ }
+ return s;
+}
+
+Status FaultInjectionTestEnv::NewWritableFile(const std::string& fname,
+ WritableFile** result) {
+ WritableFile* actual_writable_file;
+ Status s = target()->NewWritableFile(fname, &actual_writable_file);
+ if (s.ok()) {
+ FileState state(fname);
+ state.pos_ = 0;
+ *result = new TestWritableFile(state, actual_writable_file, this);
+ // NewWritableFile doesn't append to files, so if the same file is
+ // opened again then it will be truncated - so forget our saved
+ // state.
+ UntrackFile(fname);
+ MutexLock l(&mutex_);
+ new_files_since_last_dir_sync_.insert(fname);
+ }
+ return s;
+}
+
+Status FaultInjectionTestEnv::NewAppendableFile(const std::string& fname,
+ WritableFile** result) {
+ WritableFile* actual_writable_file;
+ Status s = target()->NewAppendableFile(fname, &actual_writable_file);
+ if (s.ok()) {
+ FileState state(fname);
+ state.pos_ = 0;
+ {
+ MutexLock l(&mutex_);
+ if (db_file_state_.count(fname) == 0) {
+ new_files_since_last_dir_sync_.insert(fname);
+ } else {
+ state = db_file_state_[fname];
+ }
+ }
+ *result = new TestWritableFile(state, actual_writable_file, this);
+ }
+ return s;
+}
+
+Status FaultInjectionTestEnv::DropUnsyncedFileData() {
+ Status s;
+ MutexLock l(&mutex_);
+ for (std::map<std::string, FileState>::const_iterator it =
+ db_file_state_.begin();
+ s.ok() && it != db_file_state_.end(); ++it) {
+ const FileState& state = it->second;
+ if (!state.IsFullySynced()) {
+ s = state.DropUnsyncedData();
+ }
+ }
+ return s;
+}
+
+void FaultInjectionTestEnv::DirWasSynced() {
+ MutexLock l(&mutex_);
+ new_files_since_last_dir_sync_.clear();
+}
+
+bool FaultInjectionTestEnv::IsFileCreatedSinceLastDirSync(
+ const std::string& filename) {
+ MutexLock l(&mutex_);
+ return new_files_since_last_dir_sync_.find(filename) !=
+ new_files_since_last_dir_sync_.end();
+}
+
+void FaultInjectionTestEnv::UntrackFile(const std::string& f) {
+ MutexLock l(&mutex_);
+ db_file_state_.erase(f);
+ new_files_since_last_dir_sync_.erase(f);
+}
+
+Status FaultInjectionTestEnv::DeleteFile(const std::string& f) {
+ Status s = EnvWrapper::DeleteFile(f);
+ ASSERT_OK(s);
+ if (s.ok()) {
+ UntrackFile(f);
+ }
+ return s;
+}
+
+Status FaultInjectionTestEnv::RenameFile(const std::string& s,
+ const std::string& t) {
+ Status ret = EnvWrapper::RenameFile(s, t);
+
+ if (ret.ok()) {
+ MutexLock l(&mutex_);
+ if (db_file_state_.find(s) != db_file_state_.end()) {
+ db_file_state_[t] = db_file_state_[s];
+ db_file_state_.erase(s);
+ }
+
+ if (new_files_since_last_dir_sync_.erase(s) != 0) {
+ assert(new_files_since_last_dir_sync_.find(t) ==
+ new_files_since_last_dir_sync_.end());
+ new_files_since_last_dir_sync_.insert(t);
+ }
+ }
+
+ return ret;
+}
+
+void FaultInjectionTestEnv::ResetState() {
+ // Since we are not destroying the database, the existing files
+ // should keep their recorded synced/flushed state. Therefore
+ // we do not reset db_file_state_ and new_files_since_last_dir_sync_.
+ MutexLock l(&mutex_);
+ SetFilesystemActive(true);
+}
+
+Status FaultInjectionTestEnv::DeleteFilesCreatedAfterLastDirSync() {
+ // Because DeleteFile access this container make a copy to avoid deadlock
+ mutex_.Lock();
+ std::set<std::string> new_files(new_files_since_last_dir_sync_.begin(),
+ new_files_since_last_dir_sync_.end());
+ mutex_.Unlock();
+ Status s;
+ std::set<std::string>::const_iterator it;
+ for (it = new_files.begin(); s.ok() && it != new_files.end(); ++it) {
+ s = DeleteFile(*it);
+ }
+ return s;
+}
+
+void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
+ MutexLock l(&mutex_);
+ db_file_state_[state.filename_] = state;
+}
+
+Status FileState::DropUnsyncedData() const {
+ ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
+ return Truncate(filename_, sync_pos);
+}
+
+class FaultInjectionTest {
+ public:
+ enum ExpectedVerifResult { VAL_EXPECT_NO_ERROR, VAL_EXPECT_ERROR };
+ enum ResetMethod { RESET_DROP_UNSYNCED_DATA, RESET_DELETE_UNSYNCED_FILES };
+
+ FaultInjectionTestEnv* env_;
+ std::string dbname_;
+ Cache* tiny_cache_;
+ Options options_;
+ DB* db_;
+
+ FaultInjectionTest()
+ : env_(new FaultInjectionTestEnv),
+ tiny_cache_(NewLRUCache(100)),
+ db_(NULL) {
+ dbname_ = test::TmpDir() + "/fault_test";
+ DestroyDB(dbname_, Options()); // Destroy any db from earlier run
+ options_.reuse_logs = true;
+ options_.env = env_;
+ options_.paranoid_checks = true;
+ options_.block_cache = tiny_cache_;
+ options_.create_if_missing = true;
+ }
+
+ ~FaultInjectionTest() {
+ CloseDB();
+ DestroyDB(dbname_, Options());
+ delete tiny_cache_;
+ delete env_;
+ }
+
+ void ReuseLogs(bool reuse) {
+ options_.reuse_logs = reuse;
+ }
+
+ void Build(int start_idx, int num_vals) {
+ std::string key_space, value_space;
+ WriteBatch batch;
+ for (int i = start_idx; i < start_idx + num_vals; i++) {
+ Slice key = Key(i, &key_space);
+ batch.Clear();
+ batch.Put(key, Value(i, &value_space));
+ WriteOptions options;
+ ASSERT_OK(db_->Write(options, &batch));
+ }
+ }
+
+ Status ReadValue(int i, std::string* val) const {
+ std::string key_space, value_space;
+ Slice key = Key(i, &key_space);
+ Value(i, &value_space);
+ ReadOptions options;
+ return db_->Get(options, key, val);
+ }
+
+ Status Verify(int start_idx, int num_vals,
+ ExpectedVerifResult expected) const {
+ std::string val;
+ std::string value_space;
+ Status s;
+ for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) {
+ Value(i, &value_space);
+ s = ReadValue(i, &val);
+ if (expected == VAL_EXPECT_NO_ERROR) {
+ if (s.ok()) {
+ ASSERT_EQ(value_space, val);
+ }
+ } else if (s.ok()) {
+ fprintf(stderr, "Expected an error at %d, but was OK\n", i);
+ s = Status::IOError(dbname_, "Expected value error:");
+ } else {
+ s = Status::OK(); // An expected error
+ }
+ }
+ return s;
+ }
+
+ // Return the ith key
+ Slice Key(int i, std::string* storage) const {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "%016d", i);
+ storage->assign(buf, strlen(buf));
+ return Slice(*storage);
+ }
+
+ // Return the value to associate with the specified key
+ Slice Value(int k, std::string* storage) const {
+ Random r(k);
+ return test::RandomString(&r, kValueSize, storage);
+ }
+
+ Status OpenDB() {
+ delete db_;
+ db_ = NULL;
+ env_->ResetState();
+ return DB::Open(options_, dbname_, &db_);
+ }
+
+ void CloseDB() {
+ delete db_;
+ db_ = NULL;
+ }
+
+ void DeleteAllData() {
+ Iterator* iter = db_->NewIterator(ReadOptions());
+ WriteOptions options;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
+ }
+
+ delete iter;
+ }
+
+ void ResetDBState(ResetMethod reset_method) {
+ switch (reset_method) {
+ case RESET_DROP_UNSYNCED_DATA:
+ ASSERT_OK(env_->DropUnsyncedFileData());
+ break;
+ case RESET_DELETE_UNSYNCED_FILES:
+ ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
+ break;
+ default:
+ assert(false);
+ }
+ }
+
+ void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
+ DeleteAllData();
+ Build(0, num_pre_sync);
+ db_->CompactRange(NULL, NULL);
+ Build(num_pre_sync, num_post_sync);
+ }
+
+ void PartialCompactTestReopenWithFault(ResetMethod reset_method,
+ int num_pre_sync,
+ int num_post_sync) {
+ env_->SetFilesystemActive(false);
+ CloseDB();
+ ResetDBState(reset_method);
+ ASSERT_OK(OpenDB());
+ ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::VAL_EXPECT_NO_ERROR));
+ ASSERT_OK(Verify(num_pre_sync, num_post_sync, FaultInjectionTest::VAL_EXPECT_ERROR));
+ }
+
+ void NoWriteTestPreFault() {
+ }
+
+ void NoWriteTestReopenWithFault(ResetMethod reset_method) {
+ CloseDB();
+ ResetDBState(reset_method);
+ ASSERT_OK(OpenDB());
+ }
+
+ void DoTest() {
+ Random rnd(0);
+ ASSERT_OK(OpenDB());
+ for (size_t idx = 0; idx < kNumIterations; idx++) {
+ int num_pre_sync = rnd.Uniform(kMaxNumValues);
+ int num_post_sync = rnd.Uniform(kMaxNumValues);
+
+ PartialCompactTestPreFault(num_pre_sync, num_post_sync);
+ PartialCompactTestReopenWithFault(RESET_DROP_UNSYNCED_DATA,
+ num_pre_sync,
+ num_post_sync);
+
+ NoWriteTestPreFault();
+ NoWriteTestReopenWithFault(RESET_DROP_UNSYNCED_DATA);
+
+ PartialCompactTestPreFault(num_pre_sync, num_post_sync);
+ // No new files created so we expect all values since no files will be
+ // dropped.
+ PartialCompactTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES,
+ num_pre_sync + num_post_sync,
+ 0);
+
+ NoWriteTestPreFault();
+ NoWriteTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES);
+ }
+ }
+};
+
+TEST(FaultInjectionTest, FaultTestNoLogReuse) {
+ ReuseLogs(false);
+ DoTest();
+}
+
+TEST(FaultInjectionTest, FaultTestWithLogReuse) {
+ ReuseLogs(true);
+ DoTest();
+}
+
+} // namespace leveldb
+
+int main(int argc, char** argv) {
+ return leveldb::test::RunAllTests();
+}
diff --git a/db/leveldb_main.cc b/db/leveldbutil.cc
index 9f4b7dd70c..9f4b7dd70c 100644
--- a/db/leveldb_main.cc
+++ b/db/leveldbutil.cc
diff --git a/db/log_reader.cc b/db/log_reader.cc
index e44b66c85b..a6d304545d 100644
--- a/db/log_reader.cc
+++ b/db/log_reader.cc
@@ -25,7 +25,8 @@ Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
eof_(false),
last_record_offset_(0),
end_of_buffer_offset_(0),
- initial_offset_(initial_offset) {
+ initial_offset_(initial_offset),
+ resyncing_(initial_offset > 0) {
}
Reader::~Reader() {
@@ -72,8 +73,25 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
Slice fragment;
while (true) {
- uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
const unsigned int record_type = ReadPhysicalRecord(&fragment);
+
+ // ReadPhysicalRecord may have only had an empty trailer remaining in its
+ // internal buffer. Calculate the offset of the next physical record now
+ // that it has returned, properly accounting for its header size.
+ uint64_t physical_record_offset =
+ end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
+
+ if (resyncing_) {
+ if (record_type == kMiddleType) {
+ continue;
+ } else if (record_type == kLastType) {
+ resyncing_ = false;
+ continue;
+ } else {
+ resyncing_ = false;
+ }
+ }
+
switch (record_type) {
case kFullType:
if (in_fragmented_record) {
diff --git a/db/log_reader.h b/db/log_reader.h
index 6aff791716..8389d61f8f 100644
--- a/db/log_reader.h
+++ b/db/log_reader.h
@@ -73,6 +73,11 @@ class Reader {
// Offset at which to start looking for the first record to return
uint64_t const initial_offset_;
+ // True if we are resynchronizing after a seek (initial_offset_ > 0). In
+ // particular, a run of kMiddleType and kLastType records can be silently
+ // skipped in this mode
+ bool resyncing_;
+
// Extend record types with the following special values
enum {
kEof = kMaxRecordType + 1,
diff --git a/db/log_test.cc b/db/log_test.cc
index dcf0562652..48a5928657 100644
--- a/db/log_test.cc
+++ b/db/log_test.cc
@@ -79,7 +79,7 @@ class LogTest {
virtual Status Skip(uint64_t n) {
if (n > contents_.size()) {
contents_.clear();
- return Status::NotFound("in-memory file skipepd past end");
+ return Status::NotFound("in-memory file skipped past end");
}
contents_.remove_prefix(n);
@@ -104,23 +104,34 @@ class LogTest {
StringSource source_;
ReportCollector report_;
bool reading_;
- Writer writer_;
- Reader reader_;
+ Writer* writer_;
+ Reader* reader_;
// Record metadata for testing initial offset functionality
static size_t initial_offset_record_sizes_[];
static uint64_t initial_offset_last_record_offsets_[];
+ static int num_initial_offset_records_;
public:
LogTest() : reading_(false),
- writer_(&dest_),
- reader_(&source_, &report_, true/*checksum*/,
- 0/*initial_offset*/) {
+ writer_(new Writer(&dest_)),
+ reader_(new Reader(&source_, &report_, true/*checksum*/,
+ 0/*initial_offset*/)) {
+ }
+
+ ~LogTest() {
+ delete writer_;
+ delete reader_;
+ }
+
+ void ReopenForAppend() {
+ delete writer_;
+ writer_ = new Writer(&dest_, dest_.contents_.size());
}
void Write(const std::string& msg) {
ASSERT_TRUE(!reading_) << "Write() after starting to read";
- writer_.AddRecord(Slice(msg));
+ writer_->AddRecord(Slice(msg));
}
size_t WrittenBytes() const {
@@ -134,7 +145,7 @@ class LogTest {
}
std::string scratch;
Slice record;
- if (reader_.ReadRecord(&record, &scratch)) {
+ if (reader_->ReadRecord(&record, &scratch)) {
return record.ToString();
} else {
return "EOF";
@@ -182,13 +193,18 @@ class LogTest {
}
void WriteInitialOffsetLog() {
- for (int i = 0; i < 4; i++) {
+ for (int i = 0; i < num_initial_offset_records_; i++) {
std::string record(initial_offset_record_sizes_[i],
static_cast<char>('a' + i));
Write(record);
}
}
+ void StartReadingAt(uint64_t initial_offset) {
+ delete reader_;
+ reader_ = new Reader(&source_, &report_, true/*checksum*/, initial_offset);
+ }
+
void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
WriteInitialOffsetLog();
reading_ = true;
@@ -208,32 +224,48 @@ class LogTest {
source_.contents_ = Slice(dest_.contents_);
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
initial_offset);
- Slice record;
- std::string scratch;
- ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
- ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
- record.size());
- ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
- offset_reader->LastRecordOffset());
- ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
+
+ // Read all records from expected_record_offset through the last one.
+ ASSERT_LT(expected_record_offset, num_initial_offset_records_);
+ for (; expected_record_offset < num_initial_offset_records_;
+ ++expected_record_offset) {
+ Slice record;
+ std::string scratch;
+ ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
+ ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
+ record.size());
+ ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
+ offset_reader->LastRecordOffset());
+ ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
+ }
delete offset_reader;
}
-
};
size_t LogTest::initial_offset_record_sizes_[] =
{10000, // Two sizable records in first block
10000,
2 * log::kBlockSize - 1000, // Span three blocks
- 1};
+ 1,
+ 13716, // Consume all but two bytes of block 3.
+ log::kBlockSize - kHeaderSize, // Consume the entirety of block 4.
+ };
uint64_t LogTest::initial_offset_last_record_offsets_[] =
{0,
kHeaderSize + 10000,
2 * (kHeaderSize + 10000),
2 * (kHeaderSize + 10000) +
- (2 * log::kBlockSize - 1000) + 3 * kHeaderSize};
+ (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
+ 2 * (kHeaderSize + 10000) +
+ (2 * log::kBlockSize - 1000) + 3 * kHeaderSize
+ + kHeaderSize + 1,
+ 3 * log::kBlockSize,
+ };
+// LogTest::initial_offset_last_record_offsets_ must be defined before this.
+int LogTest::num_initial_offset_records_ =
+ sizeof(LogTest::initial_offset_last_record_offsets_)/sizeof(uint64_t);
TEST(LogTest, Empty) {
ASSERT_EQ("EOF", Read());
@@ -318,6 +350,15 @@ TEST(LogTest, AlignedEof) {
ASSERT_EQ("EOF", Read());
}
+TEST(LogTest, OpenForAppend) {
+ Write("hello");
+ ReopenForAppend();
+ Write("world");
+ ASSERT_EQ("hello", Read());
+ ASSERT_EQ("world", Read());
+ ASSERT_EQ("EOF", Read());
+}
+
TEST(LogTest, RandomRead) {
const int N = 500;
Random write_rnd(301);
@@ -445,6 +486,22 @@ TEST(LogTest, PartialLastIsIgnored) {
ASSERT_EQ(0, DroppedBytes());
}
+TEST(LogTest, SkipIntoMultiRecord) {
+ // Consider a fragmented record:
+ // first(R1), middle(R1), last(R1), first(R2)
+ // If initial_offset points to a record after first(R1) but before first(R2)
+ // incomplete fragment errors are not actual errors, and must be suppressed
+ // until a new first or full record is encountered.
+ Write(BigString("foo", 3*kBlockSize));
+ Write("correct");
+ StartReadingAt(kBlockSize);
+
+ ASSERT_EQ("correct", Read());
+ ASSERT_EQ("", ReportMessage());
+ ASSERT_EQ(0, DroppedBytes());
+ ASSERT_EQ("EOF", Read());
+}
+
TEST(LogTest, ErrorJoinsRecords) {
// Consider two fragmented records:
// first(R1) last(R1) first(R2) last(R2)
@@ -514,6 +571,10 @@ TEST(LogTest, ReadFourthStart) {
3);
}
+TEST(LogTest, ReadInitialOffsetIntoBlockPadding) {
+ CheckInitialOffsetRecord(3 * log::kBlockSize - 3, 5);
+}
+
TEST(LogTest, ReadEnd) {
CheckOffsetPastEndReturnsNoRecords(0);
}
diff --git a/db/log_writer.cc b/db/log_writer.cc
index 2da99ac088..74a03270da 100644
--- a/db/log_writer.cc
+++ b/db/log_writer.cc
@@ -12,15 +12,24 @@
namespace leveldb {
namespace log {
-Writer::Writer(WritableFile* dest)
- : dest_(dest),
- block_offset_(0) {
+static void InitTypeCrc(uint32_t* type_crc) {
for (int i = 0; i <= kMaxRecordType; i++) {
char t = static_cast<char>(i);
- type_crc_[i] = crc32c::Value(&t, 1);
+ type_crc[i] = crc32c::Value(&t, 1);
}
}
+Writer::Writer(WritableFile* dest)
+ : dest_(dest),
+ block_offset_(0) {
+ InitTypeCrc(type_crc_);
+}
+
+Writer::Writer(WritableFile* dest, uint64_t dest_length)
+ : dest_(dest), block_offset_(dest_length % kBlockSize) {
+ InitTypeCrc(type_crc_);
+}
+
Writer::~Writer() {
}
diff --git a/db/log_writer.h b/db/log_writer.h
index a3a954d967..9e7cc4705b 100644
--- a/db/log_writer.h
+++ b/db/log_writer.h
@@ -22,6 +22,12 @@ class Writer {
// "*dest" must be initially empty.
// "*dest" must remain live while this Writer is in use.
explicit Writer(WritableFile* dest);
+
+ // Create a writer that will append data to "*dest".
+ // "*dest" must have initial length "dest_length".
+ // "*dest" must remain live while this Writer is in use.
+ Writer(WritableFile* dest, uint64_t dest_length);
+
~Writer();
Status AddRecord(const Slice& slice);
diff --git a/db/memtable.h b/db/memtable.h
index 92e90bb099..9f41567cde 100644
--- a/db/memtable.h
+++ b/db/memtable.h
@@ -36,10 +36,7 @@ class MemTable {
}
// Returns an estimate of the number of bytes of data in use by this
- // data structure.
- //
- // REQUIRES: external synchronization to prevent simultaneous
- // operations on the same MemTable.
+ // data structure. It is safe to call when MemTable is being modified.
size_t ApproximateMemoryUsage();
// Return an iterator that yields the contents of the memtable.
diff --git a/db/recovery_test.cc b/db/recovery_test.cc
new file mode 100644
index 0000000000..9596f4288a
--- /dev/null
+++ b/db/recovery_test.cc
@@ -0,0 +1,324 @@
+// Copyright (c) 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_impl.h"
+#include "db/filename.h"
+#include "db/version_set.h"
+#include "db/write_batch_internal.h"
+#include "leveldb/db.h"
+#include "leveldb/env.h"
+#include "leveldb/write_batch.h"
+#include "util/logging.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace leveldb {
+
+class RecoveryTest {
+ public:
+ RecoveryTest() : env_(Env::Default()), db_(NULL) {
+ dbname_ = test::TmpDir() + "/recovery_test";
+ DestroyDB(dbname_, Options());
+ Open();
+ }
+
+ ~RecoveryTest() {
+ Close();
+ DestroyDB(dbname_, Options());
+ }
+
+ DBImpl* dbfull() const { return reinterpret_cast<DBImpl*>(db_); }
+ Env* env() const { return env_; }
+
+ bool CanAppend() {
+ WritableFile* tmp;
+ Status s = env_->NewAppendableFile(CurrentFileName(dbname_), &tmp);
+ delete tmp;
+ if (s.IsNotSupportedError()) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ void Close() {
+ delete db_;
+ db_ = NULL;
+ }
+
+ void Open(Options* options = NULL) {
+ Close();
+ Options opts;
+ if (options != NULL) {
+ opts = *options;
+ } else {
+ opts.reuse_logs = true; // TODO(sanjay): test both ways
+ opts.create_if_missing = true;
+ }
+ if (opts.env == NULL) {
+ opts.env = env_;
+ }
+ ASSERT_OK(DB::Open(opts, dbname_, &db_));
+ ASSERT_EQ(1, NumLogs());
+ }
+
+ Status Put(const std::string& k, const std::string& v) {
+ return db_->Put(WriteOptions(), k, v);
+ }
+
+ std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
+ std::string result;
+ Status s = db_->Get(ReadOptions(), k, &result);
+ if (s.IsNotFound()) {
+ result = "NOT_FOUND";
+ } else if (!s.ok()) {
+ result = s.ToString();
+ }
+ return result;
+ }
+
+ std::string ManifestFileName() {
+ std::string current;
+ ASSERT_OK(ReadFileToString(env_, CurrentFileName(dbname_), &current));
+ size_t len = current.size();
+ if (len > 0 && current[len-1] == '\n') {
+ current.resize(len - 1);
+ }
+ return dbname_ + "/" + current;
+ }
+
+ std::string LogName(uint64_t number) {
+ return LogFileName(dbname_, number);
+ }
+
+ size_t DeleteLogFiles() {
+ std::vector<uint64_t> logs = GetFiles(kLogFile);
+ for (size_t i = 0; i < logs.size(); i++) {
+ ASSERT_OK(env_->DeleteFile(LogName(logs[i]))) << LogName(logs[i]);
+ }
+ return logs.size();
+ }
+
+ uint64_t FirstLogFile() {
+ return GetFiles(kLogFile)[0];
+ }
+
+ std::vector<uint64_t> GetFiles(FileType t) {
+ std::vector<std::string> filenames;
+ ASSERT_OK(env_->GetChildren(dbname_, &filenames));
+ std::vector<uint64_t> result;
+ for (size_t i = 0; i < filenames.size(); i++) {
+ uint64_t number;
+ FileType type;
+ if (ParseFileName(filenames[i], &number, &type) && type == t) {
+ result.push_back(number);
+ }
+ }
+ return result;
+ }
+
+ int NumLogs() {
+ return GetFiles(kLogFile).size();
+ }
+
+ int NumTables() {
+ return GetFiles(kTableFile).size();
+ }
+
+ uint64_t FileSize(const std::string& fname) {
+ uint64_t result;
+ ASSERT_OK(env_->GetFileSize(fname, &result)) << fname;
+ return result;
+ }
+
+ void CompactMemTable() {
+ dbfull()->TEST_CompactMemTable();
+ }
+
+ // Directly construct a log file that sets key to val.
+ void MakeLogFile(uint64_t lognum, SequenceNumber seq, Slice key, Slice val) {
+ std::string fname = LogFileName(dbname_, lognum);
+ WritableFile* file;
+ ASSERT_OK(env_->NewWritableFile(fname, &file));
+ log::Writer writer(file);
+ WriteBatch batch;
+ batch.Put(key, val);
+ WriteBatchInternal::SetSequence(&batch, seq);
+ ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch)));
+ ASSERT_OK(file->Flush());
+ delete file;
+ }
+
+ private:
+ std::string dbname_;
+ Env* env_;
+ DB* db_;
+};
+
+TEST(RecoveryTest, ManifestReused) {
+ if (!CanAppend()) {
+ fprintf(stderr, "skipping test because env does not support appending\n");
+ return;
+ }
+ ASSERT_OK(Put("foo", "bar"));
+ Close();
+ std::string old_manifest = ManifestFileName();
+ Open();
+ ASSERT_EQ(old_manifest, ManifestFileName());
+ ASSERT_EQ("bar", Get("foo"));
+ Open();
+ ASSERT_EQ(old_manifest, ManifestFileName());
+ ASSERT_EQ("bar", Get("foo"));
+}
+
+TEST(RecoveryTest, LargeManifestCompacted) {
+ if (!CanAppend()) {
+ fprintf(stderr, "skipping test because env does not support appending\n");
+ return;
+ }
+ ASSERT_OK(Put("foo", "bar"));
+ Close();
+ std::string old_manifest = ManifestFileName();
+
+ // Pad with zeroes to make manifest file very big.
+ {
+ uint64_t len = FileSize(old_manifest);
+ WritableFile* file;
+ ASSERT_OK(env()->NewAppendableFile(old_manifest, &file));
+ std::string zeroes(3*1048576 - static_cast<size_t>(len), 0);
+ ASSERT_OK(file->Append(zeroes));
+ ASSERT_OK(file->Flush());
+ delete file;
+ }
+
+ Open();
+ std::string new_manifest = ManifestFileName();
+ ASSERT_NE(old_manifest, new_manifest);
+ ASSERT_GT(10000, FileSize(new_manifest));
+ ASSERT_EQ("bar", Get("foo"));
+
+ Open();
+ ASSERT_EQ(new_manifest, ManifestFileName());
+ ASSERT_EQ("bar", Get("foo"));
+}
+
+TEST(RecoveryTest, NoLogFiles) {
+ ASSERT_OK(Put("foo", "bar"));
+ ASSERT_EQ(1, DeleteLogFiles());
+ Open();
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ Open();
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+}
+
+TEST(RecoveryTest, LogFileReuse) {
+ if (!CanAppend()) {
+ fprintf(stderr, "skipping test because env does not support appending\n");
+ return;
+ }
+ for (int i = 0; i < 2; i++) {
+ ASSERT_OK(Put("foo", "bar"));
+ if (i == 0) {
+ // Compact to ensure current log is empty
+ CompactMemTable();
+ }
+ Close();
+ ASSERT_EQ(1, NumLogs());
+ uint64_t number = FirstLogFile();
+ if (i == 0) {
+ ASSERT_EQ(0, FileSize(LogName(number)));
+ } else {
+ ASSERT_LT(0, FileSize(LogName(number)));
+ }
+ Open();
+ ASSERT_EQ(1, NumLogs());
+ ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
+ ASSERT_EQ("bar", Get("foo"));
+ Open();
+ ASSERT_EQ(1, NumLogs());
+ ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
+ ASSERT_EQ("bar", Get("foo"));
+ }
+}
+
+TEST(RecoveryTest, MultipleMemTables) {
+ // Make a large log.
+ const int kNum = 1000;
+ for (int i = 0; i < kNum; i++) {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "%050d", i);
+ ASSERT_OK(Put(buf, buf));
+ }
+ ASSERT_EQ(0, NumTables());
+ Close();
+ ASSERT_EQ(0, NumTables());
+ ASSERT_EQ(1, NumLogs());
+ uint64_t old_log_file = FirstLogFile();
+
+ // Force creation of multiple memtables by reducing the write buffer size.
+ Options opt;
+ opt.reuse_logs = true;
+ opt.write_buffer_size = (kNum*100) / 2;
+ Open(&opt);
+ ASSERT_LE(2, NumTables());
+ ASSERT_EQ(1, NumLogs());
+ ASSERT_NE(old_log_file, FirstLogFile()) << "must not reuse log";
+ for (int i = 0; i < kNum; i++) {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "%050d", i);
+ ASSERT_EQ(buf, Get(buf));
+ }
+}
+
+TEST(RecoveryTest, MultipleLogFiles) {
+ ASSERT_OK(Put("foo", "bar"));
+ Close();
+ ASSERT_EQ(1, NumLogs());
+
+ // Make a bunch of uncompacted log files.
+ uint64_t old_log = FirstLogFile();
+ MakeLogFile(old_log+1, 1000, "hello", "world");
+ MakeLogFile(old_log+2, 1001, "hi", "there");
+ MakeLogFile(old_log+3, 1002, "foo", "bar2");
+
+ // Recover and check that all log files were processed.
+ Open();
+ ASSERT_LE(1, NumTables());
+ ASSERT_EQ(1, NumLogs());
+ uint64_t new_log = FirstLogFile();
+ ASSERT_LE(old_log+3, new_log);
+ ASSERT_EQ("bar2", Get("foo"));
+ ASSERT_EQ("world", Get("hello"));
+ ASSERT_EQ("there", Get("hi"));
+
+ // Test that previous recovery produced recoverable state.
+ Open();
+ ASSERT_LE(1, NumTables());
+ ASSERT_EQ(1, NumLogs());
+ if (CanAppend()) {
+ ASSERT_EQ(new_log, FirstLogFile());
+ }
+ ASSERT_EQ("bar2", Get("foo"));
+ ASSERT_EQ("world", Get("hello"));
+ ASSERT_EQ("there", Get("hi"));
+
+ // Check that introducing an older log file does not cause it to be re-read.
+ Close();
+ MakeLogFile(old_log+1, 2000, "hello", "stale write");
+ Open();
+ ASSERT_LE(1, NumTables());
+ ASSERT_EQ(1, NumLogs());
+ if (CanAppend()) {
+ ASSERT_EQ(new_log, FirstLogFile());
+ }
+ ASSERT_EQ("bar2", Get("foo"));
+ ASSERT_EQ("world", Get("hello"));
+ ASSERT_EQ("there", Get("hi"));
+}
+
+} // namespace leveldb
+
+int main(int argc, char** argv) {
+ return leveldb::test::RunAllTests();
+}
diff --git a/db/skiplist.h b/db/skiplist.h
index ed8b092203..8bd77764d8 100644
--- a/db/skiplist.h
+++ b/db/skiplist.h
@@ -1,10 +1,10 @@
-#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_
-#define STORAGE_LEVELDB_DB_SKIPLIST_H_
-
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
+
+#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_
+#define STORAGE_LEVELDB_DB_SKIPLIST_H_
+
// Thread safety
// -------------
//
diff --git a/db/skiplist_test.cc b/db/skiplist_test.cc
index c78f4b4fb1..aee1461e1b 100644
--- a/db/skiplist_test.cc
+++ b/db/skiplist_test.cc
@@ -250,7 +250,7 @@ class ConcurrentTest {
// Note that generation 0 is never inserted, so it is ok if
// <*,0,*> is missing.
ASSERT_TRUE((gen(pos) == 0) ||
- (gen(pos) > initial_state.Get(key(pos)))
+ (gen(pos) > static_cast<Key>(initial_state.Get(key(pos))))
) << "key: " << key(pos)
<< "; gen: " << gen(pos)
<< "; initgen: "
diff --git a/db/snapshot.h b/db/snapshot.h
index e7f8fd2c37..6ed413c42d 100644
--- a/db/snapshot.h
+++ b/db/snapshot.h
@@ -5,6 +5,7 @@
#ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_
#define STORAGE_LEVELDB_DB_SNAPSHOT_H_
+#include "db/dbformat.h"
#include "leveldb/db.h"
namespace leveldb {
diff --git a/db/version_set.cc b/db/version_set.cc
index aa83df55e4..a5e0f77a6a 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -893,7 +893,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
return s;
}
-Status VersionSet::Recover() {
+Status VersionSet::Recover(bool *save_manifest) {
struct LogReporter : public log::Reader::Reporter {
Status* status;
virtual void Corruption(size_t bytes, const Status& s) {
@@ -1003,11 +1003,49 @@ Status VersionSet::Recover() {
last_sequence_ = last_sequence;
log_number_ = log_number;
prev_log_number_ = prev_log_number;
+
+ // See if we can reuse the existing MANIFEST file.
+ if (ReuseManifest(dscname, current)) {
+ // No need to save new manifest
+ } else {
+ *save_manifest = true;
+ }
}
return s;
}
+bool VersionSet::ReuseManifest(const std::string& dscname,
+ const std::string& dscbase) {
+ if (!options_->reuse_logs) {
+ return false;
+ }
+ FileType manifest_type;
+ uint64_t manifest_number;
+ uint64_t manifest_size;
+ if (!ParseFileName(dscbase, &manifest_number, &manifest_type) ||
+ manifest_type != kDescriptorFile ||
+ !env_->GetFileSize(dscname, &manifest_size).ok() ||
+ // Make new compacted MANIFEST if old one is too big
+ manifest_size >= kTargetFileSize) {
+ return false;
+ }
+
+ assert(descriptor_file_ == NULL);
+ assert(descriptor_log_ == NULL);
+ Status r = env_->NewAppendableFile(dscname, &descriptor_file_);
+ if (!r.ok()) {
+ Log(options_->info_log, "Reuse MANIFEST: %s\n", r.ToString().c_str());
+ assert(descriptor_file_ == NULL);
+ return false;
+ }
+
+ Log(options_->info_log, "Reusing MANIFEST %s\n", dscname.c_str());
+ descriptor_log_ = new log::Writer(descriptor_file_, manifest_size);
+ manifest_file_number_ = manifest_number;
+ return true;
+}
+
void VersionSet::MarkFileNumberUsed(uint64_t number) {
if (next_file_number_ <= number) {
next_file_number_ = number + 1;
diff --git a/db/version_set.h b/db/version_set.h
index 8dc14b8e01..1dec745673 100644
--- a/db/version_set.h
+++ b/db/version_set.h
@@ -179,7 +179,7 @@ class VersionSet {
EXCLUSIVE_LOCKS_REQUIRED(mu);
// Recover the last saved descriptor from persistent storage.
- Status Recover();
+ Status Recover(bool *save_manifest);
// Return the current version.
Version* current() const { return current_; }
@@ -274,6 +274,8 @@ class VersionSet {
friend class Compaction;
friend class Version;
+ bool ReuseManifest(const std::string& dscname, const std::string& dscbase);
+
void Finalize(Version* v);
void GetRange(const std::vector<FileMetaData*>& inputs,
diff --git a/db/write_batch_internal.h b/db/write_batch_internal.h
index 310a3c8912..9448ef7b21 100644
--- a/db/write_batch_internal.h
+++ b/db/write_batch_internal.h
@@ -5,6 +5,7 @@
#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
+#include "db/dbformat.h"
#include "leveldb/write_batch.h"
namespace leveldb {
diff --git a/doc/index.html b/doc/index.html
index 3ed0ed9d9e..2155192581 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -22,7 +22,7 @@ directory. The following example shows how to open a database,
creating it if necessary:
<p>
<pre>
- #include &lt;assert&gt;
+ #include &lt;cassert&gt;
#include "leveldb/db.h"
leveldb::DB* db;
diff --git a/helpers/memenv/memenv.cc b/helpers/memenv/memenv.cc
index 43ef2e0729..9a98884daf 100644
--- a/helpers/memenv/memenv.cc
+++ b/helpers/memenv/memenv.cc
@@ -277,6 +277,19 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK();
}
+ virtual Status NewAppendableFile(const std::string& fname,
+ WritableFile** result) {
+ MutexLock lock(&mutex_);
+ FileState** sptr = &file_map_[fname];
+ FileState* file = *sptr;
+ if (file == NULL) {
+ file = new FileState();
+ file->Ref();
+ }
+ *result = new WritableFileImpl(file);
+ return Status::OK();
+ }
+
virtual bool FileExists(const std::string& fname) {
MutexLock lock(&mutex_);
return file_map_.find(fname) != file_map_.end();
diff --git a/helpers/memenv/memenv_test.cc b/helpers/memenv/memenv_test.cc
index a44310fed8..5cff77613f 100644
--- a/helpers/memenv/memenv_test.cc
+++ b/helpers/memenv/memenv_test.cc
@@ -40,6 +40,8 @@ TEST(MemEnvTest, Basics) {
// Create a file.
ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
+ ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
+ ASSERT_EQ(0, file_size);
delete writable_file;
// Check that the file exists.
@@ -55,9 +57,16 @@ TEST(MemEnvTest, Basics) {
ASSERT_OK(writable_file->Append("abc"));
delete writable_file;
- // Check for expected size.
+ // Check that append works.
+ ASSERT_OK(env_->NewAppendableFile("/dir/f", &writable_file));
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(3, file_size);
+ ASSERT_OK(writable_file->Append("hello"));
+ delete writable_file;
+
+ // Check for expected size.
+ ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
+ ASSERT_EQ(8, file_size);
// Check that renaming works.
ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok());
@@ -65,7 +74,7 @@ TEST(MemEnvTest, Basics) {
ASSERT_TRUE(!env_->FileExists("/dir/f"));
ASSERT_TRUE(env_->FileExists("/dir/g"));
ASSERT_OK(env_->GetFileSize("/dir/g", &file_size));
- ASSERT_EQ(3, file_size);
+ ASSERT_EQ(8, file_size);
// Check that opening non-existent file fails.
SequentialFile* seq_file;
diff --git a/include/leveldb/cache.h b/include/leveldb/cache.h
index 1a201e5e0a..6819d5bc49 100644
--- a/include/leveldb/cache.h
+++ b/include/leveldb/cache.h
@@ -81,6 +81,17 @@ class Cache {
// its cache keys.
virtual uint64_t NewId() = 0;
+ // Remove all cache entries that are not actively in use. Memory-constrained
+ // applications may wish to call this method to reduce memory usage.
+ // Default implementation of Prune() does nothing. Subclasses are strongly
+ // encouraged to override the default implementation. A future release of
+ // leveldb may change Prune() to a pure abstract method.
+ virtual void Prune() {}
+
+ // Return an estimate of the combined charges of all elements stored in the
+ // cache.
+ virtual size_t TotalCharge() const = 0;
+
private:
void LRU_Remove(Handle* e);
void LRU_Append(Handle* e);
diff --git a/include/leveldb/db.h b/include/leveldb/db.h
index 4c169bf22e..9752cbad51 100644
--- a/include/leveldb/db.h
+++ b/include/leveldb/db.h
@@ -14,7 +14,7 @@ namespace leveldb {
// Update Makefile if you change these
static const int kMajorVersion = 1;
-static const int kMinorVersion = 18;
+static const int kMinorVersion = 19;
struct Options;
struct ReadOptions;
@@ -115,6 +115,8 @@ class DB {
// about the internal operation of the DB.
// "leveldb.sstables" - returns a multi-line string that describes all
// of the sstables that make up the db contents.
+ // "leveldb.approximate-memory-usage" - returns the approximate number of
+ // bytes of memory in use by the DB.
virtual bool GetProperty(const Slice& property, std::string* value) = 0;
// For each i in [0,n-1], store in "sizes[i]", the approximate
diff --git a/include/leveldb/env.h b/include/leveldb/env.h
index f709514da6..99b6c21414 100644
--- a/include/leveldb/env.h
+++ b/include/leveldb/env.h
@@ -69,6 +69,21 @@ class Env {
virtual Status NewWritableFile(const std::string& fname,
WritableFile** result) = 0;
+ // Create an object that either appends to an existing file, or
+ // writes to a new file (if the file does not exist to begin with).
+ // On success, stores a pointer to the new file in *result and
+ // returns OK. On failure stores NULL in *result and returns
+ // non-OK.
+ //
+ // The returned file will only be accessed by one thread at a time.
+ //
+ // May return an IsNotSupportedError error if this Env does
+ // not allow appending to an existing file. Users of Env (including
+ // the leveldb implementation) must be prepared to deal with
+ // an Env that does not support appending.
+ virtual Status NewAppendableFile(const std::string& fname,
+ WritableFile** result);
+
// Returns true iff the named file exists.
virtual bool FileExists(const std::string& fname) = 0;
@@ -289,6 +304,9 @@ class EnvWrapper : public Env {
Status NewWritableFile(const std::string& f, WritableFile** r) {
return target_->NewWritableFile(f, r);
}
+ Status NewAppendableFile(const std::string& f, WritableFile** r) {
+ return target_->NewAppendableFile(f, r);
+ }
bool FileExists(const std::string& f) { return target_->FileExists(f); }
Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
return target_->GetChildren(dir, r);
diff --git a/include/leveldb/iterator.h b/include/leveldb/iterator.h
index 76aced04bd..da631ed9d8 100644
--- a/include/leveldb/iterator.h
+++ b/include/leveldb/iterator.h
@@ -37,7 +37,7 @@ class Iterator {
// Valid() after this call iff the source is not empty.
virtual void SeekToLast() = 0;
- // Position at the first key in the source that at or past target
+ // Position at the first key in the source that is at or past target.
// The iterator is Valid() after this call iff the source contains
// an entry that comes at or past target.
virtual void Seek(const Slice& target) = 0;
diff --git a/include/leveldb/options.h b/include/leveldb/options.h
index 7c9b973454..83a1ef39a4 100644
--- a/include/leveldb/options.h
+++ b/include/leveldb/options.h
@@ -128,6 +128,12 @@ struct Options {
// efficiently detect that and will switch to uncompressed mode.
CompressionType compression;
+ // EXPERIMENTAL: If true, append to existing MANIFEST and log files
+ // when a database is opened. This can significantly speed up open.
+ //
+ // Default: currently false, but may become true later.
+ bool reuse_logs;
+
// If non-NULL, use the specified filter policy to reduce disk reads.
// Many applications will benefit from passing the result of
// NewBloomFilterPolicy() here.
diff --git a/include/leveldb/status.h b/include/leveldb/status.h
index 11dbd4b47e..d9575f9753 100644
--- a/include/leveldb/status.h
+++ b/include/leveldb/status.h
@@ -60,6 +60,12 @@ class Status {
// Returns true iff the status indicates an IOError.
bool IsIOError() const { return code() == kIOError; }
+ // Returns true iff the status indicates a NotSupportedError.
+ bool IsNotSupportedError() const { return code() == kNotSupported; }
+
+ // Returns true iff the status indicates an InvalidArgument.
+ bool IsInvalidArgument() const { return code() == kInvalidArgument; }
+
// Return a string representation of this status suitable for printing.
// Returns the string "OK" for success.
std::string ToString() const;
diff --git a/port/atomic_pointer.h b/port/atomic_pointer.h
index 9bf091f757..1c4c7aafc6 100644
--- a/port/atomic_pointer.h
+++ b/port/atomic_pointer.h
@@ -35,8 +35,12 @@
#define ARCH_CPU_X86_FAMILY 1
#elif defined(__ARMEL__)
#define ARCH_CPU_ARM_FAMILY 1
+#elif defined(__aarch64__)
+#define ARCH_CPU_ARM64_FAMILY 1
#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__)
#define ARCH_CPU_PPC_FAMILY 1
+#elif defined(__mips__)
+#define ARCH_CPU_MIPS_FAMILY 1
#endif
namespace leveldb {
@@ -92,6 +96,13 @@ inline void MemoryBarrier() {
}
#define LEVELDB_HAVE_MEMORY_BARRIER
+// ARM64
+#elif defined(ARCH_CPU_ARM64_FAMILY)
+inline void MemoryBarrier() {
+ asm volatile("dmb sy" : : : "memory");
+}
+#define LEVELDB_HAVE_MEMORY_BARRIER
+
// PPC
#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__)
inline void MemoryBarrier() {
@@ -101,6 +112,13 @@ inline void MemoryBarrier() {
}
#define LEVELDB_HAVE_MEMORY_BARRIER
+// MIPS
+#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(__GNUC__)
+inline void MemoryBarrier() {
+ __asm__ __volatile__("sync" : : : "memory");
+}
+#define LEVELDB_HAVE_MEMORY_BARRIER
+
#endif
// AtomicPointer built using platform-specific MemoryBarrier()
@@ -215,6 +233,7 @@ class AtomicPointer {
#undef LEVELDB_HAVE_MEMORY_BARRIER
#undef ARCH_CPU_X86_FAMILY
#undef ARCH_CPU_ARM_FAMILY
+#undef ARCH_CPU_ARM64_FAMILY
#undef ARCH_CPU_PPC_FAMILY
} // namespace port
diff --git a/port/port_posix.cc b/port/port_posix.cc
index 5ba127a5b9..30e8007ae3 100644
--- a/port/port_posix.cc
+++ b/port/port_posix.cc
@@ -7,7 +7,6 @@
#include <cstdlib>
#include <stdio.h>
#include <string.h>
-#include "util/logging.h"
namespace leveldb {
namespace port {
diff --git a/table/filter_block.cc b/table/filter_block.cc
index 203e15c8bc..4e78b954f8 100644
--- a/table/filter_block.cc
+++ b/table/filter_block.cc
@@ -68,7 +68,7 @@ void FilterBlockBuilder::GenerateFilter() {
// Generate filter for current set of keys and append to result_.
filter_offsets_.push_back(result_.size());
- policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_);
+ policy_->CreateFilter(&tmp_keys_[0], static_cast<int>(num_keys), &result_);
tmp_keys_.clear();
keys_.clear();
@@ -97,7 +97,7 @@ bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
if (index < num_) {
uint32_t start = DecodeFixed32(offset_ + index*4);
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
- if (start <= limit && limit <= (offset_ - data_)) {
+ if (start <= limit && limit <= static_cast<size_t>(offset_ - data_)) {
Slice filter = Slice(data_ + start, limit - start);
return policy_->KeyMayMatch(key, filter);
} else if (start == limit) {
diff --git a/table/format.cc b/table/format.cc
index aa63144c9e..24e4e02445 100644
--- a/table/format.cc
+++ b/table/format.cc
@@ -30,15 +30,14 @@ Status BlockHandle::DecodeFrom(Slice* input) {
}
void Footer::EncodeTo(std::string* dst) const {
-#ifndef NDEBUG
const size_t original_size = dst->size();
-#endif
metaindex_handle_.EncodeTo(dst);
index_handle_.EncodeTo(dst);
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu));
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
assert(dst->size() == original_size + kEncodedLength);
+ (void)original_size; // Disable unused variable warning.
}
Status Footer::DecodeFrom(Slice* input) {
diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h
index 9e16b3dbed..f410c3fabe 100644
--- a/table/iterator_wrapper.h
+++ b/table/iterator_wrapper.h
@@ -5,6 +5,9 @@
#ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
#define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
+#include "leveldb/iterator.h"
+#include "leveldb/slice.h"
+
namespace leveldb {
// A internal wrapper class with an interface similar to Iterator that
diff --git a/table/table.cc b/table/table.cc
index dff8a82590..decf8082cc 100644
--- a/table/table.cc
+++ b/table/table.cc
@@ -82,7 +82,7 @@ Status Table::Open(const Options& options,
*table = new Table(rep);
(*table)->ReadMeta(footer);
} else {
- if (index_block) delete index_block;
+ delete index_block;
}
return s;
diff --git a/table/table_test.cc b/table/table_test.cc
index c723bf84cf..abf6e246ff 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -853,12 +853,20 @@ TEST(TableTest, ApproximateOffsetOfCompressed) {
options.compression = kSnappyCompression;
c.Finish(options, &keys, &kvmap);
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0));
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000));
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000));
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000));
+ // Expected upper and lower bounds of space used by compressible strings.
+ static const int kSlop = 1000; // Compressor effectiveness varies.
+ const int expected = 2500; // 10000 * compression ratio (0.25)
+ const int min_z = expected - kSlop;
+ const int max_z = expected + kSlop;
+
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, kSlop));
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, kSlop));
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, kSlop));
+ // Have now emitted a large compressible string, so adjust expected offset.
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), min_z, max_z));
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), min_z, max_z));
+ // Have now emitted two large compressible strings, so adjust expected offset.
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 2 * min_z, 2 * max_z));
}
} // namespace leveldb
diff --git a/util/arena.cc b/util/arena.cc
index 9367f71492..74078213ee 100644
--- a/util/arena.cc
+++ b/util/arena.cc
@@ -9,8 +9,7 @@ namespace leveldb {
static const int kBlockSize = 4096;
-Arena::Arena() {
- blocks_memory_ = 0;
+Arena::Arena() : memory_usage_(0) {
alloc_ptr_ = NULL; // First allocation will allocate a block
alloc_bytes_remaining_ = 0;
}
@@ -60,8 +59,9 @@ char* Arena::AllocateAligned(size_t bytes) {
char* Arena::AllocateNewBlock(size_t block_bytes) {
char* result = new char[block_bytes];
- blocks_memory_ += block_bytes;
blocks_.push_back(result);
+ memory_usage_.NoBarrier_Store(
+ reinterpret_cast<void*>(MemoryUsage() + block_bytes + sizeof(char*)));
return result;
}
diff --git a/util/arena.h b/util/arena.h
index 73bbf1cb9b..48bab33741 100644
--- a/util/arena.h
+++ b/util/arena.h
@@ -9,6 +9,7 @@
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
+#include "port/port.h"
namespace leveldb {
@@ -24,10 +25,9 @@ class Arena {
char* AllocateAligned(size_t bytes);
// Returns an estimate of the total memory usage of data allocated
- // by the arena (including space allocated but not yet used for user
- // allocations).
+ // by the arena.
size_t MemoryUsage() const {
- return blocks_memory_ + blocks_.capacity() * sizeof(char*);
+ return reinterpret_cast<uintptr_t>(memory_usage_.NoBarrier_Load());
}
private:
@@ -41,8 +41,8 @@ class Arena {
// Array of new[] allocated memory blocks
std::vector<char*> blocks_;
- // Bytes of memory in blocks allocated so far
- size_t blocks_memory_;
+ // Total memory usage of the arena.
+ port::AtomicPointer memory_usage_;
// No copying allowed
Arena(const Arena&);
diff --git a/util/bloom.cc b/util/bloom.cc
index a27a2ace28..bf3e4ca6e9 100644
--- a/util/bloom.cc
+++ b/util/bloom.cc
@@ -47,7 +47,7 @@ class BloomFilterPolicy : public FilterPolicy {
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
char* array = &(*dst)[init_size];
- for (size_t i = 0; i < n; i++) {
+ for (int i = 0; i < n; i++) {
// Use double-hashing to generate a sequence of hash values.
// See analysis in [Kirsch,Mitzenmacher 2006].
uint32_t h = BloomHash(keys[i]);
diff --git a/util/bloom_test.cc b/util/bloom_test.cc
index 77fb1b3159..1b87a2be3f 100644
--- a/util/bloom_test.cc
+++ b/util/bloom_test.cc
@@ -46,7 +46,8 @@ class BloomTest {
key_slices.push_back(Slice(keys_[i]));
}
filter_.clear();
- policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
+ policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
+ &filter_);
keys_.clear();
if (kVerbose >= 2) DumpFilter();
}
diff --git a/util/cache.cc b/util/cache.cc
index 8b197bc02a..ce46886171 100644
--- a/util/cache.cc
+++ b/util/cache.cc
@@ -19,6 +19,23 @@ Cache::~Cache() {
namespace {
// LRU cache implementation
+//
+// Cache entries have an "in_cache" boolean indicating whether the cache has a
+// reference on the entry. The only ways that this can become false without the
+// entry being passed to its "deleter" are via Erase(), via Insert() when
+// an element with a duplicate key is inserted, or on destruction of the cache.
+//
+// The cache keeps two linked lists of items in the cache. All items in the
+// cache are in one list or the other, and never both. Items still referenced
+// by clients but erased from the cache are in neither list. The lists are:
+// - in-use: contains the items currently referenced by clients, in no
+// particular order. (This list is used for invariant checking. If we
+// removed the check, elements that would otherwise be on this list could be
+// left as disconnected singleton lists.)
+// - LRU: contains the items not currently referenced by clients, in LRU order
+// Elements are moved between these lists by the Ref() and Unref() methods,
+// when they detect an element in the cache acquiring or losing its only
+// external reference.
// An entry is a variable length heap-allocated structure. Entries
// are kept in a circular doubly linked list ordered by access time.
@@ -30,7 +47,8 @@ struct LRUHandle {
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
size_t key_length;
- uint32_t refs;
+ bool in_cache; // Whether entry is in the cache.
+ uint32_t refs; // References, including cache reference, if present.
uint32_t hash; // Hash of key(); used for fast sharding and comparisons
char key_data[1]; // Beginning of key
@@ -147,49 +165,77 @@ class LRUCache {
Cache::Handle* Lookup(const Slice& key, uint32_t hash);
void Release(Cache::Handle* handle);
void Erase(const Slice& key, uint32_t hash);
+ void Prune();
+ size_t TotalCharge() const {
+ MutexLock l(&mutex_);
+ return usage_;
+ }
private:
void LRU_Remove(LRUHandle* e);
- void LRU_Append(LRUHandle* e);
+ void LRU_Append(LRUHandle*list, LRUHandle* e);
+ void Ref(LRUHandle* e);
void Unref(LRUHandle* e);
+ bool FinishErase(LRUHandle* e);
// Initialized before use.
size_t capacity_;
// mutex_ protects the following state.
- port::Mutex mutex_;
+ mutable port::Mutex mutex_;
size_t usage_;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
+ // Entries have refs==1 and in_cache==true.
LRUHandle lru_;
+ // Dummy head of in-use list.
+ // Entries are in use by clients, and have refs >= 2 and in_cache==true.
+ LRUHandle in_use_;
+
HandleTable table_;
};
LRUCache::LRUCache()
: usage_(0) {
- // Make empty circular linked list
+ // Make empty circular linked lists.
lru_.next = &lru_;
lru_.prev = &lru_;
+ in_use_.next = &in_use_;
+ in_use_.prev = &in_use_;
}
LRUCache::~LRUCache() {
+ assert(in_use_.next == &in_use_); // Error if caller has an unreleased handle
for (LRUHandle* e = lru_.next; e != &lru_; ) {
LRUHandle* next = e->next;
- assert(e->refs == 1); // Error if caller has an unreleased handle
+ assert(e->in_cache);
+ e->in_cache = false;
+ assert(e->refs == 1); // Invariant of lru_ list.
Unref(e);
e = next;
}
}
+void LRUCache::Ref(LRUHandle* e) {
+ if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list.
+ LRU_Remove(e);
+ LRU_Append(&in_use_, e);
+ }
+ e->refs++;
+}
+
void LRUCache::Unref(LRUHandle* e) {
assert(e->refs > 0);
e->refs--;
- if (e->refs <= 0) {
- usage_ -= e->charge;
+ if (e->refs == 0) { // Deallocate.
+ assert(!e->in_cache);
(*e->deleter)(e->key(), e->value);
free(e);
+ } else if (e->in_cache && e->refs == 1) { // No longer in use; move to lru_ list.
+ LRU_Remove(e);
+ LRU_Append(&lru_, e);
}
}
@@ -198,10 +244,10 @@ void LRUCache::LRU_Remove(LRUHandle* e) {
e->prev->next = e->next;
}
-void LRUCache::LRU_Append(LRUHandle* e) {
- // Make "e" newest entry by inserting just before lru_
- e->next = &lru_;
- e->prev = lru_.prev;
+void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
+ // Make "e" newest entry by inserting just before *list
+ e->next = list;
+ e->prev = list->prev;
e->prev->next = e;
e->next->prev = e;
}
@@ -210,9 +256,7 @@ Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
MutexLock l(&mutex_);
LRUHandle* e = table_.Lookup(key, hash);
if (e != NULL) {
- e->refs++;
- LRU_Remove(e);
- LRU_Append(e);
+ Ref(e);
}
return reinterpret_cast<Cache::Handle*>(e);
}
@@ -234,34 +278,58 @@ Cache::Handle* LRUCache::Insert(
e->charge = charge;
e->key_length = key.size();
e->hash = hash;
- e->refs = 2; // One from LRUCache, one for the returned handle
+ e->in_cache = false;
+ e->refs = 1; // for the returned handle.
memcpy(e->key_data, key.data(), key.size());
- LRU_Append(e);
- usage_ += charge;
- LRUHandle* old = table_.Insert(e);
- if (old != NULL) {
- LRU_Remove(old);
- Unref(old);
- }
+ if (capacity_ > 0) {
+ e->refs++; // for the cache's reference.
+ e->in_cache = true;
+ LRU_Append(&in_use_, e);
+ usage_ += charge;
+ FinishErase(table_.Insert(e));
+ } // else don't cache. (Tests use capacity_==0 to turn off caching.)
while (usage_ > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
- LRU_Remove(old);
- table_.Remove(old->key(), old->hash);
- Unref(old);
+ assert(old->refs == 1);
+ bool erased = FinishErase(table_.Remove(old->key(), old->hash));
+ if (!erased) { // to avoid unused variable when compiled NDEBUG
+ assert(erased);
+ }
}
return reinterpret_cast<Cache::Handle*>(e);
}
-void LRUCache::Erase(const Slice& key, uint32_t hash) {
- MutexLock l(&mutex_);
- LRUHandle* e = table_.Remove(key, hash);
+// If e != NULL, finish removing *e from the cache; it has already been removed
+// from the hash table. Return whether e != NULL. Requires mutex_ held.
+bool LRUCache::FinishErase(LRUHandle* e) {
if (e != NULL) {
+ assert(e->in_cache);
LRU_Remove(e);
+ e->in_cache = false;
+ usage_ -= e->charge;
Unref(e);
}
+ return e != NULL;
+}
+
+void LRUCache::Erase(const Slice& key, uint32_t hash) {
+ MutexLock l(&mutex_);
+ FinishErase(table_.Remove(key, hash));
+}
+
+void LRUCache::Prune() {
+ MutexLock l(&mutex_);
+ while (lru_.next != &lru_) {
+ LRUHandle* e = lru_.next;
+ assert(e->refs == 1);
+ bool erased = FinishErase(table_.Remove(e->key(), e->hash));
+ if (!erased) { // to avoid unused variable when compiled NDEBUG
+ assert(erased);
+ }
+ }
}
static const int kNumShardBits = 4;
@@ -314,6 +382,18 @@ class ShardedLRUCache : public Cache {
MutexLock l(&id_mutex_);
return ++(last_id_);
}
+ virtual void Prune() {
+ for (int s = 0; s < kNumShards; s++) {
+ shard_[s].Prune();
+ }
+ }
+ virtual size_t TotalCharge() const {
+ size_t total = 0;
+ for (int s = 0; s < kNumShards; s++) {
+ total += shard_[s].TotalCharge();
+ }
+ return total;
+ }
};
} // end anonymous namespace
diff --git a/util/cache_test.cc b/util/cache_test.cc
index 43716715a8..468f7a6425 100644
--- a/util/cache_test.cc
+++ b/util/cache_test.cc
@@ -59,6 +59,11 @@ class CacheTest {
&CacheTest::Deleter));
}
+ Cache::Handle* InsertAndReturnHandle(int key, int value, int charge = 1) {
+ return cache_->Insert(EncodeKey(key), EncodeValue(value), charge,
+ &CacheTest::Deleter);
+ }
+
void Erase(int key) {
cache_->Erase(EncodeKey(key));
}
@@ -135,8 +140,11 @@ TEST(CacheTest, EntriesArePinned) {
TEST(CacheTest, EvictionPolicy) {
Insert(100, 101);
Insert(200, 201);
+ Insert(300, 301);
+ Cache::Handle* h = cache_->Lookup(EncodeKey(300));
- // Frequently used entry must be kept around
+ // Frequently used entry must be kept around,
+ // as must things that are still in use.
for (int i = 0; i < kCacheSize + 100; i++) {
Insert(1000+i, 2000+i);
ASSERT_EQ(2000+i, Lookup(1000+i));
@@ -144,6 +152,25 @@ TEST(CacheTest, EvictionPolicy) {
}
ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(-1, Lookup(200));
+ ASSERT_EQ(301, Lookup(300));
+ cache_->Release(h);
+}
+
+TEST(CacheTest, UseExceedsCacheSize) {
+ // Overfill the cache, keeping handles on all inserted entries.
+ std::vector<Cache::Handle*> h;
+ for (int i = 0; i < kCacheSize + 100; i++) {
+ h.push_back(InsertAndReturnHandle(1000+i, 2000+i));
+ }
+
+ // Check that all the entries can be found in the cache.
+ for (int i = 0; i < h.size(); i++) {
+ ASSERT_EQ(2000+i, Lookup(1000+i));
+ }
+
+ for (int i = 0; i < h.size(); i++) {
+ cache_->Release(h[i]);
+ }
}
TEST(CacheTest, HeavyEntries) {
@@ -179,6 +206,19 @@ TEST(CacheTest, NewId) {
ASSERT_NE(a, b);
}
+TEST(CacheTest, Prune) {
+ Insert(1, 100);
+ Insert(2, 200);
+
+ Cache::Handle* handle = cache_->Lookup(EncodeKey(1));
+ ASSERT_TRUE(handle);
+ cache_->Prune();
+ cache_->Release(handle);
+
+ ASSERT_EQ(100, Lookup(1));
+ ASSERT_EQ(-1, Lookup(2));
+}
+
} // namespace leveldb
int main(int argc, char** argv) {
diff --git a/util/env.cc b/util/env.cc
index c2600e964a..c58a0821ef 100644
--- a/util/env.cc
+++ b/util/env.cc
@@ -9,6 +9,10 @@ namespace leveldb {
Env::~Env() {
}
+Status Env::NewAppendableFile(const std::string& fname, WritableFile** result) {
+ return Status::NotSupported("NewAppendableFile", fname);
+}
+
SequentialFile::~SequentialFile() {
}
diff --git a/util/env_posix.cc b/util/env_posix.cc
index ba2667864a..e0fca52f46 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -351,6 +351,19 @@ class PosixEnv : public Env {
return s;
}
+ virtual Status NewAppendableFile(const std::string& fname,
+ WritableFile** result) {
+ Status s;
+ FILE* f = fopen(fname.c_str(), "a");
+ if (f == NULL) {
+ *result = NULL;
+ s = IOError(fname, errno);
+ } else {
+ *result = new PosixWritableFile(fname, f);
+ }
+ return s;
+ }
+
virtual bool FileExists(const std::string& fname) {
return access(fname.c_str(), F_OK) == 0;
}
diff --git a/util/env_win.cc b/util/env_win.cc
index e11a96b791..b074b7579e 100644
--- a/util/env_win.cc
+++ b/util/env_win.cc
@@ -106,7 +106,7 @@ private:
class Win32WritableFile : public WritableFile
{
public:
- Win32WritableFile(const std::string& fname);
+ Win32WritableFile(const std::string& fname, bool append);
~Win32WritableFile();
virtual Status Append(const Slice& data);
@@ -158,6 +158,8 @@ public:
RandomAccessFile** result);
virtual Status NewWritableFile(const std::string& fname,
WritableFile** result);
+ virtual Status NewAppendableFile(const std::string& fname,
+ WritableFile** result);
virtual bool FileExists(const std::string& fname);
@@ -423,17 +425,23 @@ void Win32RandomAccessFile::_CleanUp()
}
}
-Win32WritableFile::Win32WritableFile(const std::string& fname)
+Win32WritableFile::Win32WritableFile(const std::string& fname, bool append)
: filename_(fname)
{
std::wstring path;
ToWidePath(fname, path);
- DWORD Flag = PathFileExistsW(path.c_str()) ? OPEN_EXISTING : CREATE_ALWAYS;
+ // NewAppendableFile: append to an existing file, or create a new one
+ // if none exists - this is OPEN_ALWAYS behavior, with
+ // FILE_APPEND_DATA to avoid having to manually position the file
+ // pointer at the end of the file.
+ // NewWritableFile: create a new file, delete if it exists - this is
+ // CREATE_ALWAYS behavior. This file is used for writing only so
+ // use GENERIC_WRITE.
_hFile = CreateFileW(path.c_str(),
- GENERIC_READ | GENERIC_WRITE,
+ append ? FILE_APPEND_DATA : GENERIC_WRITE,
FILE_SHARE_READ|FILE_SHARE_DELETE|FILE_SHARE_WRITE,
NULL,
- Flag,
+ append ? OPEN_ALWAYS : CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL,
NULL);
// CreateFileW returns INVALID_HANDLE_VALUE in case of error, always check isEnable() before use
@@ -823,7 +831,9 @@ Status Win32Env::NewLogger( const std::string& fname, Logger** result )
{
Status sRet;
std::string path = fname;
- Win32WritableFile* pMapFile = new Win32WritableFile(ModifyPath(path));
+ // Logs are opened with write semantics, not with append semantics
+ // (see PosixEnv::NewLogger)
+ Win32WritableFile* pMapFile = new Win32WritableFile(ModifyPath(path), false);
if(!pMapFile->isEnable()){
delete pMapFile;
*result = NULL;
@@ -837,7 +847,20 @@ Status Win32Env::NewWritableFile( const std::string& fname, WritableFile** resul
{
Status sRet;
std::string path = fname;
- Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path));
+ Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path), false);
+ if(!pFile->isEnable()){
+ *result = NULL;
+ sRet = Status::IOError(fname,Win32::GetLastErrSz());
+ }else
+ *result = pFile;
+ return sRet;
+}
+
+Status Win32Env::NewAppendableFile( const std::string& fname, WritableFile** result )
+{
+ Status sRet;
+ std::string path = fname;
+ Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path), true);
if(!pFile->isEnable()){
*result = NULL;
sRet = Status::IOError(fname,Win32::GetLastErrSz());
diff --git a/util/options.cc b/util/options.cc
index 76af5b9302..8b618fb1ae 100644
--- a/util/options.cc
+++ b/util/options.cc
@@ -22,8 +22,8 @@ Options::Options()
block_size(4096),
block_restart_interval(16),
compression(kSnappyCompression),
+ reuse_logs(false),
filter_policy(NULL) {
}
-
} // namespace leveldb
diff --git a/util/testutil.h b/util/testutil.h
index adad3fc1ea..d7e4583702 100644
--- a/util/testutil.h
+++ b/util/testutil.h
@@ -45,6 +45,16 @@ class ErrorEnv : public EnvWrapper {
}
return target()->NewWritableFile(fname, result);
}
+
+ virtual Status NewAppendableFile(const std::string& fname,
+ WritableFile** result) {
+ if (writable_file_error_) {
+ ++num_writable_file_errors_;
+ *result = NULL;
+ return Status::IOError(fname, "fake error");
+ }
+ return target()->NewAppendableFile(fname, result);
+ }
};
} // namespace test