Squashed 'src/leveldb/' changes from 20ca81f..a31c8aa

a31c8aa Add NewAppendableFile for win32 environment 1913d71 Merge upstream LevelDB 1.19 3080a45 Increase leveldb version to 1.19. fa6dc01 A zippy change broke test assumptions about the size of compressed output. Fix the tests by allowing more slop in zippy's behavior. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=123432472 06a191b fix problems in LevelDB's caching code a7bff69 Fix LevelDB build when asserts are enabled in release builds. (#367) ea992b4 Change std::uint64_t to uint64_t (#354) e84b5bd This CL fixes a bug encountered when reading records from leveldb files that have been split, as in a [] input task split. 3211343 Deleted redundant null ptr check prior to delete. 7306ef8 Merge pull request #348 from randomascii/master 6b18316 Fix signed/unsigned mismatch on VC++ builds adbe3eb Putting build artifacts in subdirectory. 2d0320a Merge pull request #329 from ralphtheninja/travis-badge dd1c3c3 add travis build badge 43fcf23 Merge pull request #328 from cmumford/master 9fcae61 Added a Travis CI build file. dac40d2 Merge pull request #284 from ideawu/master 8ec241a Merge pull request #317 from falvojr/patch-1 5d36bed Merge pull request #272 from vapier/master 4753c9b Added a contributors section to README.md e2446d0 Merge pull request #275 from paulirish/patch-1 706b7f8 Resolve race when getting approximate-memory-usage property 3c9ff3c Only compiling TrimSpace on linux. f8d205c Including atomic_pointer.h in port_posix 889de31 Let LevelDB use xcrun to determine Xcode.app path instead of using a hardcoded path. 528c2bc Add "approximate-memory-usage" property to leveldb::DB::GetProperty 359b6bc Add leveldb::Cache::Prune 50e77a8 Fix size_t/int comparison/conversion issues in leveldb. 5208e79 Added leveldb::Status::IsInvalidArgument() method. ce45404 Suppress error reporting after seeking but before a valid First or Full record is encountered. b9afa1f include <assert> -> <cassert> edf2939 Update README.md 65190ac Will not reuse manifest if reuse_logs options is false. ac1d69d LevelDB now attempts to reuse the preceding MANIFEST and log file when re-opened. 76bba13 fix indent 8fcceb2 log compaction output file's level along with number 0e0f074 documentation. improved link c85addc readme: improved documentation link ceff6f1 Fix Android/MIPS build. 77948e7 Add benchmark that measures cost of repeatedly opening the database. 34ad72e Move header guard below copyright banner. a75d435 Clean up layering of storage/leveldb/... b234f65 Added a new fault injection test. c4c38f9 Add arm64 support to leveldb. cea9b10 Fixed incorrect comment wording for Iterator::Seek. c00c569 Deleted old README file. git-subtree-dir: src/leveldb git-subtree-split: a31c8aa408d5594830f7cb20ead1ef1dff51b79e
author: Pieter Wuille <pieter.wuille@gmail.com> 2016-12-01 16:14:45 -0800
committer: Pieter Wuille <pieter.wuille@gmail.com> 2016-12-01 16:14:45 -0800
commit: 634ad517037b319147816f1d112b066528e1724a (patch)
tree: 0088401632072effafed7c43d67a204467ffc460
parent: fb9857bfd68c13b52e14bc28dd981bc12501806a (diff)
52 files changed, 1942 insertions, 372 deletions
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000000..f5bd74c454
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,13 @@
+language: cpp
+compiler:
+- clang
+- gcc
+os:
+- linux
+- osx
+sudo: false
+before_install:
+- echo $LANG
+- echo $LC_ALL
+script:
+- make -j 4 check
diff --git a/Makefile b/Makefile
index 2bd2cadcdd..07a5a1ead6 100644
--- a/Makefile
+++ b/Makefile
@@ -20,208 +20,395 @@ $(shell CC="$(CC)" CXX="$(CXX)" TARGET_OS="$(TARGET_OS)" \
 # this file is generated by the previous line to set build flags and sources
 include build_config.mk
 
+TESTS = \
+	db/autocompact_test \
+	db/c_test \
+	db/corruption_test \
+	db/db_test \
+	db/dbformat_test \
+	db/fault_injection_test \
+	db/filename_test \
+	db/log_test \
+	db/recovery_test \
+	db/skiplist_test \
+	db/version_edit_test \
+	db/version_set_test \
+	db/write_batch_test \
+	helpers/memenv/memenv_test \
+	issues/issue178_test \
+	issues/issue200_test \
+	table/filter_block_test \
+	table/table_test \
+	util/arena_test \
+	util/bloom_test \
+	util/cache_test \
+	util/coding_test \
+	util/crc32c_test \
+	util/env_test \
+	util/hash_test
+
+UTILS = \
+	db/db_bench \
+	db/leveldbutil
+
+# Put the object files in a subdirectory, but the application at the top of the object dir.
+PROGNAMES := $(notdir $(TESTS) $(UTILS))
+
+# On Linux may need libkyotocabinet-dev for dependency.
+BENCHMARKS = \
+	doc/bench/db_bench_sqlite3 \
+	doc/bench/db_bench_tree_db
+
 CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
 CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT)
 
 LDFLAGS += $(PLATFORM_LDFLAGS)
 LIBS += $(PLATFORM_LIBS)
 
-LIBOBJECTS = $(SOURCES:.cc=.o)
-MEMENVOBJECTS = $(MEMENV_SOURCES:.cc=.o)
-
-TESTUTIL = ./util/testutil.o
-TESTHARNESS = ./util/testharness.o $(TESTUTIL)
+SIMULATOR_OUTDIR=out-ios-x86
+DEVICE_OUTDIR=out-ios-arm
 
-# Note: iOS should probably be using libtool, not ar.
 ifeq ($(PLATFORM), IOS)
+# Note: iOS should probably be using libtool, not ar.
 AR=xcrun ar
+SIMULATORSDK=$(shell xcrun -sdk iphonesimulator --show-sdk-path)
+DEVICESDK=$(shell xcrun -sdk iphoneos --show-sdk-path)
+DEVICE_CFLAGS = -isysroot "$(DEVICESDK)" -arch armv6 -arch armv7 -arch armv7s -arch arm64
+SIMULATOR_CFLAGS = -isysroot "$(SIMULATORSDK)" -arch i686 -arch x86_64
+STATIC_OUTDIR=out-ios-universal
+else
+STATIC_OUTDIR=out-static
+SHARED_OUTDIR=out-shared
+STATIC_PROGRAMS := $(addprefix $(STATIC_OUTDIR)/, $(PROGNAMES))
+SHARED_PROGRAMS := $(addprefix $(SHARED_OUTDIR)/, db_bench)
 endif
 
-TESTS = \
-	arena_test \
-	autocompact_test \
-	bloom_test \
-	c_test \
-	cache_test \
-	coding_test \
-	corruption_test \
-	crc32c_test \
-	db_test \
-	dbformat_test \
-	env_test \
-	filename_test \
-	filter_block_test \
-	hash_test \
-	issue178_test \
-	issue200_test \
-	log_test \
-	memenv_test \
-	skiplist_test \
-	table_test \
-	version_edit_test \
-	version_set_test \
-	write_batch_test
-
-PROGRAMS = db_bench leveldbutil $(TESTS)
-BENCHMARKS = db_bench_sqlite3 db_bench_tree_db
-
-LIBRARY = libleveldb.a
-MEMENVLIBRARY = libmemenv.a
+STATIC_LIBOBJECTS := $(addprefix $(STATIC_OUTDIR)/, $(SOURCES:.cc=.o))
+STATIC_MEMENVOBJECTS := $(addprefix $(STATIC_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+DEVICE_LIBOBJECTS := $(addprefix $(DEVICE_OUTDIR)/, $(SOURCES:.cc=.o))
+DEVICE_MEMENVOBJECTS := $(addprefix $(DEVICE_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+SIMULATOR_LIBOBJECTS := $(addprefix $(SIMULATOR_OUTDIR)/, $(SOURCES:.cc=.o))
+SIMULATOR_MEMENVOBJECTS := $(addprefix $(SIMULATOR_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+SHARED_LIBOBJECTS := $(addprefix $(SHARED_OUTDIR)/, $(SOURCES:.cc=.o))
+SHARED_MEMENVOBJECTS := $(addprefix $(SHARED_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
+
+TESTUTIL := $(STATIC_OUTDIR)/util/testutil.o
+TESTHARNESS := $(STATIC_OUTDIR)/util/testharness.o $(TESTUTIL)
+
+STATIC_TESTOBJS := $(addprefix $(STATIC_OUTDIR)/, $(addsuffix .o, $(TESTS)))
+STATIC_UTILOBJS := $(addprefix $(STATIC_OUTDIR)/, $(addsuffix .o, $(UTILS)))
+STATIC_ALLOBJS := $(STATIC_LIBOBJECTS) $(STATIC_MEMENVOBJECTS) $(STATIC_TESTOBJS) $(STATIC_UTILOBJS) $(TESTHARNESS)
+DEVICE_ALLOBJS := $(DEVICE_LIBOBJECTS) $(DEVICE_MEMENVOBJECTS)
+SIMULATOR_ALLOBJS := $(SIMULATOR_LIBOBJECTS) $(SIMULATOR_MEMENVOBJECTS)
 
 default: all
 
 # Should we build shared libraries?
 ifneq ($(PLATFORM_SHARED_EXT),)
 
+# Many leveldb test apps use non-exported API's. Only build a subset for testing.
+SHARED_ALLOBJS := $(SHARED_LIBOBJECTS) $(SHARED_MEMENVOBJECTS) $(TESTHARNESS)
+
 ifneq ($(PLATFORM_SHARED_VERSIONED),true)
-SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
-SHARED2 = $(SHARED1)
-SHARED3 = $(SHARED1)
-SHARED = $(SHARED1)
+SHARED_LIB1 = libleveldb.$(PLATFORM_SHARED_EXT)
+SHARED_LIB2 = $(SHARED_LIB1)
+SHARED_LIB3 = $(SHARED_LIB1)
+SHARED_LIBS = $(SHARED_LIB1)
+SHARED_MEMENVLIB = $(SHARED_OUTDIR)/libmemenv.a
 else
 # Update db.h if you change these.
-SHARED_MAJOR = 1
-SHARED_MINOR = 18
-SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
-SHARED2 = $(SHARED1).$(SHARED_MAJOR)
-SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
-SHARED = $(SHARED1) $(SHARED2) $(SHARED3)
-$(SHARED1): $(SHARED3)
-	ln -fs $(SHARED3) $(SHARED1)
-$(SHARED2): $(SHARED3)
-	ln -fs $(SHARED3) $(SHARED2)
+SHARED_VERSION_MAJOR = 1
+SHARED_VERSION_MINOR = 19
+SHARED_LIB1 = libleveldb.$(PLATFORM_SHARED_EXT)
+SHARED_LIB2 = $(SHARED_LIB1).$(SHARED_VERSION_MAJOR)
+SHARED_LIB3 = $(SHARED_LIB1).$(SHARED_VERSION_MAJOR).$(SHARED_VERSION_MINOR)
+SHARED_LIBS = $(SHARED_OUTDIR)/$(SHARED_LIB1) $(SHARED_OUTDIR)/$(SHARED_LIB2) $(SHARED_OUTDIR)/$(SHARED_LIB3)
+$(SHARED_OUTDIR)/$(SHARED_LIB1): $(SHARED_OUTDIR)/$(SHARED_LIB3)
+	ln -fs $(SHARED_LIB3) $(SHARED_OUTDIR)/$(SHARED_LIB1)
+$(SHARED_OUTDIR)/$(SHARED_LIB2): $(SHARED_OUTDIR)/$(SHARED_LIB3)
+	ln -fs $(SHARED_LIB3) $(SHARED_OUTDIR)/$(SHARED_LIB2)
+SHARED_MEMENVLIB = $(SHARED_OUTDIR)/libmemenv.a
 endif
 
-$(SHARED3):
-	$(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) $(LIBS)
+$(SHARED_OUTDIR)/$(SHARED_LIB3): $(SHARED_LIBOBJECTS)
+	$(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED_LIB2) $(SHARED_LIBOBJECTS) -o $(SHARED_OUTDIR)/$(SHARED_LIB3) $(LIBS)
 
 endif  # PLATFORM_SHARED_EXT
 
-all: $(SHARED) $(LIBRARY)
+all: $(SHARED_LIBS) $(SHARED_PROGRAMS) $(STATIC_OUTDIR)/libleveldb.a $(STATIC_OUTDIR)/libmemenv.a $(STATIC_PROGRAMS)
 
-check: all $(PROGRAMS) $(TESTS)
-	for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done
+check: $(STATIC_PROGRAMS)
+	for t in $(notdir $(TESTS)); do echo "***** Running $$t"; $(STATIC_OUTDIR)/$$t || exit 1; done
 
 clean:
-	-rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk
-	-rm -rf ios-x86/* ios-arm/*
+	-rm -rf out-static out-shared out-ios-x86 out-ios-arm out-ios-universal
+	-rm -f build_config.mk
+	-rm -rf ios-x86 ios-arm
 
-$(LIBRARY): $(LIBOBJECTS)
-	rm -f $@
-	$(AR) -rs $@ $(LIBOBJECTS)
+$(STATIC_OUTDIR):
+	mkdir $@
 
-db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
-	$(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS)
+$(STATIC_OUTDIR)/db: | $(STATIC_OUTDIR)
+	mkdir $@
 
-db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
-	$(CXX) $(LDFLAGS) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS)
+$(STATIC_OUTDIR)/helpers/memenv: | $(STATIC_OUTDIR)
+	mkdir -p $@
 
-db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
-	$(CXX) $(LDFLAGS) doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS)
+$(STATIC_OUTDIR)/port: | $(STATIC_OUTDIR)
+	mkdir $@
 
-leveldbutil: db/leveldb_main.o $(LIBOBJECTS)
-	$(CXX) $(LDFLAGS) db/leveldb_main.o $(LIBOBJECTS) -o $@ $(LIBS)
+$(STATIC_OUTDIR)/table: | $(STATIC_OUTDIR)
+	mkdir $@
 
-arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(STATIC_OUTDIR)/util: | $(STATIC_OUTDIR)
+	mkdir $@
 
-autocompact_test: db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+.PHONY: STATIC_OBJDIRS
+STATIC_OBJDIRS: \
+	$(STATIC_OUTDIR)/db \
+	$(STATIC_OUTDIR)/port \
+	$(STATIC_OUTDIR)/table \
+	$(STATIC_OUTDIR)/util \
+	$(STATIC_OUTDIR)/helpers/memenv
 
-bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR):
+	mkdir $@
 
-c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/db: | $(SHARED_OUTDIR)
+	mkdir $@
 
-cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/helpers/memenv: | $(SHARED_OUTDIR)
+	mkdir -p $@
 
-coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/port: | $(SHARED_OUTDIR)
+	mkdir $@
 
-corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/table: | $(SHARED_OUTDIR)
+	mkdir $@
 
-crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SHARED_OUTDIR)/util: | $(SHARED_OUTDIR)
+	mkdir $@
 
-db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+.PHONY: SHARED_OBJDIRS
+SHARED_OBJDIRS: \
+	$(SHARED_OUTDIR)/db \
+	$(SHARED_OUTDIR)/port \
+	$(SHARED_OUTDIR)/table \
+	$(SHARED_OUTDIR)/util \
+	$(SHARED_OUTDIR)/helpers/memenv
 
-dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR):
+	mkdir $@
 
-env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/db: | $(DEVICE_OUTDIR)
+	mkdir $@
 
-filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/helpers/memenv: | $(DEVICE_OUTDIR)
+	mkdir -p $@
 
-filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/port: | $(DEVICE_OUTDIR)
+	mkdir $@
 
-hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/table: | $(DEVICE_OUTDIR)
+	mkdir $@
 
-issue178_test: issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(DEVICE_OUTDIR)/util: | $(DEVICE_OUTDIR)
+	mkdir $@
 
-issue200_test: issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+.PHONY: DEVICE_OBJDIRS
+DEVICE_OBJDIRS: \
+	$(DEVICE_OUTDIR)/db \
+	$(DEVICE_OUTDIR)/port \
+	$(DEVICE_OUTDIR)/table \
+	$(DEVICE_OUTDIR)/util \
+	$(DEVICE_OUTDIR)/helpers/memenv
 
-log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR):
+	mkdir $@
 
-table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/db: | $(SIMULATOR_OUTDIR)
+	mkdir $@
 
-skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/helpers/memenv: | $(SIMULATOR_OUTDIR)
+	mkdir -p $@
 
-version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/port: | $(SIMULATOR_OUTDIR)
+	mkdir $@
 
-version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/table: | $(SIMULATOR_OUTDIR)
+	mkdir $@
 
-write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+$(SIMULATOR_OUTDIR)/util: | $(SIMULATOR_OUTDIR)
+	mkdir $@
 
-$(MEMENVLIBRARY) : $(MEMENVOBJECTS)
-	rm -f $@
-	$(AR) -rs $@ $(MEMENVOBJECTS)
+.PHONY: SIMULATOR_OBJDIRS
+SIMULATOR_OBJDIRS: \
+	$(SIMULATOR_OUTDIR)/db \
+	$(SIMULATOR_OUTDIR)/port \
+	$(SIMULATOR_OUTDIR)/table \
+	$(SIMULATOR_OUTDIR)/util \
+	$(SIMULATOR_OUTDIR)/helpers/memenv
 
-memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS)
-	$(CXX) $(LDFLAGS) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LIBS)
+$(STATIC_ALLOBJS): | STATIC_OBJDIRS
+$(DEVICE_ALLOBJS): | DEVICE_OBJDIRS
+$(SIMULATOR_ALLOBJS): | SIMULATOR_OBJDIRS
+$(SHARED_ALLOBJS): | SHARED_OBJDIRS
 
 ifeq ($(PLATFORM), IOS)
-# For iOS, create universal object files to be used on both the simulator and
+$(DEVICE_OUTDIR)/libleveldb.a: $(DEVICE_LIBOBJECTS)
+	rm -f $@
+	$(AR) -rs $@ $(DEVICE_LIBOBJECTS)
+
+$(SIMULATOR_OUTDIR)/libleveldb.a: $(SIMULATOR_LIBOBJECTS)
+	rm -f $@
+	$(AR) -rs $@ $(SIMULATOR_LIBOBJECTS)
+
+$(DEVICE_OUTDIR)/libmemenv.a: $(DEVICE_MEMENVOBJECTS)
+	rm -f $@
+	$(AR) -rs $@ $(DEVICE_MEMENVOBJECTS)
+
+$(SIMULATOR_OUTDIR)/libmemenv.a: $(SIMULATOR_MEMENVOBJECTS)
+	rm -f $@
+	$(AR) -rs $@ $(SIMULATOR_MEMENVOBJECTS)
+
+# For iOS, create universal object libraries to be used on both the simulator and
 # a device.
-PLATFORMSROOT=/Applications/Xcode.app/Contents/Developer/Platforms
-SIMULATORROOT=$(PLATFORMSROOT)/iPhoneSimulator.platform/Developer
-DEVICEROOT=$(PLATFORMSROOT)/iPhoneOS.platform/Developer
-IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBundleShortVersionString)
-IOSARCH=-arch armv6 -arch armv7 -arch armv7s -arch arm64
-
-.cc.o:
-	mkdir -p ios-x86/$(dir $@)
-	xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
-	mkdir -p ios-arm/$(dir $@)
-	xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
-	xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
-
-.c.o:
-	mkdir -p ios-x86/$(dir $@)
-	xcrun -sdk iphonesimulator $(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
-	mkdir -p ios-arm/$(dir $@)
-	xcrun -sdk iphoneos $(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
-	xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
+$(STATIC_OUTDIR)/libleveldb.a: $(STATIC_OUTDIR) $(DEVICE_OUTDIR)/libleveldb.a $(SIMULATOR_OUTDIR)/libleveldb.a
+	lipo -create $(DEVICE_OUTDIR)/libleveldb.a $(SIMULATOR_OUTDIR)/libleveldb.a -output $@
 
+$(STATIC_OUTDIR)/libmemenv.a: $(STATIC_OUTDIR) $(DEVICE_OUTDIR)/libmemenv.a $(SIMULATOR_OUTDIR)/libmemenv.a
+	lipo -create $(DEVICE_OUTDIR)/libmemenv.a $(SIMULATOR_OUTDIR)/libmemenv.a -output $@
 else
-.cc.o:
+$(STATIC_OUTDIR)/libleveldb.a:$(STATIC_LIBOBJECTS)
+	rm -f $@
+	$(AR) -rs $@ $(STATIC_LIBOBJECTS)
+
+$(STATIC_OUTDIR)/libmemenv.a:$(STATIC_MEMENVOBJECTS)
+	rm -f $@
+	$(AR) -rs $@ $(STATIC_MEMENVOBJECTS)
+endif
+
+$(SHARED_MEMENVLIB):$(SHARED_MEMENVOBJECTS)
+	rm -f $@
+	$(AR) -rs $@ $(SHARED_MEMENVOBJECTS)
+
+$(STATIC_OUTDIR)/db_bench:db/db_bench.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/db_bench.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/db_bench_sqlite3:doc/bench/db_bench_sqlite3.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) doc/bench/db_bench_sqlite3.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS)
+
+$(STATIC_OUTDIR)/db_bench_tree_db:doc/bench/db_bench_tree_db.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) doc/bench/db_bench_tree_db.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS)
+
+$(STATIC_OUTDIR)/leveldbutil:db/leveldbutil.cc $(STATIC_LIBOBJECTS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/leveldbutil.cc $(STATIC_LIBOBJECTS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/arena_test:util/arena_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) util/arena_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/autocompact_test:db/autocompact_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/autocompact_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/bloom_test:util/bloom_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) util/bloom_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/c_test:$(STATIC_OUTDIR)/db/c_test.o $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(STATIC_OUTDIR)/db/c_test.o $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/cache_test:util/cache_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) util/cache_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/coding_test:util/coding_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) util/coding_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/corruption_test:db/corruption_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/corruption_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/crc32c_test:util/crc32c_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) util/crc32c_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/db_test:db/db_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/db_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/dbformat_test:db/dbformat_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/dbformat_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/env_test:util/env_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) util/env_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/fault_injection_test:db/fault_injection_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/fault_injection_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/filename_test:db/filename_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/filename_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/filter_block_test:table/filter_block_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) table/filter_block_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/hash_test:util/hash_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) util/hash_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/issue178_test:issues/issue178_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) issues/issue178_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/issue200_test:issues/issue200_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) issues/issue200_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/log_test:db/log_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/log_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/recovery_test:db/recovery_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/recovery_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/table_test:table/table_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) table/table_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/skiplist_test:db/skiplist_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/skiplist_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/version_edit_test:db/version_edit_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/version_edit_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/version_set_test:db/version_set_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/version_set_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/write_batch_test:db/write_batch_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) $(CXXFLAGS) db/write_batch_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
+$(STATIC_OUTDIR)/memenv_test:$(STATIC_OUTDIR)/helpers/memenv/memenv_test.o $(STATIC_OUTDIR)/libmemenv.a $(STATIC_OUTDIR)/libleveldb.a $(TESTHARNESS)
+	$(XCRUN) $(CXX) $(LDFLAGS) $(STATIC_OUTDIR)/helpers/memenv/memenv_test.o $(STATIC_OUTDIR)/libmemenv.a $(STATIC_OUTDIR)/libleveldb.a $(TESTHARNESS) -o $@ $(LIBS)
+
+$(SHARED_OUTDIR)/db_bench:$(SHARED_OUTDIR)/db/db_bench.o $(SHARED_LIBS) $(TESTUTIL)
+	$(XCRUN) $(CXX) $(LDFLAGS) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SHARED_OUTDIR)/db/db_bench.o $(TESTUTIL) $(SHARED_OUTDIR)/$(SHARED_LIB3) -o $@ $(LIBS)
+
+.PHONY: run-shared
+run-shared: $(SHARED_OUTDIR)/db_bench
+	LD_LIBRARY_PATH=$(SHARED_OUTDIR) $(SHARED_OUTDIR)/db_bench
+
+$(SIMULATOR_OUTDIR)/%.o: %.cc
+	xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) $(SIMULATOR_CFLAGS) -c $< -o $@
+
+$(DEVICE_OUTDIR)/%.o: %.cc
+	xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) $(DEVICE_CFLAGS) -c $< -o $@
+
+$(SIMULATOR_OUTDIR)/%.o: %.c
+	xcrun -sdk iphonesimulator $(CC) $(CFLAGS) $(SIMULATOR_CFLAGS) -c $< -o $@
+
+$(DEVICE_OUTDIR)/%.o: %.c
+	xcrun -sdk iphoneos $(CC) $(CFLAGS) $(DEVICE_CFLAGS) -c $< -o $@
+
+$(STATIC_OUTDIR)/%.o: %.cc
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
-.c.o:
+$(STATIC_OUTDIR)/%.o: %.c
 	$(CC) $(CFLAGS) -c $< -o $@
-endif
+
+$(SHARED_OUTDIR)/%.o: %.cc
+	$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@
+
+$(SHARED_OUTDIR)/%.o: %.c
+	$(CC) $(CFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@
diff --git a/README b/README
deleted file mode 100644
index 3618adeeed..0000000000
--- a/README
+++ /dev/null
@@ -1,51 +0,0 @@
-leveldb: A key-value store
-Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
-
-The code under this directory implements a system for maintaining a
-persistent key/value store.
-
-See doc/index.html for more explanation.
-See doc/impl.html for a brief overview of the implementation.
-
-The public interface is in include/*.h.  Callers should not include or
-rely on the details of any other header files in this package.  Those
-internal APIs may be changed without warning.
-
-Guide to header files:
-
-include/db.h
-    Main interface to the DB: Start here
-
-include/options.h
-    Control over the behavior of an entire database, and also
-    control over the behavior of individual reads and writes.
-
-include/comparator.h
-    Abstraction for user-specified comparison function.  If you want
-    just bytewise comparison of keys, you can use the default comparator,
-    but clients can write their own comparator implementations if they
-    want custom ordering (e.g. to handle different character
-    encodings, etc.)
-
-include/iterator.h
-    Interface for iterating over data. You can get an iterator
-    from a DB object.
-
-include/write_batch.h
-    Interface for atomically applying multiple updates to a database.
-
-include/slice.h
-    A simple module for maintaining a pointer and a length into some
-    other byte array.
-
-include/status.h
-    Status is returned from many of the public interfaces and is used
-    to report success and various kinds of errors.
-
-include/env.h
-    Abstraction of the OS environment.  A posix implementation of
-    this interface is in util/env_posix.cc
-
-include/table.h
-include/table_builder.h
-    Lower-level modules that most clients probably won't use directly
diff --git a/README.md b/README.md
index 480affb5ca..c75b185e0e 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 **LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.**
 
+[![Build Status](https://travis-ci.org/google/leveldb.svg?branch=master)](https://travis-ci.org/google/leveldb)
+
 Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
 
 # Features
@@ -10,9 +12,11 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
   * Multiple changes can be made in one atomic batch.
   * Users can create a transient snapshot to get a consistent view of data.
   * Forward and backward iteration is supported over the data.
-  * Data is automatically compressed using the [Snappy compression library](http://code.google.com/p/snappy).
+  * Data is automatically compressed using the [Snappy compression library](http://google.github.io/snappy/).
   * External activity (file system operations etc.) is relayed through a virtual interface so users can customize the operating system interactions.
-  * [Detailed documentation](http://htmlpreview.github.io/?https://github.com/google/leveldb/blob/master/doc/index.html) about how to use the library is included with the source code.
+
+# Documentation
+  [LevelDB library documentation](https://rawgit.com/google/leveldb/master/doc/index.html) is online and bundled with the source code.
 
 
 # Limitations
@@ -20,6 +24,37 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
   * Only a single process (possibly multi-threaded) can access a particular database at a time.
   * There is no client-server support builtin to the library.  An application that needs such support will have to wrap their own server around the library.
 
+# Contributing to the leveldb Project
+The leveldb project welcomes contributions. leveldb's primary goal is to be
+a reliable and fast key/value store. Changes that are in line with the
+features/limitations outlined above, and meet the requirements below,
+will be considered.
+
+Contribution requirements:
+
+1. **POSIX only**. We _generally_ will only accept changes that are both
+   compiled, and tested on a POSIX platform - usually Linux. Very small
+   changes will sometimes be accepted, but consider that more of an
+   exception than the rule.
+
+2. **Stable API**. We strive very hard to maintain a stable API. Changes that
+   require changes for projects using leveldb _might_ be rejected without
+   sufficient benefit to the project.
+
+3. **Tests**: All changes must be accompanied by a new (or changed) test, or
+   a sufficient explanation as to why a new (or changed) test is not required.
+
+## Submitting a Pull Request
+Before any pull request will be accepted the author must first sign a
+Contributor License Agreement (CLA) at https://cla.developers.google.com/.
+
+In order to keep the commit timeline linear
+[squash](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Squashing-Commits)
+your changes down to a single commit and [rebase](https://git-scm.com/docs/git-rebase)
+on google/leveldb/master. This keeps the commit timeline linear and more easily sync'ed
+with the internal repository at Google. More information at GitHub's
+[About Git rebase](https://help.github.com/articles/about-git-rebase/) page.
+
 # Performance
 
 Here is a performance report (with explanations) from the run of the
diff --git a/build_detect_platform b/build_detect_platform
index a1101c1bda..d7edab1d87 100755
--- a/build_detect_platform
+++ b/build_detect_platform
@@ -175,7 +175,7 @@ DIRS="$PREFIX/db $PREFIX/util $PREFIX/table"
 set -f # temporarily disable globbing so that our patterns aren't expanded
 PRUNE_TEST="-name *test*.cc -prune"
 PRUNE_BENCH="-name *_bench.cc -prune"
-PRUNE_TOOL="-name leveldb_main.cc -prune"
+PRUNE_TOOL="-name leveldbutil.cc -prune"
 PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o $PRUNE_TOOL -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "`
 
 set +f # re-enable globbing
diff --git a/db/corruption_test.cc b/db/corruption_test.cc
index 96afc68913..37a484d25f 100644
--- a/db/corruption_test.cc
+++ b/db/corruption_test.cc
@@ -36,7 +36,7 @@ class CorruptionTest {
     tiny_cache_ = NewLRUCache(100);
     options_.env = &env_;
     options_.block_cache = tiny_cache_;
-    dbname_ = test::TmpDir() + "/db_test";
+    dbname_ = test::TmpDir() + "/corruption_test";
     DestroyDB(dbname_, options_);
 
     db_ = NULL;
diff --git a/db/db_bench.cc b/db/db_bench.cc
index 705a170aae..7a0f5e08cd 100644
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -33,6 +33,7 @@
 //      readmissing   -- read N missing keys in random order
 //      readhot       -- read N times in random order from 1% section of DB
 //      seekrandom    -- N random seeks
+//      open          -- cost of opening a DB
 //      crc32c        -- repeated crc32c of 4K of data
 //      acquireload   -- load N*1000 times
 //   Meta operations:
@@ -99,6 +100,9 @@ static int FLAGS_bloom_bits = -1;
 // benchmark will fail.
 static bool FLAGS_use_existing_db = false;
 
+// If true, reuse existing log/MANIFEST files when re-opening a database.
+static bool FLAGS_reuse_logs = false;
+
 // Use the db with the following name.
 static const char* FLAGS_db = NULL;
 
@@ -138,6 +142,7 @@ class RandomGenerator {
   }
 };
 
+#if defined(__linux)
 static Slice TrimSpace(Slice s) {
   size_t start = 0;
   while (start < s.size() && isspace(s[start])) {
@@ -149,6 +154,7 @@ static Slice TrimSpace(Slice s) {
   }
   return Slice(s.data() + start, limit - start);
 }
+#endif
 
 static void AppendWithSpace(std::string* str, Slice msg) {
   if (msg.empty()) return;
@@ -442,7 +448,11 @@ class Benchmark {
       bool fresh_db = false;
       int num_threads = FLAGS_threads;
 
-      if (name == Slice("fillseq")) {
+      if (name == Slice("open")) {
+        method = &Benchmark::OpenBench;
+        num_ /= 10000;
+        if (num_ < 1) num_ = 1;
+      } else if (name == Slice("fillseq")) {
         fresh_db = true;
         method = &Benchmark::WriteSeq;
       } else if (name == Slice("fillbatch")) {
@@ -695,6 +705,7 @@ class Benchmark {
     options.write_buffer_size = FLAGS_write_buffer_size;
     options.max_open_files = FLAGS_open_files;
     options.filter_policy = filter_policy_;
+    options.reuse_logs = FLAGS_reuse_logs;
     Status s = DB::Open(options, FLAGS_db, &db_);
     if (!s.ok()) {
       fprintf(stderr, "open error: %s\n", s.ToString().c_str());
@@ -702,6 +713,14 @@ class Benchmark {
     }
   }
 
+  void OpenBench(ThreadState* thread) {
+    for (int i = 0; i < num_; i++) {
+      delete db_;
+      Open();
+      thread->stats.FinishedSingleOp();
+    }
+  }
+
   void WriteSeq(ThreadState* thread) {
     DoWrite(thread, true);
   }
@@ -941,6 +960,9 @@ int main(int argc, char** argv) {
     } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
                (n == 0 || n == 1)) {
       FLAGS_use_existing_db = n;
+    } else if (sscanf(argv[i], "--reuse_logs=%d%c", &n, &junk) == 1 &&
+               (n == 0 || n == 1)) {
+      FLAGS_reuse_logs = n;
     } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
       FLAGS_num = n;
     } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
diff --git a/db/db_impl.cc b/db/db_impl.cc
index 49b95953b4..60f4e66e55 100644
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -125,7 +125,7 @@ DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
       db_lock_(NULL),
       shutting_down_(NULL),
       bg_cv_(&mutex_),
-      mem_(new MemTable(internal_comparator_)),
+      mem_(NULL),
       imm_(NULL),
       logfile_(NULL),
       logfile_number_(0),
@@ -134,7 +134,6 @@ DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
       tmp_batch_(new WriteBatch),
       bg_compaction_scheduled_(false),
       manual_compaction_(NULL) {
-  mem_->Ref();
   has_imm_.Release_Store(NULL);
 
   // Reserve ten files or so for other uses and give the rest to TableCache.
@@ -271,7 +270,7 @@ void DBImpl::DeleteObsoleteFiles() {
   }
 }
 
-Status DBImpl::Recover(VersionEdit* edit) {
+Status DBImpl::Recover(VersionEdit* edit, bool *save_manifest) {
   mutex_.AssertHeld();
 
   // Ignore error from CreateDir since the creation of the DB is
@@ -301,66 +300,69 @@ Status DBImpl::Recover(VersionEdit* edit) {
     }
   }
 
-  s = versions_->Recover();
-  if (s.ok()) {
-    SequenceNumber max_sequence(0);
-
-    // Recover from all newer log files than the ones named in the
-    // descriptor (new log files may have been added by the previous
-    // incarnation without registering them in the descriptor).
-    //
-    // Note that PrevLogNumber() is no longer used, but we pay
-    // attention to it in case we are recovering a database
-    // produced by an older version of leveldb.
-    const uint64_t min_log = versions_->LogNumber();
-    const uint64_t prev_log = versions_->PrevLogNumber();
-    std::vector<std::string> filenames;
-    s = env_->GetChildren(dbname_, &filenames);
+  s = versions_->Recover(save_manifest);
+  if (!s.ok()) {
+    return s;
+  }
+  SequenceNumber max_sequence(0);
+
+  // Recover from all newer log files than the ones named in the
+  // descriptor (new log files may have been added by the previous
+  // incarnation without registering them in the descriptor).
+  //
+  // Note that PrevLogNumber() is no longer used, but we pay
+  // attention to it in case we are recovering a database
+  // produced by an older version of leveldb.
+  const uint64_t min_log = versions_->LogNumber();
+  const uint64_t prev_log = versions_->PrevLogNumber();
+  std::vector<std::string> filenames;
+  s = env_->GetChildren(dbname_, &filenames);
+  if (!s.ok()) {
+    return s;
+  }
+  std::set<uint64_t> expected;
+  versions_->AddLiveFiles(&expected);
+  uint64_t number;
+  FileType type;
+  std::vector<uint64_t> logs;
+  for (size_t i = 0; i < filenames.size(); i++) {
+    if (ParseFileName(filenames[i], &number, &type)) {
+      expected.erase(number);
+      if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
+        logs.push_back(number);
+    }
+  }
+  if (!expected.empty()) {
+    char buf[50];
+    snprintf(buf, sizeof(buf), "%d missing files; e.g.",
+             static_cast<int>(expected.size()));
+    return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
+  }
+
+  // Recover in the order in which the logs were generated
+  std::sort(logs.begin(), logs.end());
+  for (size_t i = 0; i < logs.size(); i++) {
+    s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit,
+                       &max_sequence);
     if (!s.ok()) {
       return s;
     }
-    std::set<uint64_t> expected;
-    versions_->AddLiveFiles(&expected);
-    uint64_t number;
-    FileType type;
-    std::vector<uint64_t> logs;
-    for (size_t i = 0; i < filenames.size(); i++) {
-      if (ParseFileName(filenames[i], &number, &type)) {
-        expected.erase(number);
-        if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
-          logs.push_back(number);
-      }
-    }
-    if (!expected.empty()) {
-      char buf[50];
-      snprintf(buf, sizeof(buf), "%d missing files; e.g.",
-               static_cast<int>(expected.size()));
-      return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
-    }
-
-    // Recover in the order in which the logs were generated
-    std::sort(logs.begin(), logs.end());
-    for (size_t i = 0; i < logs.size(); i++) {
-      s = RecoverLogFile(logs[i], edit, &max_sequence);
 
-      // The previous incarnation may not have written any MANIFEST
-      // records after allocating this log number.  So we manually
-      // update the file number allocation counter in VersionSet.
-      versions_->MarkFileNumberUsed(logs[i]);
-    }
+    // The previous incarnation may not have written any MANIFEST
+    // records after allocating this log number.  So we manually
+    // update the file number allocation counter in VersionSet.
+    versions_->MarkFileNumberUsed(logs[i]);
+  }
 
-    if (s.ok()) {
-      if (versions_->LastSequence() < max_sequence) {
-        versions_->SetLastSequence(max_sequence);
-      }
-    }
+  if (versions_->LastSequence() < max_sequence) {
+    versions_->SetLastSequence(max_sequence);
   }
 
-  return s;
+  return Status::OK();
 }
 
-Status DBImpl::RecoverLogFile(uint64_t log_number,
-                              VersionEdit* edit,
+Status DBImpl::RecoverLogFile(uint64_t log_number, bool last_log,
+                              bool* save_manifest, VersionEdit* edit,
                               SequenceNumber* max_sequence) {
   struct LogReporter : public log::Reader::Reporter {
     Env* env;
@@ -405,6 +407,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
   std::string scratch;
   Slice record;
   WriteBatch batch;
+  int compactions = 0;
   MemTable* mem = NULL;
   while (reader.ReadRecord(&record, &scratch) &&
          status.ok()) {
@@ -432,25 +435,52 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
     }
 
     if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
+      compactions++;
+      *save_manifest = true;
       status = WriteLevel0Table(mem, edit, NULL);
+      mem->Unref();
+      mem = NULL;
       if (!status.ok()) {
         // Reflect errors immediately so that conditions like full
         // file-systems cause the DB::Open() to fail.
         break;
       }
-      mem->Unref();
-      mem = NULL;
     }
   }
 
-  if (status.ok() && mem != NULL) {
-    status = WriteLevel0Table(mem, edit, NULL);
-    // Reflect errors immediately so that conditions like full
-    // file-systems cause the DB::Open() to fail.
+  delete file;
+
+  // See if we should keep reusing the last log file.
+  if (status.ok() && options_.reuse_logs && last_log && compactions == 0) {
+    assert(logfile_ == NULL);
+    assert(log_ == NULL);
+    assert(mem_ == NULL);
+    uint64_t lfile_size;
+    if (env_->GetFileSize(fname, &lfile_size).ok() &&
+        env_->NewAppendableFile(fname, &logfile_).ok()) {
+      Log(options_.info_log, "Reusing old log %s \n", fname.c_str());
+      log_ = new log::Writer(logfile_, lfile_size);
+      logfile_number_ = log_number;
+      if (mem != NULL) {
+        mem_ = mem;
+        mem = NULL;
+      } else {
+        // mem can be NULL if lognum exists but was empty.
+        mem_ = new MemTable(internal_comparator_);
+        mem_->Ref();
+      }
+    }
+  }
+
+  if (mem != NULL) {
+    // mem did not get reused; compact it.
+    if (status.ok()) {
+      *save_manifest = true;
+      status = WriteLevel0Table(mem, edit, NULL);
+    }
+    mem->Unref();
   }
 
-  if (mem != NULL) mem->Unref();
-  delete file;
   return status;
 }
 
@@ -821,8 +851,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
     delete iter;
     if (s.ok()) {
       Log(options_.info_log,
-          "Generated table #%llu: %lld keys, %lld bytes",
+          "Generated table #%llu@%d: %lld keys, %lld bytes",
           (unsigned long long) output_number,
+          compact->compaction->level(),
           (unsigned long long) current_entries,
           (unsigned long long) current_bytes);
     }
@@ -1395,6 +1426,19 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
   } else if (in == "sstables") {
     *value = versions_->current()->DebugString();
     return true;
+  } else if (in == "approximate-memory-usage") {
+    size_t total_usage = options_.block_cache->TotalCharge();
+    if (mem_) {
+      total_usage += mem_->ApproximateMemoryUsage();
+    }
+    if (imm_) {
+      total_usage += imm_->ApproximateMemoryUsage();
+    }
+    char buf[50];
+    snprintf(buf, sizeof(buf), "%llu",
+             static_cast<unsigned long long>(total_usage));
+    value->append(buf);
+    return true;
   }
 
   return false;
@@ -1449,8 +1493,11 @@ Status DB::Open(const Options& options, const std::string& dbname,
   DBImpl* impl = new DBImpl(options, dbname);
   impl->mutex_.Lock();
   VersionEdit edit;
-  Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists
-  if (s.ok()) {
+  // Recover handles create_if_missing, error_if_exists
+  bool save_manifest = false;
+  Status s = impl->Recover(&edit, &save_manifest);
+  if (s.ok() && impl->mem_ == NULL) {
+    // Create new log and a corresponding memtable.
     uint64_t new_log_number = impl->versions_->NewFileNumber();
     WritableFile* lfile;
     s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
@@ -1460,15 +1507,22 @@ Status DB::Open(const Options& options, const std::string& dbname,
       impl->logfile_ = lfile;
       impl->logfile_number_ = new_log_number;
       impl->log_ = new log::Writer(lfile);
-      s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
-    }
-    if (s.ok()) {
-      impl->DeleteObsoleteFiles();
-      impl->MaybeScheduleCompaction();
+      impl->mem_ = new MemTable(impl->internal_comparator_);
+      impl->mem_->Ref();
     }
   }
+  if (s.ok() && save_manifest) {
+    edit.SetPrevLogNumber(0);  // No older logs needed after recovery.
+    edit.SetLogNumber(impl->logfile_number_);
+    s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
+  }
+  if (s.ok()) {
+    impl->DeleteObsoleteFiles();
+    impl->MaybeScheduleCompaction();
+  }
   impl->mutex_.Unlock();
   if (s.ok()) {
+    assert(impl->mem_ != NULL);
     *dbptr = impl;
   } else {
     delete impl;
diff --git a/db/db_impl.h b/db/db_impl.h
index cfc998164a..8ff323e728 100644
--- a/db/db_impl.h
+++ b/db/db_impl.h
@@ -78,7 +78,8 @@ class DBImpl : public DB {
   // Recover the descriptor from persistent storage.  May do a significant
   // amount of work to recover recently logged updates.  Any changes to
   // be made to the descriptor are added to *edit.
-  Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  Status Recover(VersionEdit* edit, bool* save_manifest)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   void MaybeIgnoreError(Status* s) const;
 
@@ -90,9 +91,8 @@ class DBImpl : public DB {
   // Errors are recorded in bg_error_.
   void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
-  Status RecoverLogFile(uint64_t log_number,
-                        VersionEdit* edit,
-                        SequenceNumber* max_sequence)
+  Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest,
+                        VersionEdit* edit, SequenceNumber* max_sequence)
       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)
diff --git a/db/db_test.cc b/db/db_test.cc
index 0fed9137d5..a0b08bc19c 100644
--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -193,6 +193,7 @@ class DBTest {
   // Sequence of option configurations to try
   enum OptionConfig {
     kDefault,
+    kReuse,
     kFilter,
     kUncompressed,
     kEnd
@@ -237,7 +238,11 @@ class DBTest {
   // Return the current option configuration.
   Options CurrentOptions() {
     Options options;
+    options.reuse_logs = false;
     switch (option_config_) {
+      case kReuse:
+        options.reuse_logs = true;
+        break;
       case kFilter:
         options.filter_policy = filter_policy_;
         break;
@@ -558,6 +563,17 @@ TEST(DBTest, GetFromVersions) {
   } while (ChangeOptions());
 }
 
+TEST(DBTest, GetMemUsage) {
+  do {
+    ASSERT_OK(Put("foo", "v1"));
+    std::string val;
+    ASSERT_TRUE(db_->GetProperty("leveldb.approximate-memory-usage", &val));
+    int mem_usage = atoi(val.c_str());
+    ASSERT_GT(mem_usage, 0);
+    ASSERT_LT(mem_usage, 5*1024*1024);
+  } while (ChangeOptions());
+}
+
 TEST(DBTest, GetSnapshot) {
   do {
     // Try with both a short key and a long key
@@ -1080,6 +1096,14 @@ TEST(DBTest, ApproximateSizes) {
     // 0 because GetApproximateSizes() does not account for memtable space
     ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
 
+    if (options.reuse_logs) {
+      // Recovery will reuse memtable, and GetApproximateSizes() does not
+      // account for memtable usage;
+      Reopen(&options);
+      ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
+      continue;
+    }
+
     // Check sizes across recovery by reopening a few times
     for (int run = 0; run < 3; run++) {
       Reopen(&options);
@@ -1123,6 +1147,11 @@ TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
     ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
     ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
 
+    if (options.reuse_logs) {
+      // Need to force a memtable compaction since recovery does not do so.
+      ASSERT_OK(dbfull()->TEST_CompactMemTable());
+    }
+
     // Check sizes across recovery by reopening a few times
     for (int run = 0; run < 3; run++) {
       Reopen(&options);
@@ -2084,7 +2113,8 @@ void BM_LogAndApply(int iters, int num_base_files) {
   InternalKeyComparator cmp(BytewiseComparator());
   Options options;
   VersionSet vset(dbname, &options, NULL, &cmp);
-  ASSERT_OK(vset.Recover());
+  bool save_manifest;
+  ASSERT_OK(vset.Recover(&save_manifest));
   VersionEdit vbase;
   uint64_t fnum = 1;
   for (int i = 0; i < num_base_files; i++) {
diff --git a/db/fault_injection_test.cc b/db/fault_injection_test.cc
new file mode 100644
index 0000000000..875dfe81ee
--- /dev/null
+++ b/db/fault_injection_test.cc
@@ -0,0 +1,554 @@
+// Copyright 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// This test uses a custom Env to keep track of the state of a filesystem as of
+// the last "sync". It then checks for data loss errors by purposely dropping
+// file data (or entire files) not protected by a "sync".
+
+#include "leveldb/db.h"
+
+#include <map>
+#include <set>
+#include "db/db_impl.h"
+#include "db/filename.h"
+#include "db/log_format.h"
+#include "db/version_set.h"
+#include "leveldb/cache.h"
+#include "leveldb/env.h"
+#include "leveldb/table.h"
+#include "leveldb/write_batch.h"
+#include "util/logging.h"
+#include "util/mutexlock.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace leveldb {
+
+static const int kValueSize = 1000;
+static const int kMaxNumValues = 2000;
+static const size_t kNumIterations = 3;
+
+class FaultInjectionTestEnv;
+
+namespace {
+
+// Assume a filename, and not a directory name like "/foo/bar/"
+static std::string GetDirName(const std::string filename) {
+  size_t found = filename.find_last_of("/\\");
+  if (found == std::string::npos) {
+    return "";
+  } else {
+    return filename.substr(0, found);
+  }
+}
+
+Status SyncDir(const std::string& dir) {
+  // As this is a test it isn't required to *actually* sync this directory.
+  return Status::OK();
+}
+
+// A basic file truncation function suitable for this test.
+Status Truncate(const std::string& filename, uint64_t length) {
+  leveldb::Env* env = leveldb::Env::Default();
+
+  SequentialFile* orig_file;
+  Status s = env->NewSequentialFile(filename, &orig_file);
+  if (!s.ok())
+    return s;
+
+  char* scratch = new char[length];
+  leveldb::Slice result;
+  s = orig_file->Read(length, &result, scratch);
+  delete orig_file;
+  if (s.ok()) {
+    std::string tmp_name = GetDirName(filename) + "/truncate.tmp";
+    WritableFile* tmp_file;
+    s = env->NewWritableFile(tmp_name, &tmp_file);
+    if (s.ok()) {
+      s = tmp_file->Append(result);
+      delete tmp_file;
+      if (s.ok()) {
+        s = env->RenameFile(tmp_name, filename);
+      } else {
+        env->DeleteFile(tmp_name);
+      }
+    }
+  }
+
+  delete[] scratch;
+
+  return s;
+}
+
+struct FileState {
+  std::string filename_;
+  ssize_t pos_;
+  ssize_t pos_at_last_sync_;
+  ssize_t pos_at_last_flush_;
+
+  FileState(const std::string& filename)
+      : filename_(filename),
+        pos_(-1),
+        pos_at_last_sync_(-1),
+        pos_at_last_flush_(-1) { }
+
+  FileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {}
+
+  bool IsFullySynced() const { return pos_ <= 0 || pos_ == pos_at_last_sync_; }
+
+  Status DropUnsyncedData() const;
+};
+
+}  // anonymous namespace
+
+// A wrapper around WritableFile which informs another Env whenever this file
+// is written to or sync'ed.
+class TestWritableFile : public WritableFile {
+ public:
+  TestWritableFile(const FileState& state,
+                   WritableFile* f,
+                   FaultInjectionTestEnv* env);
+  virtual ~TestWritableFile();
+  virtual Status Append(const Slice& data);
+  virtual Status Close();
+  virtual Status Flush();
+  virtual Status Sync();
+
+ private:
+  FileState state_;
+  WritableFile* target_;
+  bool writable_file_opened_;
+  FaultInjectionTestEnv* env_;
+
+  Status SyncParent();
+};
+
+class FaultInjectionTestEnv : public EnvWrapper {
+ public:
+  FaultInjectionTestEnv() : EnvWrapper(Env::Default()), filesystem_active_(true) {}
+  virtual ~FaultInjectionTestEnv() { }
+  virtual Status NewWritableFile(const std::string& fname,
+                                 WritableFile** result);
+  virtual Status NewAppendableFile(const std::string& fname,
+                                   WritableFile** result);
+  virtual Status DeleteFile(const std::string& f);
+  virtual Status RenameFile(const std::string& s, const std::string& t);
+
+  void WritableFileClosed(const FileState& state);
+  Status DropUnsyncedFileData();
+  Status DeleteFilesCreatedAfterLastDirSync();
+  void DirWasSynced();
+  bool IsFileCreatedSinceLastDirSync(const std::string& filename);
+  void ResetState();
+  void UntrackFile(const std::string& f);
+  // Setting the filesystem to inactive is the test equivalent to simulating a
+  // system reset. Setting to inactive will freeze our saved filesystem state so
+  // that it will stop being recorded. It can then be reset back to the state at
+  // the time of the reset.
+  bool IsFilesystemActive() const { return filesystem_active_; }
+  void SetFilesystemActive(bool active) { filesystem_active_ = active; }
+
+ private:
+  port::Mutex mutex_;
+  std::map<std::string, FileState> db_file_state_;
+  std::set<std::string> new_files_since_last_dir_sync_;
+  bool filesystem_active_;  // Record flushes, syncs, writes
+};
+
+TestWritableFile::TestWritableFile(const FileState& state,
+                                   WritableFile* f,
+                                   FaultInjectionTestEnv* env)
+    : state_(state),
+      target_(f),
+      writable_file_opened_(true),
+      env_(env) {
+  assert(f != NULL);
+}
+
+TestWritableFile::~TestWritableFile() {
+  if (writable_file_opened_) {
+    Close();
+  }
+  delete target_;
+}
+
+Status TestWritableFile::Append(const Slice& data) {
+  Status s = target_->Append(data);
+  if (s.ok() && env_->IsFilesystemActive()) {
+    state_.pos_ += data.size();
+  }
+  return s;
+}
+
+Status TestWritableFile::Close() {
+  writable_file_opened_ = false;
+  Status s = target_->Close();
+  if (s.ok()) {
+    env_->WritableFileClosed(state_);
+  }
+  return s;
+}
+
+Status TestWritableFile::Flush() {
+  Status s = target_->Flush();
+  if (s.ok() && env_->IsFilesystemActive()) {
+    state_.pos_at_last_flush_ = state_.pos_;
+  }
+  return s;
+}
+
+Status TestWritableFile::SyncParent() {
+  Status s = SyncDir(GetDirName(state_.filename_));
+  if (s.ok()) {
+    env_->DirWasSynced();
+  }
+  return s;
+}
+
+Status TestWritableFile::Sync() {
+  if (!env_->IsFilesystemActive()) {
+    return Status::OK();
+  }
+  // Ensure new files referred to by the manifest are in the filesystem.
+  Status s = target_->Sync();
+  if (s.ok()) {
+    state_.pos_at_last_sync_ = state_.pos_;
+  }
+  if (env_->IsFileCreatedSinceLastDirSync(state_.filename_)) {
+    Status ps = SyncParent();
+    if (s.ok() && !ps.ok()) {
+      s = ps;
+    }
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::NewWritableFile(const std::string& fname,
+                                              WritableFile** result) {
+  WritableFile* actual_writable_file;
+  Status s = target()->NewWritableFile(fname, &actual_writable_file);
+  if (s.ok()) {
+    FileState state(fname);
+    state.pos_ = 0;
+    *result = new TestWritableFile(state, actual_writable_file, this);
+    // NewWritableFile doesn't append to files, so if the same file is
+    // opened again then it will be truncated - so forget our saved
+    // state.
+    UntrackFile(fname);
+    MutexLock l(&mutex_);
+    new_files_since_last_dir_sync_.insert(fname);
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::NewAppendableFile(const std::string& fname,
+                                                WritableFile** result) {
+  WritableFile* actual_writable_file;
+  Status s = target()->NewAppendableFile(fname, &actual_writable_file);
+  if (s.ok()) {
+    FileState state(fname);
+    state.pos_ = 0;
+    {
+      MutexLock l(&mutex_);
+      if (db_file_state_.count(fname) == 0) {
+        new_files_since_last_dir_sync_.insert(fname);
+      } else {
+        state = db_file_state_[fname];
+      }
+    }
+    *result = new TestWritableFile(state, actual_writable_file, this);
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::DropUnsyncedFileData() {
+  Status s;
+  MutexLock l(&mutex_);
+  for (std::map<std::string, FileState>::const_iterator it =
+           db_file_state_.begin();
+       s.ok() && it != db_file_state_.end(); ++it) {
+    const FileState& state = it->second;
+    if (!state.IsFullySynced()) {
+      s = state.DropUnsyncedData();
+    }
+  }
+  return s;
+}
+
+void FaultInjectionTestEnv::DirWasSynced() {
+  MutexLock l(&mutex_);
+  new_files_since_last_dir_sync_.clear();
+}
+
+bool FaultInjectionTestEnv::IsFileCreatedSinceLastDirSync(
+    const std::string& filename) {
+  MutexLock l(&mutex_);
+  return new_files_since_last_dir_sync_.find(filename) !=
+         new_files_since_last_dir_sync_.end();
+}
+
+void FaultInjectionTestEnv::UntrackFile(const std::string& f) {
+  MutexLock l(&mutex_);
+  db_file_state_.erase(f);
+  new_files_since_last_dir_sync_.erase(f);
+}
+
+Status FaultInjectionTestEnv::DeleteFile(const std::string& f) {
+  Status s = EnvWrapper::DeleteFile(f);
+  ASSERT_OK(s);
+  if (s.ok()) {
+    UntrackFile(f);
+  }
+  return s;
+}
+
+Status FaultInjectionTestEnv::RenameFile(const std::string& s,
+                                         const std::string& t) {
+  Status ret = EnvWrapper::RenameFile(s, t);
+
+  if (ret.ok()) {
+    MutexLock l(&mutex_);
+    if (db_file_state_.find(s) != db_file_state_.end()) {
+      db_file_state_[t] = db_file_state_[s];
+      db_file_state_.erase(s);
+    }
+
+    if (new_files_since_last_dir_sync_.erase(s) != 0) {
+      assert(new_files_since_last_dir_sync_.find(t) ==
+             new_files_since_last_dir_sync_.end());
+      new_files_since_last_dir_sync_.insert(t);
+    }
+  }
+
+  return ret;
+}
+
+void FaultInjectionTestEnv::ResetState() {
+  // Since we are not destroying the database, the existing files
+  // should keep their recorded synced/flushed state. Therefore
+  // we do not reset db_file_state_ and new_files_since_last_dir_sync_.
+  MutexLock l(&mutex_);
+  SetFilesystemActive(true);
+}
+
+Status FaultInjectionTestEnv::DeleteFilesCreatedAfterLastDirSync() {
+  // Because DeleteFile access this container make a copy to avoid deadlock
+  mutex_.Lock();
+  std::set<std::string> new_files(new_files_since_last_dir_sync_.begin(),
+                                  new_files_since_last_dir_sync_.end());
+  mutex_.Unlock();
+  Status s;
+  std::set<std::string>::const_iterator it;
+  for (it = new_files.begin(); s.ok() && it != new_files.end(); ++it) {
+    s = DeleteFile(*it);
+  }
+  return s;
+}
+
+void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
+  MutexLock l(&mutex_);
+  db_file_state_[state.filename_] = state;
+}
+
+Status FileState::DropUnsyncedData() const {
+  ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
+  return Truncate(filename_, sync_pos);
+}
+
+class FaultInjectionTest {
+ public:
+  enum ExpectedVerifResult { VAL_EXPECT_NO_ERROR, VAL_EXPECT_ERROR };
+  enum ResetMethod { RESET_DROP_UNSYNCED_DATA, RESET_DELETE_UNSYNCED_FILES };
+
+  FaultInjectionTestEnv* env_;
+  std::string dbname_;
+  Cache* tiny_cache_;
+  Options options_;
+  DB* db_;
+
+  FaultInjectionTest()
+      : env_(new FaultInjectionTestEnv),
+        tiny_cache_(NewLRUCache(100)),
+        db_(NULL) {
+    dbname_ = test::TmpDir() + "/fault_test";
+    DestroyDB(dbname_, Options());  // Destroy any db from earlier run
+    options_.reuse_logs = true;
+    options_.env = env_;
+    options_.paranoid_checks = true;
+    options_.block_cache = tiny_cache_;
+    options_.create_if_missing = true;
+  }
+
+  ~FaultInjectionTest() {
+    CloseDB();
+    DestroyDB(dbname_, Options());
+    delete tiny_cache_;
+    delete env_;
+  }
+
+  void ReuseLogs(bool reuse) {
+    options_.reuse_logs = reuse;
+  }
+
+  void Build(int start_idx, int num_vals) {
+    std::string key_space, value_space;
+    WriteBatch batch;
+    for (int i = start_idx; i < start_idx + num_vals; i++) {
+      Slice key = Key(i, &key_space);
+      batch.Clear();
+      batch.Put(key, Value(i, &value_space));
+      WriteOptions options;
+      ASSERT_OK(db_->Write(options, &batch));
+    }
+  }
+
+  Status ReadValue(int i, std::string* val) const {
+    std::string key_space, value_space;
+    Slice key = Key(i, &key_space);
+    Value(i, &value_space);
+    ReadOptions options;
+    return db_->Get(options, key, val);
+  }
+
+  Status Verify(int start_idx, int num_vals,
+                ExpectedVerifResult expected) const {
+    std::string val;
+    std::string value_space;
+    Status s;
+    for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) {
+      Value(i, &value_space);
+      s = ReadValue(i, &val);
+      if (expected == VAL_EXPECT_NO_ERROR) {
+        if (s.ok()) {
+          ASSERT_EQ(value_space, val);
+        }
+      } else if (s.ok()) {
+        fprintf(stderr, "Expected an error at %d, but was OK\n", i);
+        s = Status::IOError(dbname_, "Expected value error:");
+      } else {
+        s = Status::OK();  // An expected error
+      }
+    }
+    return s;
+  }
+
+  // Return the ith key
+  Slice Key(int i, std::string* storage) const {
+    char buf[100];
+    snprintf(buf, sizeof(buf), "%016d", i);
+    storage->assign(buf, strlen(buf));
+    return Slice(*storage);
+  }
+
+  // Return the value to associate with the specified key
+  Slice Value(int k, std::string* storage) const {
+    Random r(k);
+    return test::RandomString(&r, kValueSize, storage);
+  }
+
+  Status OpenDB() {
+    delete db_;
+    db_ = NULL;
+    env_->ResetState();
+    return DB::Open(options_, dbname_, &db_);
+  }
+
+  void CloseDB() {
+    delete db_;
+    db_ = NULL;
+  }
+
+  void DeleteAllData() {
+    Iterator* iter = db_->NewIterator(ReadOptions());
+    WriteOptions options;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
+    }
+
+    delete iter;
+  }
+
+  void ResetDBState(ResetMethod reset_method) {
+    switch (reset_method) {
+      case RESET_DROP_UNSYNCED_DATA:
+        ASSERT_OK(env_->DropUnsyncedFileData());
+        break;
+      case RESET_DELETE_UNSYNCED_FILES:
+        ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
+        break;
+      default:
+        assert(false);
+    }
+  }
+
+  void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
+    DeleteAllData();
+    Build(0, num_pre_sync);
+    db_->CompactRange(NULL, NULL);
+    Build(num_pre_sync, num_post_sync);
+  }
+
+  void PartialCompactTestReopenWithFault(ResetMethod reset_method,
+                                         int num_pre_sync,
+                                         int num_post_sync) {
+    env_->SetFilesystemActive(false);
+    CloseDB();
+    ResetDBState(reset_method);
+    ASSERT_OK(OpenDB());
+    ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::VAL_EXPECT_NO_ERROR));
+    ASSERT_OK(Verify(num_pre_sync, num_post_sync, FaultInjectionTest::VAL_EXPECT_ERROR));
+  }
+
+  void NoWriteTestPreFault() {
+  }
+
+  void NoWriteTestReopenWithFault(ResetMethod reset_method) {
+    CloseDB();
+    ResetDBState(reset_method);
+    ASSERT_OK(OpenDB());
+  }
+
+  void DoTest() {
+    Random rnd(0);
+    ASSERT_OK(OpenDB());
+    for (size_t idx = 0; idx < kNumIterations; idx++) {
+      int num_pre_sync = rnd.Uniform(kMaxNumValues);
+      int num_post_sync = rnd.Uniform(kMaxNumValues);
+
+      PartialCompactTestPreFault(num_pre_sync, num_post_sync);
+      PartialCompactTestReopenWithFault(RESET_DROP_UNSYNCED_DATA,
+                                        num_pre_sync,
+                                        num_post_sync);
+
+      NoWriteTestPreFault();
+      NoWriteTestReopenWithFault(RESET_DROP_UNSYNCED_DATA);
+
+      PartialCompactTestPreFault(num_pre_sync, num_post_sync);
+      // No new files created so we expect all values since no files will be
+      // dropped.
+      PartialCompactTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES,
+                                        num_pre_sync + num_post_sync,
+                                        0);
+
+      NoWriteTestPreFault();
+      NoWriteTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES);
+    }
+  }
+};
+
+TEST(FaultInjectionTest, FaultTestNoLogReuse) {
+  ReuseLogs(false);
+  DoTest();
+}
+
+TEST(FaultInjectionTest, FaultTestWithLogReuse) {
+  ReuseLogs(true);
+  DoTest();
+}
+
+}  // namespace leveldb
+
+int main(int argc, char** argv) {
+  return leveldb::test::RunAllTests();
+}
diff --git a/db/leveldb_main.cc b/db/leveldbutil.cc
index 9f4b7dd70c..9f4b7dd70c 100644
--- a/db/leveldb_main.cc
+++ b/db/leveldbutil.cc
diff --git a/db/log_reader.cc b/db/log_reader.cc
index e44b66c85b..a6d304545d 100644
--- a/db/log_reader.cc
+++ b/db/log_reader.cc
@@ -25,7 +25,8 @@ Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
       eof_(false),
       last_record_offset_(0),
       end_of_buffer_offset_(0),
-      initial_offset_(initial_offset) {
+      initial_offset_(initial_offset),
+      resyncing_(initial_offset > 0) {
 }
 
 Reader::~Reader() {
@@ -72,8 +73,25 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
 
   Slice fragment;
   while (true) {
-    uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
     const unsigned int record_type = ReadPhysicalRecord(&fragment);
+
+    // ReadPhysicalRecord may have only had an empty trailer remaining in its
+    // internal buffer. Calculate the offset of the next physical record now
+    // that it has returned, properly accounting for its header size.
+    uint64_t physical_record_offset =
+        end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
+
+    if (resyncing_) {
+      if (record_type == kMiddleType) {
+        continue;
+      } else if (record_type == kLastType) {
+        resyncing_ = false;
+        continue;
+      } else {
+        resyncing_ = false;
+      }
+    }
+
     switch (record_type) {
       case kFullType:
         if (in_fragmented_record) {
diff --git a/db/log_reader.h b/db/log_reader.h
index 6aff791716..8389d61f8f 100644
--- a/db/log_reader.h
+++ b/db/log_reader.h
@@ -73,6 +73,11 @@ class Reader {
   // Offset at which to start looking for the first record to return
   uint64_t const initial_offset_;
 
+  // True if we are resynchronizing after a seek (initial_offset_ > 0). In
+  // particular, a run of kMiddleType and kLastType records can be silently
+  // skipped in this mode
+  bool resyncing_;
+
   // Extend record types with the following special values
   enum {
     kEof = kMaxRecordType + 1,
diff --git a/db/log_test.cc b/db/log_test.cc
index dcf0562652..48a5928657 100644
--- a/db/log_test.cc
+++ b/db/log_test.cc
@@ -79,7 +79,7 @@ class LogTest {
     virtual Status Skip(uint64_t n) {
       if (n > contents_.size()) {
         contents_.clear();
-        return Status::NotFound("in-memory file skipepd past end");
+        return Status::NotFound("in-memory file skipped past end");
       }
 
       contents_.remove_prefix(n);
@@ -104,23 +104,34 @@ class LogTest {
   StringSource source_;
   ReportCollector report_;
   bool reading_;
-  Writer writer_;
-  Reader reader_;
+  Writer* writer_;
+  Reader* reader_;
 
   // Record metadata for testing initial offset functionality
   static size_t initial_offset_record_sizes_[];
   static uint64_t initial_offset_last_record_offsets_[];
+  static int num_initial_offset_records_;
 
  public:
   LogTest() : reading_(false),
-              writer_(&dest_),
-              reader_(&source_, &report_, true/*checksum*/,
-                      0/*initial_offset*/) {
+              writer_(new Writer(&dest_)),
+              reader_(new Reader(&source_, &report_, true/*checksum*/,
+                      0/*initial_offset*/)) {
+  }
+
+  ~LogTest() {
+    delete writer_;
+    delete reader_;
+  }
+
+  void ReopenForAppend() {
+    delete writer_;
+    writer_ = new Writer(&dest_, dest_.contents_.size());
   }
 
   void Write(const std::string& msg) {
     ASSERT_TRUE(!reading_) << "Write() after starting to read";
-    writer_.AddRecord(Slice(msg));
+    writer_->AddRecord(Slice(msg));
   }
 
   size_t WrittenBytes() const {
@@ -134,7 +145,7 @@ class LogTest {
     }
     std::string scratch;
     Slice record;
-    if (reader_.ReadRecord(&record, &scratch)) {
+    if (reader_->ReadRecord(&record, &scratch)) {
       return record.ToString();
     } else {
       return "EOF";
@@ -182,13 +193,18 @@ class LogTest {
   }
 
   void WriteInitialOffsetLog() {
-    for (int i = 0; i < 4; i++) {
+    for (int i = 0; i < num_initial_offset_records_; i++) {
       std::string record(initial_offset_record_sizes_[i],
                          static_cast<char>('a' + i));
       Write(record);
     }
   }
 
+  void StartReadingAt(uint64_t initial_offset) {
+    delete reader_;
+    reader_ = new Reader(&source_, &report_, true/*checksum*/, initial_offset);
+  }
+
   void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
     WriteInitialOffsetLog();
     reading_ = true;
@@ -208,32 +224,48 @@ class LogTest {
     source_.contents_ = Slice(dest_.contents_);
     Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
                                        initial_offset);
-    Slice record;
-    std::string scratch;
-    ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
-    ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
-              record.size());
-    ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
-              offset_reader->LastRecordOffset());
-    ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
+
+    // Read all records from expected_record_offset through the last one.
+    ASSERT_LT(expected_record_offset, num_initial_offset_records_);
+    for (; expected_record_offset < num_initial_offset_records_;
+         ++expected_record_offset) {
+      Slice record;
+      std::string scratch;
+      ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
+      ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
+                record.size());
+      ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
+                offset_reader->LastRecordOffset());
+      ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
+    }
     delete offset_reader;
   }
-
 };
 
 size_t LogTest::initial_offset_record_sizes_[] =
     {10000,  // Two sizable records in first block
      10000,
      2 * log::kBlockSize - 1000,  // Span three blocks
-     1};
+     1,
+     13716,  // Consume all but two bytes of block 3.
+     log::kBlockSize - kHeaderSize, // Consume the entirety of block 4.
+    };
 
 uint64_t LogTest::initial_offset_last_record_offsets_[] =
     {0,
      kHeaderSize + 10000,
      2 * (kHeaderSize + 10000),
      2 * (kHeaderSize + 10000) +
-         (2 * log::kBlockSize - 1000) + 3 * kHeaderSize};
+         (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
+     2 * (kHeaderSize + 10000) +
+         (2 * log::kBlockSize - 1000) + 3 * kHeaderSize
+         + kHeaderSize + 1,
+     3 * log::kBlockSize,
+    };
 
+// LogTest::initial_offset_last_record_offsets_ must be defined before this.
+int LogTest::num_initial_offset_records_ =
+    sizeof(LogTest::initial_offset_last_record_offsets_)/sizeof(uint64_t);
 
 TEST(LogTest, Empty) {
   ASSERT_EQ("EOF", Read());
@@ -318,6 +350,15 @@ TEST(LogTest, AlignedEof) {
   ASSERT_EQ("EOF", Read());
 }
 
+TEST(LogTest, OpenForAppend) {
+  Write("hello");
+  ReopenForAppend();
+  Write("world");
+  ASSERT_EQ("hello", Read());
+  ASSERT_EQ("world", Read());
+  ASSERT_EQ("EOF", Read());
+}
+
 TEST(LogTest, RandomRead) {
   const int N = 500;
   Random write_rnd(301);
@@ -445,6 +486,22 @@ TEST(LogTest, PartialLastIsIgnored) {
   ASSERT_EQ(0, DroppedBytes());
 }
 
+TEST(LogTest, SkipIntoMultiRecord) {
+  // Consider a fragmented record:
+  //    first(R1), middle(R1), last(R1), first(R2)
+  // If initial_offset points to a record after first(R1) but before first(R2)
+  // incomplete fragment errors are not actual errors, and must be suppressed
+  // until a new first or full record is encountered.
+  Write(BigString("foo", 3*kBlockSize));
+  Write("correct");
+  StartReadingAt(kBlockSize);
+
+  ASSERT_EQ("correct", Read());
+  ASSERT_EQ("", ReportMessage());
+  ASSERT_EQ(0, DroppedBytes());
+  ASSERT_EQ("EOF", Read());
+}
+
 TEST(LogTest, ErrorJoinsRecords) {
   // Consider two fragmented records:
   //    first(R1) last(R1) first(R2) last(R2)
@@ -514,6 +571,10 @@ TEST(LogTest, ReadFourthStart) {
       3);
 }
 
+TEST(LogTest, ReadInitialOffsetIntoBlockPadding) {
+  CheckInitialOffsetRecord(3 * log::kBlockSize - 3, 5);
+}
+
 TEST(LogTest, ReadEnd) {
   CheckOffsetPastEndReturnsNoRecords(0);
 }
diff --git a/db/log_writer.cc b/db/log_writer.cc
index 2da99ac088..74a03270da 100644
--- a/db/log_writer.cc
+++ b/db/log_writer.cc
@@ -12,15 +12,24 @@
 namespace leveldb {
 namespace log {
 
-Writer::Writer(WritableFile* dest)
-    : dest_(dest),
-      block_offset_(0) {
+static void InitTypeCrc(uint32_t* type_crc) {
   for (int i = 0; i <= kMaxRecordType; i++) {
     char t = static_cast<char>(i);
-    type_crc_[i] = crc32c::Value(&t, 1);
+    type_crc[i] = crc32c::Value(&t, 1);
   }
 }
 
+Writer::Writer(WritableFile* dest)
+    : dest_(dest),
+      block_offset_(0) {
+  InitTypeCrc(type_crc_);
+}
+
+Writer::Writer(WritableFile* dest, uint64_t dest_length)
+    : dest_(dest), block_offset_(dest_length % kBlockSize) {
+  InitTypeCrc(type_crc_);
+}
+
 Writer::~Writer() {
 }
 
diff --git a/db/log_writer.h b/db/log_writer.h
index a3a954d967..9e7cc4705b 100644
--- a/db/log_writer.h
+++ b/db/log_writer.h
@@ -22,6 +22,12 @@ class Writer {
   // "*dest" must be initially empty.
   // "*dest" must remain live while this Writer is in use.
   explicit Writer(WritableFile* dest);
+
+  // Create a writer that will append data to "*dest".
+  // "*dest" must have initial length "dest_length".
+  // "*dest" must remain live while this Writer is in use.
+  Writer(WritableFile* dest, uint64_t dest_length);
+
   ~Writer();
 
   Status AddRecord(const Slice& slice);
diff --git a/db/memtable.h b/db/memtable.h
index 92e90bb099..9f41567cde 100644
--- a/db/memtable.h
+++ b/db/memtable.h
@@ -36,10 +36,7 @@ class MemTable {
   }
 
   // Returns an estimate of the number of bytes of data in use by this
-  // data structure.
-  //
-  // REQUIRES: external synchronization to prevent simultaneous
-  // operations on the same MemTable.
+  // data structure. It is safe to call when MemTable is being modified.
   size_t ApproximateMemoryUsage();
 
   // Return an iterator that yields the contents of the memtable.
diff --git a/db/recovery_test.cc b/db/recovery_test.cc
new file mode 100644
index 0000000000..9596f4288a
--- /dev/null
+++ b/db/recovery_test.cc
@@ -0,0 +1,324 @@
+// Copyright (c) 2014 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_impl.h"
+#include "db/filename.h"
+#include "db/version_set.h"
+#include "db/write_batch_internal.h"
+#include "leveldb/db.h"
+#include "leveldb/env.h"
+#include "leveldb/write_batch.h"
+#include "util/logging.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace leveldb {
+
+class RecoveryTest {
+ public:
+  RecoveryTest() : env_(Env::Default()), db_(NULL) {
+    dbname_ = test::TmpDir() + "/recovery_test";
+    DestroyDB(dbname_, Options());
+    Open();
+  }
+
+  ~RecoveryTest() {
+    Close();
+    DestroyDB(dbname_, Options());
+  }
+
+  DBImpl* dbfull() const { return reinterpret_cast<DBImpl*>(db_); }
+  Env* env() const { return env_; }
+
+  bool CanAppend() {
+    WritableFile* tmp;
+    Status s = env_->NewAppendableFile(CurrentFileName(dbname_), &tmp);
+    delete tmp;
+    if (s.IsNotSupportedError()) {
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  void Close() {
+    delete db_;
+    db_ = NULL;
+  }
+
+  void Open(Options* options = NULL) {
+    Close();
+    Options opts;
+    if (options != NULL) {
+      opts = *options;
+    } else {
+      opts.reuse_logs = true;  // TODO(sanjay): test both ways
+      opts.create_if_missing = true;
+    }
+    if (opts.env == NULL) {
+      opts.env = env_;
+    }
+    ASSERT_OK(DB::Open(opts, dbname_, &db_));
+    ASSERT_EQ(1, NumLogs());
+  }
+
+  Status Put(const std::string& k, const std::string& v) {
+    return db_->Put(WriteOptions(), k, v);
+  }
+
+  std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
+    std::string result;
+    Status s = db_->Get(ReadOptions(), k, &result);
+    if (s.IsNotFound()) {
+      result = "NOT_FOUND";
+    } else if (!s.ok()) {
+      result = s.ToString();
+    }
+    return result;
+  }
+
+  std::string ManifestFileName() {
+    std::string current;
+    ASSERT_OK(ReadFileToString(env_, CurrentFileName(dbname_), &current));
+    size_t len = current.size();
+    if (len > 0 && current[len-1] == '\n') {
+      current.resize(len - 1);
+    }
+    return dbname_ + "/" + current;
+  }
+
+  std::string LogName(uint64_t number) {
+    return LogFileName(dbname_, number);
+  }
+
+  size_t DeleteLogFiles() {
+    std::vector<uint64_t> logs = GetFiles(kLogFile);
+    for (size_t i = 0; i < logs.size(); i++) {
+      ASSERT_OK(env_->DeleteFile(LogName(logs[i]))) << LogName(logs[i]);
+    }
+    return logs.size();
+  }
+
+  uint64_t FirstLogFile() {
+    return GetFiles(kLogFile)[0];
+  }
+
+  std::vector<uint64_t> GetFiles(FileType t) {
+    std::vector<std::string> filenames;
+    ASSERT_OK(env_->GetChildren(dbname_, &filenames));
+    std::vector<uint64_t> result;
+    for (size_t i = 0; i < filenames.size(); i++) {
+      uint64_t number;
+      FileType type;
+      if (ParseFileName(filenames[i], &number, &type) && type == t) {
+        result.push_back(number);
+      }
+    }
+    return result;
+  }
+
+  int NumLogs() {
+    return GetFiles(kLogFile).size();
+  }
+
+  int NumTables() {
+    return GetFiles(kTableFile).size();
+  }
+
+  uint64_t FileSize(const std::string& fname) {
+    uint64_t result;
+    ASSERT_OK(env_->GetFileSize(fname, &result)) << fname;
+    return result;
+  }
+
+  void CompactMemTable() {
+    dbfull()->TEST_CompactMemTable();
+  }
+
+  // Directly construct a log file that sets key to val.
+  void MakeLogFile(uint64_t lognum, SequenceNumber seq, Slice key, Slice val) {
+    std::string fname = LogFileName(dbname_, lognum);
+    WritableFile* file;
+    ASSERT_OK(env_->NewWritableFile(fname, &file));
+    log::Writer writer(file);
+    WriteBatch batch;
+    batch.Put(key, val);
+    WriteBatchInternal::SetSequence(&batch, seq);
+    ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch)));
+    ASSERT_OK(file->Flush());
+    delete file;
+  }
+
+ private:
+  std::string dbname_;
+  Env* env_;
+  DB* db_;
+};
+
+TEST(RecoveryTest, ManifestReused) {
+  if (!CanAppend()) {
+    fprintf(stderr, "skipping test because env does not support appending\n");
+    return;
+  }
+  ASSERT_OK(Put("foo", "bar"));
+  Close();
+  std::string old_manifest = ManifestFileName();
+  Open();
+  ASSERT_EQ(old_manifest, ManifestFileName());
+  ASSERT_EQ("bar", Get("foo"));
+  Open();
+  ASSERT_EQ(old_manifest, ManifestFileName());
+  ASSERT_EQ("bar", Get("foo"));
+}
+
+TEST(RecoveryTest, LargeManifestCompacted) {
+  if (!CanAppend()) {
+    fprintf(stderr, "skipping test because env does not support appending\n");
+    return;
+  }
+  ASSERT_OK(Put("foo", "bar"));
+  Close();
+  std::string old_manifest = ManifestFileName();
+
+  // Pad with zeroes to make manifest file very big.
+  {
+    uint64_t len = FileSize(old_manifest);
+    WritableFile* file;
+    ASSERT_OK(env()->NewAppendableFile(old_manifest, &file));
+    std::string zeroes(3*1048576 - static_cast<size_t>(len), 0);
+    ASSERT_OK(file->Append(zeroes));
+    ASSERT_OK(file->Flush());
+    delete file;
+  }
+
+  Open();
+  std::string new_manifest = ManifestFileName();
+  ASSERT_NE(old_manifest, new_manifest);
+  ASSERT_GT(10000, FileSize(new_manifest));
+  ASSERT_EQ("bar", Get("foo"));
+
+  Open();
+  ASSERT_EQ(new_manifest, ManifestFileName());
+  ASSERT_EQ("bar", Get("foo"));
+}
+
+TEST(RecoveryTest, NoLogFiles) {
+  ASSERT_OK(Put("foo", "bar"));
+  ASSERT_EQ(1, DeleteLogFiles());
+  Open();
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  Open();
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+}
+
+TEST(RecoveryTest, LogFileReuse) {
+  if (!CanAppend()) {
+    fprintf(stderr, "skipping test because env does not support appending\n");
+    return;
+  }
+  for (int i = 0; i < 2; i++) {
+    ASSERT_OK(Put("foo", "bar"));
+    if (i == 0) {
+      // Compact to ensure current log is empty
+      CompactMemTable();
+    }
+    Close();
+    ASSERT_EQ(1, NumLogs());
+    uint64_t number = FirstLogFile();
+    if (i == 0) {
+      ASSERT_EQ(0, FileSize(LogName(number)));
+    } else {
+      ASSERT_LT(0, FileSize(LogName(number)));
+    }
+    Open();
+    ASSERT_EQ(1, NumLogs());
+    ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
+    ASSERT_EQ("bar", Get("foo"));
+    Open();
+    ASSERT_EQ(1, NumLogs());
+    ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
+    ASSERT_EQ("bar", Get("foo"));
+  }
+}
+
+TEST(RecoveryTest, MultipleMemTables) {
+  // Make a large log.
+  const int kNum = 1000;
+  for (int i = 0; i < kNum; i++) {
+    char buf[100];
+    snprintf(buf, sizeof(buf), "%050d", i);
+    ASSERT_OK(Put(buf, buf));
+  }
+  ASSERT_EQ(0, NumTables());
+  Close();
+  ASSERT_EQ(0, NumTables());
+  ASSERT_EQ(1, NumLogs());
+  uint64_t old_log_file = FirstLogFile();
+
+  // Force creation of multiple memtables by reducing the write buffer size.
+  Options opt;
+  opt.reuse_logs = true;
+  opt.write_buffer_size = (kNum*100) / 2;
+  Open(&opt);
+  ASSERT_LE(2, NumTables());
+  ASSERT_EQ(1, NumLogs());
+  ASSERT_NE(old_log_file, FirstLogFile()) << "must not reuse log";
+  for (int i = 0; i < kNum; i++) {
+    char buf[100];
+    snprintf(buf, sizeof(buf), "%050d", i);
+    ASSERT_EQ(buf, Get(buf));
+  }
+}
+
+TEST(RecoveryTest, MultipleLogFiles) {
+  ASSERT_OK(Put("foo", "bar"));
+  Close();
+  ASSERT_EQ(1, NumLogs());
+
+  // Make a bunch of uncompacted log files.
+  uint64_t old_log = FirstLogFile();
+  MakeLogFile(old_log+1, 1000, "hello", "world");
+  MakeLogFile(old_log+2, 1001, "hi", "there");
+  MakeLogFile(old_log+3, 1002, "foo", "bar2");
+
+  // Recover and check that all log files were processed.
+  Open();
+  ASSERT_LE(1, NumTables());
+  ASSERT_EQ(1, NumLogs());
+  uint64_t new_log = FirstLogFile();
+  ASSERT_LE(old_log+3, new_log);
+  ASSERT_EQ("bar2", Get("foo"));
+  ASSERT_EQ("world", Get("hello"));
+  ASSERT_EQ("there", Get("hi"));
+
+  // Test that previous recovery produced recoverable state.
+  Open();
+  ASSERT_LE(1, NumTables());
+  ASSERT_EQ(1, NumLogs());
+  if (CanAppend()) {
+    ASSERT_EQ(new_log, FirstLogFile());
+  }
+  ASSERT_EQ("bar2", Get("foo"));
+  ASSERT_EQ("world", Get("hello"));
+  ASSERT_EQ("there", Get("hi"));
+
+  // Check that introducing an older log file does not cause it to be re-read.
+  Close();
+  MakeLogFile(old_log+1, 2000, "hello", "stale write");
+  Open();
+  ASSERT_LE(1, NumTables());
+  ASSERT_EQ(1, NumLogs());
+  if (CanAppend()) {
+    ASSERT_EQ(new_log, FirstLogFile());
+  }
+  ASSERT_EQ("bar2", Get("foo"));
+  ASSERT_EQ("world", Get("hello"));
+  ASSERT_EQ("there", Get("hi"));
+}
+
+}  // namespace leveldb
+
+int main(int argc, char** argv) {
+  return leveldb::test::RunAllTests();
+}
diff --git a/db/skiplist.h b/db/skiplist.h
index ed8b092203..8bd77764d8 100644
--- a/db/skiplist.h
+++ b/db/skiplist.h
@@ -1,10 +1,10 @@
-#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_
-#define STORAGE_LEVELDB_DB_SKIPLIST_H_
-
 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
-//
+
+#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_
+#define STORAGE_LEVELDB_DB_SKIPLIST_H_
+
 // Thread safety
 // -------------
 //
diff --git a/db/skiplist_test.cc b/db/skiplist_test.cc
index c78f4b4fb1..aee1461e1b 100644
--- a/db/skiplist_test.cc
+++ b/db/skiplist_test.cc
@@ -250,7 +250,7 @@ class ConcurrentTest {
         // Note that generation 0 is never inserted, so it is ok if
         // <*,0,*> is missing.
         ASSERT_TRUE((gen(pos) == 0) ||
-                    (gen(pos) > initial_state.Get(key(pos)))
+                    (gen(pos) > static_cast<Key>(initial_state.Get(key(pos))))
                     ) << "key: " << key(pos)
                       << "; gen: " << gen(pos)
                       << "; initgen: "
diff --git a/db/snapshot.h b/db/snapshot.h
index e7f8fd2c37..6ed413c42d 100644
--- a/db/snapshot.h
+++ b/db/snapshot.h
@@ -5,6 +5,7 @@
 #ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_
 #define STORAGE_LEVELDB_DB_SNAPSHOT_H_
 
+#include "db/dbformat.h"
 #include "leveldb/db.h"
 
 namespace leveldb {
diff --git a/db/version_set.cc b/db/version_set.cc
index aa83df55e4..a5e0f77a6a 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -893,7 +893,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
   return s;
 }
 
-Status VersionSet::Recover() {
+Status VersionSet::Recover(bool *save_manifest) {
   struct LogReporter : public log::Reader::Reporter {
     Status* status;
     virtual void Corruption(size_t bytes, const Status& s) {
@@ -1003,11 +1003,49 @@ Status VersionSet::Recover() {
     last_sequence_ = last_sequence;
     log_number_ = log_number;
     prev_log_number_ = prev_log_number;
+
+    // See if we can reuse the existing MANIFEST file.
+    if (ReuseManifest(dscname, current)) {
+      // No need to save new manifest
+    } else {
+      *save_manifest = true;
+    }
   }
 
   return s;
 }
 
+bool VersionSet::ReuseManifest(const std::string& dscname,
+                               const std::string& dscbase) {
+  if (!options_->reuse_logs) {
+    return false;
+  }
+  FileType manifest_type;
+  uint64_t manifest_number;
+  uint64_t manifest_size;
+  if (!ParseFileName(dscbase, &manifest_number, &manifest_type) ||
+      manifest_type != kDescriptorFile ||
+      !env_->GetFileSize(dscname, &manifest_size).ok() ||
+      // Make new compacted MANIFEST if old one is too big
+      manifest_size >= kTargetFileSize) {
+    return false;
+  }
+
+  assert(descriptor_file_ == NULL);
+  assert(descriptor_log_ == NULL);
+  Status r = env_->NewAppendableFile(dscname, &descriptor_file_);
+  if (!r.ok()) {
+    Log(options_->info_log, "Reuse MANIFEST: %s\n", r.ToString().c_str());
+    assert(descriptor_file_ == NULL);
+    return false;
+  }
+
+  Log(options_->info_log, "Reusing MANIFEST %s\n", dscname.c_str());
+  descriptor_log_ = new log::Writer(descriptor_file_, manifest_size);
+  manifest_file_number_ = manifest_number;
+  return true;
+}
+
 void VersionSet::MarkFileNumberUsed(uint64_t number) {
   if (next_file_number_ <= number) {
     next_file_number_ = number + 1;
diff --git a/db/version_set.h b/db/version_set.h
index 8dc14b8e01..1dec745673 100644
--- a/db/version_set.h
+++ b/db/version_set.h
@@ -179,7 +179,7 @@ class VersionSet {
       EXCLUSIVE_LOCKS_REQUIRED(mu);
 
   // Recover the last saved descriptor from persistent storage.
-  Status Recover();
+  Status Recover(bool *save_manifest);
 
   // Return the current version.
   Version* current() const { return current_; }
@@ -274,6 +274,8 @@ class VersionSet {
   friend class Compaction;
   friend class Version;
 
+  bool ReuseManifest(const std::string& dscname, const std::string& dscbase);
+
   void Finalize(Version* v);
 
   void GetRange(const std::vector<FileMetaData*>& inputs,
diff --git a/db/write_batch_internal.h b/db/write_batch_internal.h
index 310a3c8912..9448ef7b21 100644
--- a/db/write_batch_internal.h
+++ b/db/write_batch_internal.h
@@ -5,6 +5,7 @@
 #ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
 #define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
 
+#include "db/dbformat.h"
 #include "leveldb/write_batch.h"
 
 namespace leveldb {
diff --git a/doc/index.html b/doc/index.html
index 3ed0ed9d9e..2155192581 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -22,7 +22,7 @@ directory.  The following example shows how to open a database,
 creating it if necessary:
 <p>
 <pre>
-  #include &lt;assert&gt;
+  #include &lt;cassert&gt;
   #include "leveldb/db.h"
 
   leveldb::DB* db;
diff --git a/helpers/memenv/memenv.cc b/helpers/memenv/memenv.cc
index 43ef2e0729..9a98884daf 100644
--- a/helpers/memenv/memenv.cc
+++ b/helpers/memenv/memenv.cc
@@ -277,6 +277,19 @@ class InMemoryEnv : public EnvWrapper {
     return Status::OK();
   }
 
+  virtual Status NewAppendableFile(const std::string& fname,
+                                   WritableFile** result) {
+    MutexLock lock(&mutex_);
+    FileState** sptr = &file_map_[fname];
+    FileState* file = *sptr;
+    if (file == NULL) {
+      file = new FileState();
+      file->Ref();
+    }
+    *result = new WritableFileImpl(file);
+    return Status::OK();
+  }
+
   virtual bool FileExists(const std::string& fname) {
     MutexLock lock(&mutex_);
     return file_map_.find(fname) != file_map_.end();
diff --git a/helpers/memenv/memenv_test.cc b/helpers/memenv/memenv_test.cc
index a44310fed8..5cff77613f 100644
--- a/helpers/memenv/memenv_test.cc
+++ b/helpers/memenv/memenv_test.cc
@@ -40,6 +40,8 @@ TEST(MemEnvTest, Basics) {
 
   // Create a file.
   ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
+  ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
+  ASSERT_EQ(0, file_size);
   delete writable_file;
 
   // Check that the file exists.
@@ -55,9 +57,16 @@ TEST(MemEnvTest, Basics) {
   ASSERT_OK(writable_file->Append("abc"));
   delete writable_file;
 
-  // Check for expected size.
+  // Check that append works.
+  ASSERT_OK(env_->NewAppendableFile("/dir/f", &writable_file));
   ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
   ASSERT_EQ(3, file_size);
+  ASSERT_OK(writable_file->Append("hello"));
+  delete writable_file;
+
+  // Check for expected size.
+  ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
+  ASSERT_EQ(8, file_size);
 
   // Check that renaming works.
   ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok());
@@ -65,7 +74,7 @@ TEST(MemEnvTest, Basics) {
   ASSERT_TRUE(!env_->FileExists("/dir/f"));
   ASSERT_TRUE(env_->FileExists("/dir/g"));
   ASSERT_OK(env_->GetFileSize("/dir/g", &file_size));
-  ASSERT_EQ(3, file_size);
+  ASSERT_EQ(8, file_size);
 
   // Check that opening non-existent file fails.
   SequentialFile* seq_file;
diff --git a/include/leveldb/cache.h b/include/leveldb/cache.h
index 1a201e5e0a..6819d5bc49 100644
--- a/include/leveldb/cache.h
+++ b/include/leveldb/cache.h
@@ -81,6 +81,17 @@ class Cache {
   // its cache keys.
   virtual uint64_t NewId() = 0;
 
+  // Remove all cache entries that are not actively in use.  Memory-constrained
+  // applications may wish to call this method to reduce memory usage.
+  // Default implementation of Prune() does nothing.  Subclasses are strongly
+  // encouraged to override the default implementation.  A future release of
+  // leveldb may change Prune() to a pure abstract method.
+  virtual void Prune() {}
+
+  // Return an estimate of the combined charges of all elements stored in the
+  // cache.
+  virtual size_t TotalCharge() const = 0;
+
  private:
   void LRU_Remove(Handle* e);
   void LRU_Append(Handle* e);
diff --git a/include/leveldb/db.h b/include/leveldb/db.h
index 4c169bf22e..9752cbad51 100644
--- a/include/leveldb/db.h
+++ b/include/leveldb/db.h
@@ -14,7 +14,7 @@ namespace leveldb {
 
 // Update Makefile if you change these
 static const int kMajorVersion = 1;
-static const int kMinorVersion = 18;
+static const int kMinorVersion = 19;
 
 struct Options;
 struct ReadOptions;
@@ -115,6 +115,8 @@ class DB {
   //     about the internal operation of the DB.
   //  "leveldb.sstables" - returns a multi-line string that describes all
   //     of the sstables that make up the db contents.
+  //  "leveldb.approximate-memory-usage" - returns the approximate number of
+  //     bytes of memory in use by the DB.
   virtual bool GetProperty(const Slice& property, std::string* value) = 0;
 
   // For each i in [0,n-1], store in "sizes[i]", the approximate
diff --git a/include/leveldb/env.h b/include/leveldb/env.h
index f709514da6..99b6c21414 100644
--- a/include/leveldb/env.h
+++ b/include/leveldb/env.h
@@ -69,6 +69,21 @@ class Env {
   virtual Status NewWritableFile(const std::string& fname,
                                  WritableFile** result) = 0;
 
+  // Create an object that either appends to an existing file, or
+  // writes to a new file (if the file does not exist to begin with).
+  // On success, stores a pointer to the new file in *result and
+  // returns OK.  On failure stores NULL in *result and returns
+  // non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  //
+  // May return an IsNotSupportedError error if this Env does
+  // not allow appending to an existing file.  Users of Env (including
+  // the leveldb implementation) must be prepared to deal with
+  // an Env that does not support appending.
+  virtual Status NewAppendableFile(const std::string& fname,
+                                   WritableFile** result);
+
   // Returns true iff the named file exists.
   virtual bool FileExists(const std::string& fname) = 0;
 
@@ -289,6 +304,9 @@ class EnvWrapper : public Env {
   Status NewWritableFile(const std::string& f, WritableFile** r) {
     return target_->NewWritableFile(f, r);
   }
+  Status NewAppendableFile(const std::string& f, WritableFile** r) {
+    return target_->NewAppendableFile(f, r);
+  }
   bool FileExists(const std::string& f) { return target_->FileExists(f); }
   Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
     return target_->GetChildren(dir, r);
diff --git a/include/leveldb/iterator.h b/include/leveldb/iterator.h
index 76aced04bd..da631ed9d8 100644
--- a/include/leveldb/iterator.h
+++ b/include/leveldb/iterator.h
@@ -37,7 +37,7 @@ class Iterator {
   // Valid() after this call iff the source is not empty.
   virtual void SeekToLast() = 0;
 
-  // Position at the first key in the source that at or past target
+  // Position at the first key in the source that is at or past target.
   // The iterator is Valid() after this call iff the source contains
   // an entry that comes at or past target.
   virtual void Seek(const Slice& target) = 0;
diff --git a/include/leveldb/options.h b/include/leveldb/options.h
index 7c9b973454..83a1ef39a4 100644
--- a/include/leveldb/options.h
+++ b/include/leveldb/options.h
@@ -128,6 +128,12 @@ struct Options {
   // efficiently detect that and will switch to uncompressed mode.
   CompressionType compression;
 
+  // EXPERIMENTAL: If true, append to existing MANIFEST and log files
+  // when a database is opened.  This can significantly speed up open.
+  //
+  // Default: currently false, but may become true later.
+  bool reuse_logs;
+
   // If non-NULL, use the specified filter policy to reduce disk reads.
   // Many applications will benefit from passing the result of
   // NewBloomFilterPolicy() here.
diff --git a/include/leveldb/status.h b/include/leveldb/status.h
index 11dbd4b47e..d9575f9753 100644
--- a/include/leveldb/status.h
+++ b/include/leveldb/status.h
@@ -60,6 +60,12 @@ class Status {
   // Returns true iff the status indicates an IOError.
   bool IsIOError() const { return code() == kIOError; }
 
+  // Returns true iff the status indicates a NotSupportedError.
+  bool IsNotSupportedError() const { return code() == kNotSupported; }
+
+  // Returns true iff the status indicates an InvalidArgument.
+  bool IsInvalidArgument() const { return code() == kInvalidArgument; }
+
   // Return a string representation of this status suitable for printing.
   // Returns the string "OK" for success.
   std::string ToString() const;
diff --git a/port/atomic_pointer.h b/port/atomic_pointer.h
index 9bf091f757..1c4c7aafc6 100644
--- a/port/atomic_pointer.h
+++ b/port/atomic_pointer.h
@@ -35,8 +35,12 @@
 #define ARCH_CPU_X86_FAMILY 1
 #elif defined(__ARMEL__)
 #define ARCH_CPU_ARM_FAMILY 1
+#elif defined(__aarch64__)
+#define ARCH_CPU_ARM64_FAMILY 1
 #elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__)
 #define ARCH_CPU_PPC_FAMILY 1
+#elif defined(__mips__)
+#define ARCH_CPU_MIPS_FAMILY 1
 #endif
 
 namespace leveldb {
@@ -92,6 +96,13 @@ inline void MemoryBarrier() {
 }
 #define LEVELDB_HAVE_MEMORY_BARRIER
 
+// ARM64
+#elif defined(ARCH_CPU_ARM64_FAMILY)
+inline void MemoryBarrier() {
+  asm volatile("dmb sy" : : : "memory");
+}
+#define LEVELDB_HAVE_MEMORY_BARRIER
+
 // PPC
 #elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__)
 inline void MemoryBarrier() {
@@ -101,6 +112,13 @@ inline void MemoryBarrier() {
 }
 #define LEVELDB_HAVE_MEMORY_BARRIER
 
+// MIPS
+#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(__GNUC__)
+inline void MemoryBarrier() {
+  __asm__ __volatile__("sync" : : : "memory");
+}
+#define LEVELDB_HAVE_MEMORY_BARRIER
+
 #endif
 
 // AtomicPointer built using platform-specific MemoryBarrier()
@@ -215,6 +233,7 @@ class AtomicPointer {
 #undef LEVELDB_HAVE_MEMORY_BARRIER
 #undef ARCH_CPU_X86_FAMILY
 #undef ARCH_CPU_ARM_FAMILY
+#undef ARCH_CPU_ARM64_FAMILY
 #undef ARCH_CPU_PPC_FAMILY
 
 }  // namespace port
diff --git a/port/port_posix.cc b/port/port_posix.cc
index 5ba127a5b9..30e8007ae3 100644
--- a/port/port_posix.cc
+++ b/port/port_posix.cc
@@ -7,7 +7,6 @@
 #include <cstdlib>
 #include <stdio.h>
 #include <string.h>
-#include "util/logging.h"
 
 namespace leveldb {
 namespace port {
diff --git a/table/filter_block.cc b/table/filter_block.cc
index 203e15c8bc..4e78b954f8 100644
--- a/table/filter_block.cc
+++ b/table/filter_block.cc
@@ -68,7 +68,7 @@ void FilterBlockBuilder::GenerateFilter() {
 
   // Generate filter for current set of keys and append to result_.
   filter_offsets_.push_back(result_.size());
-  policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_);
+  policy_->CreateFilter(&tmp_keys_[0], static_cast<int>(num_keys), &result_);
 
   tmp_keys_.clear();
   keys_.clear();
@@ -97,7 +97,7 @@ bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
   if (index < num_) {
     uint32_t start = DecodeFixed32(offset_ + index*4);
     uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
-    if (start <= limit && limit <= (offset_ - data_)) {
+    if (start <= limit && limit <= static_cast<size_t>(offset_ - data_)) {
       Slice filter = Slice(data_ + start, limit - start);
       return policy_->KeyMayMatch(key, filter);
     } else if (start == limit) {
diff --git a/table/format.cc b/table/format.cc
index aa63144c9e..24e4e02445 100644
--- a/table/format.cc
+++ b/table/format.cc
@@ -30,15 +30,14 @@ Status BlockHandle::DecodeFrom(Slice* input) {
 }
 
 void Footer::EncodeTo(std::string* dst) const {
-#ifndef NDEBUG
   const size_t original_size = dst->size();
-#endif
   metaindex_handle_.EncodeTo(dst);
   index_handle_.EncodeTo(dst);
   dst->resize(2 * BlockHandle::kMaxEncodedLength);  // Padding
   PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu));
   PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
   assert(dst->size() == original_size + kEncodedLength);
+  (void)original_size;  // Disable unused variable warning.
 }
 
 Status Footer::DecodeFrom(Slice* input) {
diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h
index 9e16b3dbed..f410c3fabe 100644
--- a/table/iterator_wrapper.h
+++ b/table/iterator_wrapper.h
@@ -5,6 +5,9 @@
 #ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
 #define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
 
+#include "leveldb/iterator.h"
+#include "leveldb/slice.h"
+
 namespace leveldb {
 
 // A internal wrapper class with an interface similar to Iterator that
diff --git a/table/table.cc b/table/table.cc
index dff8a82590..decf8082cc 100644
--- a/table/table.cc
+++ b/table/table.cc
@@ -82,7 +82,7 @@ Status Table::Open(const Options& options,
     *table = new Table(rep);
     (*table)->ReadMeta(footer);
   } else {
-    if (index_block) delete index_block;
+    delete index_block;
   }
 
   return s;
diff --git a/table/table_test.cc b/table/table_test.cc
index c723bf84cf..abf6e246ff 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -853,12 +853,20 @@ TEST(TableTest, ApproximateOffsetOfCompressed) {
   options.compression = kSnappyCompression;
   c.Finish(options, &keys, &kvmap);
 
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"),       0,      0));
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"),       0,      0));
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"),    2000,   3000));
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"),    2000,   3000));
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),    4000,   6000));
+  // Expected upper and lower bounds of space used by compressible strings.
+  static const int kSlop = 1000;  // Compressor effectiveness varies.
+  const int expected = 2500;  // 10000 * compression ratio (0.25)
+  const int min_z = expected - kSlop;
+  const int max_z = expected + kSlop;
+
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, kSlop));
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, kSlop));
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, kSlop));
+  // Have now emitted a large compressible string, so adjust expected offset.
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), min_z, max_z));
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), min_z, max_z));
+  // Have now emitted two large compressible strings, so adjust expected offset.
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 2 * min_z, 2 * max_z));
 }
 
 }  // namespace leveldb
diff --git a/util/arena.cc b/util/arena.cc
index 9367f71492..74078213ee 100644
--- a/util/arena.cc
+++ b/util/arena.cc
@@ -9,8 +9,7 @@ namespace leveldb {
 
 static const int kBlockSize = 4096;
 
-Arena::Arena() {
-  blocks_memory_ = 0;
+Arena::Arena() : memory_usage_(0) {
   alloc_ptr_ = NULL;  // First allocation will allocate a block
   alloc_bytes_remaining_ = 0;
 }
@@ -60,8 +59,9 @@ char* Arena::AllocateAligned(size_t bytes) {
 
 char* Arena::AllocateNewBlock(size_t block_bytes) {
   char* result = new char[block_bytes];
-  blocks_memory_ += block_bytes;
   blocks_.push_back(result);
+  memory_usage_.NoBarrier_Store(
+      reinterpret_cast<void*>(MemoryUsage() + block_bytes + sizeof(char*)));
   return result;
 }
 
diff --git a/util/arena.h b/util/arena.h
index 73bbf1cb9b..48bab33741 100644
--- a/util/arena.h
+++ b/util/arena.h
@@ -9,6 +9,7 @@
 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
+#include "port/port.h"
 
 namespace leveldb {
 
@@ -24,10 +25,9 @@ class Arena {
   char* AllocateAligned(size_t bytes);
 
   // Returns an estimate of the total memory usage of data allocated
-  // by the arena (including space allocated but not yet used for user
-  // allocations).
+  // by the arena.
   size_t MemoryUsage() const {
-    return blocks_memory_ + blocks_.capacity() * sizeof(char*);
+    return reinterpret_cast<uintptr_t>(memory_usage_.NoBarrier_Load());
   }
 
  private:
@@ -41,8 +41,8 @@ class Arena {
   // Array of new[] allocated memory blocks
   std::vector<char*> blocks_;
 
-  // Bytes of memory in blocks allocated so far
-  size_t blocks_memory_;
+  // Total memory usage of the arena.
+  port::AtomicPointer memory_usage_;
 
   // No copying allowed
   Arena(const Arena&);
diff --git a/util/bloom.cc b/util/bloom.cc
index a27a2ace28..bf3e4ca6e9 100644
--- a/util/bloom.cc
+++ b/util/bloom.cc
@@ -47,7 +47,7 @@ class BloomFilterPolicy : public FilterPolicy {
     dst->resize(init_size + bytes, 0);
     dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
     char* array = &(*dst)[init_size];
-    for (size_t i = 0; i < n; i++) {
+    for (int i = 0; i < n; i++) {
       // Use double-hashing to generate a sequence of hash values.
       // See analysis in [Kirsch,Mitzenmacher 2006].
       uint32_t h = BloomHash(keys[i]);
diff --git a/util/bloom_test.cc b/util/bloom_test.cc
index 77fb1b3159..1b87a2be3f 100644
--- a/util/bloom_test.cc
+++ b/util/bloom_test.cc
@@ -46,7 +46,8 @@ class BloomTest {
       key_slices.push_back(Slice(keys_[i]));
     }
     filter_.clear();
-    policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
+    policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
+                          &filter_);
     keys_.clear();
     if (kVerbose >= 2) DumpFilter();
   }
diff --git a/util/cache.cc b/util/cache.cc
index 8b197bc02a..ce46886171 100644
--- a/util/cache.cc
+++ b/util/cache.cc
@@ -19,6 +19,23 @@ Cache::~Cache() {
 namespace {
 
 // LRU cache implementation
+//
+// Cache entries have an "in_cache" boolean indicating whether the cache has a
+// reference on the entry.  The only ways that this can become false without the
+// entry being passed to its "deleter" are via Erase(), via Insert() when
+// an element with a duplicate key is inserted, or on destruction of the cache.
+//
+// The cache keeps two linked lists of items in the cache.  All items in the
+// cache are in one list or the other, and never both.  Items still referenced
+// by clients but erased from the cache are in neither list.  The lists are:
+// - in-use:  contains the items currently referenced by clients, in no
+//   particular order.  (This list is used for invariant checking.  If we
+//   removed the check, elements that would otherwise be on this list could be
+//   left as disconnected singleton lists.)
+// - LRU:  contains the items not currently referenced by clients, in LRU order
+// Elements are moved between these lists by the Ref() and Unref() methods,
+// when they detect an element in the cache acquiring or losing its only
+// external reference.
 
 // An entry is a variable length heap-allocated structure.  Entries
 // are kept in a circular doubly linked list ordered by access time.
@@ -30,7 +47,8 @@ struct LRUHandle {
   LRUHandle* prev;
   size_t charge;      // TODO(opt): Only allow uint32_t?
   size_t key_length;
-  uint32_t refs;
+  bool in_cache;      // Whether entry is in the cache.
+  uint32_t refs;      // References, including cache reference, if present.
   uint32_t hash;      // Hash of key(); used for fast sharding and comparisons
   char key_data[1];   // Beginning of key
 
@@ -147,49 +165,77 @@ class LRUCache {
   Cache::Handle* Lookup(const Slice& key, uint32_t hash);
   void Release(Cache::Handle* handle);
   void Erase(const Slice& key, uint32_t hash);
+  void Prune();
+  size_t TotalCharge() const {
+    MutexLock l(&mutex_);
+    return usage_;
+  }
 
  private:
   void LRU_Remove(LRUHandle* e);
-  void LRU_Append(LRUHandle* e);
+  void LRU_Append(LRUHandle*list, LRUHandle* e);
+  void Ref(LRUHandle* e);
   void Unref(LRUHandle* e);
+  bool FinishErase(LRUHandle* e);
 
   // Initialized before use.
   size_t capacity_;
 
   // mutex_ protects the following state.
-  port::Mutex mutex_;
+  mutable port::Mutex mutex_;
   size_t usage_;
 
   // Dummy head of LRU list.
   // lru.prev is newest entry, lru.next is oldest entry.
+  // Entries have refs==1 and in_cache==true.
   LRUHandle lru_;
 
+  // Dummy head of in-use list.
+  // Entries are in use by clients, and have refs >= 2 and in_cache==true.
+  LRUHandle in_use_;
+
   HandleTable table_;
 };
 
 LRUCache::LRUCache()
     : usage_(0) {
-  // Make empty circular linked list
+  // Make empty circular linked lists.
   lru_.next = &lru_;
   lru_.prev = &lru_;
+  in_use_.next = &in_use_;
+  in_use_.prev = &in_use_;
 }
 
 LRUCache::~LRUCache() {
+  assert(in_use_.next == &in_use_);  // Error if caller has an unreleased handle
   for (LRUHandle* e = lru_.next; e != &lru_; ) {
     LRUHandle* next = e->next;
-    assert(e->refs == 1);  // Error if caller has an unreleased handle
+    assert(e->in_cache);
+    e->in_cache = false;
+    assert(e->refs == 1);  // Invariant of lru_ list.
     Unref(e);
     e = next;
   }
 }
 
+void LRUCache::Ref(LRUHandle* e) {
+  if (e->refs == 1 && e->in_cache) {  // If on lru_ list, move to in_use_ list.
+    LRU_Remove(e);
+    LRU_Append(&in_use_, e);
+  }
+  e->refs++;
+}
+
 void LRUCache::Unref(LRUHandle* e) {
   assert(e->refs > 0);
   e->refs--;
-  if (e->refs <= 0) {
-    usage_ -= e->charge;
+  if (e->refs == 0) { // Deallocate.
+    assert(!e->in_cache);
     (*e->deleter)(e->key(), e->value);
     free(e);
+  } else if (e->in_cache && e->refs == 1) {  // No longer in use; move to lru_ list.
+    LRU_Remove(e);
+    LRU_Append(&lru_, e);
   }
 }
 
@@ -198,10 +244,10 @@ void LRUCache::LRU_Remove(LRUHandle* e) {
   e->prev->next = e->next;
 }
 
-void LRUCache::LRU_Append(LRUHandle* e) {
-  // Make "e" newest entry by inserting just before lru_
-  e->next = &lru_;
-  e->prev = lru_.prev;
+void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
+  // Make "e" newest entry by inserting just before *list
+  e->next = list;
+  e->prev = list->prev;
   e->prev->next = e;
   e->next->prev = e;
 }
@@ -210,9 +256,7 @@ Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
   MutexLock l(&mutex_);
   LRUHandle* e = table_.Lookup(key, hash);
   if (e != NULL) {
-    e->refs++;
-    LRU_Remove(e);
-    LRU_Append(e);
+    Ref(e);
   }
   return reinterpret_cast<Cache::Handle*>(e);
 }
@@ -234,34 +278,58 @@ Cache::Handle* LRUCache::Insert(
   e->charge = charge;
   e->key_length = key.size();
   e->hash = hash;
-  e->refs = 2;  // One from LRUCache, one for the returned handle
+  e->in_cache = false;
+  e->refs = 1;  // for the returned handle.
   memcpy(e->key_data, key.data(), key.size());
-  LRU_Append(e);
-  usage_ += charge;
 
-  LRUHandle* old = table_.Insert(e);
-  if (old != NULL) {
-    LRU_Remove(old);
-    Unref(old);
-  }
+  if (capacity_ > 0) {
+    e->refs++;  // for the cache's reference.
+    e->in_cache = true;
+    LRU_Append(&in_use_, e);
+    usage_ += charge;
+    FinishErase(table_.Insert(e));
+  } // else don't cache.  (Tests use capacity_==0 to turn off caching.)
 
   while (usage_ > capacity_ && lru_.next != &lru_) {
     LRUHandle* old = lru_.next;
-    LRU_Remove(old);
-    table_.Remove(old->key(), old->hash);
-    Unref(old);
+    assert(old->refs == 1);
+    bool erased = FinishErase(table_.Remove(old->key(), old->hash));
+    if (!erased) {  // to avoid unused variable when compiled NDEBUG
+      assert(erased);
+    }
   }
 
   return reinterpret_cast<Cache::Handle*>(e);
 }
 
-void LRUCache::Erase(const Slice& key, uint32_t hash) {
-  MutexLock l(&mutex_);
-  LRUHandle* e = table_.Remove(key, hash);
+// If e != NULL, finish removing *e from the cache; it has already been removed
+// from the hash table.  Return whether e != NULL.  Requires mutex_ held.
+bool LRUCache::FinishErase(LRUHandle* e) {
   if (e != NULL) {
+    assert(e->in_cache);
     LRU_Remove(e);
+    e->in_cache = false;
+    usage_ -= e->charge;
     Unref(e);
   }
+  return e != NULL;
+}
+
+void LRUCache::Erase(const Slice& key, uint32_t hash) {
+  MutexLock l(&mutex_);
+  FinishErase(table_.Remove(key, hash));
+}
+
+void LRUCache::Prune() {
+  MutexLock l(&mutex_);
+  while (lru_.next != &lru_) {
+    LRUHandle* e = lru_.next;
+    assert(e->refs == 1);
+    bool erased = FinishErase(table_.Remove(e->key(), e->hash));
+    if (!erased) {  // to avoid unused variable when compiled NDEBUG
+      assert(erased);
+    }
+  }
 }
 
 static const int kNumShardBits = 4;
@@ -314,6 +382,18 @@ class ShardedLRUCache : public Cache {
     MutexLock l(&id_mutex_);
     return ++(last_id_);
   }
+  virtual void Prune() {
+    for (int s = 0; s < kNumShards; s++) {
+      shard_[s].Prune();
+    }
+  }
+  virtual size_t TotalCharge() const {
+    size_t total = 0;
+    for (int s = 0; s < kNumShards; s++) {
+      total += shard_[s].TotalCharge();
+    }
+    return total;
+  }
 };
 
 }  // end anonymous namespace
diff --git a/util/cache_test.cc b/util/cache_test.cc
index 43716715a8..468f7a6425 100644
--- a/util/cache_test.cc
+++ b/util/cache_test.cc
@@ -59,6 +59,11 @@ class CacheTest {
                                    &CacheTest::Deleter));
   }
 
+  Cache::Handle* InsertAndReturnHandle(int key, int value, int charge = 1) {
+    return cache_->Insert(EncodeKey(key), EncodeValue(value), charge,
+                          &CacheTest::Deleter);
+  }
+
   void Erase(int key) {
     cache_->Erase(EncodeKey(key));
   }
@@ -135,8 +140,11 @@ TEST(CacheTest, EntriesArePinned) {
 TEST(CacheTest, EvictionPolicy) {
   Insert(100, 101);
   Insert(200, 201);
+  Insert(300, 301);
+  Cache::Handle* h = cache_->Lookup(EncodeKey(300));
 
-  // Frequently used entry must be kept around
+  // Frequently used entry must be kept around,
+  // as must things that are still in use.
   for (int i = 0; i < kCacheSize + 100; i++) {
     Insert(1000+i, 2000+i);
     ASSERT_EQ(2000+i, Lookup(1000+i));
@@ -144,6 +152,25 @@ TEST(CacheTest, EvictionPolicy) {
   }
   ASSERT_EQ(101, Lookup(100));
   ASSERT_EQ(-1, Lookup(200));
+  ASSERT_EQ(301, Lookup(300));
+  cache_->Release(h);
+}
+
+TEST(CacheTest, UseExceedsCacheSize) {
+  // Overfill the cache, keeping handles on all inserted entries.
+  std::vector<Cache::Handle*> h;
+  for (int i = 0; i < kCacheSize + 100; i++) {
+    h.push_back(InsertAndReturnHandle(1000+i, 2000+i));
+  }
+
+  // Check that all the entries can be found in the cache.
+  for (int i = 0; i < h.size(); i++) {
+    ASSERT_EQ(2000+i, Lookup(1000+i));
+  }
+
+  for (int i = 0; i < h.size(); i++) {
+    cache_->Release(h[i]);
+  }
 }
 
 TEST(CacheTest, HeavyEntries) {
@@ -179,6 +206,19 @@ TEST(CacheTest, NewId) {
   ASSERT_NE(a, b);
 }
 
+TEST(CacheTest, Prune) {
+  Insert(1, 100);
+  Insert(2, 200);
+
+  Cache::Handle* handle = cache_->Lookup(EncodeKey(1));
+  ASSERT_TRUE(handle);
+  cache_->Prune();
+  cache_->Release(handle);
+
+  ASSERT_EQ(100, Lookup(1));
+  ASSERT_EQ(-1, Lookup(2));
+}
+
 }  // namespace leveldb
 
 int main(int argc, char** argv) {
diff --git a/util/env.cc b/util/env.cc
index c2600e964a..c58a0821ef 100644
--- a/util/env.cc
+++ b/util/env.cc
@@ -9,6 +9,10 @@ namespace leveldb {
 Env::~Env() {
 }
 
+Status Env::NewAppendableFile(const std::string& fname, WritableFile** result) {
+  return Status::NotSupported("NewAppendableFile", fname);
+}
+
 SequentialFile::~SequentialFile() {
 }
 
diff --git a/util/env_posix.cc b/util/env_posix.cc
index ba2667864a..e0fca52f46 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -351,6 +351,19 @@ class PosixEnv : public Env {
     return s;
   }
 
+  virtual Status NewAppendableFile(const std::string& fname,
+                                   WritableFile** result) {
+    Status s;
+    FILE* f = fopen(fname.c_str(), "a");
+    if (f == NULL) {
+      *result = NULL;
+      s = IOError(fname, errno);
+    } else {
+      *result = new PosixWritableFile(fname, f);
+    }
+    return s;
+  }
+
   virtual bool FileExists(const std::string& fname) {
     return access(fname.c_str(), F_OK) == 0;
   }
diff --git a/util/env_win.cc b/util/env_win.cc
index e11a96b791..b074b7579e 100644
--- a/util/env_win.cc
+++ b/util/env_win.cc
@@ -106,7 +106,7 @@ private:
 class Win32WritableFile : public WritableFile
 {
 public:
-    Win32WritableFile(const std::string& fname);
+    Win32WritableFile(const std::string& fname, bool append);
     ~Win32WritableFile();
 
     virtual Status Append(const Slice& data);
@@ -158,6 +158,8 @@ public:
         RandomAccessFile** result);
     virtual Status NewWritableFile(const std::string& fname,
         WritableFile** result);
+    virtual Status NewAppendableFile(const std::string& fname,
+        WritableFile** result);
 
     virtual bool FileExists(const std::string& fname);
 
@@ -423,17 +425,23 @@ void Win32RandomAccessFile::_CleanUp()
     }
 }
 
-Win32WritableFile::Win32WritableFile(const std::string& fname)
+Win32WritableFile::Win32WritableFile(const std::string& fname, bool append)
     : filename_(fname)
 {
     std::wstring path;
     ToWidePath(fname, path);
-    DWORD Flag = PathFileExistsW(path.c_str()) ? OPEN_EXISTING : CREATE_ALWAYS;
+    // NewAppendableFile: append to an existing file, or create a new one
+    //     if none exists - this is OPEN_ALWAYS behavior, with
+    //     FILE_APPEND_DATA to avoid having to manually position the file
+    //     pointer at the end of the file.
+    // NewWritableFile: create a new file, delete if it exists - this is
+    //     CREATE_ALWAYS behavior. This file is used for writing only so
+    //     use GENERIC_WRITE.
     _hFile = CreateFileW(path.c_str(),
-                         GENERIC_READ | GENERIC_WRITE,
+                         append ? FILE_APPEND_DATA : GENERIC_WRITE,
                          FILE_SHARE_READ|FILE_SHARE_DELETE|FILE_SHARE_WRITE,
                          NULL,
-                         Flag,
+                         append ? OPEN_ALWAYS : CREATE_ALWAYS,
                          FILE_ATTRIBUTE_NORMAL,
                          NULL);
     // CreateFileW returns INVALID_HANDLE_VALUE in case of error, always check isEnable() before use
@@ -823,7 +831,9 @@ Status Win32Env::NewLogger( const std::string& fname, Logger** result )
 {
     Status sRet;
     std::string path = fname;
-    Win32WritableFile* pMapFile = new Win32WritableFile(ModifyPath(path));
+    // Logs are opened with write semantics, not with append semantics
+    // (see PosixEnv::NewLogger)
+    Win32WritableFile* pMapFile = new Win32WritableFile(ModifyPath(path), false);
     if(!pMapFile->isEnable()){
         delete pMapFile;
         *result = NULL;
@@ -837,7 +847,20 @@ Status Win32Env::NewWritableFile( const std::string& fname, WritableFile** resul
 {
     Status sRet;
     std::string path = fname;
-    Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path));
+    Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path), false);
+    if(!pFile->isEnable()){
+        *result = NULL;
+        sRet = Status::IOError(fname,Win32::GetLastErrSz());
+    }else
+        *result = pFile;
+    return sRet;
+}
+
+Status Win32Env::NewAppendableFile( const std::string& fname, WritableFile** result )
+{
+    Status sRet;
+    std::string path = fname;
+    Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path), true);
     if(!pFile->isEnable()){
         *result = NULL;
         sRet = Status::IOError(fname,Win32::GetLastErrSz());
diff --git a/util/options.cc b/util/options.cc
index 76af5b9302..8b618fb1ae 100644
--- a/util/options.cc
+++ b/util/options.cc
@@ -22,8 +22,8 @@ Options::Options()
       block_size(4096),
       block_restart_interval(16),
       compression(kSnappyCompression),
+      reuse_logs(false),
       filter_policy(NULL) {
 }
 
-
 }  // namespace leveldb
diff --git a/util/testutil.h b/util/testutil.h
index adad3fc1ea..d7e4583702 100644
--- a/util/testutil.h
+++ b/util/testutil.h
@@ -45,6 +45,16 @@ class ErrorEnv : public EnvWrapper {
     }
     return target()->NewWritableFile(fname, result);
   }
+
+  virtual Status NewAppendableFile(const std::string& fname,
+                                   WritableFile** result) {
+    if (writable_file_error_) {
+      ++num_writable_file_errors_;
+      *result = NULL;
+      return Status::IOError(fname, "fake error");
+    }
+    return target()->NewAppendableFile(fname, result);
+  }
 };
 
 }  // namespace test
author	Pieter Wuille <pieter.wuille@gmail.com>	2016-12-01 16:14:45 -0800
committer	Pieter Wuille <pieter.wuille@gmail.com>	2016-12-01 16:14:45 -0800
commit	634ad517037b319147816f1d112b066528e1724a (patch)
tree	0088401632072effafed7c43d67a204467ffc460
parent	fb9857bfd68c13b52e14bc28dd981bc12501806a (diff)