aboutsummaryrefslogtreecommitdiff
path: root/src/leveldb/util/env_posix.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/leveldb/util/env_posix.cc')
-rw-r--r--src/leveldb/util/env_posix.cc1122
1 files changed, 661 insertions, 461 deletions
diff --git a/src/leveldb/util/env_posix.cc b/src/leveldb/util/env_posix.cc
index f77918313e..9f5863a0f3 100644
--- a/src/leveldb/util/env_posix.cc
+++ b/src/leveldb/util/env_posix.cc
@@ -1,706 +1,906 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
-#if !defined(LEVELDB_PLATFORM_WINDOWS)
#include <dirent.h>
-#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
-#include <time.h>
#include <unistd.h>
-#include <deque>
+
+#include <atomic>
+#include <cerrno>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
#include <limits>
+#include <queue>
#include <set>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <utility>
+
#include "leveldb/env.h"
#include "leveldb/slice.h"
+#include "leveldb/status.h"
#include "port/port.h"
-#include "util/logging.h"
-#include "util/mutexlock.h"
-#include "util/posix_logger.h"
+#include "port/thread_annotations.h"
#include "util/env_posix_test_helper.h"
+#include "util/posix_logger.h"
namespace leveldb {
namespace {
-static int open_read_only_file_limit = -1;
-static int mmap_limit = -1;
+// Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles().
+int g_open_read_only_file_limit = -1;
+
+// Up to 4096 mmap regions for 64-bit binaries; none for 32-bit.
+constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 4096 : 0;
+
+// Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit().
+int g_mmap_limit = kDefaultMmapLimit;
-static Status IOError(const std::string& context, int err_number) {
- return Status::IOError(context, strerror(err_number));
+// Common flags defined for all posix open operations
+#if defined(HAVE_O_CLOEXEC)
+constexpr const int kOpenBaseFlags = O_CLOEXEC;
+#else
+constexpr const int kOpenBaseFlags = 0;
+#endif // defined(HAVE_O_CLOEXEC)
+
+constexpr const size_t kWritableFileBufferSize = 65536;
+
+Status PosixError(const std::string& context, int error_number) {
+ if (error_number == ENOENT) {
+ return Status::NotFound(context, std::strerror(error_number));
+ } else {
+ return Status::IOError(context, std::strerror(error_number));
+ }
}
// Helper class to limit resource usage to avoid exhaustion.
// Currently used to limit read-only file descriptors and mmap file usage
-// so that we do not end up running out of file descriptors, virtual memory,
-// or running into kernel performance problems for very large databases.
+// so that we do not run out of file descriptors or virtual memory, or run into
+// kernel performance problems for very large databases.
class Limiter {
public:
- // Limit maximum number of resources to |n|.
- Limiter(intptr_t n) {
- SetAllowed(n);
- }
+ // Limit maximum number of resources to |max_acquires|.
+ Limiter(int max_acquires) : acquires_allowed_(max_acquires) {}
+
+ Limiter(const Limiter&) = delete;
+ Limiter operator=(const Limiter&) = delete;
// If another resource is available, acquire it and return true.
// Else return false.
bool Acquire() {
- if (GetAllowed() <= 0) {
- return false;
- }
- MutexLock l(&mu_);
- intptr_t x = GetAllowed();
- if (x <= 0) {
- return false;
- } else {
- SetAllowed(x - 1);
- return true;
- }
+ int old_acquires_allowed =
+ acquires_allowed_.fetch_sub(1, std::memory_order_relaxed);
+
+ if (old_acquires_allowed > 0) return true;
+
+ acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
+ return false;
}
// Release a resource acquired by a previous call to Acquire() that returned
// true.
- void Release() {
- MutexLock l(&mu_);
- SetAllowed(GetAllowed() + 1);
- }
+ void Release() { acquires_allowed_.fetch_add(1, std::memory_order_relaxed); }
private:
- port::Mutex mu_;
- port::AtomicPointer allowed_;
-
- intptr_t GetAllowed() const {
- return reinterpret_cast<intptr_t>(allowed_.Acquire_Load());
- }
-
- // REQUIRES: mu_ must be held
- void SetAllowed(intptr_t v) {
- allowed_.Release_Store(reinterpret_cast<void*>(v));
- }
-
- Limiter(const Limiter&);
- void operator=(const Limiter&);
+ // The number of available resources.
+ //
+ // This is a counter and is not tied to the invariants of any other class, so
+ // it can be operated on safely using std::memory_order_relaxed.
+ std::atomic<int> acquires_allowed_;
};
-class PosixSequentialFile: public SequentialFile {
- private:
- std::string filename_;
- FILE* file_;
-
+// Implements sequential read access in a file using read().
+//
+// Instances of this class are thread-friendly but not thread-safe, as required
+// by the SequentialFile API.
+class PosixSequentialFile final : public SequentialFile {
public:
- PosixSequentialFile(const std::string& fname, FILE* f)
- : filename_(fname), file_(f) { }
- virtual ~PosixSequentialFile() { fclose(file_); }
-
- virtual Status Read(size_t n, Slice* result, char* scratch) {
- Status s;
- size_t r = fread_unlocked(scratch, 1, n, file_);
- *result = Slice(scratch, r);
- if (r < n) {
- if (feof(file_)) {
- // We leave status as ok if we hit the end of the file
- } else {
- // A partial read with an error: return a non-ok status
- s = IOError(filename_, errno);
+ PosixSequentialFile(std::string filename, int fd)
+ : fd_(fd), filename_(filename) {}
+ ~PosixSequentialFile() override { close(fd_); }
+
+ Status Read(size_t n, Slice* result, char* scratch) override {
+ Status status;
+ while (true) {
+ ::ssize_t read_size = ::read(fd_, scratch, n);
+ if (read_size < 0) { // Read error.
+ if (errno == EINTR) {
+ continue; // Retry
+ }
+ status = PosixError(filename_, errno);
+ break;
}
+ *result = Slice(scratch, read_size);
+ break;
}
- return s;
+ return status;
}
- virtual Status Skip(uint64_t n) {
- if (fseek(file_, n, SEEK_CUR)) {
- return IOError(filename_, errno);
+ Status Skip(uint64_t n) override {
+ if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
+ return PosixError(filename_, errno);
}
return Status::OK();
}
- virtual std::string GetName() const { return filename_; }
-};
+ virtual std::string GetName() const override { return filename_; }
-// pread() based random-access
-class PosixRandomAccessFile: public RandomAccessFile {
private:
- std::string filename_;
- bool temporary_fd_; // If true, fd_ is -1 and we open on every read.
- int fd_;
- Limiter* limiter_;
+ const int fd_;
+ const std::string filename_;
+};
+// Implements random read access in a file using pread().
+//
+// Instances of this class are thread-safe, as required by the RandomAccessFile
+// API. Instances are immutable and Read() only calls thread-safe library
+// functions.
+class PosixRandomAccessFile final : public RandomAccessFile {
public:
- PosixRandomAccessFile(const std::string& fname, int fd, Limiter* limiter)
- : filename_(fname), fd_(fd), limiter_(limiter) {
- temporary_fd_ = !limiter->Acquire();
- if (temporary_fd_) {
- // Open file on every access.
- close(fd_);
- fd_ = -1;
+ // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
+ // instance, and will be used to determine if .
+ PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
+ : has_permanent_fd_(fd_limiter->Acquire()),
+ fd_(has_permanent_fd_ ? fd : -1),
+ fd_limiter_(fd_limiter),
+ filename_(std::move(filename)) {
+ if (!has_permanent_fd_) {
+ assert(fd_ == -1);
+ ::close(fd); // The file will be opened on every read.
}
}
- virtual ~PosixRandomAccessFile() {
- if (!temporary_fd_) {
- close(fd_);
- limiter_->Release();
+ ~PosixRandomAccessFile() override {
+ if (has_permanent_fd_) {
+ assert(fd_ != -1);
+ ::close(fd_);
+ fd_limiter_->Release();
}
}
- virtual Status Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const {
+ Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const override {
int fd = fd_;
- if (temporary_fd_) {
- fd = open(filename_.c_str(), O_RDONLY);
+ if (!has_permanent_fd_) {
+ fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags);
if (fd < 0) {
- return IOError(filename_, errno);
+ return PosixError(filename_, errno);
}
}
- Status s;
- ssize_t r = pread(fd, scratch, n, static_cast<off_t>(offset));
- *result = Slice(scratch, (r < 0) ? 0 : r);
- if (r < 0) {
- // An error: return a non-ok status
- s = IOError(filename_, errno);
+ assert(fd != -1);
+
+ Status status;
+ ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
+ *result = Slice(scratch, (read_size < 0) ? 0 : read_size);
+ if (read_size < 0) {
+ // An error: return a non-ok status.
+ status = PosixError(filename_, errno);
}
- if (temporary_fd_) {
+ if (!has_permanent_fd_) {
// Close the temporary file descriptor opened earlier.
- close(fd);
+ assert(fd != fd_);
+ ::close(fd);
}
- return s;
+ return status;
}
- virtual std::string GetName() const { return filename_; }
-};
+ virtual std::string GetName() const override { return filename_; }
-// mmap() based random-access
-class PosixMmapReadableFile: public RandomAccessFile {
private:
- std::string filename_;
- void* mmapped_region_;
- size_t length_;
- Limiter* limiter_;
+ const bool has_permanent_fd_; // If false, the file is opened on every read.
+ const int fd_; // -1 if has_permanent_fd_ is false.
+ Limiter* const fd_limiter_;
+ const std::string filename_;
+};
+// Implements random read access in a file using mmap().
+//
+// Instances of this class are thread-safe, as required by the RandomAccessFile
+// API. Instances are immutable and Read() only calls thread-safe library
+// functions.
+class PosixMmapReadableFile final : public RandomAccessFile {
public:
- // base[0,length-1] contains the mmapped contents of the file.
- PosixMmapReadableFile(const std::string& fname, void* base, size_t length,
- Limiter* limiter)
- : filename_(fname), mmapped_region_(base), length_(length),
- limiter_(limiter) {
- }
-
- virtual ~PosixMmapReadableFile() {
- munmap(mmapped_region_, length_);
- limiter_->Release();
- }
-
- virtual Status Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const {
- Status s;
+ // mmap_base[0, length-1] points to the memory-mapped contents of the file. It
+ // must be the result of a successful call to mmap(). This instances takes
+ // over the ownership of the region.
+ //
+ // |mmap_limiter| must outlive this instance. The caller must have already
+ // aquired the right to use one mmap region, which will be released when this
+ // instance is destroyed.
+ PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
+ Limiter* mmap_limiter)
+ : mmap_base_(mmap_base),
+ length_(length),
+ mmap_limiter_(mmap_limiter),
+ filename_(std::move(filename)) {}
+
+ ~PosixMmapReadableFile() override {
+ ::munmap(static_cast<void*>(mmap_base_), length_);
+ mmap_limiter_->Release();
+ }
+
+ Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const override {
if (offset + n > length_) {
*result = Slice();
- s = IOError(filename_, EINVAL);
- } else {
- *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
+ return PosixError(filename_, EINVAL);
}
- return s;
+
+ *result = Slice(mmap_base_ + offset, n);
+ return Status::OK();
}
- virtual std::string GetName() const { return filename_; }
-};
+ virtual std::string GetName() const override { return filename_; }
-class PosixWritableFile : public WritableFile {
private:
- std::string filename_;
- FILE* file_;
+ char* const mmap_base_;
+ const size_t length_;
+ Limiter* const mmap_limiter_;
+ const std::string filename_;
+};
+class PosixWritableFile final : public WritableFile {
public:
- PosixWritableFile(const std::string& fname, FILE* f)
- : filename_(fname), file_(f) { }
-
- ~PosixWritableFile() {
- if (file_ != NULL) {
+ PosixWritableFile(std::string filename, int fd)
+ : pos_(0),
+ fd_(fd),
+ is_manifest_(IsManifest(filename)),
+ filename_(std::move(filename)),
+ dirname_(Dirname(filename_)) {}
+
+ ~PosixWritableFile() override {
+ if (fd_ >= 0) {
// Ignoring any potential errors
- fclose(file_);
+ Close();
}
}
- virtual Status Append(const Slice& data) {
- size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_);
- if (r != data.size()) {
- return IOError(filename_, errno);
+ Status Append(const Slice& data) override {
+ size_t write_size = data.size();
+ const char* write_data = data.data();
+
+ // Fit as much as possible into buffer.
+ size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
+ std::memcpy(buf_ + pos_, write_data, copy_size);
+ write_data += copy_size;
+ write_size -= copy_size;
+ pos_ += copy_size;
+ if (write_size == 0) {
+ return Status::OK();
}
- return Status::OK();
+
+ // Can't fit in buffer, so need to do at least one write.
+ Status status = FlushBuffer();
+ if (!status.ok()) {
+ return status;
+ }
+
+ // Small writes go to buffer, large writes are written directly.
+ if (write_size < kWritableFileBufferSize) {
+ std::memcpy(buf_, write_data, write_size);
+ pos_ = write_size;
+ return Status::OK();
+ }
+ return WriteUnbuffered(write_data, write_size);
}
- virtual Status Close() {
- Status result;
- if (fclose(file_) != 0) {
- result = IOError(filename_, errno);
+ Status Close() override {
+ Status status = FlushBuffer();
+ const int close_result = ::close(fd_);
+ if (close_result < 0 && status.ok()) {
+ status = PosixError(filename_, errno);
}
- file_ = NULL;
- return result;
+ fd_ = -1;
+ return status;
}
- virtual Status Flush() {
- if (fflush_unlocked(file_) != 0) {
- return IOError(filename_, errno);
+ Status Flush() override { return FlushBuffer(); }
+
+ Status Sync() override {
+ // Ensure new files referred to by the manifest are in the filesystem.
+ //
+ // This needs to happen before the manifest file is flushed to disk, to
+ // avoid crashing in a state where the manifest refers to files that are not
+ // yet on disk.
+ Status status = SyncDirIfManifest();
+ if (!status.ok()) {
+ return status;
+ }
+
+ status = FlushBuffer();
+ if (!status.ok()) {
+ return status;
+ }
+
+ return SyncFd(fd_, filename_, false);
+ }
+
+ private:
+ Status FlushBuffer() {
+ Status status = WriteUnbuffered(buf_, pos_);
+ pos_ = 0;
+ return status;
+ }
+
+ Status WriteUnbuffered(const char* data, size_t size) {
+ while (size > 0) {
+ ssize_t write_result = ::write(fd_, data, size);
+ if (write_result < 0) {
+ if (errno == EINTR) {
+ continue; // Retry
+ }
+ return PosixError(filename_, errno);
+ }
+ data += write_result;
+ size -= write_result;
}
return Status::OK();
}
Status SyncDirIfManifest() {
- const char* f = filename_.c_str();
- const char* sep = strrchr(f, '/');
- Slice basename;
- std::string dir;
- if (sep == NULL) {
- dir = ".";
- basename = f;
+ Status status;
+ if (!is_manifest_) {
+ return status;
+ }
+
+ int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
+ if (fd < 0) {
+ status = PosixError(dirname_, errno);
} else {
- dir = std::string(f, sep - f);
- basename = sep + 1;
+ status = SyncFd(fd, dirname_, true);
+ ::close(fd);
+ }
+ return status;
+ }
+
+ // Ensures that all the caches associated with the given file descriptor's
+ // data are flushed all the way to durable media, and can withstand power
+ // failures.
+ //
+ // The path argument is only used to populate the description string in the
+ // returned Status if an error occurs.
+ static Status SyncFd(int fd, const std::string& fd_path, bool syncing_dir) {
+#if HAVE_FULLFSYNC
+ // On macOS and iOS, fsync() doesn't guarantee durability past power
+ // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
+ // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
+ // fsync().
+ if (::fcntl(fd, F_FULLFSYNC) == 0) {
+ return Status::OK();
}
- Status s;
- if (basename.starts_with("MANIFEST")) {
- int fd = open(dir.c_str(), O_RDONLY);
- if (fd < 0) {
- s = IOError(dir, errno);
- } else {
- if (fsync(fd) < 0 && errno != EINVAL) {
- s = IOError(dir, errno);
- }
- close(fd);
- }
+#endif // HAVE_FULLFSYNC
+
+#if HAVE_FDATASYNC
+ bool sync_success = ::fdatasync(fd) == 0;
+#else
+ bool sync_success = ::fsync(fd) == 0;
+#endif // HAVE_FDATASYNC
+
+ if (sync_success) {
+ return Status::OK();
}
- return s;
+ // Do not crash if filesystem can't fsync directories
+ // (see https://github.com/bitcoin/bitcoin/pull/10000)
+ if (syncing_dir && errno == EINVAL) {
+ return Status::OK();
+ }
+ return PosixError(fd_path, errno);
}
- virtual Status Sync() {
- // Ensure new files referred to by the manifest are in the filesystem.
- Status s = SyncDirIfManifest();
- if (!s.ok()) {
- return s;
+ // Returns the directory name in a path pointing to a file.
+ //
+ // Returns "." if the path does not contain any directory separator.
+ static std::string Dirname(const std::string& filename) {
+ std::string::size_type separator_pos = filename.rfind('/');
+ if (separator_pos == std::string::npos) {
+ return std::string(".");
}
- if (fflush_unlocked(file_) != 0 ||
- fdatasync(fileno(file_)) != 0) {
- s = Status::IOError(filename_, strerror(errno));
+ // The filename component should not contain a path separator. If it does,
+ // the splitting was done incorrectly.
+ assert(filename.find('/', separator_pos + 1) == std::string::npos);
+
+ return filename.substr(0, separator_pos);
+ }
+
+ // Extracts the file name from a path pointing to a file.
+ //
+ // The returned Slice points to |filename|'s data buffer, so it is only valid
+ // while |filename| is alive and unchanged.
+ static Slice Basename(const std::string& filename) {
+ std::string::size_type separator_pos = filename.rfind('/');
+ if (separator_pos == std::string::npos) {
+ return Slice(filename);
}
- return s;
+ // The filename component should not contain a path separator. If it does,
+ // the splitting was done incorrectly.
+ assert(filename.find('/', separator_pos + 1) == std::string::npos);
+
+ return Slice(filename.data() + separator_pos + 1,
+ filename.length() - separator_pos - 1);
}
- virtual std::string GetName() const { return filename_; }
+ // True if the given file is a manifest file.
+ static bool IsManifest(const std::string& filename) {
+ return Basename(filename).starts_with("MANIFEST");
+ }
+
+ virtual std::string GetName() const override { return filename_; }
+
+ // buf_[0, pos_ - 1] contains data to be written to fd_.
+ char buf_[kWritableFileBufferSize];
+ size_t pos_;
+ int fd_;
+
+ const bool is_manifest_; // True if the file's name starts with MANIFEST.
+ const std::string filename_;
+ const std::string dirname_; // The directory of filename_.
};
-static int LockOrUnlock(int fd, bool lock) {
+int LockOrUnlock(int fd, bool lock) {
errno = 0;
- struct flock f;
- memset(&f, 0, sizeof(f));
- f.l_type = (lock ? F_WRLCK : F_UNLCK);
- f.l_whence = SEEK_SET;
- f.l_start = 0;
- f.l_len = 0; // Lock/unlock entire file
- return fcntl(fd, F_SETLK, &f);
+ struct ::flock file_lock_info;
+ std::memset(&file_lock_info, 0, sizeof(file_lock_info));
+ file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK);
+ file_lock_info.l_whence = SEEK_SET;
+ file_lock_info.l_start = 0;
+ file_lock_info.l_len = 0; // Lock/unlock entire file.
+ return ::fcntl(fd, F_SETLK, &file_lock_info);
}
+// Instances are thread-safe because they are immutable.
class PosixFileLock : public FileLock {
public:
- int fd_;
- std::string name_;
+ PosixFileLock(int fd, std::string filename)
+ : fd_(fd), filename_(std::move(filename)) {}
+
+ int fd() const { return fd_; }
+ const std::string& filename() const { return filename_; }
+
+ private:
+ const int fd_;
+ const std::string filename_;
};
-// Set of locked files. We keep a separate set instead of just
-// relying on fcntrl(F_SETLK) since fcntl(F_SETLK) does not provide
-// any protection against multiple uses from the same process.
+// Tracks the files locked by PosixEnv::LockFile().
+//
+// We maintain a separate set instead of relying on fcntl(F_SETLK) because
+// fcntl(F_SETLK) does not provide any protection against multiple uses from the
+// same process.
+//
+// Instances are thread-safe because all member data is guarded by a mutex.
class PosixLockTable {
- private:
- port::Mutex mu_;
- std::set<std::string> locked_files_;
public:
- bool Insert(const std::string& fname) {
- MutexLock l(&mu_);
- return locked_files_.insert(fname).second;
- }
- void Remove(const std::string& fname) {
- MutexLock l(&mu_);
+ bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) {
+ mu_.Lock();
+ bool succeeded = locked_files_.insert(fname).second;
+ mu_.Unlock();
+ return succeeded;
+ }
+ void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) {
+ mu_.Lock();
locked_files_.erase(fname);
+ mu_.Unlock();
}
+
+ private:
+ port::Mutex mu_;
+ std::set<std::string> locked_files_ GUARDED_BY(mu_);
};
class PosixEnv : public Env {
public:
PosixEnv();
- virtual ~PosixEnv() {
- char msg[] = "Destroying Env::Default()\n";
- fwrite(msg, 1, sizeof(msg), stderr);
- abort();
- }
-
- virtual Status NewSequentialFile(const std::string& fname,
- SequentialFile** result) {
- FILE* f = fopen(fname.c_str(), "r");
- if (f == NULL) {
- *result = NULL;
- return IOError(fname, errno);
- } else {
- *result = new PosixSequentialFile(fname, f);
- return Status::OK();
+ ~PosixEnv() override {
+ static const char msg[] =
+ "PosixEnv singleton destroyed. Unsupported behavior!\n";
+ std::fwrite(msg, 1, sizeof(msg), stderr);
+ std::abort();
+ }
+
+ Status NewSequentialFile(const std::string& filename,
+ SequentialFile** result) override {
+ int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
+ if (fd < 0) {
+ *result = nullptr;
+ return PosixError(filename, errno);
}
+
+ *result = new PosixSequentialFile(filename, fd);
+ return Status::OK();
}
- virtual Status NewRandomAccessFile(const std::string& fname,
- RandomAccessFile** result) {
- *result = NULL;
- Status s;
- int fd = open(fname.c_str(), O_RDONLY);
+ Status NewRandomAccessFile(const std::string& filename,
+ RandomAccessFile** result) override {
+ *result = nullptr;
+ int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
if (fd < 0) {
- s = IOError(fname, errno);
- } else if (mmap_limit_.Acquire()) {
- uint64_t size;
- s = GetFileSize(fname, &size);
- if (s.ok()) {
- void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
- if (base != MAP_FAILED) {
- *result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_);
- } else {
- s = IOError(fname, errno);
- }
- }
- close(fd);
- if (!s.ok()) {
- mmap_limit_.Release();
+ return PosixError(filename, errno);
+ }
+
+ if (!mmap_limiter_.Acquire()) {
+ *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_);
+ return Status::OK();
+ }
+
+ uint64_t file_size;
+ Status status = GetFileSize(filename, &file_size);
+ if (status.ok()) {
+ void* mmap_base =
+ ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (mmap_base != MAP_FAILED) {
+ *result = new PosixMmapReadableFile(filename,
+ reinterpret_cast<char*>(mmap_base),
+ file_size, &mmap_limiter_);
+ } else {
+ status = PosixError(filename, errno);
}
- } else {
- *result = new PosixRandomAccessFile(fname, fd, &fd_limit_);
}
- return s;
+ ::close(fd);
+ if (!status.ok()) {
+ mmap_limiter_.Release();
+ }
+ return status;
}
- virtual Status NewWritableFile(const std::string& fname,
- WritableFile** result) {
- Status s;
- FILE* f = fopen(fname.c_str(), "w");
- if (f == NULL) {
- *result = NULL;
- s = IOError(fname, errno);
- } else {
- *result = new PosixWritableFile(fname, f);
+ Status NewWritableFile(const std::string& filename,
+ WritableFile** result) override {
+ int fd = ::open(filename.c_str(),
+ O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
+ if (fd < 0) {
+ *result = nullptr;
+ return PosixError(filename, errno);
}
- return s;
+
+ *result = new PosixWritableFile(filename, fd);
+ return Status::OK();
}
- virtual Status NewAppendableFile(const std::string& fname,
- WritableFile** result) {
- Status s;
- FILE* f = fopen(fname.c_str(), "a");
- if (f == NULL) {
- *result = NULL;
- s = IOError(fname, errno);
- } else {
- *result = new PosixWritableFile(fname, f);
+ Status NewAppendableFile(const std::string& filename,
+ WritableFile** result) override {
+ int fd = ::open(filename.c_str(),
+ O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
+ if (fd < 0) {
+ *result = nullptr;
+ return PosixError(filename, errno);
}
- return s;
+
+ *result = new PosixWritableFile(filename, fd);
+ return Status::OK();
}
- virtual bool FileExists(const std::string& fname) {
- return access(fname.c_str(), F_OK) == 0;
+ bool FileExists(const std::string& filename) override {
+ return ::access(filename.c_str(), F_OK) == 0;
}
- virtual Status GetChildren(const std::string& dir,
- std::vector<std::string>* result) {
+ Status GetChildren(const std::string& directory_path,
+ std::vector<std::string>* result) override {
result->clear();
- DIR* d = opendir(dir.c_str());
- if (d == NULL) {
- return IOError(dir, errno);
+ ::DIR* dir = ::opendir(directory_path.c_str());
+ if (dir == nullptr) {
+ return PosixError(directory_path, errno);
}
- struct dirent* entry;
- while ((entry = readdir(d)) != NULL) {
- result->push_back(entry->d_name);
+ struct ::dirent* entry;
+ while ((entry = ::readdir(dir)) != nullptr) {
+ result->emplace_back(entry->d_name);
}
- closedir(d);
+ ::closedir(dir);
return Status::OK();
}
- virtual Status DeleteFile(const std::string& fname) {
- Status result;
- if (unlink(fname.c_str()) != 0) {
- result = IOError(fname, errno);
+ Status DeleteFile(const std::string& filename) override {
+ if (::unlink(filename.c_str()) != 0) {
+ return PosixError(filename, errno);
}
- return result;
+ return Status::OK();
}
- virtual Status CreateDir(const std::string& name) {
- Status result;
- if (mkdir(name.c_str(), 0755) != 0) {
- result = IOError(name, errno);
+ Status CreateDir(const std::string& dirname) override {
+ if (::mkdir(dirname.c_str(), 0755) != 0) {
+ return PosixError(dirname, errno);
}
- return result;
+ return Status::OK();
}
- virtual Status DeleteDir(const std::string& name) {
- Status result;
- if (rmdir(name.c_str()) != 0) {
- result = IOError(name, errno);
+ Status DeleteDir(const std::string& dirname) override {
+ if (::rmdir(dirname.c_str()) != 0) {
+ return PosixError(dirname, errno);
}
- return result;
+ return Status::OK();
}
- virtual Status GetFileSize(const std::string& fname, uint64_t* size) {
- Status s;
- struct stat sbuf;
- if (stat(fname.c_str(), &sbuf) != 0) {
+ Status GetFileSize(const std::string& filename, uint64_t* size) override {
+ struct ::stat file_stat;
+ if (::stat(filename.c_str(), &file_stat) != 0) {
*size = 0;
- s = IOError(fname, errno);
- } else {
- *size = sbuf.st_size;
+ return PosixError(filename, errno);
}
- return s;
+ *size = file_stat.st_size;
+ return Status::OK();
}
- virtual Status RenameFile(const std::string& src, const std::string& target) {
- Status result;
- if (rename(src.c_str(), target.c_str()) != 0) {
- result = IOError(src, errno);
+ Status RenameFile(const std::string& from, const std::string& to) override {
+ if (std::rename(from.c_str(), to.c_str()) != 0) {
+ return PosixError(from, errno);
}
- return result;
+ return Status::OK();
}
- virtual Status LockFile(const std::string& fname, FileLock** lock) {
- *lock = NULL;
- Status result;
- int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
+ Status LockFile(const std::string& filename, FileLock** lock) override {
+ *lock = nullptr;
+
+ int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644);
if (fd < 0) {
- result = IOError(fname, errno);
- } else if (!locks_.Insert(fname)) {
- close(fd);
- result = Status::IOError("lock " + fname, "already held by process");
- } else if (LockOrUnlock(fd, true) == -1) {
- result = IOError("lock " + fname, errno);
- close(fd);
- locks_.Remove(fname);
- } else {
- PosixFileLock* my_lock = new PosixFileLock;
- my_lock->fd_ = fd;
- my_lock->name_ = fname;
- *lock = my_lock;
+ return PosixError(filename, errno);
}
- return result;
+
+ if (!locks_.Insert(filename)) {
+ ::close(fd);
+ return Status::IOError("lock " + filename, "already held by process");
+ }
+
+ if (LockOrUnlock(fd, true) == -1) {
+ int lock_errno = errno;
+ ::close(fd);
+ locks_.Remove(filename);
+ return PosixError("lock " + filename, lock_errno);
+ }
+
+ *lock = new PosixFileLock(fd, filename);
+ return Status::OK();
}
- virtual Status UnlockFile(FileLock* lock) {
- PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
- Status result;
- if (LockOrUnlock(my_lock->fd_, false) == -1) {
- result = IOError("unlock", errno);
+ Status UnlockFile(FileLock* lock) override {
+ PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock);
+ if (LockOrUnlock(posix_file_lock->fd(), false) == -1) {
+ return PosixError("unlock " + posix_file_lock->filename(), errno);
}
- locks_.Remove(my_lock->name_);
- close(my_lock->fd_);
- delete my_lock;
- return result;
+ locks_.Remove(posix_file_lock->filename());
+ ::close(posix_file_lock->fd());
+ delete posix_file_lock;
+ return Status::OK();
}
- virtual void Schedule(void (*function)(void*), void* arg);
+ void Schedule(void (*background_work_function)(void* background_work_arg),
+ void* background_work_arg) override;
- virtual void StartThread(void (*function)(void* arg), void* arg);
+ void StartThread(void (*thread_main)(void* thread_main_arg),
+ void* thread_main_arg) override {
+ std::thread new_thread(thread_main, thread_main_arg);
+ new_thread.detach();
+ }
- virtual Status GetTestDirectory(std::string* result) {
- const char* env = getenv("TEST_TMPDIR");
+ Status GetTestDirectory(std::string* result) override {
+ const char* env = std::getenv("TEST_TMPDIR");
if (env && env[0] != '\0') {
*result = env;
} else {
char buf[100];
- snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", int(geteuid()));
+ std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d",
+ static_cast<int>(::geteuid()));
*result = buf;
}
- // Directory may already exist
+
+ // The CreateDir status is ignored because the directory may already exist.
CreateDir(*result);
+
return Status::OK();
}
- static uint64_t gettid() {
- pthread_t tid = pthread_self();
- uint64_t thread_id = 0;
- memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid)));
- return thread_id;
- }
+ Status NewLogger(const std::string& filename, Logger** result) override {
+ int fd = ::open(filename.c_str(),
+ O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
+ if (fd < 0) {
+ *result = nullptr;
+ return PosixError(filename, errno);
+ }
- virtual Status NewLogger(const std::string& fname, Logger** result) {
- FILE* f = fopen(fname.c_str(), "w");
- if (f == NULL) {
- *result = NULL;
- return IOError(fname, errno);
+ std::FILE* fp = ::fdopen(fd, "w");
+ if (fp == nullptr) {
+ ::close(fd);
+ *result = nullptr;
+ return PosixError(filename, errno);
} else {
- *result = new PosixLogger(f, &PosixEnv::gettid);
+ *result = new PosixLogger(fp);
return Status::OK();
}
}
- virtual uint64_t NowMicros() {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+ uint64_t NowMicros() override {
+ static constexpr uint64_t kUsecondsPerSecond = 1000000;
+ struct ::timeval tv;
+ ::gettimeofday(&tv, nullptr);
+ return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec;
}
- virtual void SleepForMicroseconds(int micros) {
- usleep(micros);
+ void SleepForMicroseconds(int micros) override {
+ std::this_thread::sleep_for(std::chrono::microseconds(micros));
}
private:
- void PthreadCall(const char* label, int result) {
- if (result != 0) {
- fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
- abort();
- }
- }
+ void BackgroundThreadMain();
- // BGThread() is the body of the background thread
- void BGThread();
- static void* BGThreadWrapper(void* arg) {
- reinterpret_cast<PosixEnv*>(arg)->BGThread();
- return NULL;
+ static void BackgroundThreadEntryPoint(PosixEnv* env) {
+ env->BackgroundThreadMain();
}
- pthread_mutex_t mu_;
- pthread_cond_t bgsignal_;
- pthread_t bgthread_;
- bool started_bgthread_;
+ // Stores the work item data in a Schedule() call.
+ //
+ // Instances are constructed on the thread calling Schedule() and used on the
+ // background thread.
+ //
+ // This structure is thread-safe beacuse it is immutable.
+ struct BackgroundWorkItem {
+ explicit BackgroundWorkItem(void (*function)(void* arg), void* arg)
+ : function(function), arg(arg) {}
+
+ void (*const function)(void*);
+ void* const arg;
+ };
- // Entry per Schedule() call
- struct BGItem { void* arg; void (*function)(void*); };
- typedef std::deque<BGItem> BGQueue;
- BGQueue queue_;
+ port::Mutex background_work_mutex_;
+ port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_);
+ bool started_background_thread_ GUARDED_BY(background_work_mutex_);
- PosixLockTable locks_;
- Limiter mmap_limit_;
- Limiter fd_limit_;
+ std::queue<BackgroundWorkItem> background_work_queue_
+ GUARDED_BY(background_work_mutex_);
+
+ PosixLockTable locks_; // Thread-safe.
+ Limiter mmap_limiter_; // Thread-safe.
+ Limiter fd_limiter_; // Thread-safe.
};
// Return the maximum number of concurrent mmaps.
-static int MaxMmaps() {
- if (mmap_limit >= 0) {
- return mmap_limit;
- }
- // Up to 4096 mmaps for 64-bit binaries; none for smaller pointer sizes.
- mmap_limit = sizeof(void*) >= 8 ? 4096 : 0;
- return mmap_limit;
-}
+int MaxMmaps() { return g_mmap_limit; }
// Return the maximum number of read-only files to keep open.
-static intptr_t MaxOpenFiles() {
- if (open_read_only_file_limit >= 0) {
- return open_read_only_file_limit;
+int MaxOpenFiles() {
+ if (g_open_read_only_file_limit >= 0) {
+ return g_open_read_only_file_limit;
}
- struct rlimit rlim;
- if (getrlimit(RLIMIT_NOFILE, &rlim)) {
+ struct ::rlimit rlim;
+ if (::getrlimit(RLIMIT_NOFILE, &rlim)) {
// getrlimit failed, fallback to hard-coded default.
- open_read_only_file_limit = 50;
+ g_open_read_only_file_limit = 50;
} else if (rlim.rlim_cur == RLIM_INFINITY) {
- open_read_only_file_limit = std::numeric_limits<int>::max();
+ g_open_read_only_file_limit = std::numeric_limits<int>::max();
} else {
// Allow use of 20% of available file descriptors for read-only files.
- open_read_only_file_limit = rlim.rlim_cur / 5;
+ g_open_read_only_file_limit = rlim.rlim_cur / 5;
}
- return open_read_only_file_limit;
+ return g_open_read_only_file_limit;
}
+} // namespace
+
PosixEnv::PosixEnv()
- : started_bgthread_(false),
- mmap_limit_(MaxMmaps()),
- fd_limit_(MaxOpenFiles()) {
- PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
- PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
-}
+ : background_work_cv_(&background_work_mutex_),
+ started_background_thread_(false),
+ mmap_limiter_(MaxMmaps()),
+ fd_limiter_(MaxOpenFiles()) {}
-void PosixEnv::Schedule(void (*function)(void*), void* arg) {
- PthreadCall("lock", pthread_mutex_lock(&mu_));
+void PosixEnv::Schedule(
+ void (*background_work_function)(void* background_work_arg),
+ void* background_work_arg) {
+ background_work_mutex_.Lock();
- // Start background thread if necessary
- if (!started_bgthread_) {
- started_bgthread_ = true;
- PthreadCall(
- "create thread",
- pthread_create(&bgthread_, NULL, &PosixEnv::BGThreadWrapper, this));
+ // Start the background thread, if we haven't done so already.
+ if (!started_background_thread_) {
+ started_background_thread_ = true;
+ std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this);
+ background_thread.detach();
}
- // If the queue is currently empty, the background thread may currently be
- // waiting.
- if (queue_.empty()) {
- PthreadCall("signal", pthread_cond_signal(&bgsignal_));
+ // If the queue is empty, the background thread may be waiting for work.
+ if (background_work_queue_.empty()) {
+ background_work_cv_.Signal();
}
- // Add to priority queue
- queue_.push_back(BGItem());
- queue_.back().function = function;
- queue_.back().arg = arg;
-
- PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+ background_work_queue_.emplace(background_work_function, background_work_arg);
+ background_work_mutex_.Unlock();
}
-void PosixEnv::BGThread() {
+void PosixEnv::BackgroundThreadMain() {
while (true) {
- // Wait until there is an item that is ready to run
- PthreadCall("lock", pthread_mutex_lock(&mu_));
- while (queue_.empty()) {
- PthreadCall("wait", pthread_cond_wait(&bgsignal_, &mu_));
+ background_work_mutex_.Lock();
+
+ // Wait until there is work to be done.
+ while (background_work_queue_.empty()) {
+ background_work_cv_.Wait();
}
- void (*function)(void*) = queue_.front().function;
- void* arg = queue_.front().arg;
- queue_.pop_front();
+ assert(!background_work_queue_.empty());
+ auto background_work_function = background_work_queue_.front().function;
+ void* background_work_arg = background_work_queue_.front().arg;
+ background_work_queue_.pop();
- PthreadCall("unlock", pthread_mutex_unlock(&mu_));
- (*function)(arg);
+ background_work_mutex_.Unlock();
+ background_work_function(background_work_arg);
}
}
namespace {
-struct StartThreadState {
- void (*user_function)(void*);
- void* arg;
+
+// Wraps an Env instance whose destructor is never created.
+//
+// Intended usage:
+// using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
+// void ConfigurePosixEnv(int param) {
+// PlatformSingletonEnv::AssertEnvNotInitialized();
+// // set global configuration flags.
+// }
+// Env* Env::Default() {
+// static PlatformSingletonEnv default_env;
+// return default_env.env();
+// }
+template <typename EnvType>
+class SingletonEnv {
+ public:
+ SingletonEnv() {
+#if !defined(NDEBUG)
+ env_initialized_.store(true, std::memory_order::memory_order_relaxed);
+#endif // !defined(NDEBUG)
+ static_assert(sizeof(env_storage_) >= sizeof(EnvType),
+ "env_storage_ will not fit the Env");
+ static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType),
+ "env_storage_ does not meet the Env's alignment needs");
+ new (&env_storage_) EnvType();
+ }
+ ~SingletonEnv() = default;
+
+ SingletonEnv(const SingletonEnv&) = delete;
+ SingletonEnv& operator=(const SingletonEnv&) = delete;
+
+ Env* env() { return reinterpret_cast<Env*>(&env_storage_); }
+
+ static void AssertEnvNotInitialized() {
+#if !defined(NDEBUG)
+ assert(!env_initialized_.load(std::memory_order::memory_order_relaxed));
+#endif // !defined(NDEBUG)
+ }
+
+ private:
+ typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type
+ env_storage_;
+#if !defined(NDEBUG)
+ static std::atomic<bool> env_initialized_;
+#endif // !defined(NDEBUG)
};
-}
-static void* StartThreadWrapper(void* arg) {
- StartThreadState* state = reinterpret_cast<StartThreadState*>(arg);
- state->user_function(state->arg);
- delete state;
- return NULL;
-}
-void PosixEnv::StartThread(void (*function)(void* arg), void* arg) {
- pthread_t t;
- StartThreadState* state = new StartThreadState;
- state->user_function = function;
- state->arg = arg;
- PthreadCall("start thread",
- pthread_create(&t, NULL, &StartThreadWrapper, state));
-}
+#if !defined(NDEBUG)
+template <typename EnvType>
+std::atomic<bool> SingletonEnv<EnvType>::env_initialized_;
+#endif // !defined(NDEBUG)
-} // namespace
+using PosixDefaultEnv = SingletonEnv<PosixEnv>;
-static pthread_once_t once = PTHREAD_ONCE_INIT;
-static Env* default_env;
-static void InitDefaultEnv() { default_env = new PosixEnv; }
+} // namespace
void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
- assert(default_env == NULL);
- open_read_only_file_limit = limit;
+ PosixDefaultEnv::AssertEnvNotInitialized();
+ g_open_read_only_file_limit = limit;
}
void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
- assert(default_env == NULL);
- mmap_limit = limit;
+ PosixDefaultEnv::AssertEnvNotInitialized();
+ g_mmap_limit = limit;
}
Env* Env::Default() {
- pthread_once(&once, InitDefaultEnv);
- return default_env;
+ static PosixDefaultEnv env_container;
+ return env_container.env();
}
} // namespace leveldb
-
-#endif