aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAva Chow <github@achow101.com>2024-09-16 23:09:16 -0400
committerAva Chow <github@achow101.com>2024-09-16 23:09:16 -0400
commit9f1aa88d4d9596aeb5220fa30ee3972294b62793 (patch)
tree19f6cc5b1db80d5593ed519766e2ad445b095aea
parent06329eb13488640159057c897978b3f0f2e33ab5 (diff)
parenta240e150e837b5a95ed19765a2e8b7c5b6013f35 (diff)
Merge bitcoin/bitcoin#30884: streams: cache file position within AutoFile
a240e150e837b5a95ed19765a2e8b7c5b6013f35 streams: remove AutoFile::Get() entirely (Pieter Wuille) e624a9bef16b6335fd119c10698352b59bf2930a streams: cache file position within AutoFile (Pieter Wuille) Pull request description: Fixes #30833. Instead of relying on frequent `ftell` calls (which appear to cause a significant slowdown on some systems) in XOR-enabled `AutoFile`s, cache the file position within `AutoFile` itself. ACKs for top commit: achow101: ACK a240e150e837b5a95ed19765a2e8b7c5b6013f35 davidgumberg: untested reACK https://github.com/bitcoin/bitcoin/pull/30884/commits/a240e150e837b5a95ed19765a2e8b7c5b6013f35 theStack: Code-review ACK a240e150e837b5a95ed19765a2e8b7c5b6013f35 Tree-SHA512: fd3681edc018afaf955dc7a41a0c953ca80d46c1129e3c5b306c87c95aae93b2fe7b900794eb8b6f10491f9211645e7939918a28838295e6873eb226fca7006f
-rw-r--r--src/addrdb.cpp2
-rw-r--r--src/bench/streams_findbyte.cpp2
-rw-r--r--src/index/blockfilterindex.cpp6
-rw-r--r--src/index/txindex.cpp5
-rw-r--r--src/node/blockstorage.cpp4
-rw-r--r--src/node/mempool_persist.cpp4
-rw-r--r--src/node/utxo_snapshot.cpp6
-rw-r--r--src/streams.cpp69
-rw-r--r--src/streams.h13
-rw-r--r--src/test/fuzz/autofile.cpp1
-rw-r--r--src/test/streams_tests.cpp6
-rw-r--r--src/util/asmap.cpp6
12 files changed, 75 insertions, 49 deletions
diff --git a/src/addrdb.cpp b/src/addrdb.cpp
index e9838d7222..b89141c88e 100644
--- a/src/addrdb.cpp
+++ b/src/addrdb.cpp
@@ -73,7 +73,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
remove(pathTmp);
return false;
}
- if (!FileCommit(fileout.Get())) {
+ if (!fileout.Commit()) {
fileout.fclose();
remove(pathTmp);
LogError("%s: Failed to flush file %s\n", __func__, fs::PathToString(pathTmp));
diff --git a/src/bench/streams_findbyte.cpp b/src/bench/streams_findbyte.cpp
index 5098262e9a..004bf8ffc9 100644
--- a/src/bench/streams_findbyte.cpp
+++ b/src/bench/streams_findbyte.cpp
@@ -19,7 +19,7 @@ static void FindByte(benchmark::Bench& bench)
uint8_t data[file_size] = {0};
data[file_size-1] = 1;
file << data;
- std::rewind(file.Get());
+ file.seek(0, SEEK_SET);
BufferedFile bf{file, /*nBufSize=*/file_size + 1, /*nRewindIn=*/file_size};
bench.run([&] {
diff --git a/src/index/blockfilterindex.cpp b/src/index/blockfilterindex.cpp
index 41bdca9df5..26de7eee32 100644
--- a/src/index/blockfilterindex.cpp
+++ b/src/index/blockfilterindex.cpp
@@ -151,7 +151,7 @@ bool BlockFilterIndex::CustomCommit(CDBBatch& batch)
LogError("%s: Failed to open filter file %d\n", __func__, pos.nFile);
return false;
}
- if (!FileCommit(file.Get())) {
+ if (!file.Commit()) {
LogError("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
return false;
}
@@ -201,11 +201,11 @@ size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter&
LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
return 0;
}
- if (!TruncateFile(last_file.Get(), pos.nPos)) {
+ if (!last_file.Truncate(pos.nPos)) {
LogPrintf("%s: Failed to truncate filter file %d\n", __func__, pos.nFile);
return 0;
}
- if (!FileCommit(last_file.Get())) {
+ if (!last_file.Commit()) {
LogPrintf("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
return 0;
}
diff --git a/src/index/txindex.cpp b/src/index/txindex.cpp
index 80f615ed0e..425a7f00a0 100644
--- a/src/index/txindex.cpp
+++ b/src/index/txindex.cpp
@@ -87,10 +87,7 @@ bool TxIndex::FindTx(const uint256& tx_hash, uint256& block_hash, CTransactionRe
CBlockHeader header;
try {
file >> header;
- if (fseek(file.Get(), postx.nTxOffset, SEEK_CUR)) {
- LogError("%s: fseek(...) failed\n", __func__);
- return false;
- }
+ file.seek(postx.nTxOffset, SEEK_CUR);
file >> TX_WITH_WITNESS(tx);
} catch (const std::exception& e) {
LogError("%s: Deserialize or I/O error - %s\n", __func__, e.what());
diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp
index b40ca0260b..44c2808c3b 100644
--- a/src/node/blockstorage.cpp
+++ b/src/node/blockstorage.cpp
@@ -683,7 +683,7 @@ bool BlockManager::UndoWriteToDisk(const CBlockUndo& blockundo, FlatFilePos& pos
fileout << GetParams().MessageStart() << nSize;
// Write undo data
- long fileOutPos = ftell(fileout.Get());
+ long fileOutPos = fileout.tell();
if (fileOutPos < 0) {
LogError("%s: ftell failed\n", __func__);
return false;
@@ -981,7 +981,7 @@ bool BlockManager::WriteBlockToDisk(const CBlock& block, FlatFilePos& pos) const
fileout << GetParams().MessageStart() << nSize;
// Write block
- long fileOutPos = ftell(fileout.Get());
+ long fileOutPos = fileout.tell();
if (fileOutPos < 0) {
LogError("%s: ftell failed\n", __func__);
return false;
diff --git a/src/node/mempool_persist.cpp b/src/node/mempool_persist.cpp
index a265c2e12d..ff7de8c64a 100644
--- a/src/node/mempool_persist.cpp
+++ b/src/node/mempool_persist.cpp
@@ -199,8 +199,8 @@ bool DumpMempool(const CTxMemPool& pool, const fs::path& dump_path, FopenFn mock
LogInfo("Writing %d unbroadcast transactions to file.\n", unbroadcast_txids.size());
file << unbroadcast_txids;
- if (!skip_file_commit && !FileCommit(file.Get()))
- throw std::runtime_error("FileCommit failed");
+ if (!skip_file_commit && !file.Commit())
+ throw std::runtime_error("Commit failed");
file.fclose();
if (!RenameOver(dump_path + ".new", dump_path)) {
throw std::runtime_error("Rename failed");
diff --git a/src/node/utxo_snapshot.cpp b/src/node/utxo_snapshot.cpp
index 976421e455..7d589c886b 100644
--- a/src/node/utxo_snapshot.cpp
+++ b/src/node/utxo_snapshot.cpp
@@ -73,9 +73,11 @@ std::optional<uint256> ReadSnapshotBaseBlockhash(fs::path chaindir)
}
afile >> base_blockhash;
- if (std::fgetc(afile.Get()) != EOF) {
+ int64_t position = afile.tell();
+ afile.seek(0, SEEK_END);
+ if (position != afile.tell()) {
LogPrintf("[snapshot] warning: unexpected trailing data in %s\n", read_from_str);
- } else if (std::ferror(afile.Get())) {
+ } else if (afile.IsError()) {
LogPrintf("[snapshot] warning: i/o error reading %s\n", read_from_str);
}
return base_blockhash;
diff --git a/src/streams.cpp b/src/streams.cpp
index cdd36a86fe..5f7baf92b9 100644
--- a/src/streams.cpp
+++ b/src/streams.cpp
@@ -4,21 +4,29 @@
#include <span.h>
#include <streams.h>
+#include <util/fs_helpers.h>
#include <array>
+AutoFile::AutoFile(std::FILE* file, std::vector<std::byte> data_xor)
+ : m_file{file}, m_xor{std::move(data_xor)}
+{
+ if (!IsNull()) {
+ auto pos{std::ftell(m_file)};
+ if (pos >= 0) m_position = pos;
+ }
+}
+
std::size_t AutoFile::detail_fread(Span<std::byte> dst)
{
if (!m_file) throw std::ios_base::failure("AutoFile::read: file handle is nullptr");
- if (m_xor.empty()) {
- return std::fread(dst.data(), 1, dst.size(), m_file);
- } else {
- const auto init_pos{std::ftell(m_file)};
- if (init_pos < 0) throw std::ios_base::failure("AutoFile::read: ftell failed");
- std::size_t ret{std::fread(dst.data(), 1, dst.size(), m_file)};
- util::Xor(dst.subspan(0, ret), m_xor, init_pos);
- return ret;
+ size_t ret = std::fread(dst.data(), 1, dst.size(), m_file);
+ if (!m_xor.empty()) {
+ if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::read: position unknown");
+ util::Xor(dst.subspan(0, ret), m_xor, *m_position);
}
+ if (m_position.has_value()) *m_position += ret;
+ return ret;
}
void AutoFile::seek(int64_t offset, int origin)
@@ -29,18 +37,23 @@ void AutoFile::seek(int64_t offset, int origin)
if (std::fseek(m_file, offset, origin) != 0) {
throw std::ios_base::failure(feof() ? "AutoFile::seek: end of file" : "AutoFile::seek: fseek failed");
}
+ if (origin == SEEK_SET) {
+ m_position = offset;
+ } else if (origin == SEEK_CUR && m_position.has_value()) {
+ *m_position += offset;
+ } else {
+ int64_t r{std::ftell(m_file)};
+ if (r < 0) {
+ throw std::ios_base::failure("AutoFile::seek: ftell failed");
+ }
+ m_position = r;
+ }
}
int64_t AutoFile::tell()
{
- if (IsNull()) {
- throw std::ios_base::failure("AutoFile::tell: file handle is nullptr");
- }
- int64_t r{std::ftell(m_file)};
- if (r < 0) {
- throw std::ios_base::failure("AutoFile::tell: ftell failed");
- }
- return r;
+ if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::tell: position unknown");
+ return *m_position;
}
void AutoFile::read(Span<std::byte> dst)
@@ -60,6 +73,7 @@ void AutoFile::ignore(size_t nSize)
throw std::ios_base::failure(feof() ? "AutoFile::ignore: end of file" : "AutoFile::ignore: fread failed");
}
nSize -= nNow;
+ if (m_position.has_value()) *m_position += nNow;
}
}
@@ -70,19 +84,34 @@ void AutoFile::write(Span<const std::byte> src)
if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) {
throw std::ios_base::failure("AutoFile::write: write failed");
}
+ if (m_position.has_value()) *m_position += src.size();
} else {
- auto current_pos{std::ftell(m_file)};
- if (current_pos < 0) throw std::ios_base::failure("AutoFile::write: ftell failed");
+ if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::write: position unknown");
std::array<std::byte, 4096> buf;
while (src.size() > 0) {
auto buf_now{Span{buf}.first(std::min<size_t>(src.size(), buf.size()))};
std::copy(src.begin(), src.begin() + buf_now.size(), buf_now.begin());
- util::Xor(buf_now, m_xor, current_pos);
+ util::Xor(buf_now, m_xor, *m_position);
if (std::fwrite(buf_now.data(), 1, buf_now.size(), m_file) != buf_now.size()) {
throw std::ios_base::failure{"XorFile::write: failed"};
}
src = src.subspan(buf_now.size());
- current_pos += buf_now.size();
+ *m_position += buf_now.size();
}
}
}
+
+bool AutoFile::Commit()
+{
+ return ::FileCommit(m_file);
+}
+
+bool AutoFile::IsError()
+{
+ return ferror(m_file);
+}
+
+bool AutoFile::Truncate(unsigned size)
+{
+ return ::TruncateFile(m_file, size);
+}
diff --git a/src/streams.h b/src/streams.h
index c2a9dea287..431a4d77c6 100644
--- a/src/streams.h
+++ b/src/streams.h
@@ -390,9 +390,10 @@ class AutoFile
protected:
std::FILE* m_file;
std::vector<std::byte> m_xor;
+ std::optional<int64_t> m_position;
public:
- explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={}) : m_file{file}, m_xor{std::move(data_xor)} {}
+ explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={});
~AutoFile() { fclose(); }
@@ -419,12 +420,6 @@ public:
return ret;
}
- /** Get wrapped FILE* without transfer of ownership.
- * @note Ownership of the FILE* will remain with this class. Use this only if the scope of the
- * AutoFile outlives use of the passed pointer.
- */
- std::FILE* Get() const { return m_file; }
-
/** Return true if the wrapped FILE* is nullptr, false otherwise.
*/
bool IsNull() const { return m_file == nullptr; }
@@ -458,6 +453,10 @@ public:
::Unserialize(*this, obj);
return *this;
}
+
+ bool Commit();
+ bool IsError();
+ bool Truncate(unsigned size);
};
/** Wrapper around an AutoFile& that implements a ring buffer to
diff --git a/src/test/fuzz/autofile.cpp b/src/test/fuzz/autofile.cpp
index 45316b6b21..81761c7bf9 100644
--- a/src/test/fuzz/autofile.cpp
+++ b/src/test/fuzz/autofile.cpp
@@ -56,7 +56,6 @@ FUZZ_TARGET(autofile)
WriteToStream(fuzzed_data_provider, auto_file);
});
}
- (void)auto_file.Get();
(void)auto_file.IsNull();
if (fuzzed_data_provider.ConsumeBool()) {
FILE* f = auto_file.release();
diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp
index 9217f05945..777122df6d 100644
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -261,7 +261,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
for (uint8_t j = 0; j < 40; ++j) {
file << j;
}
- std::rewind(file.Get());
+ file.seek(0, SEEK_SET);
// The buffer size (second arg) must be greater than the rewind
// amount (third arg).
@@ -391,7 +391,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_skip)
for (uint8_t j = 0; j < 40; ++j) {
file << j;
}
- std::rewind(file.Get());
+ file.seek(0, SEEK_SET);
// The buffer is 25 bytes, allow rewinding 10 bytes.
BufferedFile bf{file, 25, 10};
@@ -444,7 +444,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
for (uint8_t i = 0; i < fileSize; ++i) {
file << i;
}
- std::rewind(file.Get());
+ file.seek(0, SEEK_SET);
size_t bufSize = m_rng.randrange(300) + 1;
size_t rewindSize = m_rng.randrange(bufSize);
diff --git a/src/util/asmap.cpp b/src/util/asmap.cpp
index f50cd8a28c..04b0673c49 100644
--- a/src/util/asmap.cpp
+++ b/src/util/asmap.cpp
@@ -203,10 +203,10 @@ std::vector<bool> DecodeAsmap(fs::path path)
LogPrintf("Failed to open asmap file from disk\n");
return bits;
}
- fseek(filestr, 0, SEEK_END);
- int length = ftell(filestr);
+ file.seek(0, SEEK_END);
+ int length = file.tell();
LogPrintf("Opened asmap file %s (%d bytes) from disk\n", fs::quoted(fs::PathToString(path)), length);
- fseek(filestr, 0, SEEK_SET);
+ file.seek(0, SEEK_SET);
uint8_t cur_byte;
for (int i = 0; i < length; ++i) {
file >> cur_byte;