aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chow <github@achow101.com>2023-05-10 17:40:42 -0400
committerAndrew Chow <github@achow101.com>2023-05-10 17:50:42 -0400
commit3ff67f77831474e342502e6a24be8a2ce5688e04 (patch)
treebce3b70ba020fc71b0cddc3a974b78896364684e
parente0a70c5b4f2c691e8d6b507c8ce879f0b0424254 (diff)
parent72efc26439da9a1344a19569fb0cab01f82ae7d1 (diff)
Merge bitcoin/bitcoin#19690: util: improve FindByte() performance
72efc26439da9a1344a19569fb0cab01f82ae7d1 util: improve streams.h:FindByte() performance (Larry Ruane) 604df63f6c70b9692b067777ddb38d946ac0b2fc [bench] add streams findbyte (gzhao408) Pull request description: This PR is strictly a performance improvement; there is no functional change. The `CBufferedFile::FindByte()` method searches for the next occurrence of the given byte in the file. Currently, this is done by explicitly inspecting each byte in turn. This PR takes advantage of `std::find()` to do the same more efficiently, improving its CPU runtime by a factor of about 25 in typical use. ACKs for top commit: achow101: re-ACK 72efc26439da9a1344a19569fb0cab01f82ae7d1 stickies-v: re-ACK 72efc26439da9a1344a19569fb0cab01f82ae7d1 Tree-SHA512: ddf0bff335cc8aa34f911aa4e0558fa77ce35d963d602e4ab1c63090b4a386faf074548daf06ee829c7f2c760d06eed0125cf4c34e981c6129cea1804eb3b719
-rw-r--r--src/Makefile.bench.include1
-rw-r--r--src/bench/streams_findbyte.cpp31
-rw-r--r--src/streams.h20
-rw-r--r--src/test/fuzz/buffered_file.cpp2
-rw-r--r--src/test/streams_tests.cpp2
-rw-r--r--src/validation.cpp2
6 files changed, 50 insertions, 8 deletions
diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include
index c230728a1c..c8e510b482 100644
--- a/src/Makefile.bench.include
+++ b/src/Makefile.bench.include
@@ -47,6 +47,7 @@ bench_bench_bitcoin_SOURCES = \
bench/rollingbloom.cpp \
bench/rpc_blockchain.cpp \
bench/rpc_mempool.cpp \
+ bench/streams_findbyte.cpp \
bench/strencodings.cpp \
bench/util_time.cpp \
bench/verify_script.cpp
diff --git a/src/bench/streams_findbyte.cpp b/src/bench/streams_findbyte.cpp
new file mode 100644
index 0000000000..77f5940926
--- /dev/null
+++ b/src/bench/streams_findbyte.cpp
@@ -0,0 +1,31 @@
+// Copyright (c) 2023 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <bench/bench.h>
+
+#include <util/fs.h>
+#include <streams.h>
+
+static void FindByte(benchmark::Bench& bench)
+{
+ // Setup
+ FILE* file = fsbridge::fopen("streams_tmp", "w+b");
+ const size_t file_size = 200;
+ uint8_t data[file_size] = {0};
+ data[file_size-1] = 1;
+ fwrite(&data, sizeof(uint8_t), file_size, file);
+ rewind(file);
+ CBufferedFile bf(file, /*nBufSize=*/file_size + 1, /*nRewindIn=*/file_size, 0, 0);
+
+ bench.run([&] {
+ bf.SetPos(0);
+ bf.FindByte(std::byte(1));
+ });
+
+ // Cleanup
+ bf.fclose();
+ fs::remove("streams_tmp");
+}
+
+BENCHMARK(FindByte, benchmark::PriorityLevel::HIGH);
diff --git a/src/streams.h b/src/streams.h
index 8788343809..e346aa0a3f 100644
--- a/src/streams.h
+++ b/src/streams.h
@@ -756,15 +756,25 @@ public:
}
//! search for a given byte in the stream, and remain positioned on it
- void FindByte(uint8_t ch)
+ void FindByte(std::byte byte)
{
+ // For best performance, avoid mod operation within the loop.
+ size_t buf_offset{size_t(m_read_pos % uint64_t(vchBuf.size()))};
while (true) {
- if (m_read_pos == nSrcPos)
+ if (m_read_pos == nSrcPos) {
+ // No more bytes available; read from the file into the buffer,
+ // setting nSrcPos to one beyond the end of the new data.
+ // Throws exception if end-of-file reached.
Fill();
- if (vchBuf[m_read_pos % vchBuf.size()] == std::byte{ch}) {
- break;
}
- m_read_pos++;
+ const size_t len{std::min<size_t>(vchBuf.size() - buf_offset, nSrcPos - m_read_pos)};
+ const auto it_start{vchBuf.begin() + buf_offset};
+ const auto it_find{std::find(it_start, it_start + len, byte)};
+ const size_t inc{size_t(std::distance(it_start, it_find))};
+ m_read_pos += inc;
+ if (inc < len) break;
+ buf_offset += inc;
+ if (buf_offset >= vchBuf.size()) buf_offset = 0;
}
}
};
diff --git a/src/test/fuzz/buffered_file.cpp b/src/test/fuzz/buffered_file.cpp
index 67cac8fa4e..2f7ce60c7f 100644
--- a/src/test/fuzz/buffered_file.cpp
+++ b/src/test/fuzz/buffered_file.cpp
@@ -53,7 +53,7 @@ FUZZ_TARGET(buffered_file)
return;
}
try {
- opt_buffered_file->FindByte(fuzzed_data_provider.ConsumeIntegral<uint8_t>());
+ opt_buffered_file->FindByte(std::byte(fuzzed_data_provider.ConsumeIntegral<uint8_t>()));
} catch (const std::ios_base::failure&) {
}
},
diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp
index a0392570f1..55e4f200b1 100644
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -463,7 +463,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
size_t find = currentPos + InsecureRandRange(8);
if (find >= fileSize)
find = fileSize - 1;
- bf.FindByte(uint8_t(find));
+ bf.FindByte(std::byte(find));
// The value at each offset is the offset.
BOOST_CHECK_EQUAL(bf.GetPos(), find);
currentPos = find;
diff --git a/src/validation.cpp b/src/validation.cpp
index a24211909d..f929e77416 100644
--- a/src/validation.cpp
+++ b/src/validation.cpp
@@ -4565,7 +4565,7 @@ void Chainstate::LoadExternalBlockFile(
try {
// locate a header
unsigned char buf[CMessageHeader::MESSAGE_START_SIZE];
- blkdat.FindByte(params.MessageStart()[0]);
+ blkdat.FindByte(std::byte(params.MessageStart()[0]));
nRewind = blkdat.GetPos() + 1;
blkdat >> buf;
if (memcmp(buf, params.MessageStart(), CMessageHeader::MESSAGE_START_SIZE)) {