aboutsummaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorAndrew Chow <github@achow101.com>2022-11-15 19:07:35 -0500
committerAndrew Chow <github@achow101.com>2022-11-15 19:23:39 -0500
commit5602cc7ccf4a51ad52dadc495b732f54b43ceb99 (patch)
treee62a474379f733ceead58695502dda8b16a4c97c /src/test
parent547a96362888d2fa02d496c192dcdb7ea7d72813 (diff)
parentdb929893ef0bc86ea2708cdbcf41152240cd7c73 (diff)
downloadbitcoin-5602cc7ccf4a51ad52dadc495b732f54b43ceb99.tar.xz
Merge bitcoin/bitcoin#16981: Improve runtime performance of --reindex
db929893ef0bc86ea2708cdbcf41152240cd7c73 Faster -reindex by initially deserializing only headers (Larry Ruane) c72de9990ae8f1744006d9c852023b882d5ed80c util: add CBufferedFile::SkipTo() to move ahead in the stream (Larry Ruane) 48a68908ba3d5e077cda7bd1e908b923fbead824 Add LoadExternalBlockFile() benchmark (Larry Ruane) Pull request description: ### Background During the first part of reindexing, `LoadExternalBlockFile()` sequentially reads raw blocks from the `blocks/blk00nnn.dat` files (rather than receiving them from peers, as with initial block download) and eventually adds all of them to the block index. When an individual block is initially read, it can't be immediately added unless all its ancestors have been added, which is rare (only about 8% of the time), because the blocks are not sorted by height. When the block can't be immediately added to the block index, its disk location is saved in a map so it can be added later. When its parent is later added to the block index, `LoadExternalBlockFile()` reads and deserializes the block from disk a second time and adds it to the block index. Most blocks (92%) get deserialized twice. ### This PR During the initial read, it's rarely useful to deserialize the entire block; only the header is needed to determine if the block can be added to the block index immediately. This change to `LoadExternalBlockFile()` initially deserializes only a block's header, then deserializes the entire block only if it can be added immediately. This reduces reindex time on mainnet by 7 hours on a Raspberry Pi, which translates to around a 25% reduction in the first part of reindexing (adding blocks to the index), and about a 6% reduction in overall reindex time. Summary: The performance gain is the result of deserializing each block only once, except its header which is deserialized twice, but the header is only 80 bytes. ACKs for top commit: andrewtoth: ACK db929893ef0bc86ea2708cdbcf41152240cd7c73 achow101: ACK db929893ef0bc86ea2708cdbcf41152240cd7c73 aureleoules: ACK db929893ef0bc86ea2708cdbcf41152240cd7c73 - minor changes and new benchmark since last review theStack: re-ACK db929893ef0bc86ea2708cdbcf41152240cd7c73 stickies-v: re-ACK db929893e Tree-SHA512: 5a5377192c11edb5b662e18f511c9beb8f250bc88aeadf2f404c92c3232a7617bade50477ebf16c0602b9bd3b68306d3ee7615de58acfd8cae664d28bb7b0136
Diffstat (limited to 'src/test')
-rw-r--r--src/test/streams_tests.cpp67
1 files changed, 63 insertions, 4 deletions
diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp
index 0925e2e9ee..b1b262eade 100644
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -253,7 +253,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
BOOST_CHECK(false);
} catch (const std::exception& e) {
BOOST_CHECK(strstr(e.what(),
- "Read attempted past buffer limit") != nullptr);
+ "Attempt to position past buffer limit") != nullptr);
}
// The default argument removes the limit completely.
BOOST_CHECK(bf.SetLimit());
@@ -322,7 +322,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
BOOST_CHECK(!bf.SetPos(0));
// But we should now be positioned at least as far back as allowed
// by the rewind window (relative to our farthest read position, 40).
- BOOST_CHECK(bf.GetPos() <= 30);
+ BOOST_CHECK(bf.GetPos() <= 30U);
// We can explicitly close the file, or the destructor will do it.
bf.fclose();
@@ -330,6 +330,55 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
fs::remove(streams_test_filename);
}
+BOOST_AUTO_TEST_CASE(streams_buffered_file_skip)
+{
+ fs::path streams_test_filename = m_args.GetDataDirBase() / "streams_test_tmp";
+ FILE* file = fsbridge::fopen(streams_test_filename, "w+b");
+ // The value at each offset is the byte offset (e.g. byte 1 in the file has the value 0x01).
+ for (uint8_t j = 0; j < 40; ++j) {
+ fwrite(&j, 1, 1, file);
+ }
+ rewind(file);
+
+ // The buffer is 25 bytes, allow rewinding 10 bytes.
+ CBufferedFile bf(file, 25, 10, 222, 333);
+
+ uint8_t i;
+ // This is like bf >> (7-byte-variable), in that it will cause data
+ // to be read from the file into memory, but it's not copied to us.
+ bf.SkipTo(7);
+ BOOST_CHECK_EQUAL(bf.GetPos(), 7U);
+ bf >> i;
+ BOOST_CHECK_EQUAL(i, 7);
+
+ // The bytes in the buffer up to offset 7 are valid and can be read.
+ BOOST_CHECK(bf.SetPos(0));
+ bf >> i;
+ BOOST_CHECK_EQUAL(i, 0);
+ bf >> i;
+ BOOST_CHECK_EQUAL(i, 1);
+
+ bf.SkipTo(11);
+ bf >> i;
+ BOOST_CHECK_EQUAL(i, 11);
+
+ // SkipTo() honors the transfer limit; we can't position beyond the limit.
+ bf.SetLimit(13);
+ try {
+ bf.SkipTo(14);
+ BOOST_CHECK(false);
+ } catch (const std::exception& e) {
+ BOOST_CHECK(strstr(e.what(), "Attempt to position past buffer limit") != nullptr);
+ }
+
+ // We can position exactly to the transfer limit.
+ bf.SkipTo(13);
+ BOOST_CHECK_EQUAL(bf.GetPos(), 13U);
+
+ bf.fclose();
+ fs::remove(streams_test_filename);
+}
+
BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
{
// Make this test deterministic.
@@ -361,7 +410,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
// sizes; the boundaries of the objects can interact arbitrarily
// with the CBufferFile's internal buffer. These first three
// cases simulate objects of various sizes (1, 2, 5 bytes).
- switch (InsecureRandRange(5)) {
+ switch (InsecureRandRange(6)) {
case 0: {
uint8_t a[1];
if (currentPos + 1 > fileSize)
@@ -399,6 +448,16 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
break;
}
case 3: {
+ // SkipTo is similar to the "read" cases above, except
+ // we don't receive the data.
+ size_t skip_length{static_cast<size_t>(InsecureRandRange(5))};
+ if (currentPos + skip_length > fileSize) continue;
+ bf.SetLimit(currentPos + skip_length);
+ bf.SkipTo(currentPos + skip_length);
+ currentPos += skip_length;
+ break;
+ }
+ case 4: {
// Find a byte value (that is at or ahead of the current position).
size_t find = currentPos + InsecureRandRange(8);
if (find >= fileSize)
@@ -415,7 +474,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
currentPos++;
break;
}
- case 4: {
+ case 5: {
size_t requestPos = InsecureRandRange(maxPos + 4);
bool okay = bf.SetPos(requestPos);
// The new position may differ from the requested position