6 files changed, 238 insertions, 37 deletions
diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include
index 0a3f9df463..f1e4e706a1 100644
--- a/src/Makefile.bench.include
+++ b/src/Makefile.bench.include
@@ -32,6 +32,7 @@ bench_bench_bitcoin_SOURCES = \
   bench/examples.cpp \
   bench/gcs_filter.cpp \
   bench/hashpadding.cpp \
+  bench/load_external.cpp \
   bench/lockedpool.cpp \
   bench/logging.cpp \
   bench/mempool_eviction.cpp \
diff --git a/src/bench/load_external.cpp b/src/bench/load_external.cpp
new file mode 100644
index 0000000000..be01b2a483
--- /dev/null
+++ b/src/bench/load_external.cpp
@@ -0,0 +1,63 @@
+// Copyright (c) 2022 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or https://www.opensource.org/licenses/mit-license.php.
+
+#include <bench/bench.h>
+#include <bench/data.h>
+#include <chainparams.h>
+#include <test/util/setup_common.h>
+#include <validation.h>
+
+/**
+ * The LoadExternalBlockFile() function is used during -reindex and -loadblock.
+ *
+ * Create a test file that's similar to a datadir/blocks/blk?????.dat file,
+ * It contains around 134 copies of the same block (typical size of real block files).
+ * For each block in the file, LoadExternalBlockFile() won't find its parent,
+ * and so will skip the block. (In the real system, it will re-read the block
+ * from disk later when it encounters its parent.)
+ *
+ * This benchmark measures the performance of deserializing the block (or just
+ * its header, beginning with PR 16981).
+ */
+static void LoadExternalBlockFile(benchmark::Bench& bench)
+{
+    const auto testing_setup{MakeNoLogFileContext<const TestingSetup>(CBaseChainParams::MAIN)};
+
+    // Create a single block as in the blocks files (magic bytes, block size,
+    // block data) as a stream object.
+    const fs::path blkfile{testing_setup.get()->m_path_root / "blk.dat"};
+    CDataStream ss(SER_DISK, 0);
+    auto params{testing_setup->m_node.chainman->GetParams()};
+    ss << params.MessageStart();
+    ss << static_cast<uint32_t>(benchmark::data::block413567.size());
+    // We can't use the streaming serialization (ss << benchmark::data::block413567)
+    // because that first writes a compact size.
+    ss.write(MakeByteSpan(benchmark::data::block413567));
+
+    // Create the test file.
+    {
+        // "wb+" is "binary, O_RDWR | O_CREAT | O_TRUNC".
+        FILE* file{fsbridge::fopen(blkfile, "wb+")};
+        // Make the test block file about 128 MB in length.
+        for (size_t i = 0; i < node::MAX_BLOCKFILE_SIZE / ss.size(); ++i) {
+            if (fwrite(ss.data(), 1, ss.size(), file) != ss.size()) {
+                throw std::runtime_error("write to test file failed\n");
+            }
+        }
+        fclose(file);
+    }
+
+    Chainstate& chainstate{testing_setup->m_node.chainman->ActiveChainstate()};
+    std::multimap<uint256, FlatFilePos> blocks_with_unknown_parent;
+    FlatFilePos pos;
+    bench.run([&] {
+        // "rb" is "binary, O_RDONLY", positioned to the start of the file.
+        // The file will be closed by LoadExternalBlockFile().
+        FILE* file{fsbridge::fopen(blkfile, "rb")};
+        chainstate.LoadExternalBlockFile(file, &pos, &blocks_with_unknown_parent);
+    });
+    fs::remove(blkfile);
+}
+
+BENCHMARK(LoadExternalBlockFile, benchmark::PriorityLevel::HIGH);
diff --git a/src/streams.h b/src/streams.h
index 0178df1c49..84b12f65aa 100644
--- a/src/streams.h
+++ b/src/streams.h
@@ -612,7 +612,6 @@ private:
     uint64_t nRewind;     //!< how many bytes we guarantee to rewind
     std::vector<std::byte> vchBuf; //!< the buffer
 
-protected:
     //! read data from the source to fill the buffer
     bool Fill() {
         unsigned int pos = nSrcPos % vchBuf.size();
@@ -630,6 +629,28 @@ protected:
         return true;
     }
 
+    //! Advance the stream's read pointer (m_read_pos) by up to 'length' bytes,
+    //! filling the buffer from the file so that at least one byte is available.
+    //! Return a pointer to the available buffer data and the number of bytes
+    //! (which may be less than the requested length) that may be accessed
+    //! beginning at that pointer.
+    std::pair<std::byte*, size_t> AdvanceStream(size_t length)
+    {
+        assert(m_read_pos <= nSrcPos);
+        if (m_read_pos + length > nReadLimit) {
+            throw std::ios_base::failure("Attempt to position past buffer limit");
+        }
+        // If there are no bytes available, read from the file.
+        if (m_read_pos == nSrcPos && length > 0) Fill();
+
+        size_t buffer_offset{static_cast<size_t>(m_read_pos % vchBuf.size())};
+        size_t buffer_available{static_cast<size_t>(vchBuf.size() - buffer_offset)};
+        size_t bytes_until_source_pos{static_cast<size_t>(nSrcPos - m_read_pos)};
+        size_t advance{std::min({length, buffer_available, bytes_until_source_pos})};
+        m_read_pos += advance;
+        return std::make_pair(&vchBuf[buffer_offset], advance);
+    }
+
 public:
     CBufferedFile(FILE* fileIn, uint64_t nBufSize, uint64_t nRewindIn, int nTypeIn, int nVersionIn)
         : nType(nTypeIn), nVersion(nVersionIn), nSrcPos(0), m_read_pos(0), nReadLimit(std::numeric_limits<uint64_t>::max()), nRewind(nRewindIn), vchBuf(nBufSize, std::byte{0})
@@ -667,24 +688,21 @@ public:
     //! read a number of bytes
     void read(Span<std::byte> dst)
     {
-        if (dst.size() + m_read_pos > nReadLimit) {
-            throw std::ios_base::failure("Read attempted past buffer limit");
-        }
         while (dst.size() > 0) {
-            if (m_read_pos == nSrcPos)
-                Fill();
-            unsigned int pos = m_read_pos % vchBuf.size();
-            size_t nNow = dst.size();
-            if (nNow + pos > vchBuf.size())
-                nNow = vchBuf.size() - pos;
-            if (nNow + m_read_pos > nSrcPos)
-                nNow = nSrcPos - m_read_pos;
-            memcpy(dst.data(), &vchBuf[pos], nNow);
-            m_read_pos += nNow;
-            dst = dst.subspan(nNow);
+            auto [buffer_pointer, length]{AdvanceStream(dst.size())};
+            memcpy(dst.data(), buffer_pointer, length);
+            dst = dst.subspan(length);
         }
     }
 
+    //! Move the read position ahead in the stream to the given position.
+    //! Use SetPos() to back up in the stream, not SkipTo().
+    void SkipTo(const uint64_t file_pos)
+    {
+        assert(file_pos >= m_read_pos);
+        while (m_read_pos < file_pos) AdvanceStream(file_pos - m_read_pos);
+    }
+
     //! return the current reading position
     uint64_t GetPos() const {
         return m_read_pos;
diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp
index 0925e2e9ee..b1b262eade 100644
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -253,7 +253,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
         BOOST_CHECK(false);
     } catch (const std::exception& e) {
         BOOST_CHECK(strstr(e.what(),
-                        "Read attempted past buffer limit") != nullptr);
+                           "Attempt to position past buffer limit") != nullptr);
     }
     // The default argument removes the limit completely.
     BOOST_CHECK(bf.SetLimit());
@@ -322,7 +322,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
     BOOST_CHECK(!bf.SetPos(0));
     // But we should now be positioned at least as far back as allowed
     // by the rewind window (relative to our farthest read position, 40).
-    BOOST_CHECK(bf.GetPos() <= 30);
+    BOOST_CHECK(bf.GetPos() <= 30U);
 
     // We can explicitly close the file, or the destructor will do it.
     bf.fclose();
@@ -330,6 +330,55 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
     fs::remove(streams_test_filename);
 }
 
+BOOST_AUTO_TEST_CASE(streams_buffered_file_skip)
+{
+    fs::path streams_test_filename = m_args.GetDataDirBase() / "streams_test_tmp";
+    FILE* file = fsbridge::fopen(streams_test_filename, "w+b");
+    // The value at each offset is the byte offset (e.g. byte 1 in the file has the value 0x01).
+    for (uint8_t j = 0; j < 40; ++j) {
+        fwrite(&j, 1, 1, file);
+    }
+    rewind(file);
+
+    // The buffer is 25 bytes, allow rewinding 10 bytes.
+    CBufferedFile bf(file, 25, 10, 222, 333);
+
+    uint8_t i;
+    // This is like bf >> (7-byte-variable), in that it will cause data
+    // to be read from the file into memory, but it's not copied to us.
+    bf.SkipTo(7);
+    BOOST_CHECK_EQUAL(bf.GetPos(), 7U);
+    bf >> i;
+    BOOST_CHECK_EQUAL(i, 7);
+
+    // The bytes in the buffer up to offset 7 are valid and can be read.
+    BOOST_CHECK(bf.SetPos(0));
+    bf >> i;
+    BOOST_CHECK_EQUAL(i, 0);
+    bf >> i;
+    BOOST_CHECK_EQUAL(i, 1);
+
+    bf.SkipTo(11);
+    bf >> i;
+    BOOST_CHECK_EQUAL(i, 11);
+
+    // SkipTo() honors the transfer limit; we can't position beyond the limit.
+    bf.SetLimit(13);
+    try {
+        bf.SkipTo(14);
+        BOOST_CHECK(false);
+    } catch (const std::exception& e) {
+        BOOST_CHECK(strstr(e.what(), "Attempt to position past buffer limit") != nullptr);
+    }
+
+    // We can position exactly to the transfer limit.
+    bf.SkipTo(13);
+    BOOST_CHECK_EQUAL(bf.GetPos(), 13U);
+
+    bf.fclose();
+    fs::remove(streams_test_filename);
+}
+
 BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
 {
     // Make this test deterministic.
@@ -361,7 +410,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
             // sizes; the boundaries of the objects can interact arbitrarily
             // with the CBufferFile's internal buffer. These first three
             // cases simulate objects of various sizes (1, 2, 5 bytes).
-            switch (InsecureRandRange(5)) {
+            switch (InsecureRandRange(6)) {
             case 0: {
                 uint8_t a[1];
                 if (currentPos + 1 > fileSize)
@@ -399,6 +448,16 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
                 break;
             }
             case 3: {
+                // SkipTo is similar to the "read" cases above, except
+                // we don't receive the data.
+                size_t skip_length{static_cast<size_t>(InsecureRandRange(5))};
+                if (currentPos + skip_length > fileSize) continue;
+                bf.SetLimit(currentPos + skip_length);
+                bf.SkipTo(currentPos + skip_length);
+                currentPos += skip_length;
+                break;
+            }
+            case 4: {
                 // Find a byte value (that is at or ahead of the current position).
                 size_t find = currentPos + InsecureRandRange(8);
                 if (find >= fileSize)
@@ -415,7 +474,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
                 currentPos++;
                 break;
             }
-            case 4: {
+            case 5: {
                 size_t requestPos = InsecureRandRange(maxPos + 4);
                 bool okay = bf.SetPos(requestPos);
                 // The new position may differ from the requested position
diff --git a/src/validation.cpp b/src/validation.cpp
index debdc2ae74..4692626545 100644
--- a/src/validation.cpp
+++ b/src/validation.cpp
@@ -4389,6 +4389,8 @@ void Chainstate::LoadExternalBlockFile(
     try {
         // This takes over fileIn and calls fclose() on it in the CBufferedFile destructor
         CBufferedFile blkdat(fileIn, 2*MAX_BLOCK_SERIALIZED_SIZE, MAX_BLOCK_SERIALIZED_SIZE+8, SER_DISK, CLIENT_VERSION);
+        // nRewind indicates where to resume scanning in case something goes wrong,
+        // such as a block fails to deserialize.
         uint64_t nRewind = blkdat.GetPos();
         while (!blkdat.eof()) {
             if (ShutdownRequested()) return;
@@ -4412,28 +4414,30 @@ void Chainstate::LoadExternalBlockFile(
                     continue;
             } catch (const std::exception&) {
                 // no valid block header found; don't complain
+                // (this happens at the end of every blk.dat file)
                 break;
             }
             try {
-                // read block
-                uint64_t nBlockPos = blkdat.GetPos();
+                // read block header
+                const uint64_t nBlockPos{blkdat.GetPos()};
                 if (dbp)
                     dbp->nPos = nBlockPos;
                 blkdat.SetLimit(nBlockPos + nSize);
-                std::shared_ptr<CBlock> pblock = std::make_shared<CBlock>();
-                CBlock& block = *pblock;
-                blkdat >> block;
-                nRewind = blkdat.GetPos();
-
-                uint256 hash = block.GetHash();
+                CBlockHeader header;
+                blkdat >> header;
+                const uint256 hash{header.GetHash()};
+                // Skip the rest of this block (this may read from disk into memory); position to the marker before the
+                // next block, but it's still possible to rewind to the start of the current block (without a disk read).
+                nRewind = nBlockPos + nSize;
+                blkdat.SkipTo(nRewind);
                 {
                     LOCK(cs_main);
                     // detect out of order blocks, and store them for later
-                    if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(block.hashPrevBlock)) {
+                    if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(header.hashPrevBlock)) {
                         LogPrint(BCLog::REINDEX, "%s: Out of order block %s, parent %s not known\n", __func__, hash.ToString(),
-                                block.hashPrevBlock.ToString());
+                                 header.hashPrevBlock.ToString());
                         if (dbp && blocks_with_unknown_parent) {
-                            blocks_with_unknown_parent->emplace(block.hashPrevBlock, *dbp);
+                            blocks_with_unknown_parent->emplace(header.hashPrevBlock, *dbp);
                         }
                         continue;
                     }
@@ -4441,13 +4445,19 @@ void Chainstate::LoadExternalBlockFile(
                     // process in case the block isn't known yet
                     const CBlockIndex* pindex = m_blockman.LookupBlockIndex(hash);
                     if (!pindex || (pindex->nStatus & BLOCK_HAVE_DATA) == 0) {
-                      BlockValidationState state;
-                      if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) {
-                          nLoaded++;
-                      }
-                      if (state.IsError()) {
-                          break;
-                      }
+                        // This block can be processed immediately; rewind to its start, read and deserialize it.
+                        blkdat.SetPos(nBlockPos);
+                        std::shared_ptr<CBlock> pblock{std::make_shared<CBlock>()};
+                        blkdat >> *pblock;
+                        nRewind = blkdat.GetPos();
+
+                        BlockValidationState state;
+                        if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) {
+                            nLoaded++;
+                        }
+                        if (state.IsError()) {
+                            break;
+                        }
                     } else if (hash != params.GetConsensus().hashGenesisBlock && pindex->nHeight % 1000 == 0) {
                         LogPrint(BCLog::REINDEX, "Block Import: already had block %s at height %d\n", hash.ToString(), pindex->nHeight);
                     }
diff --git a/test/functional/feature_reindex.py b/test/functional/feature_reindex.py
index 44040f426f..0f6a8fd0d2 100755
--- a/test/functional/feature_reindex.py
+++ b/test/functional/feature_reindex.py
@@ -7,9 +7,12 @@
 - Start a single node and generate 3 blocks.
 - Stop the node and restart it with -reindex. Verify that the node has reindexed up to block 3.
 - Stop the node and restart it with -reindex-chainstate. Verify that the node has reindexed up to block 3.
+- Verify that out-of-order blocks are correctly processed, see LoadExternalBlockFile()
 """
 
+import os
 from test_framework.test_framework import BitcoinTestFramework
+from test_framework.p2p import MAGIC_BYTES
 from test_framework.util import assert_equal
 
 
@@ -27,11 +30,58 @@ class ReindexTest(BitcoinTestFramework):
         assert_equal(self.nodes[0].getblockcount(), blockcount)  # start_node is blocking on reindex
         self.log.info("Success")
 
+    # Check that blocks can be processed out of order
+    def out_of_order(self):
+        # The previous test created 12 blocks
+        assert_equal(self.nodes[0].getblockcount(), 12)
+        self.stop_nodes()
+
+        # In this test environment, blocks will always be in order (since
+        # we're generating them rather than getting them from peers), so to
+        # test out-of-order handling, swap blocks 1 and 2 on disk.
+        blk0 = os.path.join(self.nodes[0].datadir, self.nodes[0].chain, 'blocks', 'blk00000.dat')
+        with open(blk0, 'r+b') as bf:
+            # Read at least the first few blocks (including genesis)
+            b = bf.read(2000)
+
+            # Find the offsets of blocks 2, 3, and 4 (the first 3 blocks beyond genesis)
+            # by searching for the regtest marker bytes (see pchMessageStart).
+            def find_block(b, start):
+                return b.find(MAGIC_BYTES["regtest"], start)+4
+
+            genesis_start = find_block(b, 0)
+            assert_equal(genesis_start, 4)
+            b2_start = find_block(b, genesis_start)
+            b3_start = find_block(b, b2_start)
+            b4_start = find_block(b, b3_start)
+
+            # Blocks 2 and 3 should be the same size.
+            assert_equal(b3_start-b2_start, b4_start-b3_start)
+
+            # Swap the second and third blocks (don't disturb the genesis block).
+            bf.seek(b2_start)
+            bf.write(b[b3_start:b4_start])
+            bf.write(b[b2_start:b3_start])
+
+        # The reindexing code should detect and accommodate out of order blocks.
+        with self.nodes[0].assert_debug_log([
+            'LoadExternalBlockFile: Out of order block',
+            'LoadExternalBlockFile: Processing out of order child',
+        ]):
+            extra_args = [["-reindex"]]
+            self.start_nodes(extra_args)
+
+        # All blocks should be accepted and processed.
+        assert_equal(self.nodes[0].getblockcount(), 12)
+
     def run_test(self):
         self.reindex(False)
         self.reindex(True)
         self.reindex(False)
         self.reindex(True)
 
+        self.out_of_order()
+
+
 if __name__ == '__main__':
     ReindexTest().main()