diff options
author | Pieter Wuille <pieter@wuille.net> | 2024-07-10 11:08:42 -0400 |
---|---|---|
committer | Pieter Wuille <pieter@wuille.net> | 2024-07-25 10:16:40 -0400 |
commit | 97d98718b005adc0bdf513d724874601d8aa13ad (patch) | |
tree | 53376441726177c6e1040361ab788966caa87c9d | |
parent | d5918dc3c6d9480c8a5e295db0f4d4892b0138f6 (diff) |
clusterlin: add LinearizationChunking class
It encapsulates a given linearization in chunked form, permitting arbitrary
subsets of transactions to be removed from the linearization. Its purpose
is adding the Intersect function, which is a crucial operation that will
be used in a further commit to make Linearize improve existing linearizations.
-rw-r--r-- | src/cluster_linearize.h | 109 | ||||
-rw-r--r-- | src/test/fuzz/cluster_linearize.cpp | 117 |
2 files changed, 226 insertions, 0 deletions
diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index f689e7e33a..4238295630 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -13,6 +13,7 @@ #include <utility> #include <random.h> +#include <span.h> #include <util/feefrac.h> #include <util/vecdeque.h> @@ -256,6 +257,114 @@ std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, Span< return ret; } +/** Data structure encapsulating the chunking of a linearization, permitting removal of subsets. */ +template<typename SetType> +class LinearizationChunking +{ + /** The depgraph this linearization is for. */ + const DepGraph<SetType>& m_depgraph; + + /** The linearization we started from. */ + Span<const ClusterIndex> m_linearization; + + /** Chunk sets and their feerates, of what remains of the linearization. */ + std::vector<SetInfo<SetType>> m_chunks; + + /** Which transactions remain in the linearization. */ + SetType m_todo; + + /** Fill the m_chunks variable. */ + void BuildChunks() noexcept + { + // Caller must clear m_chunks. + Assume(m_chunks.empty()); + + // Iterate over the entries in m_linearization. This is effectively the same + // algorithm as ChunkLinearization, but supports skipping parts of the linearization and + // keeps track of the sets themselves instead of just their feerates. + for (auto idx : m_linearization) { + if (!m_todo[idx]) continue; + // Start with an initial chunk containing just element idx. + SetInfo add(m_depgraph, idx); + // Absorb existing final chunks into add while they have lower feerate. + while (!m_chunks.empty() && add.feerate >> m_chunks.back().feerate) { + add |= m_chunks.back(); + m_chunks.pop_back(); + } + // Remember new chunk. + m_chunks.push_back(std::move(add)); + } + } + +public: + /** Initialize a LinearizationSubset object for a given length of linearization. */ + explicit LinearizationChunking(const DepGraph<SetType>& depgraph LIFETIMEBOUND, Span<const ClusterIndex> lin LIFETIMEBOUND) noexcept : + m_depgraph(depgraph), m_linearization(lin) + { + // Mark everything in lin as todo still. + for (auto i : m_linearization) m_todo.Set(i); + // Compute the initial chunking. + m_chunks.reserve(depgraph.TxCount()); + BuildChunks(); + } + + /** Determine how many chunks remain in the linearization. */ + ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size(); } + + /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */ + const SetInfo<SetType>& GetChunk(ClusterIndex n) const noexcept + { + Assume(n < m_chunks.size()); + return m_chunks[n]; + } + + /** Remove some subset of transactions from the linearization. */ + void MarkDone(SetType subset) noexcept + { + Assume(subset.Any()); + Assume(subset.IsSubsetOf(m_todo)); + m_todo -= subset; + // Rechunk what remains of m_linearization. + m_chunks.clear(); + BuildChunks(); + } + + /** Find the shortest intersection between subset and the prefixes of remaining chunks + * of the linearization that has a feerate not below subset's. + * + * This is a crucial operation in guaranteeing improvements to linearizations. If subset has + * a feerate not below GetChunk(0)'s, then moving Intersect(subset) to the front of (what + * remains of) the linearization is guaranteed not to make it worse at any point. + * + * See https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 for background. + */ + SetInfo<SetType> Intersect(const SetInfo<SetType>& subset) const noexcept + { + Assume(subset.transactions.IsSubsetOf(m_todo)); + SetInfo<SetType> accumulator; + // Iterate over all chunks of the remaining linearization. + for (ClusterIndex i = 0; i < NumChunksLeft(); ++i) { + // Find what (if any) intersection the chunk has with subset. + const SetType to_add = GetChunk(i).transactions & subset.transactions; + if (to_add.Any()) { + // If adding that to accumulator makes us hit all of subset, we are done as no + // shorter intersection with higher/equal feerate exists. + accumulator.transactions |= to_add; + if (accumulator.transactions == subset.transactions) break; + // Otherwise update the accumulator feerate. + accumulator.feerate += m_depgraph.FeeRate(to_add); + // If that does result in something better, or something with the same feerate but + // smaller, return that. Even if a longer, higher-feerate intersection exists, it + // does not hurt to return the shorter one (the remainder of the longer intersection + // will generally be found in the next call to Intersect, but even if not, it is not + // required for the improvement guarantee this function makes). + if (!(accumulator.feerate << subset.feerate)) return accumulator; + } + } + return subset; + } +}; + /** Class encapsulating the state needed to find the best remaining ancestor set. * * It is initialized for an entire DepGraph, and parts of the graph can be dropped by calling diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 6157291364..5c8b9f5905 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -484,6 +484,123 @@ FUZZ_TARGET(clusterlin_search_finder) assert(anc_finder.AllDone()); } +FUZZ_TARGET(clusterlin_linearization_chunking) +{ + // Verify the behavior of LinearizationChunking. + + // Retrieve a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph<TestBitSet> depgraph; + try { + reader >> Using<DepGraphFormatter>(depgraph); + } catch (const std::ios_base::failure&) {} + + // Retrieve a topologically-valid subset of depgraph. + auto todo = TestBitSet::Fill(depgraph.TxCount()); + auto subset = SetInfo(depgraph, ReadTopologicalSet(depgraph, todo, reader)); + + // Retrieve a valid linearization for depgraph. + auto linearization = ReadLinearization(depgraph, reader); + + // Construct a LinearizationChunking object, initially for the whole linearization. + LinearizationChunking chunking(depgraph, linearization); + + // Incrementally remove transactions from the chunking object, and check various properties at + // every step. + while (todo.Any()) { + assert(chunking.NumChunksLeft() > 0); + + // Construct linearization with just todo. + std::vector<ClusterIndex> linearization_left; + for (auto i : linearization) { + if (todo[i]) linearization_left.push_back(i); + } + + // Compute the chunking for linearization_left. + auto chunking_left = ChunkLinearization(depgraph, linearization_left); + + // Verify that it matches the feerates of the chunks of chunking. + assert(chunking.NumChunksLeft() == chunking_left.size()); + for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + assert(chunking.GetChunk(i).feerate == chunking_left[i]); + } + + // Check consistency of chunking. + TestBitSet combined; + for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + const auto& chunk_info = chunking.GetChunk(i); + // Chunks must be non-empty. + assert(chunk_info.transactions.Any()); + // Chunk feerates must be monotonically non-increasing. + if (i > 0) assert(!(chunk_info.feerate >> chunking.GetChunk(i - 1).feerate)); + // Chunks must be a subset of what is left of the linearization. + assert(chunk_info.transactions.IsSubsetOf(todo)); + // Chunks' claimed feerates must match their transactions' aggregate feerate. + assert(depgraph.FeeRate(chunk_info.transactions) == chunk_info.feerate); + // Chunks must be the highest-feerate remaining prefix. + SetInfo<TestBitSet> accumulator, best; + for (auto j : linearization) { + if (todo[j] && !combined[j]) { + accumulator |= SetInfo(depgraph, j); + if (best.feerate.IsEmpty() || accumulator.feerate > best.feerate) { + best = accumulator; + } + } + } + assert(best.transactions == chunk_info.transactions); + assert(best.feerate == chunk_info.feerate); + // Chunks cannot overlap. + assert(!chunk_info.transactions.Overlaps(combined)); + combined |= chunk_info.transactions; + // Chunks must be topological. + for (auto idx : chunk_info.transactions) { + assert((depgraph.Ancestors(idx) & todo).IsSubsetOf(combined)); + } + } + assert(combined == todo); + + // Verify the expected properties of LinearizationChunking::Intersect: + auto intersect = chunking.Intersect(subset); + // - Intersecting again doesn't change the result. + assert(chunking.Intersect(intersect) == intersect); + // - The intersection is topological. + TestBitSet intersect_anc; + for (auto idx : intersect.transactions) { + intersect_anc |= (depgraph.Ancestors(idx) & todo); + } + assert(intersect.transactions == intersect_anc); + // - The claimed intersection feerate matches its transactions. + assert(intersect.feerate == depgraph.FeeRate(intersect.transactions)); + // - The intersection may only be empty if its input is empty. + assert(intersect.transactions.Any() == subset.transactions.Any()); + // - The intersection feerate must be as high as the input. + assert(intersect.feerate >= subset.feerate); + // - No non-empty intersection between the intersection and a prefix of the chunks of the + // remainder of the linearization may be better than the intersection. + TestBitSet prefix; + for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + prefix |= chunking.GetChunk(i).transactions; + auto reintersect = SetInfo(depgraph, prefix & intersect.transactions); + if (!reintersect.feerate.IsEmpty()) { + assert(reintersect.feerate <= intersect.feerate); + } + } + + // Find a subset to remove from linearization. + auto done = ReadTopologicalSet(depgraph, todo, reader); + if (done.None()) { + // We need to remove a non-empty subset, so fall back to the unlinearized ancestors of + // the first transaction in todo if done is empty. + done = depgraph.Ancestors(todo.First()) & todo; + } + todo -= done; + chunking.MarkDone(done); + subset = SetInfo(depgraph, subset.transactions - done); + } + + assert(chunking.NumChunksLeft() == 0); +} + FUZZ_TARGET(clusterlin_linearize) { // Verify the behavior of Linearize(). |