diff options
author | Pieter Wuille <pieter@wuille.net> | 2024-05-08 17:28:39 -0400 |
---|---|---|
committer | Pieter Wuille <pieter@wuille.net> | 2024-07-25 10:16:37 -0400 |
commit | 46aad9b09986feb1d54fc4229a0d224da94fb80a (patch) | |
tree | 09cf5719b71eaf835a67a5cf332d317230507a74 /src | |
parent | ee0ddfe4f626bfb4b58927db89d317cb3531813f (diff) |
clusterlin: add Linearize function
This adds a first version of the overall linearization interface, which given
a DepGraph constructs a good linearization, by incrementally including good
candidate sets (found using AncestorCandidateFinder and SearchCandidateFinder).
Diffstat (limited to 'src')
-rw-r--r-- | src/cluster_linearize.h | 67 | ||||
-rw-r--r-- | src/test/fuzz/cluster_linearize.cpp | 89 | ||||
-rw-r--r-- | src/test/util/cluster_linearize.h | 17 |
3 files changed, 173 insertions, 0 deletions
diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index ba60c49ed0..cb6187eeca 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -167,6 +167,22 @@ public: for (auto pos : elems) ret += entries[pos].feerate; return ret; } + + /** Append the entries of select to list in a topologically valid order. + * + * Complexity: O(select.Count() * log(select.Count())). + */ + void AppendTopo(std::vector<ClusterIndex>& list, const SetType& select) const noexcept + { + ClusterIndex old_len = list.size(); + for (auto i : select) list.push_back(i); + std::sort(list.begin() + old_len, list.end(), [&](ClusterIndex a, ClusterIndex b) noexcept { + const auto a_anc_count = entries[a].ancestors.Count(); + const auto b_anc_count = entries[b].ancestors.Count(); + if (a_anc_count != b_anc_count) return a_anc_count < b_anc_count; + return a < b; + }); + } }; /** A set of transactions together with their aggregate feerate. */ @@ -486,6 +502,57 @@ public: } }; +/** Find a linearization for a cluster. + * + * @param[in] depgraph Dependency graph of the cluster to be linearized. + * @param[in] max_iterations Upper bound on the number of optimization steps that will be done. + * @return A pair of: + * - The resulting linearization. + * - A boolean indicating whether the result is guaranteed to be + * optimal. + * + * Complexity: O(N * min(max_iterations + N, 2^N)) where N=depgraph.TxCount(). + */ +template<typename SetType> +std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations) noexcept +{ + if (depgraph.TxCount() == 0) return {{}, true}; + + uint64_t iterations_left = max_iterations; + std::vector<ClusterIndex> linearization; + + AncestorCandidateFinder anc_finder(depgraph); + SearchCandidateFinder src_finder(depgraph); + linearization.reserve(depgraph.TxCount()); + bool optimal = true; + + while (true) { + // Initialize best as the best remaining ancestor set. + auto best = anc_finder.FindCandidateSet(); + + // Invoke bounded search to update best, with up to half of our remaining iterations as + // limit. + uint64_t max_iterations_now = (iterations_left + 1) / 2; + uint64_t iterations_done_now = 0; + std::tie(best, iterations_done_now) = src_finder.FindCandidateSet(max_iterations_now, best); + iterations_left -= iterations_done_now; + + if (iterations_done_now == max_iterations_now) { + optimal = false; + } + + // Add to output in topological order. + depgraph.AppendTopo(linearization, best.transactions); + + // Update state to reflect best is no longer to be linearized. + anc_finder.MarkDone(best.transactions); + if (anc_finder.AllDone()) break; + src_finder.MarkDone(best.transactions); + } + + return {std::move(linearization), optimal}; +} + } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index f93eb9c8e3..e4d9a59b8d 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -11,6 +11,7 @@ #include <util/bitset.h> #include <util/feefrac.h> +#include <algorithm> #include <stdint.h> #include <vector> #include <utility> @@ -140,6 +141,29 @@ public: } }; +/** A simple linearization algorithm. + * + * This matches Linearize() in interface and behavior, though with fewer optimizations, and using + * just SimpleCandidateFinder rather than AncestorCandidateFinder and SearchCandidateFinder. + */ +template<typename SetType> +std::pair<std::vector<ClusterIndex>, bool> SimpleLinearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations) +{ + std::vector<ClusterIndex> linearization; + SimpleCandidateFinder finder(depgraph); + SetType todo = SetType::Fill(depgraph.TxCount()); + bool optimal = true; + while (todo.Any()) { + auto [candidate, iterations_done] = finder.FindCandidateSet(max_iterations); + if (iterations_done == max_iterations) optimal = false; + depgraph.AppendTopo(linearization, candidate.transactions); + todo -= candidate.transactions; + finder.MarkDone(candidate.transactions); + max_iterations -= iterations_done; + } + return {std::move(linearization), optimal}; +} + /** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */ template<typename SetType> SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& todo, SpanReader& reader) @@ -458,3 +482,68 @@ FUZZ_TARGET(clusterlin_search_finder) assert(exh_finder.AllDone()); assert(anc_finder.AllDone()); } + +FUZZ_TARGET(clusterlin_linearize) +{ + // Verify the behavior of Linearize(). + + // Retrieve an iteration count, and a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph<TestBitSet> depgraph; + uint64_t iter_count{0}; + try { + reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph); + } catch (const std::ios_base::failure&) {} + + // Invoke Linearize(). + iter_count &= 0x7ffff; + auto [linearization, optimal] = Linearize(depgraph, iter_count); + SanityCheck(depgraph, linearization); + auto chunking = ChunkLinearization(depgraph, linearization); + + // If the iteration count is sufficiently high, an optimal linearization must be found. + // Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is + // 2 * (2^n - 1) + const uint64_t n = depgraph.TxCount(); + if (n <= 18 && iter_count > 2U * ((uint64_t{1} << n) - 1U)) { + assert(optimal); + } + + // If Linearize claims optimal result, run quality tests. + if (optimal) { + // It must be as good as SimpleLinearize. + auto [simple_linearization, simple_optimal] = SimpleLinearize(depgraph, MAX_SIMPLE_ITERATIONS); + SanityCheck(depgraph, simple_linearization); + auto simple_chunking = ChunkLinearization(depgraph, simple_linearization); + auto cmp = CompareChunks(chunking, simple_chunking); + assert(cmp >= 0); + // If SimpleLinearize finds the optimal result too, they must be equal (if not, + // SimpleLinearize is broken). + if (simple_optimal) assert(cmp == 0); + + // Only for very small clusters, test every topologically-valid permutation. + if (depgraph.TxCount() <= 7) { + std::vector<ClusterIndex> perm_linearization(depgraph.TxCount()); + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) perm_linearization[i] = i; + // Iterate over all valid permutations. + do { + // Determine whether perm_linearization is topological. + TestBitSet perm_done; + bool perm_is_topo{true}; + for (auto i : perm_linearization) { + perm_done.Set(i); + if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) { + perm_is_topo = false; + break; + } + } + // If so, verify that the obtained linearization is as good as the permutation. + if (perm_is_topo) { + auto perm_chunking = ChunkLinearization(depgraph, perm_linearization); + auto cmp = CompareChunks(chunking, perm_chunking); + assert(cmp >= 0); + } + } while(std::next_permutation(perm_linearization.begin(), perm_linearization.end())); + } + } +} diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index 771fbd543a..508a08133c 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -7,6 +7,7 @@ #include <cluster_linearize.h> #include <serialize.h> +#include <span.h> #include <streams.h> #include <util/bitset.h> #include <util/feefrac.h> @@ -331,6 +332,22 @@ void VerifyDepGraphFromCluster(const Cluster<SetType>& cluster, const DepGraph<S } } +/** Perform a sanity check on a linearization. */ +template<typename SetType> +void SanityCheck(const DepGraph<SetType>& depgraph, Span<const ClusterIndex> linearization) +{ + // Check completeness. + assert(linearization.size() == depgraph.TxCount()); + TestBitSet done; + for (auto i : linearization) { + // Check transaction position is in range. + assert(i < depgraph.TxCount()); + // Check topology and lack of duplicates. + assert((depgraph.Ancestors(i) - done) == TestBitSet::Singleton(i)); + done.Set(i); + } +} + } // namespace #endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H |