From b8401c3281978beed6198b2f9782b6a8dd35cbd7 Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 11 Jun 2022 09:23:51 +0200 Subject: Add pool based memory resource & allocator A memory resource similar to std::pmr::unsynchronized_pool_resource, but optimized for node-based containers. Co-Authored-By: Pieter Wuille --- src/Makefile.am | 1 + src/Makefile.bench.include | 1 + src/Makefile.test.include | 1 + src/Makefile.test_util.include | 1 + src/bench/pool.cpp | 50 ++++++ src/support/allocators/pool.h | 349 +++++++++++++++++++++++++++++++++++++ src/test/pool_tests.cpp | 156 +++++++++++++++++ src/test/util/poolresourcetester.h | 129 ++++++++++++++ 8 files changed, 688 insertions(+) create mode 100644 src/bench/pool.cpp create mode 100644 src/support/allocators/pool.h create mode 100644 src/test/pool_tests.cpp create mode 100644 src/test/util/poolresourcetester.h diff --git a/src/Makefile.am b/src/Makefile.am index 53c809c901..985a8e8915 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -261,6 +261,7 @@ BITCOIN_CORE_H = \ shutdown.h \ signet.h \ streams.h \ + support/allocators/pool.h \ support/allocators/secure.h \ support/allocators/zeroafterfree.h \ support/cleanse.h \ diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include index f1e4e706a1..c230728a1c 100644 --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -42,6 +42,7 @@ bench_bench_bitcoin_SOURCES = \ bench/nanobench.h \ bench/peer_eviction.cpp \ bench/poly1305.cpp \ + bench/pool.cpp \ bench/prevector.cpp \ bench/rollingbloom.cpp \ bench/rpc_blockchain.cpp \ diff --git a/src/Makefile.test.include b/src/Makefile.test.include index a39b0abd9d..98ed3ba4ad 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -116,6 +116,7 @@ BITCOIN_TESTS =\ test/pmt_tests.cpp \ test/policy_fee_tests.cpp \ test/policyestimator_tests.cpp \ + test/pool_tests.cpp \ test/pow_tests.cpp \ test/prevector_tests.cpp \ test/raii_event_tests.cpp \ diff --git a/src/Makefile.test_util.include b/src/Makefile.test_util.include index aefefe789a..11b93ad13e 100644 --- a/src/Makefile.test_util.include +++ b/src/Makefile.test_util.include @@ -15,6 +15,7 @@ TEST_UTIL_H = \ test/util/logging.h \ test/util/mining.h \ test/util/net.h \ + test/util/poolresourcetester.h \ test/util/random.h \ test/util/script.h \ test/util/setup_common.h \ diff --git a/src/bench/pool.cpp b/src/bench/pool.cpp new file mode 100644 index 0000000000..b3e54d85a2 --- /dev/null +++ b/src/bench/pool.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include + +#include + +template +void BenchFillClearMap(benchmark::Bench& bench, Map& map) +{ + size_t batch_size = 5000; + + // make sure each iteration of the benchmark contains exactly 5000 inserts and one clear. + // do this at least 10 times so we get reasonable accurate results + + bench.batch(batch_size).minEpochIterations(10).run([&] { + auto rng = ankerl::nanobench::Rng(1234); + for (size_t i = 0; i < batch_size; ++i) { + map[rng()]; + } + map.clear(); + }); +} + +static void PoolAllocator_StdUnorderedMap(benchmark::Bench& bench) +{ + auto map = std::unordered_map(); + BenchFillClearMap(bench, map); +} + +static void PoolAllocator_StdUnorderedMapWithPoolResource(benchmark::Bench& bench) +{ + using Map = std::unordered_map, + std::equal_to, + PoolAllocator, + sizeof(std::pair) + 4 * sizeof(void*), + alignof(void*)>>; + + // make sure the resource supports large enough pools to hold the node. We do this by adding the size of a few pointers to it. + auto pool_resource = Map::allocator_type::ResourceType(); + auto map = Map{0, std::hash{}, std::equal_to{}, &pool_resource}; + BenchFillClearMap(bench, map); +} + +BENCHMARK(PoolAllocator_StdUnorderedMap, benchmark::PriorityLevel::HIGH); +BENCHMARK(PoolAllocator_StdUnorderedMapWithPoolResource, benchmark::PriorityLevel::HIGH); diff --git a/src/support/allocators/pool.h b/src/support/allocators/pool.h new file mode 100644 index 0000000000..c8e70ebacf --- /dev/null +++ b/src/support/allocators/pool.h @@ -0,0 +1,349 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_SUPPORT_ALLOCATORS_POOL_H +#define BITCOIN_SUPPORT_ALLOCATORS_POOL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * A memory resource similar to std::pmr::unsynchronized_pool_resource, but + * optimized for node-based containers. It has the following properties: + * + * * Owns the allocated memory and frees it on destruction, even when deallocate + * has not been called on the allocated blocks. + * + * * Consists of a number of pools, each one for a different block size. + * Each pool holds blocks of uniform size in a freelist. + * + * * Exhausting memory in a freelist causes a new allocation of a fixed size chunk. + * This chunk is used to carve out blocks. + * + * * Block sizes or alignments that can not be served by the pools are allocated + * and deallocated by operator new(). + * + * PoolResource is not thread-safe. It is intended to be used by PoolAllocator. + * + * @tparam MAX_BLOCK_SIZE_BYTES Maximum size to allocate with the pool. If larger + * sizes are requested, allocation falls back to new(). + * + * @tparam ALIGN_BYTES Required alignment for the allocations. + * + * An example: If you create a PoolResource<128, 8>(262144) and perform a bunch of + * allocations and deallocate 2 blocks with size 8 bytes, and 3 blocks with size 16, + * the members will look like this: + * + * m_free_lists m_allocated_chunks + * ┌───┐ ┌───┐ ┌────────────-------──────┐ + * │ │ blocks │ ├─►│ 262144 B │ + * │ │ ┌─────┐ ┌─────┐ └─┬─┘ └────────────-------──────┘ + * │ 1 ├─►│ 8 B ├─►│ 8 B │ │ + * │ │ └─────┘ └─────┘ : + * │ │ │ + * │ │ ┌─────┐ ┌─────┐ ┌─────┐ ▼ + * │ 2 ├─►│16 B ├─►│16 B ├─►│16 B │ ┌───┐ ┌─────────────────────────┐ + * │ │ └─────┘ └─────┘ └─────┘ │ ├─►│ ▲ │ ▲ + * │ │ └───┘ └──────────┬──────────────┘ │ + * │ . │ │ m_available_memory_end + * │ . │ m_available_memory_it + * │ . │ + * │ │ + * │ │ + * │16 │ + * └───┘ + * + * Here m_free_lists[1] holds the 2 blocks of size 8 bytes, and m_free_lists[2] + * holds the 3 blocks of size 16. The blocks came from the data stored in the + * m_allocated_chunks list. Each chunk has bytes 262144. The last chunk has still + * some memory available for the blocks, and when m_available_memory_it is at the + * end, a new chunk will be allocated and added to the list. + */ +template +class PoolResource final +{ + static_assert(ALIGN_BYTES > 0, "ALIGN_BYTES must be nonzero"); + static_assert((ALIGN_BYTES & (ALIGN_BYTES - 1)) == 0, "ALIGN_BYTES must be a power of two"); + + /** + * In-place linked list of the allocations, used for the freelist. + */ + struct ListNode { + ListNode* m_next; + + explicit ListNode(ListNode* next) : m_next(next) {} + }; + static_assert(std::is_trivially_destructible_v, "Make sure we don't need to manually call a destructor"); + + /** + * Internal alignment value. The larger of the requested ALIGN_BYTES and alignof(FreeList). + */ + static constexpr std::size_t ELEM_ALIGN_BYTES = std::max(alignof(ListNode), ALIGN_BYTES); + static_assert((ELEM_ALIGN_BYTES & (ELEM_ALIGN_BYTES - 1)) == 0, "ELEM_ALIGN_BYTES must be a power of two"); + static_assert(sizeof(ListNode) <= ELEM_ALIGN_BYTES, "Units of size ELEM_SIZE_ALIGN need to be able to store a ListNode"); + static_assert((MAX_BLOCK_SIZE_BYTES & (ELEM_ALIGN_BYTES - 1)) == 0, "MAX_BLOCK_SIZE_BYTES needs to be a multiple of the alignment."); + + /** + * Size in bytes to allocate per chunk + */ + const size_t m_chunk_size_bytes; + + /** + * Contains all allocated pools of memory, used to free the data in the destructor. + */ + std::list m_allocated_chunks{}; + + /** + * Single linked lists of all data that came from deallocating. + * m_free_lists[n] will serve blocks of size n*ELEM_ALIGN_BYTES. + */ + std::array m_free_lists{}; + + /** + * Points to the beginning of available memory for carving out allocations. + */ + std::byte* m_available_memory_it = nullptr; + + /** + * Points to the end of available memory for carving out allocations. + * + * That member variable is redundant, and is always equal to `m_allocated_chunks.back() + m_chunk_size_bytes` + * whenever it is accessed, but `m_available_memory_end` caches this for clarity and efficiency. + */ + std::byte* m_available_memory_end = nullptr; + + /** + * How many multiple of ELEM_ALIGN_BYTES are necessary to fit bytes. We use that result directly as an index + * into m_free_lists. Round up for the special case when bytes==0. + */ + [[nodiscard]] static constexpr std::size_t NumElemAlignBytes(std::size_t bytes) + { + return (bytes + ELEM_ALIGN_BYTES - 1) / ELEM_ALIGN_BYTES + (bytes == 0); + } + + /** + * True when it is possible to make use of the freelist + */ + [[nodiscard]] static constexpr bool IsFreeListUsable(std::size_t bytes, std::size_t alignment) + { + return alignment <= ELEM_ALIGN_BYTES && bytes <= MAX_BLOCK_SIZE_BYTES; + } + + /** + * Replaces node with placement constructed ListNode that points to the previous node + */ + void PlacementAddToList(void* p, ListNode*& node) + { + node = new (p) ListNode{node}; + } + + /** + * Allocate one full memory chunk which will be used to carve out allocations. + * Also puts any leftover bytes into the freelist. + * + * Precondition: leftover bytes are either 0 or few enough to fit into a place in the freelist + */ + void AllocateChunk() + { + // if there is still any available memory left, put it into the freelist. + size_t remaining_available_bytes = std::distance(m_available_memory_it, m_available_memory_end); + if (0 != remaining_available_bytes) { + PlacementAddToList(m_available_memory_it, m_free_lists[remaining_available_bytes / ELEM_ALIGN_BYTES]); + } + + void* storage = ::operator new (m_chunk_size_bytes, std::align_val_t{ELEM_ALIGN_BYTES}); + m_available_memory_it = new (storage) std::byte[m_chunk_size_bytes]; + m_available_memory_end = m_available_memory_it + m_chunk_size_bytes; + m_allocated_chunks.emplace_back(m_available_memory_it); + } + + /** + * Access to internals for testing purpose only + */ + friend class PoolResourceTester; + +public: + /** + * Construct a new PoolResource object which allocates the first chunk. + * chunk_size_bytes will be rounded up to next multiple of ELEM_ALIGN_BYTES. + */ + explicit PoolResource(std::size_t chunk_size_bytes) + : m_chunk_size_bytes(NumElemAlignBytes(chunk_size_bytes) * ELEM_ALIGN_BYTES) + { + assert(m_chunk_size_bytes >= MAX_BLOCK_SIZE_BYTES); + AllocateChunk(); + } + + /** + * Construct a new Pool Resource object, defaults to 2^18=262144 chunk size. + */ + PoolResource() : PoolResource(262144) {} + + /** + * Disable copy & move semantics, these are not supported for the resource. + */ + PoolResource(const PoolResource&) = delete; + PoolResource& operator=(const PoolResource&) = delete; + PoolResource(PoolResource&&) = delete; + PoolResource& operator=(PoolResource&&) = delete; + + /** + * Deallocates all memory allocated associated with the memory resource. + */ + ~PoolResource() + { + for (std::byte* chunk : m_allocated_chunks) { + std::destroy(chunk, chunk + m_chunk_size_bytes); + ::operator delete ((void*)chunk, std::align_val_t{ELEM_ALIGN_BYTES}); + } + } + + /** + * Allocates a block of bytes. If possible the freelist is used, otherwise allocation + * is forwarded to ::operator new(). + */ + void* Allocate(std::size_t bytes, std::size_t alignment) + { + if (IsFreeListUsable(bytes, alignment)) { + const std::size_t num_alignments = NumElemAlignBytes(bytes); + if (nullptr != m_free_lists[num_alignments]) { + // we've already got data in the pool's freelist, unlink one element and return the pointer + // to the unlinked memory. Since FreeList is trivially destructible we can just treat it as + // uninitialized memory. + return std::exchange(m_free_lists[num_alignments], m_free_lists[num_alignments]->m_next); + } + + // freelist is empty: get one allocation from allocated chunk memory. + const std::ptrdiff_t round_bytes = static_cast(num_alignments * ELEM_ALIGN_BYTES); + if (round_bytes > m_available_memory_end - m_available_memory_it) { + // slow path, only happens when a new chunk needs to be allocated + AllocateChunk(); + } + + // Make sure we use the right amount of bytes for that freelist (might be rounded up), + return std::exchange(m_available_memory_it, m_available_memory_it + round_bytes); + } + + // Can't use the pool => use operator new() + return ::operator new (bytes, std::align_val_t{alignment}); + } + + /** + * Returns a block to the freelists, or deletes the block when it did not come from the chunks. + */ + void Deallocate(void* p, std::size_t bytes, std::size_t alignment) noexcept + { + if (IsFreeListUsable(bytes, alignment)) { + const std::size_t num_alignments = NumElemAlignBytes(bytes); + // put the memory block into the linked list. We can placement construct the FreeList + // into the memory since we can be sure the alignment is correct. + PlacementAddToList(p, m_free_lists[num_alignments]); + } else { + // Can't use the pool => forward deallocation to ::operator delete(). + ::operator delete (p, std::align_val_t{alignment}); + } + } + + /** + * Number of allocated chunks + */ + [[nodiscard]] std::size_t NumAllocatedChunks() const + { + return m_allocated_chunks.size(); + } + + /** + * Size in bytes to allocate per chunk, currently hardcoded to a fixed size. + */ + [[nodiscard]] size_t ChunkSizeBytes() const + { + return m_chunk_size_bytes; + } +}; + + +/** + * Forwards all allocations/deallocations to the PoolResource. + */ +template +class PoolAllocator +{ + PoolResource* m_resource; + + template + friend class PoolAllocator; + +public: + using value_type = T; + using ResourceType = PoolResource; + + /** + * Not explicit so we can easily construct it with the correct resource + */ + PoolAllocator(ResourceType* resource) noexcept + : m_resource(resource) + { + } + + PoolAllocator(const PoolAllocator& other) noexcept = default; + PoolAllocator& operator=(const PoolAllocator& other) noexcept = default; + + template + PoolAllocator(const PoolAllocator& other) noexcept + : m_resource(other.resource()) + { + } + + /** + * The rebind struct here is mandatory because we use non type template arguments for + * PoolAllocator. See https://en.cppreference.com/w/cpp/named_req/Allocator#cite_note-2 + */ + template + struct rebind { + using other = PoolAllocator; + }; + + /** + * Forwards each call to the resource. + */ + T* allocate(size_t n) + { + return static_cast(m_resource->Allocate(n * sizeof(T), alignof(T))); + } + + /** + * Forwards each call to the resource. + */ + void deallocate(T* p, size_t n) noexcept + { + m_resource->Deallocate(p, n * sizeof(T), alignof(T)); + } + + ResourceType* resource() const noexcept + { + return m_resource; + } +}; + +template +bool operator==(const PoolAllocator& a, + const PoolAllocator& b) noexcept +{ + return a.resource() == b.resource(); +} + +template +bool operator!=(const PoolAllocator& a, + const PoolAllocator& b) noexcept +{ + return !(a == b); +} + +#endif // BITCOIN_SUPPORT_ALLOCATORS_POOL_H diff --git a/src/test/pool_tests.cpp b/src/test/pool_tests.cpp new file mode 100644 index 0000000000..dfe857d05b --- /dev/null +++ b/src/test/pool_tests.cpp @@ -0,0 +1,156 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +BOOST_FIXTURE_TEST_SUITE(pool_tests, BasicTestingSetup) + +BOOST_AUTO_TEST_CASE(basic_allocating) +{ + auto resource = PoolResource<8, 8>(); + PoolResourceTester::CheckAllDataAccountedFor(resource); + + // first chunk is already allocated + size_t expected_bytes_available = resource.ChunkSizeBytes(); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + // chunk is used, no more allocation + void* block = resource.Allocate(8, 8); + expected_bytes_available -= 8; + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + BOOST_TEST(0 == PoolResourceTester::FreeListSizes(resource)[1]); + resource.Deallocate(block, 8, 8); + PoolResourceTester::CheckAllDataAccountedFor(resource); + BOOST_TEST(1 == PoolResourceTester::FreeListSizes(resource)[1]); + + // alignment is too small, but the best fitting freelist is used. Nothing is allocated. + void* b = resource.Allocate(8, 1); + BOOST_TEST(b == block); // we got the same block of memory as before + BOOST_TEST(0 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + resource.Deallocate(block, 8, 1); + PoolResourceTester::CheckAllDataAccountedFor(resource); + BOOST_TEST(1 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + // can't use resource because alignment is too big, allocate system memory + b = resource.Allocate(8, 16); + BOOST_TEST(b != block); + block = b; + PoolResourceTester::CheckAllDataAccountedFor(resource); + BOOST_TEST(1 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + resource.Deallocate(block, 8, 16); + PoolResourceTester::CheckAllDataAccountedFor(resource); + BOOST_TEST(1 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + // can't use chunk because size is too big + block = resource.Allocate(16, 8); + PoolResourceTester::CheckAllDataAccountedFor(resource); + BOOST_TEST(1 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + resource.Deallocate(block, 16, 8); + PoolResourceTester::CheckAllDataAccountedFor(resource); + BOOST_TEST(1 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + // it's possible that 0 bytes are allocated, make sure this works. In that case the call is forwarded to operator new + // 0 bytes takes one entry from the first freelist + void* p = resource.Allocate(0, 1); + BOOST_TEST(0 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); + + resource.Deallocate(p, 0, 1); + PoolResourceTester::CheckAllDataAccountedFor(resource); + BOOST_TEST(1 == PoolResourceTester::FreeListSizes(resource)[1]); + BOOST_TEST(expected_bytes_available == PoolResourceTester::AvailableMemoryFromChunk(resource)); +} + +// Allocates from 0 to n bytes were n > the PoolResource's data, and each should work +BOOST_AUTO_TEST_CASE(allocate_any_byte) +{ + auto resource = PoolResource<128, 8>(1024); + + uint8_t num_allocs = 200; + + auto data = std::vector>(); + + // allocate an increasing number of bytes + for (uint8_t num_bytes = 0; num_bytes < num_allocs; ++num_bytes) { + uint8_t* bytes = new (resource.Allocate(num_bytes, 1)) uint8_t[num_bytes]; + BOOST_TEST(bytes != nullptr); + data.emplace_back(bytes, num_bytes); + + // set each byte to num_bytes + std::fill(bytes, bytes + num_bytes, num_bytes); + } + + // now that we got all allocated, test if all still have the correct values, and give everything back to the allocator + uint8_t val = 0; + for (auto const& span : data) { + for (auto x : span) { + BOOST_TEST(val == x); + } + std::destroy(span.data(), span.data() + span.size()); + resource.Deallocate(span.data(), span.size(), 1); + ++val; + } + + PoolResourceTester::CheckAllDataAccountedFor(resource); +} + +BOOST_AUTO_TEST_CASE(random_allocations) +{ + struct PtrSizeAlignment { + void* ptr; + size_t bytes; + size_t alignment; + }; + + // makes a bunch of random allocations and gives all of them back in random order. + auto resource = PoolResource<128, 8>(65536); + std::vector ptr_size_alignment{}; + for (size_t i = 0; i < 1000; ++i) { + // make it a bit more likely to allocate than deallocate + if (ptr_size_alignment.empty() || 0 != InsecureRandRange(4)) { + // allocate a random item + std::size_t alignment = std::size_t{1} << InsecureRandRange(8); // 1, 2, ..., 128 + std::size_t size = (InsecureRandRange(200) / alignment + 1) * alignment; // multiple of alignment + void* ptr = resource.Allocate(size, alignment); + BOOST_TEST(ptr != nullptr); + BOOST_TEST((reinterpret_cast(ptr) & (alignment - 1)) == 0); + ptr_size_alignment.push_back({ptr, size, alignment}); + } else { + // deallocate a random item + auto& x = ptr_size_alignment[InsecureRandRange(ptr_size_alignment.size())]; + resource.Deallocate(x.ptr, x.bytes, x.alignment); + x = ptr_size_alignment.back(); + ptr_size_alignment.pop_back(); + } + } + + // deallocate all the rest + for (auto const& x : ptr_size_alignment) { + resource.Deallocate(x.ptr, x.bytes, x.alignment); + } + + PoolResourceTester::CheckAllDataAccountedFor(resource); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/util/poolresourcetester.h b/src/test/util/poolresourcetester.h new file mode 100644 index 0000000000..93f62eb2a9 --- /dev/null +++ b/src/test/util/poolresourcetester.h @@ -0,0 +1,129 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_TEST_UTIL_POOLRESOURCETESTER_H +#define BITCOIN_TEST_UTIL_POOLRESOURCETESTER_H + +#include + +#include +#include +#include +#include +#include + +/** + * Helper to get access to private parts of PoolResource. Used in unit tests and in the fuzzer + */ +class PoolResourceTester +{ + struct PtrAndBytes { + uintptr_t ptr; + std::size_t size; + + PtrAndBytes(const void* p, std::size_t s) + : ptr(reinterpret_cast(p)), size(s) + { + } + + /** + * defines a sort ordering by the pointer value + */ + friend bool operator<(PtrAndBytes const& a, PtrAndBytes const& b) + { + return a.ptr < b.ptr; + } + }; + +public: + /** + * Extracts the number of elements per freelist + */ + template + static std::vector FreeListSizes(const PoolResource& resource) + { + auto sizes = std::vector(); + for (const auto* ptr : resource.m_free_lists) { + size_t size = 0; + while (ptr != nullptr) { + ++size; + ptr = ptr->m_next; + } + sizes.push_back(size); + } + return sizes; + } + + /** + * How many bytes are still available from the last allocated chunk + */ + template + static std::size_t AvailableMemoryFromChunk(const PoolResource& resource) + { + return resource.m_available_memory_end - resource.m_available_memory_it; + } + + /** + * Once all blocks are given back to the resource, tests that the freelists are consistent: + * + * * All data in the freelists must come from the chunks + * * Memory doesn't overlap + * * Each byte in the chunks can be accounted for in either the freelist or as available bytes. + */ + template + static void CheckAllDataAccountedFor(const PoolResource& resource) + { + // collect all free blocks by iterating all freelists + std::vector free_blocks; + for (std::size_t freelist_idx = 0; freelist_idx < resource.m_free_lists.size(); ++freelist_idx) { + std::size_t bytes = freelist_idx * resource.ELEM_ALIGN_BYTES; + auto* ptr = resource.m_free_lists[freelist_idx]; + while (ptr != nullptr) { + free_blocks.emplace_back(ptr, bytes); + ptr = ptr->m_next; + } + } + // also add whatever has not yet been used for blocks + auto num_available_bytes = resource.m_available_memory_end - resource.m_available_memory_it; + if (num_available_bytes > 0) { + free_blocks.emplace_back(resource.m_available_memory_it, num_available_bytes); + } + + // collect all chunks + std::vector chunks; + for (const std::byte* ptr : resource.m_allocated_chunks) { + chunks.emplace_back(ptr, resource.ChunkSizeBytes()); + } + + // now we have all the data from all freelists on the one hand side, and all chunks on the other hand side. + // To check if all of them match, sort by address and iterate. + std::sort(free_blocks.begin(), free_blocks.end()); + std::sort(chunks.begin(), chunks.end()); + + auto chunk_it = chunks.begin(); + auto chunk_ptr_remaining = chunk_it->ptr; + auto chunk_size_remaining = chunk_it->size; + for (const auto& free_block : free_blocks) { + if (chunk_size_remaining == 0) { + assert(chunk_it != chunks.end()); + ++chunk_it; + assert(chunk_it != chunks.end()); + chunk_ptr_remaining = chunk_it->ptr; + chunk_size_remaining = chunk_it->size; + } + assert(free_block.ptr == chunk_ptr_remaining); // ensure addresses match + assert(free_block.size <= chunk_size_remaining); // ensure no overflow + assert((free_block.ptr & (resource.ELEM_ALIGN_BYTES - 1)) == 0); // ensure correct alignment + chunk_ptr_remaining += free_block.size; + chunk_size_remaining -= free_block.size; + } + // ensure we are at the end of the chunks + assert(chunk_ptr_remaining == chunk_it->ptr + chunk_it->size); + ++chunk_it; + assert(chunk_it == chunks.end()); + assert(chunk_size_remaining == 0); + } +}; + +#endif // BITCOIN_TEST_UTIL_POOLRESOURCETESTER_H -- cgit v1.2.3 From e19943f049ed8aa4f32a1d8440a9fbf160367f0f Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 11 Jun 2022 09:28:13 +0200 Subject: Calculate memory usage correctly for unordered_maps that use PoolAllocator Extracts the resource from a PoolAllocator and uses it for calculation of the node's memory usage. --- src/memusage.h | 20 ++++++++++++++++++++ src/test/pool_tests.cpp | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/memusage.h b/src/memusage.h index 9755be0ff5..bb39066a7d 100644 --- a/src/memusage.h +++ b/src/memusage.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -166,6 +167,25 @@ static inline size_t DynamicUsage(const std::unordered_map& m) return MallocUsage(sizeof(unordered_node >)) * m.size() + MallocUsage(sizeof(void*) * m.bucket_count()); } +template +static inline size_t DynamicUsage(const std::unordered_map, + MAX_BLOCK_SIZE_BYTES, + ALIGN_BYTES>>& m) +{ + auto* pool_resource = m.get_allocator().resource(); + + // The allocated chunks are stored in a std::list. Size per node should + // therefore be 3 pointers: next, previous, and a pointer to the chunk. + size_t estimated_list_node_size = MallocUsage(sizeof(void*) * 3); + size_t usage_resource = estimated_list_node_size * pool_resource->NumAllocatedChunks(); + size_t usage_chunks = MallocUsage(pool_resource->ChunkSizeBytes()) * pool_resource->NumAllocatedChunks(); + return usage_resource + usage_chunks + MallocUsage(sizeof(void*) * m.bucket_count()); } +} // namespace memusage + #endif // BITCOIN_MEMUSAGE_H diff --git a/src/test/pool_tests.cpp b/src/test/pool_tests.cpp index dfe857d05b..8a07e09a44 100644 --- a/src/test/pool_tests.cpp +++ b/src/test/pool_tests.cpp @@ -2,6 +2,7 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. +#include #include #include #include @@ -153,4 +154,37 @@ BOOST_AUTO_TEST_CASE(random_allocations) PoolResourceTester::CheckAllDataAccountedFor(resource); } +BOOST_AUTO_TEST_CASE(memusage_test) +{ + auto std_map = std::unordered_map{}; + + using Map = std::unordered_map, + std::equal_to, + PoolAllocator, + sizeof(std::pair) + sizeof(void*) * 4, + alignof(void*)>>; + auto resource = Map::allocator_type::ResourceType(1024); + + PoolResourceTester::CheckAllDataAccountedFor(resource); + + { + auto resource_map = Map{0, std::hash{}, std::equal_to{}, &resource}; + + // can't have the same resource usage + BOOST_TEST(memusage::DynamicUsage(std_map) != memusage::DynamicUsage(resource_map)); + + for (size_t i = 0; i < 10000; ++i) { + std_map[i]; + resource_map[i]; + } + + // Eventually the resource_map should have a much lower memory usage because it has less malloc overhead + BOOST_TEST(memusage::DynamicUsage(resource_map) <= memusage::DynamicUsage(std_map) * 90 / 100); + } + + PoolResourceTester::CheckAllDataAccountedFor(resource); +} + BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.3 From 1afca6b663bb54022afff193fd9d83856606b189 Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 11 Jun 2022 10:48:35 +0200 Subject: Add PoolResource fuzzer Fuzzes PoolResource with random allocations/deallocations, and multiple asserts. Co-Authored-By: Pieter Wuille --- src/Makefile.test.include | 1 + src/test/fuzz/poolresource.cpp | 174 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 src/test/fuzz/poolresource.cpp diff --git a/src/Makefile.test.include b/src/Makefile.test.include index 98ed3ba4ad..fc49c1fd40 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -301,6 +301,7 @@ test_fuzz_fuzz_SOURCES = \ test/fuzz/partially_downloaded_block.cpp \ test/fuzz/policy_estimator.cpp \ test/fuzz/policy_estimator_io.cpp \ + test/fuzz/poolresource.cpp \ test/fuzz/pow.cpp \ test/fuzz/prevector.cpp \ test/fuzz/primitives_transaction.cpp \ diff --git a/src/test/fuzz/poolresource.cpp b/src/test/fuzz/poolresource.cpp new file mode 100644 index 0000000000..ce64ef6472 --- /dev/null +++ b/src/test/fuzz/poolresource.cpp @@ -0,0 +1,174 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace { + +template +class PoolResourceFuzzer +{ + FuzzedDataProvider& m_provider; + PoolResource m_test_resource; + uint64_t m_sequence{0}; + size_t m_total_allocated{}; + + struct Entry { + Span span; + size_t alignment; + uint64_t seed; + + Entry(Span s, size_t a, uint64_t se) : span(s), alignment(a), seed(se) {} + }; + + std::vector m_entries; + +public: + PoolResourceFuzzer(FuzzedDataProvider& provider) + : m_provider{provider}, + m_test_resource{provider.ConsumeIntegralInRange(MAX_BLOCK_SIZE_BYTES, 262144)} + { + } + + void Allocate(size_t size, size_t alignment) + { + assert(size > 0); // Must allocate at least 1 byte. + assert(alignment > 0); // Alignment must be at least 1. + assert((alignment & (alignment - 1)) == 0); // Alignment must be power of 2. + assert((size & (alignment - 1)) == 0); // Size must be a multiple of alignment. + + auto span = Span(static_cast(m_test_resource.Allocate(size, alignment)), size); + m_total_allocated += size; + + auto ptr_val = reinterpret_cast(span.data()); + assert((ptr_val & (alignment - 1)) == 0); + + uint64_t seed = m_sequence++; + RandomContentFill(m_entries.emplace_back(span, alignment, seed)); + } + + void + Allocate() + { + if (m_total_allocated > 0x1000000) return; + size_t alignment_bits = m_provider.ConsumeIntegralInRange(0, 7); + size_t alignment = 1 << alignment_bits; + size_t size_bits = m_provider.ConsumeIntegralInRange(0, 16 - alignment_bits); + size_t size = m_provider.ConsumeIntegralInRange(1U << size_bits, (1U << (size_bits + 1)) - 1U) << alignment_bits; + Allocate(size, alignment); + } + + void RandomContentFill(Entry& entry) + { + XoRoShiRo128PlusPlus rng(entry.seed); + auto ptr = entry.span.data(); + auto size = entry.span.size(); + + while (size >= 8) { + auto r = rng(); + std::memcpy(ptr, &r, 8); + size -= 8; + ptr += 8; + } + if (size > 0) { + auto r = rng(); + std::memcpy(ptr, &r, size); + } + } + + void RandomContentCheck(const Entry& entry) + { + XoRoShiRo128PlusPlus rng(entry.seed); + auto ptr = entry.span.data(); + auto size = entry.span.size(); + + std::byte buf[8]; + while (size >= 8) { + auto r = rng(); + std::memcpy(buf, &r, 8); + assert(std::memcmp(buf, ptr, 8) == 0); + size -= 8; + ptr += 8; + } + if (size > 0) { + auto r = rng(); + std::memcpy(buf, &r, size); + assert(std::memcmp(buf, ptr, size) == 0); + } + } + + void Deallocate(const Entry& entry) + { + auto ptr_val = reinterpret_cast(entry.span.data()); + assert((ptr_val & (entry.alignment - 1)) == 0); + RandomContentCheck(entry); + m_total_allocated -= entry.span.size(); + m_test_resource.Deallocate(entry.span.data(), entry.span.size(), entry.alignment); + } + + void Deallocate() + { + if (m_entries.empty()) { + return; + } + + size_t idx = m_provider.ConsumeIntegralInRange(0, m_entries.size() - 1); + Deallocate(m_entries[idx]); + if (idx != m_entries.size() - 1) { + m_entries[idx] = std::move(m_entries.back()); + } + m_entries.pop_back(); + } + + void Clear() + { + while (!m_entries.empty()) { + Deallocate(); + } + + PoolResourceTester::CheckAllDataAccountedFor(m_test_resource); + } + + void Fuzz() + { + LIMITED_WHILE(m_provider.ConsumeBool(), 10000) + { + CallOneOf( + m_provider, + [&] { Allocate(); }, + [&] { Deallocate(); }); + } + Clear(); + } +}; + + +} // namespace + +FUZZ_TARGET(pool_resource) +{ + FuzzedDataProvider provider(buffer.data(), buffer.size()); + CallOneOf( + provider, + [&] { PoolResourceFuzzer<128, 1>{provider}.Fuzz(); }, + [&] { PoolResourceFuzzer<128, 2>{provider}.Fuzz(); }, + [&] { PoolResourceFuzzer<128, 4>{provider}.Fuzz(); }, + [&] { PoolResourceFuzzer<128, 8>{provider}.Fuzz(); }, + + [&] { PoolResourceFuzzer<8, 8>{provider}.Fuzz(); }, + [&] { PoolResourceFuzzer<16, 16>{provider}.Fuzz(); }, + + [&] { PoolResourceFuzzer<256, alignof(max_align_t)>{provider}.Fuzz(); }, + [&] { PoolResourceFuzzer<256, 64>{provider}.Fuzz(); }); +} -- cgit v1.2.3 From 5e4ac5abf54f8e6d6330df0c73119aa0cca4c103 Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 11 Jun 2022 11:00:53 +0200 Subject: Call ReallocateCache() on each Flush() This frees up all associated memory with the map, not only the nodes. This is necessary in preparation for using the PoolAllocator for CCoinsMap, which does not actually free any memory on clear(). --- src/coins.cpp | 9 ++++++--- src/test/validation_flush_tests.cpp | 5 ++--- src/validation.cpp | 1 - 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coins.cpp b/src/coins.cpp index 5a6ae525a7..f55932f302 100644 --- a/src/coins.cpp +++ b/src/coins.cpp @@ -253,9 +253,12 @@ bool CCoinsViewCache::BatchWrite(CCoinsMap &mapCoins, const uint256 &hashBlockIn bool CCoinsViewCache::Flush() { bool fOk = base->BatchWrite(cacheCoins, hashBlock, /*erase=*/true); - if (fOk && !cacheCoins.empty()) { - /* BatchWrite must erase all cacheCoins elements when erase=true. */ - throw std::logic_error("Not all cached coins were erased"); + if (fOk) { + if (!cacheCoins.empty()) { + /* BatchWrite must erase all cacheCoins elements when erase=true. */ + throw std::logic_error("Not all cached coins were erased"); + } + ReallocateCache(); } cachedCoinsUsage = 0; return fOk; diff --git a/src/test/validation_flush_tests.cpp b/src/test/validation_flush_tests.cpp index 26c48eb0e0..205164b94c 100644 --- a/src/test/validation_flush_tests.cpp +++ b/src/test/validation_flush_tests.cpp @@ -131,8 +131,7 @@ BOOST_AUTO_TEST_CASE(getcoinscachesizestate) CoinsCacheSizeState::OK); } - // Flushing the view doesn't take us back to OK because cacheCoins has - // preallocated memory that doesn't get reclaimed even after flush. + // Flushing the view does take us back to OK because ReallocateCache() is called BOOST_CHECK_EQUAL( chainstate.GetCoinsCacheSizeState(MAX_COINS_CACHE_BYTES, 0), @@ -144,7 +143,7 @@ BOOST_AUTO_TEST_CASE(getcoinscachesizestate) BOOST_CHECK_EQUAL( chainstate.GetCoinsCacheSizeState(MAX_COINS_CACHE_BYTES, 0), - CoinsCacheSizeState::CRITICAL); + CoinsCacheSizeState::OK); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/validation.cpp b/src/validation.cpp index e82fead89e..cf7688ea9f 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -4930,7 +4930,6 @@ bool Chainstate::ResizeCoinsCaches(size_t coinstip_size, size_t coinsdb_size) } else { // Otherwise, flush state to disk and deallocate the in-memory coins map. ret = FlushStateToDisk(state, FlushStateMode::ALWAYS); - CoinsTip().ReallocateCache(); } return ret; } -- cgit v1.2.3 From 9f947fc3d4b779f017332135323b34e8f216f613 Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 11 Jun 2022 11:27:38 +0200 Subject: Use PoolAllocator for CCoinsMap In my benchmarks, using this pool allocator for CCoinsMap gives about 20% faster `-reindex-chainstate` with -dbcache=5000 with practically the same memory usage. The change in max RSS changed was 0.3%. The `validation_flush_tests` tests need to be updated because memory allocation is now done in large pools instead of one node at a time, so the limits need to be updated accordingly. --- src/coins.cpp | 6 ++++-- src/coins.h | 20 ++++++++++++++++++- src/test/coins_tests.cpp | 39 ++++++++++++++++++++++++++++++++++--- src/test/fuzz/coins_view.cpp | 3 ++- src/test/validation_flush_tests.cpp | 28 ++++++++++++++++---------- 5 files changed, 79 insertions(+), 17 deletions(-) diff --git a/src/coins.cpp b/src/coins.cpp index f55932f302..0fe642e46b 100644 --- a/src/coins.cpp +++ b/src/coins.cpp @@ -34,7 +34,7 @@ size_t CCoinsViewBacked::EstimateSize() const { return base->EstimateSize(); } CCoinsViewCache::CCoinsViewCache(CCoinsView* baseIn, bool deterministic) : CCoinsViewBacked(baseIn), m_deterministic(deterministic), - cacheCoins(0, SaltedOutpointHasher(/*deterministic=*/deterministic)) + cacheCoins(0, SaltedOutpointHasher(/*deterministic=*/deterministic), CCoinsMap::key_equal{}, &m_cache_coins_memory_resource) {} size_t CCoinsViewCache::DynamicMemoryUsage() const { @@ -317,7 +317,9 @@ void CCoinsViewCache::ReallocateCache() // Cache should be empty when we're calling this. assert(cacheCoins.size() == 0); cacheCoins.~CCoinsMap(); - ::new (&cacheCoins) CCoinsMap(0, SaltedOutpointHasher(/*deterministic=*/m_deterministic)); + m_cache_coins_memory_resource.~CCoinsMapMemoryResource(); + ::new (&m_cache_coins_memory_resource) CCoinsMapMemoryResource{}; + ::new (&cacheCoins) CCoinsMap{0, SaltedOutpointHasher{/*deterministic=*/m_deterministic}, CCoinsMap::key_equal{}, &m_cache_coins_memory_resource}; } void CCoinsViewCache::SanityCheck() const diff --git a/src/coins.h b/src/coins.h index dd336b210a..039a07054d 100644 --- a/src/coins.h +++ b/src/coins.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -131,7 +132,23 @@ struct CCoinsCacheEntry CCoinsCacheEntry(Coin&& coin_, unsigned char flag) : coin(std::move(coin_)), flags(flag) {} }; -typedef std::unordered_map CCoinsMap; +/** + * PoolAllocator's MAX_BLOCK_SIZE_BYTES parameter here uses sizeof the data, and adds the size + * of 4 pointers. We do not know the exact node size used in the std::unordered_node implementation + * because it is implementation defined. Most implementations have an overhead of 1 or 2 pointers, + * so nodes can be connected in a linked list, and in some cases the hash value is stored as well. + * Using an additional sizeof(void*)*4 for MAX_BLOCK_SIZE_BYTES should thus be sufficient so that + * all implementations can allocate the nodes from the PoolAllocator. + */ +using CCoinsMap = std::unordered_map, + PoolAllocator, + sizeof(std::pair) + sizeof(void*) * 4, + alignof(void*)>>; + +using CCoinsMapMemoryResource = CCoinsMap::allocator_type::ResourceType; /** Cursor for iterating over CoinsView state */ class CCoinsViewCursor @@ -220,6 +237,7 @@ protected: * declared as "const". */ mutable uint256 hashBlock; + mutable CCoinsMapMemoryResource m_cache_coins_memory_resource{}; mutable CCoinsMap cacheCoins; /* Cached dynamic memory usage for the inner Coin objects. */ diff --git a/src/test/coins_tests.cpp b/src/test/coins_tests.cpp index e082800fc3..853dc6dc1e 100644 --- a/src/test/coins_tests.cpp +++ b/src/test/coins_tests.cpp @@ -6,6 +6,7 @@ #include #include