From 0e5f4a681f4ff3efa71cdfd71bc90829c6890d99 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Wed, 17 Apr 2024 12:52:47 -0700 Subject: [PATCH] Put cleanup nodes on separate chunked list. PiperOrigin-RevId: 625777108 --- src/google/protobuf/arena.cc | 192 ++++++++++++++---------- src/google/protobuf/arena_cleanup.h | 79 ++++++---- src/google/protobuf/arena_unittest.cc | 18 ++- src/google/protobuf/serial_arena.h | 74 ++------- src/google/protobuf/thread_safe_arena.h | 4 +- 5 files changed, 193 insertions(+), 174 deletions(-) diff --git a/src/google/protobuf/arena.cc b/src/google/protobuf/arena.cc index e4fa0cea45c04..fc3d19d7c5e50 100644 --- a/src/google/protobuf/arena.cc +++ b/src/google/protobuf/arena.cc @@ -60,29 +60,46 @@ ArenaBlock* SentryArenaBlock() { } #endif -SizedPtr AllocateMemory(const AllocationPolicy* policy_ptr, size_t last_size, - size_t min_bytes) { - AllocationPolicy policy; // default policy - if (policy_ptr) policy = *policy_ptr; +size_t AllocationSize(size_t last_size, size_t min_size, size_t max_size) { size_t size; if (last_size != 0) { // Double the current block size, up to a limit. - auto max_size = policy.max_block_size; size = std::min(2 * last_size, max_size); } else { - size = policy.start_block_size; + size = min_size; } - // Verify that min_bytes + kBlockHeaderSize won't overflow. - ABSL_CHECK_LE(min_bytes, std::numeric_limits::max() - - SerialArena::kBlockHeaderSize); - size = std::max(size, SerialArena::kBlockHeaderSize + min_bytes); + return size; +} +SizedPtr AllocateMemory(const AllocationPolicy& policy, size_t size) { if (policy.block_alloc == nullptr) { return AllocateAtLeast(size); } return {policy.block_alloc(size), size}; } +SizedPtr AllocateBlock(const AllocationPolicy* policy_ptr, size_t last_size, + size_t min_bytes) { + AllocationPolicy policy; // default policy + if (policy_ptr) policy = *policy_ptr; + size_t size = AllocationSize(last_size, min_bytes, policy.max_block_size); + // Verify that min_bytes + kBlockHeaderSize won't overflow. + ABSL_CHECK_LE(min_bytes, std::numeric_limits::max() - + SerialArena::kBlockHeaderSize); + size = std::max(size, SerialArena::kBlockHeaderSize + min_bytes); + + return AllocateMemory(policy, size); +} + +SizedPtr AllocateCleanupChunk(const AllocationPolicy* policy_ptr, + size_t last_size) { + AllocationPolicy policy; // default policy + if (policy_ptr) policy = *policy_ptr; + const size_t size = + AllocationSize(last_size, /*min_size=*/64, /*max_size=*/4 << 10); + return AllocateMemory(policy, size); +} + class GetDeallocator { public: GetDeallocator(const AllocationPolicy* policy, size_t* space_allocated) @@ -107,6 +124,82 @@ class GetDeallocator { } // namespace +namespace cleanup { +struct ChunkList::Chunk { + CleanupNode* First() { return reinterpret_cast(this + 1); } + CleanupNode* Last() { return First() + Capacity() - 1; } + static size_t Capacity(size_t size) { + return (size - sizeof(Chunk)) / sizeof(CleanupNode); + } + size_t Capacity() const { return Capacity(size); } + + Chunk* next; + size_t size; + // Cleanup nodes follow. +}; + +void ChunkList::AddFallback(const ThreadSafeArena& arena, void* elem, + void (*destructor)(void*)) { + ABSL_DCHECK_EQ(next_, limit_); + SizedPtr mem = AllocateCleanupChunk(arena.AllocPolicy(), + head_ == nullptr ? 0 : head_->size); + head_ = new (mem.p) Chunk{head_, mem.n}; + next_ = head_->First(); + limit_ = next_ + Chunk::Capacity(mem.n); + AddFromExisting(elem, destructor); +} + +void ChunkList::Cleanup(const ThreadSafeArena& arena, size_t* space_allocated) { + Chunk* c = head_; + if (c == nullptr) return; + GetDeallocator deallocator(arena.AllocPolicy(), space_allocated); + + // Iterate backwards in order to destroy in the right order. + CleanupNode* it = next_ - 1; + head_ = nullptr; + next_ = limit_ = nullptr; + while (true) { + CleanupNode* first = c->First(); + // A prefetch distance of 8 here was chosen arbitrarily. + CleanupNode* prefetch = it; + int prefetch_dist = 8; + for (; prefetch >= first && --prefetch_dist; --prefetch) { + prefetch->Prefetch(); + } + for (; prefetch >= first; --it, --prefetch) { + it->Destroy(); + prefetch->Prefetch(); + } + absl::PrefetchToLocalCacheNta(c->next); + for (; it >= first; --it) { + it->Destroy(); + } + Chunk* next = c->next; + deallocator({c, c->size}); + if (next == nullptr) return; + c = next; + it = c->Last(); + }; +} + +std::vector ChunkList::Peek() { + std::vector ret; + Chunk* c = head_; + if (c == nullptr) return ret; + // Iterate backwards to match destruction order. + CleanupNode* it = next_ - 1; + while (true) { + CleanupNode* first = c->First(); + for (; it >= first; --it) { + ret.push_back(it->elem); + } + c = c->next; + if (c == nullptr) return ret; + it = c->Last(); + }; +} +} // namespace cleanup + // It is guaranteed that this is constructed in `b`. IOW, this is not the first // arena and `b` cannot be sentry. SerialArena::SerialArena(ArenaBlock* b, ThreadSafeArena& parent) @@ -114,7 +207,6 @@ SerialArena::SerialArena(ArenaBlock* b, ThreadSafeArena& parent) limit_{b->Limit()}, prefetch_ptr_( b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)), - prefetch_limit_(b->Limit()), head_{b}, space_allocated_{b->size}, parent_{parent} { @@ -135,22 +227,7 @@ SerialArena::SerialArena(FirstSerialArena, ArenaBlock* b, } std::vector SerialArena::PeekCleanupListForTesting() { - std::vector res; - - ArenaBlock* b = head(); - if (b->IsSentry()) return res; - - const auto peek_list = [&](char* pos, char* end) { - for (; pos != end; pos += cleanup::Size()) { - cleanup::PeekNode(pos, res); - } - }; - - peek_list(limit_, b->Limit()); - for (b = b->next; b; b = b->next) { - peek_list(reinterpret_cast(b->cleanup_nodes), b->Limit()); - } - return res; + return cleanup_list_.Peek(); } std::vector ThreadSafeArena::PeekCleanupListForTesting() { @@ -228,25 +305,16 @@ void* SerialArena::AllocateFromStringBlockFallback() { PROTOBUF_NOINLINE void* SerialArena::AllocateAlignedWithCleanupFallback( size_t n, size_t align, void (*destructor)(void*)) { - size_t required = AlignUpTo(n, align) + cleanup::Size(); + size_t required = AlignUpTo(n, align); AllocateNewBlock(required); return AllocateAlignedWithCleanup(n, align, destructor); } -PROTOBUF_NOINLINE -void SerialArena::AddCleanupFallback(void* elem, void (*destructor)(void*)) { - AllocateNewBlock(cleanup::Size()); - AddCleanupFromExisting(elem, destructor); -} - void SerialArena::AllocateNewBlock(size_t n) { size_t used = 0; size_t wasted = 0; ArenaBlock* old_head = head(); if (!old_head->IsSentry()) { - // Sync limit to block - old_head->cleanup_nodes = limit_; - // Record how much used in this block. used = static_cast(ptr() - old_head->Pointer(kBlockHeaderSize)); wasted = old_head->size - used - kBlockHeaderSize; @@ -258,7 +326,7 @@ void SerialArena::AllocateNewBlock(size_t n) { // but with a CPU regression. The regression might have been an artifact of // the microbenchmark. - auto mem = AllocateMemory(parent_.AllocPolicy(), old_head->size, n); + auto mem = AllocateBlock(parent_.AllocPolicy(), old_head->size, n); AddSpaceAllocated(mem.n); ThreadSafeArenaStats::RecordAllocateStats(parent_.arena_stats_.MutableStats(), /*used=*/used, @@ -319,34 +387,6 @@ size_t SerialArena::FreeStringBlocks(StringBlock* string_block, return deallocated; } -void SerialArena::CleanupList() { - ArenaBlock* b = head(); - if (b->IsSentry()) return; - - b->cleanup_nodes = limit_; - do { - char* limit = b->Limit(); - char* it = reinterpret_cast(b->cleanup_nodes); - ABSL_DCHECK(!b->IsSentry() || it == limit); - // A prefetch distance of 8 here was chosen arbitrarily. - char* prefetch = it; - int prefetch_dist = 8; - for (; prefetch < limit && --prefetch_dist; prefetch += cleanup::Size()) { - cleanup::PrefetchNode(prefetch); - } - for (; prefetch < limit; - it += cleanup::Size(), prefetch += cleanup::Size()) { - cleanup::DestroyNode(it); - cleanup::PrefetchNode(prefetch); - } - absl::PrefetchToLocalCacheNta(b->next); - for (; it < limit; it += cleanup::Size()) { - cleanup::DestroyNode(it); - } - b = b->next; - } while (b); -} - // Stores arrays of void* and SerialArena* instead of linked list of // SerialArena* to speed up traversing all SerialArena. The cost of walk is non // trivial when there are many nodes. Separately storing "ids" minimizes cache @@ -549,7 +589,7 @@ ArenaBlock* ThreadSafeArena::FirstBlock(void* buf, size_t size, SizedPtr mem; if (buf == nullptr || size < kBlockHeaderSize + kAllocPolicySize) { - mem = AllocateMemory(&policy, 0, kAllocPolicySize); + mem = AllocateBlock(&policy, 0, kAllocPolicySize); } else { mem = {buf, size}; // Record user-owned block. @@ -688,11 +728,11 @@ void ThreadSafeArena::Init() { } ThreadSafeArena::~ThreadSafeArena() { + size_t space_allocated = 0; // Have to do this in a first pass, because some of the destructors might // refer to memory in other blocks. - CleanupList(); + CleanupList(&space_allocated); - size_t space_allocated = 0; auto mem = Free(&space_allocated); if (alloc_policy_.is_user_owned_initial_block()) { // Unpoison the initial block, now that it's going back to the user. @@ -730,13 +770,13 @@ SizedPtr ThreadSafeArena::Free(size_t* space_allocated) { } uint64_t ThreadSafeArena::Reset() { + size_t space_allocated = 0; // Have to do this in a first pass, because some of the destructors might // refer to memory in other blocks. - CleanupList(); + CleanupList(&space_allocated); // Discard all blocks except the first one. Whether it is user-provided or // allocated, always reuse the first block for the first arena. - size_t space_allocated = 0; auto mem = Free(&space_allocated); space_allocated += mem.n; @@ -868,12 +908,12 @@ template void* ThreadSafeArena::AllocateAlignedFallback< template void* ThreadSafeArena::AllocateAlignedFallback(size_t); -void ThreadSafeArena::CleanupList() { +void ThreadSafeArena::CleanupList(size_t* space_allocated) { #ifdef PROTOBUF_ASAN UnpoisonAllArenaBlocks(); #endif - WalkSerialArenaChunk([](SerialArenaChunk* chunk) { + WalkSerialArenaChunk([space_allocated](SerialArenaChunk* chunk) { absl::Span> span = chunk->arenas(); // Walks arenas backward to handle the first serial arena the last. // Destroying in reverse-order to the construction is often assumed by users @@ -881,11 +921,11 @@ void ThreadSafeArena::CleanupList() { for (auto it = span.rbegin(); it != span.rend(); ++it) { SerialArena* serial = it->load(std::memory_order_relaxed); ABSL_DCHECK_NE(serial, nullptr); - serial->CleanupList(); + serial->CleanupList(space_allocated); } }); // First arena must be cleaned up last. (b/247560530) - first_arena_.CleanupList(); + first_arena_.CleanupList(space_allocated); } PROTOBUF_NOINLINE @@ -914,7 +954,7 @@ SerialArena* ThreadSafeArena::GetSerialArenaFallback(size_t n) { // have any blocks yet. So we'll allocate its first block now. It must be // big enough to host SerialArena and the pending request. serial = SerialArena::New( - AllocateMemory(alloc_policy_.get(), 0, n + kSerialArenaSize), *this); + AllocateBlock(alloc_policy_.get(), 0, n + kSerialArenaSize), *this); AddSerialArena(id, serial); } diff --git a/src/google/protobuf/arena_cleanup.h b/src/google/protobuf/arena_cleanup.h index abd2b73349bd8..d78619c647168 100644 --- a/src/google/protobuf/arena_cleanup.h +++ b/src/google/protobuf/arena_cleanup.h @@ -21,6 +21,9 @@ namespace google { namespace protobuf { namespace internal { + +class ThreadSafeArena; + namespace cleanup { // Helper function invoking the destructor of `object` @@ -33,44 +36,56 @@ void arena_destruct_object(void* object) { // destroyed, and the function to destroy it (`destructor`) // elem must be aligned at minimum on a 4 byte boundary. struct CleanupNode { + // Optimization: performs a prefetch on the elem for the cleanup node. We + // explicitly use NTA prefetch here to avoid polluting remote caches: we are + // destroying these instances, there is no purpose for these cache lines to + // linger around in remote caches. + ABSL_ATTRIBUTE_ALWAYS_INLINE void Prefetch() { + absl::PrefetchToLocalCacheNta(elem); + } + + // Destroys the object referenced by the cleanup node. + ABSL_ATTRIBUTE_ALWAYS_INLINE void Destroy() { destructor(elem); } + void* elem; void (*destructor)(void*); }; -inline ABSL_ATTRIBUTE_ALWAYS_INLINE CleanupNode* ToCleanup(void* pos) { - return reinterpret_cast(pos); -} - -// Adds a cleanup entry at memory location `pos`. -inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CreateNode(void* pos, void* elem, +// Manages the list of cleanup nodes in a chunked linked list. Chunks grow by +// factors of two. Trivially destructible, but Cleanup() must be called before +// destruction. +class ChunkList { + public: + void Add(const ThreadSafeArena& arena, void* elem, + void (*destructor)(void*)) { + if (PROTOBUF_PREDICT_TRUE(next_ < limit_)) { + AddFromExisting(elem, destructor); + return; + } + AddFallback(arena, elem, destructor); + } + + // Runs all inserted cleanups and frees allocated chunks. Must be called + // before destruction. + void Cleanup(const ThreadSafeArena& arena, size_t* space_allocated); + + // Returns the pointers to the to-be-cleaned objects. For testing. + std::vector Peek(); + + private: + struct Chunk; + + void AddFallback(const ThreadSafeArena& arena, void* elem, + void (*destructor)(void*)); + ABSL_ATTRIBUTE_ALWAYS_INLINE void AddFromExisting(void* elem, void (*destructor)(void*)) { - CleanupNode n = {elem, destructor}; - memcpy(pos, &n, sizeof(n)); -} + new (next_++) CleanupNode{elem, destructor}; + } -// Optimization: performs a prefetch on the elem for the cleanup node at `pos`. -inline ABSL_ATTRIBUTE_ALWAYS_INLINE void PrefetchNode(void* pos) { - // We explicitly use NTA prefetch here to avoid polluting remote caches: we - // are destroying these instances, there is no purpose for these cache lines - // to linger around in remote caches. - absl::PrefetchToLocalCacheNta(ToCleanup(pos)->elem); -} - -// Destroys the object referenced by the cleanup node. -inline ABSL_ATTRIBUTE_ALWAYS_INLINE void DestroyNode(void* pos) { - CleanupNode* cleanup = ToCleanup(pos); - cleanup->destructor(cleanup->elem); -} - -// Append in `out` the pointer to the to-be-cleaned object in `pos`. -inline void PeekNode(void* pos, std::vector& out) { - out.push_back(ToCleanup(pos)->elem); -} - -// Returns the required size for a cleanup node. -constexpr ABSL_ATTRIBUTE_ALWAYS_INLINE size_t Size() { - return sizeof(CleanupNode); -} + Chunk* head_ = nullptr; + CleanupNode* next_ = nullptr; + CleanupNode* limit_ = nullptr; +}; } // namespace cleanup } // namespace internal diff --git a/src/google/protobuf/arena_unittest.cc b/src/google/protobuf/arena_unittest.cc index 0752735a4836d..1662088ee08b0 100644 --- a/src/google/protobuf/arena_unittest.cc +++ b/src/google/protobuf/arena_unittest.cc @@ -1406,12 +1406,12 @@ TEST(ArenaTest, RepeatedFieldOnArena) { // Preallocate an initial arena block to avoid mallocs during hooked region. std::vector arena_block(1024 * 1024); Arena arena(arena_block.data(), arena_block.size()); + const size_t initial_allocated_size = arena.SpaceAllocated(); { - internal::NoHeapChecker no_heap; - - // Fill some repeated fields on the arena to test for leaks. Also verify no - // memory allocations. + // Fill some repeated fields on the arena to test for leaks. Also that the + // newly allocated memory is approximately the size of the cleanups for the + // repeated messages. RepeatedField repeated_int32(&arena); RepeatedPtrField repeated_message(&arena); for (int i = 0; i < 100; i++) { @@ -1432,10 +1432,13 @@ TEST(ArenaTest, RepeatedFieldOnArena) { repeated_message.UnsafeArenaExtractSubrange(0, 5, extracted_messages); EXPECT_EQ(&arena, repeated_message.Get(0).GetArena()); EXPECT_EQ(5, repeated_message.size()); + const size_t upperbound_cleanup_size = + 2 * 110 * sizeof(internal::cleanup::CleanupNode); + EXPECT_GT(initial_allocated_size + upperbound_cleanup_size, + arena.SpaceAllocated()); } - // Now, outside the scope of the NoHeapChecker, test ExtractSubrange's copying - // semantics. + // Now test ExtractSubrange's copying semantics. { RepeatedPtrField repeated_message(&arena); for (int i = 0; i < 100; i++) { @@ -1610,8 +1613,9 @@ TEST(ArenaTest, MessageLiteOnArena) { initial_message.SerializeToString(&serialized); { - MessageLite* generic_message = prototype->New(&arena); + + EXPECT_TRUE(generic_message != nullptr); EXPECT_EQ(&arena, generic_message->GetArena()); EXPECT_TRUE(generic_message->ParseFromString(serialized)); diff --git a/src/google/protobuf/serial_arena.h b/src/google/protobuf/serial_arena.h index 0ccc410a768cf..6b809c2754fe1 100644 --- a/src/google/protobuf/serial_arena.h +++ b/src/google/protobuf/serial_arena.h @@ -37,13 +37,10 @@ namespace internal { // Arena blocks are variable length malloc-ed objects. The following structure // describes the common header for all blocks. struct ArenaBlock { - // For the sentry block with zero-size where ptr_, limit_, cleanup_nodes all - // point to "this". - constexpr ArenaBlock() - : next(nullptr), cleanup_nodes(this), size(0) {} + // For the sentry block with zero-size where ptr_/limit_ both point to `this`. + constexpr ArenaBlock() : next(nullptr), size(0) {} - ArenaBlock(ArenaBlock* next, size_t size) - : next(next), cleanup_nodes(nullptr), size(size) { + ArenaBlock(ArenaBlock* next, size_t size) : next(next), size(size) { ABSL_DCHECK_GT(size, sizeof(ArenaBlock)); } @@ -56,7 +53,6 @@ struct ArenaBlock { bool IsSentry() const { return size == 0; } ArenaBlock* const next; - void* cleanup_nodes; const size_t size; // data follows }; @@ -86,7 +82,9 @@ class PROTOBUF_EXPORT SerialArena { static constexpr size_t kBlockHeaderSize = ArenaAlignDefault::Ceil(sizeof(ArenaBlock)); - void CleanupList(); + void CleanupList(size_t* space_allocated) { + cleanup_list_.Cleanup(parent_, space_allocated); + } uint64_t SpaceAllocated() const { return space_allocated_.load(std::memory_order_relaxed); } @@ -236,15 +234,14 @@ class PROTOBUF_EXPORT SerialArena { n = ArenaAlignDefault::Ceil(n); char* ret = ArenaAlignAs(align).CeilDefaultAligned(ptr()); // See the comment in MaybeAllocateAligned re uintptr_t. - if (PROTOBUF_PREDICT_FALSE(reinterpret_cast(ret) + n + - cleanup::Size() > + if (PROTOBUF_PREDICT_FALSE(reinterpret_cast(ret) + n > reinterpret_cast(limit_))) { return AllocateAlignedWithCleanupFallback(n, align, destructor); } PROTOBUF_UNPOISON_MEMORY_REGION(ret, n); char* next = ret + n; set_ptr(next); - AddCleanupFromExisting(ret, destructor); + AddCleanup(ret, destructor); ABSL_DCHECK_GE(limit_, ptr()); MaybePrefetchForwards(next); return ret; @@ -252,11 +249,7 @@ class PROTOBUF_EXPORT SerialArena { PROTOBUF_ALWAYS_INLINE void AddCleanup(void* elem, void (*destructor)(void*)) { - size_t has = static_cast(limit_ - ptr()); - if (PROTOBUF_PREDICT_FALSE(cleanup::Size() > has)) { - return AddCleanupFallback(elem, destructor); - } - AddCleanupFromExisting(elem, destructor); + cleanup_list_.Add(parent_, elem, destructor); } ABSL_ATTRIBUTE_RETURNS_NONNULL void* AllocateFromStringBlock(); @@ -273,7 +266,6 @@ class PROTOBUF_EXPORT SerialArena { }; static constexpr ptrdiff_t kPrefetchForwardsDegree = ABSL_CACHELINE_SIZE * 16; - static constexpr ptrdiff_t kPrefetchBackwardsDegree = ABSL_CACHELINE_SIZE * 6; // Constructor is private as only New() should be used. inline SerialArena(ArenaBlock* b, ThreadSafeArena& parent); @@ -285,19 +277,8 @@ class PROTOBUF_EXPORT SerialArena { bool MaybeAllocateString(void*& p); ABSL_ATTRIBUTE_RETURNS_NONNULL void* AllocateFromStringBlockFallback(); - PROTOBUF_ALWAYS_INLINE - void AddCleanupFromExisting(void* elem, void (*destructor)(void*)) { - const size_t cleanup_size = cleanup::Size(); - - PROTOBUF_UNPOISON_MEMORY_REGION(limit_ - cleanup_size, cleanup_size); - limit_ -= cleanup_size; - MaybePrefetchBackwards(limit_); - ABSL_DCHECK_GE(limit_, ptr()); - cleanup::CreateNode(limit_, elem, destructor); - } - // Prefetch the next kPrefetchForwardsDegree bytes after `prefetch_ptr_` and - // up to `prefetch_limit_`, if `next` is within kPrefetchForwardsDegree bytes + // up to `limit_`, if `next` is within kPrefetchForwardsDegree bytes // of `prefetch_ptr_`. PROTOBUF_ALWAYS_INLINE void MaybePrefetchForwards(const char* next) { @@ -305,11 +286,11 @@ class PROTOBUF_EXPORT SerialArena { static_cast(prefetch_ptr_) >= head()); if (PROTOBUF_PREDICT_TRUE(prefetch_ptr_ - next > kPrefetchForwardsDegree)) return; - if (PROTOBUF_PREDICT_TRUE(prefetch_ptr_ < prefetch_limit_)) { + if (PROTOBUF_PREDICT_TRUE(prefetch_ptr_ < limit_)) { const char* prefetch_ptr = std::max(next, prefetch_ptr_); ABSL_DCHECK(prefetch_ptr != nullptr); const char* end = - std::min(prefetch_limit_, prefetch_ptr + ABSL_CACHELINE_SIZE * 16); + std::min(limit_, prefetch_ptr + kPrefetchForwardsDegree); for (; prefetch_ptr < end; prefetch_ptr += ABSL_CACHELINE_SIZE) { absl::PrefetchToLocalCacheForWrite(prefetch_ptr); } @@ -317,29 +298,6 @@ class PROTOBUF_EXPORT SerialArena { } } - PROTOBUF_ALWAYS_INLINE - // Prefetch up to kPrefetchBackwardsDegree before `prefetch_limit_` and after - // `prefetch_ptr_`, if `limit` is within kPrefetchBackwardsDegree of - // `prefetch_limit_`. - void MaybePrefetchBackwards(const char* limit) { - ABSL_DCHECK(prefetch_limit_ == nullptr || - static_cast(prefetch_limit_) <= - static_cast(head()->Limit())); - if (PROTOBUF_PREDICT_TRUE(limit - prefetch_limit_ > - kPrefetchBackwardsDegree)) - return; - if (PROTOBUF_PREDICT_TRUE(prefetch_limit_ > prefetch_ptr_)) { - const char* prefetch_limit = std::min(limit, prefetch_limit_); - ABSL_DCHECK_NE(prefetch_limit, nullptr); - const char* end = - std::max(prefetch_ptr_, prefetch_limit - kPrefetchBackwardsDegree); - for (; prefetch_limit > end; prefetch_limit -= ABSL_CACHELINE_SIZE) { - absl::PrefetchToLocalCacheForWrite(prefetch_limit); - } - prefetch_limit_ = prefetch_limit; - } - } - // Creates a new SerialArena inside mem using the remaining memory as for // future allocations. // The `parent` arena must outlive the serial arena, which is guaranteed @@ -385,7 +343,6 @@ class PROTOBUF_EXPORT SerialArena { set_ptr(ptr); prefetch_ptr_ = ptr; limit_ = limit; - prefetch_limit_ = limit; } void* AllocateAlignedFallback(size_t n); @@ -405,10 +362,11 @@ class PROTOBUF_EXPORT SerialArena { // Limiting address up to which memory can be allocated from the head block. char* limit_ = nullptr; // Current prefetch positions. Data from `ptr_` up to but not including - // `prefetch_ptr_` is software prefetched. Similarly, data from `limit_` down - // to but not including `prefetch_limit_` is software prefetched. + // `prefetch_ptr_` is software prefetched. const char* prefetch_ptr_ = nullptr; - const char* prefetch_limit_ = nullptr; + + // Chunked linked list for managing cleanup for arena elements. + cleanup::ChunkList cleanup_list_; // The active string block. std::atomic string_block_{nullptr}; diff --git a/src/google/protobuf/thread_safe_arena.h b/src/google/protobuf/thread_safe_arena.h index 93dc6a7cfade7..3e2222e93fe1e 100644 --- a/src/google/protobuf/thread_safe_arena.h +++ b/src/google/protobuf/thread_safe_arena.h @@ -20,6 +20,7 @@ #include "absl/synchronization/mutex.h" #include "google/protobuf/arena_align.h" #include "google/protobuf/arena_allocation_policy.h" +#include "google/protobuf/arena_cleanup.h" #include "google/protobuf/arenaz_sampler.h" #include "google/protobuf/port.h" #include "google/protobuf/serial_arena.h" @@ -109,6 +110,7 @@ class PROTOBUF_EXPORT ThreadSafeArena { friend class TcParser; friend class SerialArena; friend struct SerialArenaChunkHeader; + friend class cleanup::ChunkList; static uint64_t GetNextLifeCycleId(); class SerialArenaChunk; @@ -161,7 +163,7 @@ class PROTOBUF_EXPORT ThreadSafeArena { void Init(); // Delete or Destruct all objects owned by the arena. - void CleanupList(); + void CleanupList(size_t* space_allocated); inline void CacheSerialArena(SerialArena* serial) { thread_cache().last_serial_arena = serial;