From a123879666e9122039b0e3bc3daff821c8fcf4ab Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Thu, 26 Dec 2024 17:47:39 -0800 Subject: [PATCH] Remove atomics from linked list of blocks We no longer need to traverse the linked list of blocks to check allocated space, which means we also no longer need atomics in the linked list or even its head. This is especially beneficial as the previous implementation contained a race where we could dereference uninitialized memory; because the setting of the `next` pointers did not use release semantics and the reading of them in `SpaceAllocated` reads with relaxed order, there's no guarantee that `size` has actually been initialized - but worse, *there is also no guarantee that `next` has been!*. Simplified: ``` AddBlock: 1 ptr = malloc(); 2 ptr->size = 123; 3 ptr->next = ai->blocks; 4 ai->blocks = ptr (release order); ``` ``` SpaceAllocated: 5 block = ai->blocks (relaxed order) 6 block->size (acquire, but probably by accident) 7 block = block->next (relaxed order) ``` So I think a second thread calling SpaceAllocated could see the order 1, 4, 5, 6, 7, 2, 3 and read uninitialized memory - there is no data-dependency relationship or happens-before edge that this order violates, and so it would be valid for a compiler+hardware to produce. In reality, operation 4 will produce an `stlr` on arm (forcing an order of 1, 2, 3 before 4), and `block->next` has a data dependency on `ai->blocks` which would force an ordering in the hardware between 5->6 and 5->7 even for regular `ldr` instructions. Delete arena contains, it's private and the only user is its own test. PiperOrigin-RevId: 709918443 --- upb/mem/BUILD | 1 + upb/mem/arena.c | 64 ++++++++++++++++---------------------- upb/mem/arena_test.cc | 67 +++++++++++++++++----------------------- upb/mem/internal/arena.h | 7 +---- 4 files changed, 57 insertions(+), 82 deletions(-) diff --git a/upb/mem/BUILD b/upb/mem/BUILD index 2afa190b0285..c468348a8928 100644 --- a/upb/mem/BUILD +++ b/upb/mem/BUILD @@ -47,6 +47,7 @@ cc_test( ":mem", "//upb:port", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/cleanup", "@com_google_absl//absl/random", "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/synchronization", diff --git a/upb/mem/arena.c b/upb/mem/arena.c index 79f42d8f9690..d233e2e96d51 100644 --- a/upb/mem/arena.c +++ b/upb/mem/arena.c @@ -28,8 +28,7 @@ void upb_Arena_SetMaxBlockSize(size_t max) { } typedef struct upb_MemBlock { - // Atomic only for the benefit of SpaceAllocated(). - UPB_ATOMIC(struct upb_MemBlock*) next; + struct upb_MemBlock* next; uint32_t size; // Data follows. } upb_MemBlock; @@ -62,9 +61,11 @@ typedef struct upb_ArenaInternal { // == self when no other list members. UPB_ATOMIC(struct upb_ArenaInternal*) tail; - // Linked list of blocks to free/cleanup. Atomic only for the benefit of - // upb_Arena_SpaceAllocated(). - UPB_ATOMIC(upb_MemBlock*) blocks; + // Linked list of blocks to free/cleanup. + upb_MemBlock* blocks; + + // Total space allocated in blocks, atomic only for SpaceAllocated + UPB_ATOMIC(size_t) space_allocated; } upb_ArenaInternal; // All public + private state for an arena. @@ -207,11 +208,7 @@ size_t upb_Arena_SpaceAllocated(upb_Arena* arena, size_t* fused_count) { size_t local_fused_count = 0; while (ai != NULL) { - upb_MemBlock* block = upb_Atomic_Load(&ai->blocks, memory_order_relaxed); - while (block != NULL) { - memsize += sizeof(upb_MemBlock) + block->size; - block = upb_Atomic_Load(&block->next, memory_order_relaxed); - } + memsize += upb_Atomic_Load(&ai->space_allocated, memory_order_relaxed); ai = upb_Atomic_Load(&ai->next, memory_order_relaxed); local_fused_count++; } @@ -220,21 +217,6 @@ size_t upb_Arena_SpaceAllocated(upb_Arena* arena, size_t* fused_count) { return memsize; } -bool UPB_PRIVATE(_upb_Arena_Contains)(const upb_Arena* a, void* ptr) { - upb_ArenaInternal* ai = upb_Arena_Internal(a); - UPB_ASSERT(ai); - - upb_MemBlock* block = upb_Atomic_Load(&ai->blocks, memory_order_relaxed); - while (block) { - uintptr_t beg = (uintptr_t)block; - uintptr_t end = beg + block->size; - if ((uintptr_t)ptr >= beg && (uintptr_t)ptr < end) return true; - block = upb_Atomic_Load(&block->next, memory_order_relaxed); - } - - return false; -} - uint32_t upb_Arena_DebugRefCount(upb_Arena* a) { upb_ArenaInternal* ai = upb_Arena_Internal(a); // These loads could probably be relaxed, but given that this is debug-only, @@ -251,10 +233,10 @@ static void _upb_Arena_AddBlock(upb_Arena* a, void* ptr, size_t size) { upb_ArenaInternal* ai = upb_Arena_Internal(a); upb_MemBlock* block = ptr; - // Insert into linked list. block->size = (uint32_t)size; - upb_Atomic_Init(&block->next, ai->blocks); - upb_Atomic_Store(&ai->blocks, block, memory_order_release); + // Insert into linked list. + block->next = ai->blocks; + ai->blocks = block; a->UPB_PRIVATE(ptr) = UPB_PTR_AT(block, kUpb_MemblockReserve, char); a->UPB_PRIVATE(end) = UPB_PTR_AT(block, size, char); @@ -267,7 +249,7 @@ static bool _upb_Arena_AllocBlock(upb_Arena* a, size_t size) { upb_ArenaInternal* ai = upb_Arena_Internal(a); if (!ai->block_alloc) return false; size_t last_size = 128; - upb_MemBlock* last_block = upb_Atomic_Load(&ai->blocks, memory_order_relaxed); + upb_MemBlock* last_block = ai->blocks; if (last_block) { last_size = a->UPB_PRIVATE(end) - (char*)last_block; } @@ -288,6 +270,13 @@ static bool _upb_Arena_AllocBlock(upb_Arena* a, size_t size) { if (!block) return false; _upb_Arena_AddBlock(a, block, block_size); + // Atomic add not required here, as threads won't race allocating blocks, plus + // atomic fetch-add is slower than load/add/store on arm devices compiled + // targetting pre-v8.1. + size_t old_space_allocated = + upb_Atomic_Load(&ai->space_allocated, memory_order_relaxed); + upb_Atomic_Store(&ai->space_allocated, old_space_allocated + block_size, + memory_order_relaxed); UPB_ASSERT(UPB_PRIVATE(_upb_ArenaHas)(a) >= size); return true; } @@ -316,7 +305,8 @@ static upb_Arena* _upb_Arena_InitSlow(upb_alloc* alloc) { upb_Atomic_Init(&a->body.parent_or_count, _upb_Arena_TaggedFromRefcount(1)); upb_Atomic_Init(&a->body.next, NULL); upb_Atomic_Init(&a->body.tail, &a->body); - upb_Atomic_Init(&a->body.blocks, NULL); + upb_Atomic_Init(&a->body.space_allocated, n); + a->body.blocks = NULL; a->body.upb_alloc_cleanup = NULL; _upb_Arena_AddBlock(&a->head, mem, n); @@ -355,7 +345,8 @@ upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) { upb_Atomic_Init(&a->body.parent_or_count, _upb_Arena_TaggedFromRefcount(1)); upb_Atomic_Init(&a->body.next, NULL); upb_Atomic_Init(&a->body.tail, &a->body); - upb_Atomic_Init(&a->body.blocks, NULL); + upb_Atomic_Init(&a->body.space_allocated, 0); + a->body.blocks = NULL; a->body.upb_alloc_cleanup = NULL; a->body.block_alloc = _upb_Arena_MakeBlockAlloc(alloc, 1); @@ -374,12 +365,11 @@ static void _upb_Arena_DoFree(upb_ArenaInternal* ai) { upb_ArenaInternal* next_arena = (upb_ArenaInternal*)upb_Atomic_Load(&ai->next, memory_order_acquire); upb_alloc* block_alloc = _upb_ArenaInternal_BlockAlloc(ai); - upb_MemBlock* block = upb_Atomic_Load(&ai->blocks, memory_order_acquire); + upb_MemBlock* block = ai->blocks; upb_AllocCleanupFunc* alloc_cleanup = *ai->upb_alloc_cleanup; while (block != NULL) { // Load first since we are deleting block. - upb_MemBlock* next_block = - upb_Atomic_Load(&block->next, memory_order_acquire); + upb_MemBlock* next_block = block->next; upb_free(block_alloc, block); block = next_block; } @@ -601,8 +591,7 @@ void UPB_PRIVATE(_upb_Arena_SwapIn)(upb_Arena* des, const upb_Arena* src) { *des = *src; desi->block_alloc = srci->block_alloc; - upb_MemBlock* blocks = upb_Atomic_Load(&srci->blocks, memory_order_relaxed); - upb_Atomic_Init(&desi->blocks, blocks); + desi->blocks = srci->blocks; } void UPB_PRIVATE(_upb_Arena_SwapOut)(upb_Arena* des, const upb_Arena* src) { @@ -610,6 +599,5 @@ void UPB_PRIVATE(_upb_Arena_SwapOut)(upb_Arena* des, const upb_Arena* src) { upb_ArenaInternal* srci = upb_Arena_Internal(src); *des = *src; - upb_MemBlock* blocks = upb_Atomic_Load(&srci->blocks, memory_order_relaxed); - upb_Atomic_Store(&desi->blocks, blocks, memory_order_relaxed); + desi->blocks = srci->blocks; } diff --git a/upb/mem/arena_test.cc b/upb/mem/arena_test.cc index 292dcf340ca9..8c2af9f676ba 100644 --- a/upb/mem/arena_test.cc +++ b/upb/mem/arena_test.cc @@ -17,6 +17,7 @@ #include #include #include "absl/base/thread_annotations.h" +#include "absl/cleanup/cleanup.h" #include "absl/random/distributions.h" #include "absl/random/random.h" #include "absl/synchronization/mutex.h" @@ -172,44 +173,6 @@ TEST(ArenaTest, FuzzSingleThreaded) { } } -TEST(ArenaTest, Contains) { - upb_Arena* arena1 = upb_Arena_New(); - upb_Arena* arena2 = upb_Arena_New(); - void* ptr1a = upb_Arena_Malloc(arena1, 8); - void* ptr2a = upb_Arena_Malloc(arena2, 8); - - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1a)); - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2a)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2a)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1a)); - - void* ptr1b = upb_Arena_Malloc(arena1, 1000000); - void* ptr2b = upb_Arena_Malloc(arena2, 1000000); - - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1a)); - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1b)); - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2a)); - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2b)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2a)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2b)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1a)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1b)); - - upb_Arena_Fuse(arena1, arena2); - - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1a)); - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1b)); - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2a)); - EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2b)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2a)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2b)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1a)); - EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1b)); - - upb_Arena_Free(arena1); - upb_Arena_Free(arena2); -} - TEST(ArenaTest, LargeAlloc) { // Tests an allocation larger than the max block size. upb_Arena* arena = upb_Arena_New(); @@ -284,6 +247,34 @@ TEST(ArenaTest, FuzzFuseFuseRace) { for (auto& t : threads) t.join(); } +TEST(ArenaTest, FuzzAllocSpaceAllocatedRace) { + Environment env; + upb_Arena_SetMaxBlockSize(512); + absl::Cleanup reset_max_block_size = [] { + upb_Arena_SetMaxBlockSize(32 << 10); + }; + upb_Arena* arena = upb_Arena_New(); + absl::Notification done; + std::vector threads; + for (int i = 0; i < 10; ++i) { + threads.emplace_back([&]() { + while (!done.HasBeenNotified()) { + size_t count; + upb_Arena_SpaceAllocated(arena, &count); + } + }); + } + + absl::BitGen gen; + auto end = absl::Now() + absl::Seconds(2); + while (absl::Now() < end) { + upb_Arena_Malloc(arena, 256); + } + done.Notify(); + for (auto& t : threads) t.join(); + upb_Arena_Free(arena); +} + TEST(ArenaTest, ArenaIncRef) { upb_Arena* arena1 = upb_Arena_New(); EXPECT_EQ(upb_Arena_DebugRefCount(arena1), 1); diff --git a/upb/mem/internal/arena.h b/upb/mem/internal/arena.h index 08aacde1eac8..f69c621c7556 100644 --- a/upb/mem/internal/arena.h +++ b/upb/mem/internal/arena.h @@ -20,7 +20,7 @@ // // We need this because the decoder inlines a upb_Arena for performance but // the full struct is not visible outside of arena.c. Yes, I know, it's awful. -#define UPB_ARENA_SIZE_HACK 8 +#define UPB_ARENA_SIZE_HACK 9 // LINT.IfChange(upb_Arena) @@ -40,11 +40,6 @@ void UPB_PRIVATE(_upb_Arena_SwapIn)(struct upb_Arena* des, void UPB_PRIVATE(_upb_Arena_SwapOut)(struct upb_Arena* des, const struct upb_Arena* src); -// Returns whether |ptr| was allocated directly by |a| (so care must be used -// with fused arenas). -UPB_API bool UPB_ONLYBITS(_upb_Arena_Contains)(const struct upb_Arena* a, - void* ptr); - UPB_INLINE size_t UPB_PRIVATE(_upb_ArenaHas)(const struct upb_Arena* a) { return (size_t)(a->UPB_ONLYBITS(end) - a->UPB_ONLYBITS(ptr)); }