Skip to content

Commit

Permalink
Remove atomics from linked list of blocks
Browse files Browse the repository at this point in the history
We no longer need to traverse the linked list of blocks to check allocated space, which means we also no longer need atomics in the linked list or even its head. This is especially beneficial as the previous implementation contained a race where we could dereference uninitialized memory; because the setting of the `next` pointers did not use release semantics and the reading of them in `SpaceAllocated` reads with relaxed order, there's no guarantee that `size` has actually been initialized - but worse, *there is also no guarantee that `next` has been!*. Simplified:
```
AddBlock:
1 ptr = malloc();
2 ptr->size = 123;
3 ptr->next = ai->blocks;
4 ai->blocks = ptr (release order);
```
```
SpaceAllocated:
5 block = ai->blocks (relaxed order)
6 block->size (acquire, but probably by accident)
7 block = block->next (relaxed order)
```

So I think a second thread calling SpaceAllocated could see the order 1, 4, 5, 6, 7, 2, 3 and read uninitialized memory - there is no data-dependency relationship or happens-before edge that this order violates, and so it would be valid for a compiler+hardware to produce.

In reality, operation 4 will produce an `stlr` on arm (forcing an order of 1, 2, 3 before 4), and `block->next` has a data dependency on `ai->blocks` which would force an ordering in the hardware between 5->6 and 5->7 even for regular `ldr` instructions.

Delete arena contains, it's private and the only user is its own test.

PiperOrigin-RevId: 709918443
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Dec 27, 2024
1 parent 374bc8c commit a123879
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 82 deletions.
1 change: 1 addition & 0 deletions upb/mem/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ cc_test(
":mem",
"//upb:port",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/cleanup",
"@com_google_absl//absl/random",
"@com_google_absl//absl/random:distributions",
"@com_google_absl//absl/synchronization",
Expand Down
64 changes: 26 additions & 38 deletions upb/mem/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ void upb_Arena_SetMaxBlockSize(size_t max) {
}

typedef struct upb_MemBlock {
// Atomic only for the benefit of SpaceAllocated().
UPB_ATOMIC(struct upb_MemBlock*) next;
struct upb_MemBlock* next;
uint32_t size;
// Data follows.
} upb_MemBlock;
Expand Down Expand Up @@ -62,9 +61,11 @@ typedef struct upb_ArenaInternal {
// == self when no other list members.
UPB_ATOMIC(struct upb_ArenaInternal*) tail;

// Linked list of blocks to free/cleanup. Atomic only for the benefit of
// upb_Arena_SpaceAllocated().
UPB_ATOMIC(upb_MemBlock*) blocks;
// Linked list of blocks to free/cleanup.
upb_MemBlock* blocks;

// Total space allocated in blocks, atomic only for SpaceAllocated
UPB_ATOMIC(size_t) space_allocated;
} upb_ArenaInternal;

// All public + private state for an arena.
Expand Down Expand Up @@ -207,11 +208,7 @@ size_t upb_Arena_SpaceAllocated(upb_Arena* arena, size_t* fused_count) {
size_t local_fused_count = 0;

while (ai != NULL) {
upb_MemBlock* block = upb_Atomic_Load(&ai->blocks, memory_order_relaxed);
while (block != NULL) {
memsize += sizeof(upb_MemBlock) + block->size;
block = upb_Atomic_Load(&block->next, memory_order_relaxed);
}
memsize += upb_Atomic_Load(&ai->space_allocated, memory_order_relaxed);
ai = upb_Atomic_Load(&ai->next, memory_order_relaxed);
local_fused_count++;
}
Expand All @@ -220,21 +217,6 @@ size_t upb_Arena_SpaceAllocated(upb_Arena* arena, size_t* fused_count) {
return memsize;
}

bool UPB_PRIVATE(_upb_Arena_Contains)(const upb_Arena* a, void* ptr) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
UPB_ASSERT(ai);

upb_MemBlock* block = upb_Atomic_Load(&ai->blocks, memory_order_relaxed);
while (block) {
uintptr_t beg = (uintptr_t)block;
uintptr_t end = beg + block->size;
if ((uintptr_t)ptr >= beg && (uintptr_t)ptr < end) return true;
block = upb_Atomic_Load(&block->next, memory_order_relaxed);
}

return false;
}

uint32_t upb_Arena_DebugRefCount(upb_Arena* a) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
// These loads could probably be relaxed, but given that this is debug-only,
Expand All @@ -251,10 +233,10 @@ static void _upb_Arena_AddBlock(upb_Arena* a, void* ptr, size_t size) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
upb_MemBlock* block = ptr;

// Insert into linked list.
block->size = (uint32_t)size;
upb_Atomic_Init(&block->next, ai->blocks);
upb_Atomic_Store(&ai->blocks, block, memory_order_release);
// Insert into linked list.
block->next = ai->blocks;
ai->blocks = block;

a->UPB_PRIVATE(ptr) = UPB_PTR_AT(block, kUpb_MemblockReserve, char);
a->UPB_PRIVATE(end) = UPB_PTR_AT(block, size, char);
Expand All @@ -267,7 +249,7 @@ static bool _upb_Arena_AllocBlock(upb_Arena* a, size_t size) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
if (!ai->block_alloc) return false;
size_t last_size = 128;
upb_MemBlock* last_block = upb_Atomic_Load(&ai->blocks, memory_order_relaxed);
upb_MemBlock* last_block = ai->blocks;
if (last_block) {
last_size = a->UPB_PRIVATE(end) - (char*)last_block;
}
Expand All @@ -288,6 +270,13 @@ static bool _upb_Arena_AllocBlock(upb_Arena* a, size_t size) {

if (!block) return false;
_upb_Arena_AddBlock(a, block, block_size);
// Atomic add not required here, as threads won't race allocating blocks, plus
// atomic fetch-add is slower than load/add/store on arm devices compiled
// targetting pre-v8.1.
size_t old_space_allocated =
upb_Atomic_Load(&ai->space_allocated, memory_order_relaxed);
upb_Atomic_Store(&ai->space_allocated, old_space_allocated + block_size,
memory_order_relaxed);
UPB_ASSERT(UPB_PRIVATE(_upb_ArenaHas)(a) >= size);
return true;
}
Expand Down Expand Up @@ -316,7 +305,8 @@ static upb_Arena* _upb_Arena_InitSlow(upb_alloc* alloc) {
upb_Atomic_Init(&a->body.parent_or_count, _upb_Arena_TaggedFromRefcount(1));
upb_Atomic_Init(&a->body.next, NULL);
upb_Atomic_Init(&a->body.tail, &a->body);
upb_Atomic_Init(&a->body.blocks, NULL);
upb_Atomic_Init(&a->body.space_allocated, n);
a->body.blocks = NULL;
a->body.upb_alloc_cleanup = NULL;

_upb_Arena_AddBlock(&a->head, mem, n);
Expand Down Expand Up @@ -355,7 +345,8 @@ upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) {
upb_Atomic_Init(&a->body.parent_or_count, _upb_Arena_TaggedFromRefcount(1));
upb_Atomic_Init(&a->body.next, NULL);
upb_Atomic_Init(&a->body.tail, &a->body);
upb_Atomic_Init(&a->body.blocks, NULL);
upb_Atomic_Init(&a->body.space_allocated, 0);
a->body.blocks = NULL;
a->body.upb_alloc_cleanup = NULL;

a->body.block_alloc = _upb_Arena_MakeBlockAlloc(alloc, 1);
Expand All @@ -374,12 +365,11 @@ static void _upb_Arena_DoFree(upb_ArenaInternal* ai) {
upb_ArenaInternal* next_arena =
(upb_ArenaInternal*)upb_Atomic_Load(&ai->next, memory_order_acquire);
upb_alloc* block_alloc = _upb_ArenaInternal_BlockAlloc(ai);
upb_MemBlock* block = upb_Atomic_Load(&ai->blocks, memory_order_acquire);
upb_MemBlock* block = ai->blocks;
upb_AllocCleanupFunc* alloc_cleanup = *ai->upb_alloc_cleanup;
while (block != NULL) {
// Load first since we are deleting block.
upb_MemBlock* next_block =
upb_Atomic_Load(&block->next, memory_order_acquire);
upb_MemBlock* next_block = block->next;
upb_free(block_alloc, block);
block = next_block;
}
Expand Down Expand Up @@ -601,15 +591,13 @@ void UPB_PRIVATE(_upb_Arena_SwapIn)(upb_Arena* des, const upb_Arena* src) {

*des = *src;
desi->block_alloc = srci->block_alloc;
upb_MemBlock* blocks = upb_Atomic_Load(&srci->blocks, memory_order_relaxed);
upb_Atomic_Init(&desi->blocks, blocks);
desi->blocks = srci->blocks;
}

void UPB_PRIVATE(_upb_Arena_SwapOut)(upb_Arena* des, const upb_Arena* src) {
upb_ArenaInternal* desi = upb_Arena_Internal(des);
upb_ArenaInternal* srci = upb_Arena_Internal(src);

*des = *src;
upb_MemBlock* blocks = upb_Atomic_Load(&srci->blocks, memory_order_relaxed);
upb_Atomic_Store(&desi->blocks, blocks, memory_order_relaxed);
desi->blocks = srci->blocks;
}
67 changes: 29 additions & 38 deletions upb/mem/arena_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/base/thread_annotations.h"
#include "absl/cleanup/cleanup.h"
#include "absl/random/distributions.h"
#include "absl/random/random.h"
#include "absl/synchronization/mutex.h"
Expand Down Expand Up @@ -172,44 +173,6 @@ TEST(ArenaTest, FuzzSingleThreaded) {
}
}

TEST(ArenaTest, Contains) {
upb_Arena* arena1 = upb_Arena_New();
upb_Arena* arena2 = upb_Arena_New();
void* ptr1a = upb_Arena_Malloc(arena1, 8);
void* ptr2a = upb_Arena_Malloc(arena2, 8);

EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1a));
EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2a));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2a));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1a));

void* ptr1b = upb_Arena_Malloc(arena1, 1000000);
void* ptr2b = upb_Arena_Malloc(arena2, 1000000);

EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1a));
EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1b));
EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2a));
EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2b));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2a));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2b));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1a));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1b));

upb_Arena_Fuse(arena1, arena2);

EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1a));
EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr1b));
EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2a));
EXPECT_TRUE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr2b));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2a));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena1, ptr2b));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1a));
EXPECT_FALSE(UPB_PRIVATE(_upb_Arena_Contains)(arena2, ptr1b));

upb_Arena_Free(arena1);
upb_Arena_Free(arena2);
}

TEST(ArenaTest, LargeAlloc) {
// Tests an allocation larger than the max block size.
upb_Arena* arena = upb_Arena_New();
Expand Down Expand Up @@ -284,6 +247,34 @@ TEST(ArenaTest, FuzzFuseFuseRace) {
for (auto& t : threads) t.join();
}

TEST(ArenaTest, FuzzAllocSpaceAllocatedRace) {
Environment env;
upb_Arena_SetMaxBlockSize(512);
absl::Cleanup reset_max_block_size = [] {
upb_Arena_SetMaxBlockSize(32 << 10);
};
upb_Arena* arena = upb_Arena_New();
absl::Notification done;
std::vector<std::thread> threads;
for (int i = 0; i < 10; ++i) {
threads.emplace_back([&]() {
while (!done.HasBeenNotified()) {
size_t count;
upb_Arena_SpaceAllocated(arena, &count);
}
});
}

absl::BitGen gen;
auto end = absl::Now() + absl::Seconds(2);
while (absl::Now() < end) {
upb_Arena_Malloc(arena, 256);
}
done.Notify();
for (auto& t : threads) t.join();
upb_Arena_Free(arena);
}

TEST(ArenaTest, ArenaIncRef) {
upb_Arena* arena1 = upb_Arena_New();
EXPECT_EQ(upb_Arena_DebugRefCount(arena1), 1);
Expand Down
7 changes: 1 addition & 6 deletions upb/mem/internal/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
//
// We need this because the decoder inlines a upb_Arena for performance but
// the full struct is not visible outside of arena.c. Yes, I know, it's awful.
#define UPB_ARENA_SIZE_HACK 8
#define UPB_ARENA_SIZE_HACK 9

// LINT.IfChange(upb_Arena)

Expand All @@ -40,11 +40,6 @@ void UPB_PRIVATE(_upb_Arena_SwapIn)(struct upb_Arena* des,
void UPB_PRIVATE(_upb_Arena_SwapOut)(struct upb_Arena* des,
const struct upb_Arena* src);

// Returns whether |ptr| was allocated directly by |a| (so care must be used
// with fused arenas).
UPB_API bool UPB_ONLYBITS(_upb_Arena_Contains)(const struct upb_Arena* a,
void* ptr);

UPB_INLINE size_t UPB_PRIVATE(_upb_ArenaHas)(const struct upb_Arena* a) {
return (size_t)(a->UPB_ONLYBITS(end) - a->UPB_ONLYBITS(ptr));
}
Expand Down

0 comments on commit a123879

Please sign in to comment.