-
Notifications
You must be signed in to change notification settings - Fork 0
/
dlb_hash.h
355 lines (306 loc) · 9.92 KB
/
dlb_hash.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#ifndef DLB_HASH_H
#define DLB_HASH_H
//------------------------------------------------------------------------------
// Copyright 2018 Dan Bechard
//------------------------------------------------------------------------------
//-- header --------------------------------------------------------------------
#include "dlb_types.h"
#include "dlb_murmur3.h"
#include <stdio.h>
typedef enum
{
DLB_HASH_STRING,
DLB_HASH_INT
} dlb_hash_type;
typedef struct
{
const void *key;
size_t klen;
void *value;
} dlb_hash_entry;
typedef struct
{
dlb_hash_type type;
const char *name;
size_t size;
dlb_hash_entry *buckets;
FILE *debug;
} dlb_hash;
#if DEBUG
// NOTE: For easier casting in debugger, make strings readable. Could use union instead, but wutevs for now.
typedef struct
{
const char *key;
size_t klen;
void *value;
} dlb_hash_entry_str;
typedef struct
{
dlb_hash_type type;
const char *name;
size_t size;
dlb_hash_entry_str *buckets;
FILE *debug;
} dlb_hash_str;
#endif
void dlb_hash_init(dlb_hash *table, dlb_hash_type type, const char *name,
size_t size_pow2);
void dlb_hash_free(dlb_hash *table);
void dlb_hash_insert(dlb_hash *table, const void *key, size_t klen, void *value);
void *dlb_hash_search(dlb_hash *table, const void *key, size_t klen, int *found);
void dlb_hash_delete(dlb_hash *table, const void *key, size_t klen);
void dlb_hash_test();
#endif
//-- end of header -------------------------------------------------------------
#ifdef __INTELLISENSE__
/* This makes MSVC intellisense work. */
#define DLB_HASH_IMPLEMENTATION
#endif
//-- implementation ------------------------------------------------------------
#ifdef DLB_HASH_IMPLEMENTATION
#ifndef DLB_HASH_IMPL_INTERNAL
#define DLB_HASH_IMPL_INTERNAL
#include "dlb_memory.h"
#include <string.h>
#include <stdio.h>
const void *_DLB_HASH_FREED = (void *)INT32_MIN;
static size_t _dlb_hash_pow2(size_t x)
{
return (x & (x - 1)) == 0;
}
void dlb_hash_init(dlb_hash *table, dlb_hash_type type, const char *name,
size_t size_pow2)
{
DLB_ASSERT(_dlb_hash_pow2(size_pow2));
table->type = type;
table->name = name;
table->size = size_pow2;
table->buckets = (dlb_hash_entry *)dlb_calloc(table->size,
sizeof(table->buckets[0]));
#if _DEBUG
if (table->debug) {
fprintf(table->debug, "[hash][init] %s\n", table->name);
}
#endif
}
void dlb_hash_free(dlb_hash *table)
{
#if _DEBUG
if (table->debug) {
fprintf(table->debug, "[hash][free] %s\n", table->name);
}
#endif
dlb_free(table->buckets);
}
static dlb_hash_entry *_dlb_hash_find(dlb_hash *table, const void *key, size_t klen, dlb_hash_entry **first_freed,
u8 return_first)
{
u32 hash = 0;
switch (table->type) {
case DLB_HASH_STRING:
hash = dlb_murmur3(key, klen);
break;
case DLB_HASH_INT:
hash = dlb_murmur3(&key, klen);
break;
default: DLB_ASSERT(0);
}
u32 index = hash & (table->size - 1);
u32 i = 0;
#if _DEBUG
if (table->debug) {
fprintf(table->debug, "[hash][find] finding slot for %.*s, hash %u, starting at %u\n", (int)klen, (char *)key,
hash, index);
}
#endif
dlb_hash_entry *entry = 0;
for (;;) {
entry = table->buckets + index;
// Empty slot
if (!entry->klen) {
if (entry->key == _DLB_HASH_FREED) {
if (return_first) {
break;
} else if (first_freed && !*first_freed) {
*first_freed = entry;
}
#if _DEBUG
if (table->debug) {
fprintf(table->debug, "[hash][find] %u is empty (FREED)\n", index);
}
#endif
} else {
#if _DEBUG
if (table->debug && !return_first) {
fprintf(table->debug, "[hash][find] %u is empty\n", index);
}
#endif
break;
}
}
// Match
int match = 0;
if (entry->klen == klen) {
switch (table->type) {
case DLB_HASH_STRING: {
match = !strncmp((const char *)entry->key, (const char *)key, klen);
break;
} case DLB_HASH_INT: {
match = entry->key == key;
break;
} default: {
DLB_ASSERT(0);
}
}
}
if (match) {
break;
}
// Next slot
i++;
index += (i * (i + 1)) >> 1; // Same as (0.5f)i + (0.5f)i^2
index &= (table->size - 1);
// NOTE(perf): This check will check entire hash table if power-of-two
// size. This would be a good place to enforce max probes.
// End of probe; not found
if (i == table->size) {
entry = 0;
break;
}
}
#if _DEBUG
if (table->debug) {
fprintf(table->debug, "[hash][find] found slot at %u\n", index);
}
#endif
return entry;
}
void dlb_hash_insert(dlb_hash *table, const void *key, size_t klen, void *value)
{
DLB_ASSERT(key);
DLB_ASSERT(klen);
#if _DEBUG
if (table->debug) {
fprintf(table->debug, "[hash][insert] inserting value %p for key %.*s\n", value, (int)klen, (char *)key);
}
#endif
dlb_hash_entry *first_freed = 0;
dlb_hash_entry *entry =
_dlb_hash_find(table, key, klen, &first_freed, 1);
if (entry) {
entry->key = key;
entry->klen = klen;
entry->value = value;
} else {
// Out of memory
// NOTE: In the power-of-two, triangle number case this won't happen
// until table is 100% full. We should realloc much sooner.
DLB_ASSERT(!"DLB_HASH: Out of memory"); // TODO: Realloc hash table
}
}
void *dlb_hash_search(dlb_hash *table, const void *key, size_t klen, int *found)
{
#if _DEBUG
if (table->debug) {
fprintf(table->debug, "[hash][search_start] searching for key %.*s\n", (int)klen, (char *)key);
}
#endif
//DLB_ASSERT(key);
DLB_ASSERT(klen);
void *value = NULL;
int value_found = 0;
dlb_hash_entry *first_freed = 0;
dlb_hash_entry *entry = _dlb_hash_find(table, key, klen, &first_freed, 0);
if (entry && entry->klen) {
value = entry->value;
value_found = 1;
// Optimize by pushing entry into first free slot
if (first_freed) {
first_freed->key = entry->key;
first_freed->klen = entry->klen;
first_freed->value = entry->value;
entry->key = _DLB_HASH_FREED;
entry->klen = 0;
entry->value = 0;
}
}
#if _DEBUG
if (table->debug) {
if (value_found) {
fprintf(table->debug, "[hash][search_end] found value %p\n", value);
} else {
fprintf(table->debug, "[hash][search_end] not found\n");
}
}
#endif
if (found) *found = value_found;
return value;
}
// Possible improvements:
// https://en.wikipedia.org/wiki/Lazy_deletion
// https://attractivechaos.wordpress.com/2018/10/01/advanced-techniques-to-implement-fast-hash-tables/
void dlb_hash_delete(dlb_hash *table, const void *key, size_t klen)
{
DLB_ASSERT(key);
DLB_ASSERT(klen);
dlb_hash_entry *entry = _dlb_hash_find(table, key, klen, 0, 0);
if (entry && entry->klen) {
entry->key = _DLB_HASH_FREED;
entry->klen = 0;
entry->value = 0;
} else {
DLB_ASSERT(0); // Error: Tried to delete non-existent key
}
}
#endif
#endif
//-- end of implementation -----------------------------------------------------
//-- tests ---------------------------------------------------------------------
#ifdef DLB_HASH_TEST
void dlb_hash_test() {
dlb_hash table_ = { 0 };
dlb_hash *table = &table_;
dlb_hash_init(table, DLB_HASH_STRING, "hashtable", 4);
const char key_1[] = "test key 1";
const char key_2[] = "test key 2";
const char key_3[] = "test key 3";
const char val_1[] = "1st value";
const char val_2[] = "2nd value";
const char val_3[] = "3rd value";
dlb_hash_insert(table, CSTR(key_1), (void *)val_1);
dlb_hash_insert(table, CSTR(key_2), (void *)val_2);
dlb_hash_insert(table, CSTR(key_3), (void *)val_3);
const char *search_1;
const char *search_2;
const char *search_3;
search_1 = dlb_hash_search(table, CSTR(key_1), 0);
search_2 = dlb_hash_search(table, CSTR(key_2), 0);
search_3 = dlb_hash_search(table, CSTR(key_3), 0);
DLB_ASSERT(search_1 == val_1);
DLB_ASSERT(search_2 == val_2);
DLB_ASSERT(search_3 == val_3);
dlb_hash_delete(table, CSTR(key_1));
search_1 = dlb_hash_search(table, CSTR(key_1), 0);
search_2 = dlb_hash_search(table, CSTR(key_2), 0);
search_3 = dlb_hash_search(table, CSTR(key_3), 0);
DLB_ASSERT(search_1 == 0);
DLB_ASSERT(search_2 == val_2);
DLB_ASSERT(search_3 == val_3);
dlb_hash_delete(table, CSTR(key_2));
search_1 = dlb_hash_search(table, CSTR(key_1), 0);
search_2 = dlb_hash_search(table, CSTR(key_2), 0);
search_3 = dlb_hash_search(table, CSTR(key_3), 0);
DLB_ASSERT(search_1 == 0);
DLB_ASSERT(search_2 == 0);
DLB_ASSERT(search_3 == val_3);
dlb_hash_delete(table, CSTR(key_3));
search_1 = dlb_hash_search(table, CSTR(key_1), 0);
search_2 = dlb_hash_search(table, CSTR(key_2), 0);
search_3 = dlb_hash_search(table, CSTR(key_3), 0);
DLB_ASSERT(search_1 == 0);
DLB_ASSERT(search_2 == 0);
DLB_ASSERT(search_3 == 0);
dlb_hash_free(table);
}
#endif
//-- end of tests --------------------------------------------------------------