-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeneric_khm.c
170 lines (154 loc) · 5.34 KB
/
generic_khm.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// Copyright (C) 2020-2024 Andy Kurnia.
// kurnia hashmap
// usage:
// define Vec for Bool, U64, K, T.
// implement uint64_t hashFunc(KHM_K_T*).
// implement bool eqlFunc(KHM_K_T*, KHM_K_T*).
// define KHM_K_NAME Bool
// define KHM_K_T bool
// define KHM_K_HASHFUNC hshBool
// define KHM_K_EQLFUNC eqlBool
// define KHM_V_NAME U64
// define KHM_V_T uint64_t
// include
// undef KHM_V_T
// undef KHM_V_NAME
// undef KHM_K_EQLFUNC
// undef KHM_K_HASHFUNC
// undef KHM_K_T
// undef KHM_K_NAME
#define GENERIC_CONCAT_(a, b) a##b
#define GENERIC_CONCAT(a, b) GENERIC_CONCAT_(a, b)
#define KHM_T GENERIC_CONCAT(GENERIC_CONCAT(Khm, KHM_K_NAME), KHM_V_NAME)
#define KHM_F(suffix) GENERIC_CONCAT(GENERIC_CONCAT(GENERIC_CONCAT(GENERIC_CONCAT(khm, KHM_K_NAME), KHM_V_NAME), _), suffix)
#define KHM_VEC_K_T GENERIC_CONCAT(Vec, KHM_K_NAME)
#define KHM_VEC_K_F(suffix) GENERIC_CONCAT(GENERIC_CONCAT(GENERIC_CONCAT(vec, KHM_K_NAME), _), suffix)
#define KHM_VEC_V_T GENERIC_CONCAT(Vec, KHM_V_NAME)
#define KHM_VEC_V_F(suffix) GENERIC_CONCAT(GENERIC_CONCAT(GENERIC_CONCAT(vec, KHM_V_NAME), _), suffix)
typedef struct {
VecBool occupieds;
VecU64 hashes;
KHM_VEC_K_T keys;
KHM_VEC_V_T values;
size_t len;
size_t last_probe; // note: this would be useful if caller needs to strdup()
} KHM_T;
// KhmKV h = khmKV_new_cap(cap);
static inline KHM_T KHM_F(new_cap)(size_t cap) {
if (cap != (cap & -cap) || cap < 16) { // power of two of at least 16.
fprintf(stderr, "invalid cap=%zu\n", cap);
abort();
}
KHM_T ret = {
.occupieds = vecBool_new(),
.hashes = vecU64_new(),
.keys = KHM_VEC_K_F(new)(),
.values = KHM_VEC_V_F(new)(),
.len = 0,
.last_probe = (size_t)-1,
};
vecBool_ensure_cap_exact(&ret.occupieds, cap);
vecU64_ensure_cap_exact(&ret.hashes, cap);
KHM_VEC_K_F(ensure_cap_exact)(&ret.keys, cap);
KHM_VEC_V_F(ensure_cap_exact)(&ret.values, cap);
memset(ret.occupieds.ptr, 0, cap * sizeof(bool));
ret.occupieds.len = cap;
ret.hashes.len = cap;
ret.keys.len = cap;
ret.values.len = cap;
return ret;
}
// KhmKV h = khmKV_new();
static inline KHM_T KHM_F(new)(void) {
return KHM_F(new_cap)(16);
}
// khmKV_free(&khm);
static inline void KHM_F(free)(KHM_T self[static 1]) {
KHM_VEC_V_F(free)(&self->values);
KHM_VEC_K_F(free)(&self->keys);
vecU64_free(&self->hashes);
vecBool_free(&self->occupieds);
self->len = 0;
}
// size_t idx = khmKV_locate(&khm, &k, hshk(pk));
// returns index where the value is or would end up, or (size_t)-1 if full.
static inline size_t KHM_F(locate)(KHM_T self[static 1], KHM_K_T *pk, uint64_t hsh) {
size_t mask = self->occupieds.len - 1; // always power of two.
size_t bucket = hsh & mask;
size_t probe = bucket;
do {
if (!self->occupieds.ptr[probe]) return probe;
#ifndef __clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
if (self->hashes.ptr[probe] == hsh && KHM_K_EQLFUNC(&self->keys.ptr[probe], pk)) return probe;
#ifndef __clang__
#pragma GCC diagnostic pop
#endif
probe = (probe + 1) & mask;
} while (probe != bucket);
return (size_t)-1;
}
// V *v = khmKV_get(&khm, &k);
static inline KHM_V_T *KHM_F(get)(KHM_T self[static 1], KHM_K_T *pk) {
uint64_t hsh = KHM_K_HASHFUNC(pk);
size_t probe = KHM_F(locate)(self, pk, hsh);
self->last_probe = probe;
if (probe == (size_t)-1) return NULL;
if (self->occupieds.ptr[probe]) return self->values.ptr + probe;
return NULL;
}
// bool inserted = khmKV_set(&khm, &k, &v);
static inline bool KHM_F(set)(KHM_T self[static 1], KHM_K_T *pk, KHM_V_T *pv) {
uint64_t hsh = KHM_K_HASHFUNC(pk);
size_t probe = self->len + self->len / 3 >= self->occupieds.len ? (size_t)-1 : KHM_F(locate)(self, pk, hsh);
if (probe == (size_t)-1) {
// no space. grow.
size_t next_cap = self->occupieds.len << 1; // ignore overflow.
KHM_T old = *self;
// rehash.
self->occupieds.ptr = NULL;
self->hashes.ptr = NULL;
self->keys.ptr = NULL;
self->values.ptr = NULL;
vecBool_ensure_cap_exact(&self->occupieds, next_cap);
vecU64_ensure_cap_exact(&self->hashes, next_cap);
KHM_VEC_K_F(ensure_cap_exact)(&self->keys, next_cap);
KHM_VEC_V_F(ensure_cap_exact)(&self->values, next_cap);
memset(self->occupieds.ptr, 0, next_cap * sizeof(bool));
self->occupieds.len = next_cap;
self->hashes.len = next_cap;
self->keys.len = next_cap;
self->values.len = next_cap;
for (size_t i = 0; i < old.occupieds.len; ++i) {
if (old.occupieds.ptr[i]) {
uint64_t old_hash = old.hashes.ptr[i];
probe = KHM_F(locate)(self, old.keys.ptr + i, old_hash);
self->occupieds.ptr[probe] = true;
self->hashes.ptr[probe] = old_hash;
memcpy(self->keys.ptr + probe, old.keys.ptr + i, sizeof(KHM_K_T));
memcpy(self->values.ptr + probe, old.values.ptr + i, sizeof(KHM_V_T));
}
}
KHM_F(free)(&old);
probe = KHM_F(locate)(self, pk, hsh);
}
self->last_probe = probe;
if (self->occupieds.ptr[probe]) {
memcpy(self->values.ptr + probe, pv, sizeof(KHM_V_T));
return false; // replaced, old value is gone.
}
self->occupieds.ptr[probe] = true;
self->hashes.ptr[probe] = hsh;
memcpy(self->keys.ptr + probe, pk, sizeof(KHM_K_T));
memcpy(self->values.ptr + probe, pv, sizeof(KHM_V_T));
++self->len;
return true; // inserted.
}
#undef KHM_VEC_V_F
#undef KHM_VEC_V_T
#undef KHM_VEC_K_F
#undef KHM_VEC_K_T
#undef KHM_F
#undef KHM_T