Improve hashmap performance

This commit is contained in:
Hugo Mårdbrink 2025-04-18 00:46:05 +02:00
parent 50a450e7c3
commit 2abff63fcd
3 changed files with 131 additions and 54 deletions

View file

@ -18,7 +18,7 @@ Use shorter more concise names for common C types.
### Data structures ### Data structures
- `Dynamic array`: A dynamic array that can grow and shrink in size. - `Dynamic array`: A dynamic array that can grow and shrink in size.
- `Hash map`: A hash map that uses linear probing for collision resolution and Murmur3 for hashing. - `Hash map`: A hash map that uses murmur3, open addressing (double hashing) and tombstone deletion.
## Building ## Building

View file

@ -4,15 +4,11 @@
#include <htd/primitives/primitives.h> #include <htd/primitives/primitives.h>
typedef struct { typedef struct {
void* key; void* table;
void* val;
} HashMapEntry;
typedef struct {
HashMapEntry* table;
usize len; usize len;
usize capacity; usize capacity;
usize prime_idx;
usize key_size; usize key_size;
usize val_size; usize val_size;
} HashMap; } HashMap;
@ -21,8 +17,12 @@ void hmap_init(HashMap* hmap, usize key_size, usize val_size);
void hmap_put(HashMap* hmap, const void* key, const void* val); void hmap_put(HashMap* hmap, const void* key, const void* val);
void hmap_remove(HashMap* hmap, const void* key);
void* hmap_get(HashMap* hmap, const void* key); void* hmap_get(HashMap* hmap, const void* key);
bool hmap_contains(HashMap* hmap, const void* key);
void hmap_free(HashMap* hmap); void hmap_free(HashMap* hmap);
#endif // HTD_HASH_MAP_H #endif // HTD_HASH_MAP_H

View file

@ -3,11 +3,22 @@
#include <stdlib.h> #include <stdlib.h>
#include <memory.h> #include <memory.h>
#include <assert.h>
const usize START_LEN = 16; const f32 LOAD_FACTOR = 0.7f;
const usize GROWTH_FACTOR = 2; const usize PRIME_CAPACITIES[] = {
17, 37, 67, 131, 257, 521, 1031, 2053, 4099, 8209,
16411, 32771, 65537, 131101, 262147, 524309, 1048583,
2097169, 4194301, 8388617, 16777259, 33554467, 67108879,
};
usize hash(const u8* key, usize len) { typedef struct {
void* key;
void* val;
bool is_deleted;
} HashMapEntry;
usize hash1(const void* key, usize len) {
const u8 *data = (const u8*)key; const u8 *data = (const u8*)key;
u32 h = 0x811c9dc5; u32 h = 0x811c9dc5;
const u32 c1 = 0xcc9e2d51; const u32 c1 = 0xcc9e2d51;
@ -15,7 +26,8 @@ usize hash(const u8* key, usize len) {
const usize nblocks = len / 4; const usize nblocks = len / 4;
for (usize i = 0; i < nblocks; i++) { for (usize i = 0; i < nblocks; i++) {
u32 k = *((u32*)data); u32 k;
memcpy(&k, data, sizeof(u32));
data += 4; data += 4;
k *= c1; k *= c1;
@ -30,7 +42,7 @@ usize hash(const u8* key, usize len) {
const usize tail_size = len & 3; const usize tail_size = len & 3;
u32 k1 = 0; u32 k1 = 0;
if (tail_size > 0) { if (tail_size > 0) {
for (usize i = 0; i < tail_size; ++i) { for (usize i = 0; i < tail_size; i++) {
k1 ^= data[i] << (i * 8); k1 ^= data[i] << (i * 8);
} }
@ -50,38 +62,51 @@ usize hash(const u8* key, usize len) {
return h; return h;
} }
usize hash2(HashMap* hmap, const void* key, usize len) {
usize h1 = hash1(key, len);
return 1 + (h1 % (hmap->capacity - 1));
}
void hmap_init(HashMap* hmap, usize key_size, usize val_size) { void hmap_init(HashMap* hmap, usize key_size, usize val_size) {
hmap->len = 0; hmap->len = 0;
hmap->capacity = START_LEN; hmap->capacity = PRIME_CAPACITIES[0];
hmap->prime_idx = 0;
hmap->key_size = key_size; hmap->key_size = key_size;
hmap->val_size = val_size; hmap->val_size = val_size;
hmap->table = (HashMapEntry*)malloc(sizeof(HashMapEntry) * hmap->capacity); hmap->table = (HashMapEntry*) malloc(sizeof(HashMapEntry) * hmap->capacity);
for (usize i = 0; i < hmap->capacity; ++i) { HashMapEntry* table = hmap->table;
hmap->table[i].key = NULL; for (usize i = 0; i < hmap->capacity; i++) {
hmap->table[i].val = NULL; table[i].key = NULL;
table[i].val = NULL;
table[i].is_deleted = false;
} }
} }
void resize(HashMap* hmap) { void resize(HashMap* hmap) {
usize old_capacity = hmap->capacity; const usize old_capacity = hmap->capacity;
HashMapEntry* old_table = (HashMapEntry*)malloc(sizeof(HashMapEntry) * old_capacity); hmap->prime_idx++;
hmap->capacity = PRIME_CAPACITIES[hmap->prime_idx];
HashMapEntry* old_table = (HashMapEntry*) malloc(sizeof(HashMapEntry) * old_capacity);
memcpy(old_table, hmap->table, sizeof(HashMapEntry) * old_capacity); memcpy(old_table, hmap->table, sizeof(HashMapEntry) * old_capacity);
hmap->capacity *= GROWTH_FACTOR; free(hmap->table);
hmap->table = (HashMapEntry*)realloc(hmap->table, sizeof(HashMapEntry) * hmap->capacity); hmap->table = malloc(sizeof(HashMapEntry) * hmap->capacity);
HashMapEntry* table = hmap->table;
for (usize i = 0; i < hmap->capacity; i++) { for (usize i = 0; i < hmap->capacity; i++) {
hmap->table[i].key = NULL; table[i].key = NULL;
hmap->table[i].val = NULL; table[i].val = NULL;
table[i].is_deleted = false;
} }
// Reset length, re-adding // Rehash and re-add entries
hmap->len = 0; hmap->len = 0;
for (usize i = 0; i < old_capacity; i++) { for (usize i = 0; i < old_capacity; i++) {
if (old_table[i].key != NULL) { if (old_table[i].key != NULL && !old_table[i].is_deleted) {
hmap_put(hmap, old_table[i].key, old_table[i].val); hmap_put(hmap, old_table[i].key, old_table[i].val);
free(old_table[i].key); free(old_table[i].key);
free(old_table[i].val); free(old_table[i].val);
} }
@ -91,52 +116,104 @@ void resize(HashMap* hmap) {
} }
void hmap_put(HashMap* hmap, const void* key, const void* val) { void hmap_put(HashMap* hmap, const void* key, const void* val) {
if (hmap->len >= hmap->capacity) { if (hmap->len >= (hmap->capacity * LOAD_FACTOR)) {
resize(hmap); resize(hmap);
} }
usize idx = hash(key, hmap->key_size) % hmap->capacity; usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
usize h2 = hash2(hmap, key, hmap->key_size);
while (hmap->table[idx].key != NULL) { HashMapEntry* table = hmap->table;
if (memcmp(hmap->table[idx].key, key, hmap->key_size) == 0) {
free(hmap->table[idx].val); for (usize i = 0; i < hmap->capacity; i++) {
hmap->table[idx].val = malloc(hmap->val_size); usize idx = (h1 + i * h2) % hmap->capacity;
memcpy(hmap->table[idx].val, val, hmap->val_size);
if (table[idx].key == NULL) {
table[idx].key = malloc(hmap->key_size);
memcpy(table[idx].key, key, hmap->key_size);
table[idx].val = malloc(hmap->val_size);
memcpy(table[idx].val, val, hmap->val_size);
hmap->len++;
return;
} else if (memcmp(table[idx].key, key, hmap->key_size) == 0) {
free(table[idx].val);
table[idx].val = malloc(hmap->val_size);
memcpy(table[idx].val, val, hmap->val_size);
return; return;
} }
idx = (idx + 1) % hmap->capacity;
} }
hmap->table[idx].key = malloc(hmap->key_size); assert(false && "Error: HashMap is full, memory allocation probably failed");
memcpy(hmap->table[idx].key, key, hmap->key_size); }
hmap->table[idx].val = malloc(hmap->val_size);
memcpy(hmap->table[idx].val, val, hmap->val_size); void hmap_remove(HashMap* hmap, const void* key) {
usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
usize h2 = hash2(hmap, key, hmap->key_size);
HashMapEntry* table = hmap->table;
for (usize i = 0; i < hmap->capacity; i++) {
usize idx = (h1 + i * h2) % hmap->capacity;
if (table[idx].key != NULL && !table[idx].is_deleted &&
memcmp(table[idx].key, key, hmap->key_size) == 0) {
free(table[idx].key);
free(table[idx].val);
table[idx].key = NULL;
table[idx].val = NULL;
table[idx].is_deleted = true;
hmap->len--;
return;
}
}
hmap->len++;
} }
void* hmap_get(HashMap* hmap, const void* key) { void* hmap_get(HashMap* hmap, const void* key) {
usize idx = hash(key, hmap->key_size) % hmap->capacity; usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
usize h2 = hash2(hmap, key, hmap->key_size);
HashMapEntry* table = hmap->table;
while (hmap->table[idx].key != NULL) { for (usize i = 0; i < hmap->capacity; i++) {
if (memcmp(hmap->table[idx].key, key, hmap->key_size) == 0) { usize idx = (h1 + i * h2) % hmap->capacity;
return hmap->table[idx].val; if (table[idx].key != NULL && !table[idx].is_deleted && memcmp(table[idx].key, key, hmap->key_size) == 0) {
return table[idx].val;
} }
idx = (idx + 1) % hmap->capacity;
} }
return NULL; return NULL;
} }
bool hmap_contains(HashMap* hmap, const void* key) {
usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
usize h2 = hash2(hmap, key, hmap->key_size);
HashMapEntry* table = hmap->table;
for (usize i = 0; i < hmap->capacity; i++) {
usize idx = (h1 + i * h2) % hmap->capacity;
if (table[idx].key == NULL && !table[idx].is_deleted) {
return false;
} else if (table[idx].key != NULL && memcmp(table[idx].key, key, hmap->key_size) == 0 && !table[idx].is_deleted) {
return true;
}
}
return false;
}
void hmap_free(HashMap* hmap) { void hmap_free(HashMap* hmap) {
for (usize i = 0; i < hmap->capacity; ++i) { HashMapEntry* table = hmap->table;
if (hmap->table[i].key != NULL) {
free(hmap->table[i].key); for (usize i = 0; i < hmap->capacity; i++) {
free(hmap->table[i].val); if (table[i].key != NULL) {
free(table[i].key);
free(table[i].val);
} }
} }
free(hmap->table); free(hmap->table);
hmap->table = NULL; hmap->table = NULL;
hmap->capacity = 0; hmap->capacity = 0;
hmap->key_size = 0;
hmap->val_size = 0;
hmap->prime_idx = 0;
hmap->len = 0; hmap->len = 0;
} }