Add better hmap compacting
This commit is contained in:
parent
187883883b
commit
274d35ea5d
4 changed files with 103 additions and 50 deletions
|
|
@ -17,7 +17,10 @@ add_library(htd STATIC ${HTD_SOURCES})
|
||||||
target_include_directories(htd PUBLIC
|
target_include_directories(htd PUBLIC
|
||||||
${PROJECT_SOURCE_DIR}/include
|
${PROJECT_SOURCE_DIR}/include
|
||||||
)
|
)
|
||||||
target_compile_options(htd PRIVATE -Wall -Wextra -Werror)
|
target_compile_options(htd PRIVATE -Wall -Wextra -Werror -pedantic)
|
||||||
|
|
||||||
|
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -fsanitize=address")
|
||||||
|
#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
|
||||||
|
|
||||||
install(TARGETS htd
|
install(TARGETS htd
|
||||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ typedef struct {
|
||||||
usize prime_idx;
|
usize prime_idx;
|
||||||
usize key_size;
|
usize key_size;
|
||||||
usize val_size;
|
usize val_size;
|
||||||
|
|
||||||
|
usize tombstone_count;
|
||||||
} HashMap;
|
} HashMap;
|
||||||
|
|
||||||
void hmap_init(HashMap* hmap, usize key_size, usize val_size);
|
void hmap_init(HashMap* hmap, usize key_size, usize val_size);
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
const f32 LOAD_FACTOR = 0.7f;
|
const f32 LOAD_FACTOR = 0.7f;
|
||||||
|
const f32 TOMBSTONE_FACTOR = 0.2f;
|
||||||
const usize PRIME_CAPACITIES[] = {
|
const usize PRIME_CAPACITIES[] = {
|
||||||
17, 37, 67, 131, 257, 521, 1031, 2053, 4099, 8209,
|
17, 37, 67, 131, 257, 521, 1031, 2053, 4099, 8209,
|
||||||
16411, 32771, 65537, 131101, 262147, 524309, 1048583,
|
16411, 32771, 65537, 131101, 262147, 524309, 1048583,
|
||||||
|
|
@ -74,41 +75,40 @@ void hmap_init(HashMap* hmap, usize key_size, usize val_size) {
|
||||||
hmap->prime_idx = 0;
|
hmap->prime_idx = 0;
|
||||||
hmap->key_size = key_size;
|
hmap->key_size = key_size;
|
||||||
hmap->val_size = val_size;
|
hmap->val_size = val_size;
|
||||||
|
hmap->tombstone_count = 0;
|
||||||
|
|
||||||
hmap->table = (HashMapEntry*) malloc(sizeof(HashMapEntry) * hmap->capacity);
|
hmap->table = (HashMapEntry*) calloc(hmap->capacity, sizeof(HashMapEntry));
|
||||||
HashMapEntry* table = hmap->table;
|
|
||||||
for (usize i = 0; i < hmap->capacity; i++) {
|
|
||||||
table[i].key = NULL;
|
|
||||||
table[i].val = NULL;
|
|
||||||
table[i].is_deleted = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void resize(HashMap* hmap) {
|
void rehash(HashMap* hmap, usize old_capacity) {
|
||||||
const usize old_capacity = hmap->capacity;
|
|
||||||
hmap->prime_idx++;
|
|
||||||
hmap->capacity = PRIME_CAPACITIES[hmap->prime_idx];
|
|
||||||
|
|
||||||
HashMapEntry* old_table = (HashMapEntry*) malloc(sizeof(HashMapEntry) * old_capacity);
|
|
||||||
memcpy(old_table, hmap->table, sizeof(HashMapEntry) * old_capacity);
|
|
||||||
|
|
||||||
free(hmap->table);
|
|
||||||
hmap->table = malloc(sizeof(HashMapEntry) * hmap->capacity);
|
|
||||||
HashMapEntry* table = hmap->table;
|
HashMapEntry* table = hmap->table;
|
||||||
|
if (hmap->len == 0) {
|
||||||
for (usize i = 0; i < hmap->capacity; i++) {
|
for (usize i = 0; i < old_capacity; i++) {
|
||||||
table[i].key = NULL;
|
if (table[i].key != NULL) {
|
||||||
table[i].val = NULL;
|
free(table[i].key);
|
||||||
table[i].is_deleted = false;
|
table[i].key = NULL;
|
||||||
|
table[i].val = NULL;
|
||||||
|
table[i].is_deleted = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hmap->tombstone_count = 0;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rehash and re-add entries
|
HashMapEntry* old_table = hmap->table;
|
||||||
|
hmap->table = (HashMapEntry*) calloc(hmap->capacity, sizeof(HashMapEntry));
|
||||||
hmap->len = 0;
|
hmap->len = 0;
|
||||||
|
hmap->tombstone_count = 0;
|
||||||
|
// TODO: capacity is too big for old table when increasing
|
||||||
for (usize i = 0; i < old_capacity; i++) {
|
for (usize i = 0; i < old_capacity; i++) {
|
||||||
if (old_table[i].key != NULL && !old_table[i].is_deleted) {
|
if(old_table[i].key != NULL) {
|
||||||
hmap_put(hmap, old_table[i].key, old_table[i].val);
|
if (old_table[i].is_deleted) {
|
||||||
free(old_table[i].key);
|
free(old_table[i].key);
|
||||||
free(old_table[i].val);
|
} else {
|
||||||
|
hmap_put(hmap, old_table[i].key, old_table[i].val);
|
||||||
|
free(old_table[i].key);
|
||||||
|
free(old_table[i].val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -117,54 +117,85 @@ void resize(HashMap* hmap) {
|
||||||
|
|
||||||
void hmap_put(HashMap* hmap, const void* key, const void* val) {
|
void hmap_put(HashMap* hmap, const void* key, const void* val) {
|
||||||
if (hmap->len >= (hmap->capacity * LOAD_FACTOR)) {
|
if (hmap->len >= (hmap->capacity * LOAD_FACTOR)) {
|
||||||
resize(hmap);
|
hmap->prime_idx++;
|
||||||
|
hmap->capacity = PRIME_CAPACITIES[hmap->prime_idx];
|
||||||
|
rehash(hmap, PRIME_CAPACITIES[hmap->prime_idx - 1]);
|
||||||
|
} else if (hmap->tombstone_count >= (hmap->capacity * TOMBSTONE_FACTOR)) {
|
||||||
|
rehash(hmap, hmap->capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
|
usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
|
||||||
usize h2 = hash2(hmap, key, hmap->key_size);
|
usize h2 = hash2(hmap, key, hmap->key_size);
|
||||||
|
|
||||||
HashMapEntry* table = hmap->table;
|
HashMapEntry* table = hmap->table;
|
||||||
|
isize fst_tombstone = -1;
|
||||||
|
|
||||||
for (usize i = 0; i < hmap->capacity; i++) {
|
for (usize i = 0; i < hmap->capacity; i++) {
|
||||||
usize idx = (h1 + i * h2) % hmap->capacity;
|
usize idx = (h1 + i * h2) % hmap->capacity;
|
||||||
|
|
||||||
if (table[idx].key == NULL) {
|
if (table[idx].is_deleted && fst_tombstone == -1) {
|
||||||
table[idx].key = malloc(hmap->key_size);
|
fst_tombstone = idx;
|
||||||
memcpy(table[idx].key, key, hmap->key_size);
|
}
|
||||||
table[idx].val = malloc(hmap->val_size);
|
|
||||||
memcpy(table[idx].val, val, hmap->val_size);
|
if (table[idx].key != NULL && !table[idx].is_deleted &&
|
||||||
hmap->len++;
|
memcmp(table[idx].key, key, hmap->key_size) == 0) {
|
||||||
return;
|
|
||||||
} else if (memcmp(table[idx].key, key, hmap->key_size) == 0) {
|
|
||||||
free(table[idx].val);
|
free(table[idx].val);
|
||||||
table[idx].val = malloc(hmap->val_size);
|
table[idx].val = malloc(hmap->val_size);
|
||||||
memcpy(table[idx].val, val, hmap->val_size);
|
memcpy(table[idx].val, val, hmap->val_size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (table[idx].key == NULL) {
|
||||||
|
if (fst_tombstone != -1) {
|
||||||
|
idx = fst_tombstone;
|
||||||
|
hmap->tombstone_count--;
|
||||||
|
} else {
|
||||||
|
table[idx].key = malloc(hmap->key_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
table[idx].val = malloc(hmap->val_size);
|
||||||
|
memcpy(table[idx].key, key, hmap->key_size);
|
||||||
|
memcpy(table[idx].val, val, hmap->val_size);
|
||||||
|
table[idx].is_deleted = false;
|
||||||
|
hmap->len++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(false && "Error: HashMap is full, memory allocation probably failed");
|
assert(false && "Error: HashMap is full, memory allocation probably failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void hmap_remove(HashMap* hmap, const void* key) {
|
void hmap_remove(HashMap* hmap, const void* key) {
|
||||||
|
if (hmap->tombstone_count >= (hmap->capacity * TOMBSTONE_FACTOR)) {
|
||||||
|
rehash(hmap, hmap->capacity);
|
||||||
|
}
|
||||||
|
|
||||||
usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
|
usize h1 = hash1(key, hmap->key_size) % hmap->capacity;
|
||||||
usize h2 = hash2(hmap, key, hmap->key_size);
|
usize h2 = hash2(hmap, key, hmap->key_size);
|
||||||
HashMapEntry* table = hmap->table;
|
HashMapEntry* table = hmap->table;
|
||||||
|
|
||||||
for (usize i = 0; i < hmap->capacity; i++) {
|
for (usize i = 0; i < hmap->capacity; i++) {
|
||||||
usize idx = (h1 + i * h2) % hmap->capacity;
|
usize idx = (h1 + i * h2) % hmap->capacity;
|
||||||
|
|
||||||
|
if (table[idx].key == NULL && !table[idx].is_deleted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (table[idx].key != NULL && !table[idx].is_deleted &&
|
if (table[idx].key != NULL && !table[idx].is_deleted &&
|
||||||
memcmp(table[idx].key, key, hmap->key_size) == 0) {
|
memcmp(table[idx].key, key, hmap->key_size) == 0) {
|
||||||
free(table[idx].key);
|
|
||||||
free(table[idx].val);
|
|
||||||
table[idx].key = NULL;
|
|
||||||
table[idx].val = NULL;
|
|
||||||
table[idx].is_deleted = true;
|
table[idx].is_deleted = true;
|
||||||
|
hmap->tombstone_count++;
|
||||||
|
|
||||||
|
free(table[idx].val);
|
||||||
|
table[idx].val = NULL;
|
||||||
hmap->len--;
|
hmap->len--;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void* hmap_get(HashMap* hmap, const void* key) {
|
void* hmap_get(HashMap* hmap, const void* key) {
|
||||||
|
|
@ -174,10 +205,15 @@ void* hmap_get(HashMap* hmap, const void* key) {
|
||||||
|
|
||||||
for (usize i = 0; i < hmap->capacity; i++) {
|
for (usize i = 0; i < hmap->capacity; i++) {
|
||||||
usize idx = (h1 + i * h2) % hmap->capacity;
|
usize idx = (h1 + i * h2) % hmap->capacity;
|
||||||
if (table[idx].key != NULL && !table[idx].is_deleted && memcmp(table[idx].key, key, hmap->key_size) == 0) {
|
|
||||||
return table[idx].val;
|
if (table[idx].key == NULL && !table[idx].is_deleted) {
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (table[idx].key != NULL && !table[idx].is_deleted &&
|
||||||
|
memcmp(table[idx].key, key, hmap->key_size) == 0) {
|
||||||
|
return table[idx].val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
@ -192,7 +228,7 @@ bool hmap_contains(HashMap* hmap, const void* key) {
|
||||||
usize idx = (h1 + i * h2) % hmap->capacity;
|
usize idx = (h1 + i * h2) % hmap->capacity;
|
||||||
if (table[idx].key == NULL && !table[idx].is_deleted) {
|
if (table[idx].key == NULL && !table[idx].is_deleted) {
|
||||||
return false;
|
return false;
|
||||||
} else if (table[idx].key != NULL && memcmp(table[idx].key, key, hmap->key_size) == 0 && !table[idx].is_deleted) {
|
} else if (table[idx].key != NULL && !table[idx].is_deleted && memcmp(table[idx].key, key, hmap->key_size) == 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,18 @@ int main() {
|
||||||
assert(retrieved_value != NULL && *retrieved_value == expected_value);
|
assert(retrieved_value != NULL && *retrieved_value == expected_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i32 i = 0; i < 100; i+=2) {
|
||||||
|
hmap_remove(&i32_i64_hmap, &i);
|
||||||
|
i64* retrieved_value = (i64*)hmap_get(&i32_i64_hmap, &i);
|
||||||
|
assert(retrieved_value == NULL);
|
||||||
|
}
|
||||||
|
for (i32 i = 1; i < 100; i+=2) {
|
||||||
|
i64 expected_value = (i64)i * 2;
|
||||||
|
i64* retrieved_value = (i64*)hmap_get(&i32_i64_hmap, &i);
|
||||||
|
|
||||||
|
assert(retrieved_value != NULL && *retrieved_value == expected_value);
|
||||||
|
}
|
||||||
|
|
||||||
hmap_free(&i32_i64_hmap);
|
hmap_free(&i32_i64_hmap);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue