From 8946fa4b77fef3002d78bb1c5a19cc05c97a71c3 Mon Sep 17 00:00:00 2001 From: Kimplul Date: Sat, 22 Mar 2025 15:07:31 +0200 Subject: make map faster --- include/conts/map.h | 208 +++++++++++++++++++++++++++++++++++++++++++--------- tests/map.c | 5 +- 2 files changed, 178 insertions(+), 35 deletions(-) diff --git a/include/conts/map.h b/include/conts/map.h index f7b9bfd..31334a0 100644 --- a/include/conts/map.h +++ b/include/conts/map.h @@ -10,96 +10,236 @@ #error "Need map cmp" #endif +#ifndef MAP_HASH +#error "Need map hash" +#endif + #ifndef MAP_NAME #error "Need map name" #endif +#include +#include +#include + #include "conts.h" #define MAP(a) CONTS_JOIN(MAP_NAME, a) +#define MAP_TUPLE MAP(tuple) #define MAP_NODE MAP(node) +#define MAP_BUCKET MAP(bucket) #define MAP_ROOT MAP_NAME -struct MAP_NODE { - MAP_KEY key; - MAP_TYPE data; -}; +#ifndef CONTS_MAP_H +#define CONTS_MAP_H + +static inline size_t conts_map_generic_hash(const char *s, size_t l) +{ + /* djb2 */ + size_t hash = 5381; + for (size_t i = 0; i < l; ++i) + hash = ((hash << 5) + hash) + s[i]; + + return hash; +} + +#define CONTS_MAP_NO_HASH(a) (a) +#define CONTS_MAP_STR_HASH(a) conts_map_generic_hash(a, strlen(a)) + +/* fast modulo pow2 */ +#define CONTS_BUCKET_IDX(hash, pow2, bucket) \ + ((hash) & (((pow2) << bucket) - 1)) -static inline int MAP(cmp)(struct MAP_NODE a, struct MAP_NODE b) +#endif /* CONTS_MAP_H */ + +static inline size_t MAP(generic_hash)(MAP_KEY *key) { - return MAP_CMP(a.key, b.key); + return conts_map_generic_hash((const char *)key, sizeof(*key)); } -#define BASE(a) CONTS_JOIN(MAP(map_base), a) +struct MAP_TUPLE { + MAP_KEY key; + MAP_TYPE data; +}; -#define SPTREE_TYPE struct MAP_NODE -#define SPTREE_CMP MAP(cmp) -#define SPTREE_NAME MAP(map_base) -#include "sptree.h" +struct MAP_NODE { + struct MAP_BUCKET *bucket; + size_t hash; + struct MAP_TUPLE t; +}; + +struct MAP_BUCKET { + size_t size; /* bucket size */ + struct MAP_BUCKET *next; + struct MAP_NODE nodes[]; +}; struct MAP_ROOT { - struct MAP(map_base) root; + size_t len; /* number of items */ + size_t count; /* bucket count */ + size_t pow2; /* how many nodes in smallest bucket */ + struct MAP_BUCKET **buckets; }; -static inline struct MAP_ROOT MAP(create)() +static inline struct MAP_ROOT MAP(create)(size_t count) { - return (struct MAP_ROOT){.root = BASE(create)()}; + size_t pow2 = 1; + while (pow2 < count) + pow2 <<= 1; + + return (struct MAP_ROOT){ + .len = 0, + .count = 0, + .pow2 = pow2, + .buckets = NULL + }; } static inline void MAP(destroy)(struct MAP_ROOT *root) { - BASE(destroy)(&root->root); + for (size_t i = 0; i < root->count; ++i) + free(root->buckets[i]); + + free(root->buckets); } static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data) { - struct MAP_NODE node = {.key = key, .data = data}; - struct MAP_NODE *res = BASE(insert)(&root->root, node); - if (!res) - return NULL; - - return &res->data; + size_t hash = MAP_HASH(key); + /* look through buckets in order */ + for (size_t b = 0; b < root->count; ++b) { + struct MAP_BUCKET *bucket = root->buckets[b]; + size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b); + + struct MAP_NODE *node = &bucket->nodes[idx]; + /* free to use this slot */ + if (!node->bucket) { + node->bucket = bucket; + node->hash = hash; + node->t.data = data; + node->t.key = key; + root->len++; + return &node->t.data; + } + + /* there already exists a node like this */ + if (node->hash == hash && MAP_CMP(&node->t.key, &key) == 0) + return &node->t.data; + } + + /* no bucket available, create new one */ + size_t size = root->pow2 << root->count; + size_t bytes = sizeof(struct MAP_BUCKET) + sizeof(struct MAP_NODE) * size; + struct MAP_BUCKET *bucket = calloc(1, bytes); + bucket->size = size; + + size_t buckets_bytes = sizeof(struct MAP_BUCKET *) * (root->count + 1); + root->buckets = realloc(root->buckets, buckets_bytes); + if (root->count != 0) + root->buckets[root->count - 1]->next = bucket; + + root->buckets[root->count] = bucket; + + /* populate node */ + size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, root->count); + struct MAP_NODE *node = &bucket->nodes[idx]; + node->bucket = bucket; + node->hash = hash; + node->t.key = key; + node->t.data = data; + root->count++; + root->len++; + return &node->t.data; } static inline MAP_TYPE *MAP(find)(struct MAP_ROOT *root, MAP_KEY key) { - struct MAP_NODE node = {.key = key}; - struct MAP_NODE *res = BASE(find)(&root->root, node); - if (!res) + if (root->len == 0) return NULL; - return &res->data; + size_t hash = MAP_HASH(key); + for (size_t b = 0; b < root->count; ++b) { + struct MAP_BUCKET *bucket = root->buckets[b]; + size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b); + + struct MAP_NODE *node = &bucket->nodes[idx]; + if (node->hash != hash) + continue; + + if (MAP_CMP(node->t.key, key) != 0) + continue; + + return &node->t.data; + } + + return NULL; +} + +static inline void MAP(remove_found)(struct MAP_ROOT *root, MAP_TYPE *data) +{ + struct MAP_TUPLE *tuple = CONTAINER_OF(data, struct MAP_TUPLE, data); + struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t); + node->bucket = NULL; + root->len--; } static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key) { - struct MAP_NODE node = {.key = key}; - BASE(remove)(&root->root, node); + MAP_TYPE *found = MAP(find)(root, key); + if (!found) + return; + + MAP(remove_found)(root, found); } -static inline struct MAP_NODE *MAP(begin)(struct MAP_ROOT *root) +static inline struct MAP_TUPLE *MAP(find_next)(struct MAP_BUCKET *bucket, struct MAP_TUPLE *tuple) { - return BASE(begin)(&root->root); + struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t); + size_t idx = node - bucket->nodes; + for (idx += 1; idx < bucket->size; ++idx) { + struct MAP_NODE *candidate = &bucket->nodes[idx]; + if (candidate->bucket) + return &candidate->t; + } + + struct MAP_BUCKET *next = bucket->next; + if (next) + return NULL; + + return MAP(find_next)(next, &next->nodes[0].t); +} + +static inline struct MAP_TUPLE *MAP(begin)(struct MAP_ROOT *root) +{ + if (root->len == 0) + return NULL; + + struct MAP_BUCKET *bucket = root->buckets[0]; + return MAP(find_next)(bucket, &bucket->nodes[0].t); } -static inline struct MAP_NODE *MAP(next)(struct MAP_NODE *n) +static inline struct MAP_TUPLE *MAP(next)(struct MAP_TUPLE *t) { - return BASE(next)(n); + struct MAP_NODE *node = CONTAINER_OF(t, struct MAP_NODE, t); + return MAP(find_next)(node->bucket, t); } -static inline bool MAP(end)(struct MAP_ROOT *root, struct MAP_NODE *n) +static inline bool MAP(end)(struct MAP_ROOT *root, struct MAP_TUPLE *t) { - return BASE(end)(&root->root, n); + (void)root; + return t == NULL; } static inline size_t MAP(len)(struct MAP_ROOT *root) { - return BASE(len)(&root->root); + return root->len; } #undef MAP #undef MAP_NODE +#undef MAP_TUPLE +#undef MAP_BUCKET #undef MAP_ROOT #undef MAP_KEY #undef MAP_TYPE diff --git a/tests/map.c b/tests/map.c index e9362ab..08f831e 100644 --- a/tests/map.c +++ b/tests/map.c @@ -3,13 +3,16 @@ #define MAP_KEY int #define MAP_TYPE int +#define MAP_HASH(a) CONTS_MAP_NO_HASH(a) #define MAP_CMP(a, b) ((a) - (b)) #define MAP_NAME ints #include int main() { - struct ints ints = ints_create(); + /* heuristic, but if we know how many elements we'll need, we should + * give it to the create function. */ + struct ints ints = ints_create(0); for (int i = 0; i < 1000000; ++i) { ints_insert(&ints, i, i); } -- cgit v1.2.3