diff options
| author | Kimplul <kimi.h.kuparinen@gmail.com> | 2025-10-16 17:29:07 +0300 |
|---|---|---|
| committer | Kimplul <kimi.h.kuparinen@gmail.com> | 2025-10-16 17:29:07 +0300 |
| commit | 3f85d1168a9cfa10e7592ebf8870cc658ea70879 (patch) | |
| tree | b8e3518106b479816a3127f5e769d0fd7be20f7f /include | |
| parent | a2944822ba192e6bc00fdc591284ce194847c731 (diff) | |
| download | conts-3f85d1168a9cfa10e7592ebf8870cc658ea70879.tar.gz conts-3f85d1168a9cfa10e7592ebf8870cc658ea70879.zip | |
reimplement maps to use new spvecs
+ Performance is roughly the same as before, but memory usage is now
*way* down, and shouldn't explode out of control if a bas hash is
used.
Diffstat (limited to 'include')
| -rw-r--r-- | include/conts/map.h | 273 |
1 files changed, 138 insertions, 135 deletions
diff --git a/include/conts/map.h b/include/conts/map.h index 9824b2a..2060658 100644 --- a/include/conts/map.h +++ b/include/conts/map.h @@ -45,14 +45,12 @@ #define CONTS_MAP_H #include <stddef.h> +#include <stdint.h> #include <stdlib.h> #include <stdbool.h> #include "conts.h" -/* how many times to retry placing an element in a bucket */ -#define CONTS_MAP_PACKING 4 - static inline size_t conts_map_generic_hash(const char *s, size_t l) { /* djb2 */ @@ -66,9 +64,11 @@ static inline size_t conts_map_generic_hash(const char *s, size_t l) #define CONTS_MAP_NO_HASH(a) (a) #define CONTS_MAP_STR_HASH(a) conts_map_generic_hash(a, strlen(a)) -/* fast modulo pow2 */ -#define CONTS_BUCKET_IDX(hash, pow2, bucket) \ - ((hash) & (((pow2) << bucket) - 1)) +enum conts_map_state { + CONTS_MAP_FREE = 0, + CONTS_MAP_USED = 1, + CONTS_MAP_GRAVE = 2 +}; #endif /* CONTS_MAP_H */ @@ -82,142 +82,140 @@ struct MAP_TUPLE { MAP_TYPE data; }; -typedef struct MAP_TUPLE *MAP(iter); +#define MAP_SPVEC(x) CONTS_JOIN(MAP(spvec), x) +#define MAP_ITER MAP(iter) + +typedef struct { + size_t i; + struct MAP_TUPLE *t; +} MAP_ITER; struct MAP_NODE { - struct MAP_BUCKET *bucket; - size_t hash; + uint32_t used : 2; + uint32_t hash : 30; struct MAP_TUPLE t; }; -struct MAP_BUCKET { - size_t size; /* bucket size */ - struct MAP_BUCKET *next; - struct MAP_NODE nodes[]; -}; +/* use spvec as internal buffer representation */ +#define SPVEC_TYPE struct MAP_NODE +#define SPVEC_NAME MAP(spvec) + +#define SPVEC_MALLOC MAP_MALLOC +#define SPVEC_CALLOC MAP_CALLOC +#define SPVEC_REALLOC MAP_REALLOC +#define SPVEC_FREE MAP_FREE + +#include "spvec.h" struct MAP_ROOT { - size_t len; /* number of items */ - size_t count; /* bucket count */ - size_t pow2; /* how many nodes in smallest bucket */ - struct MAP_BUCKET **buckets; + size_t n; /* number of items */ + struct MAP(spvec) buf; }; static inline struct MAP_ROOT MAP(create)(size_t count) { - size_t pow2 = 1; - while (pow2 < count) - pow2 <<= 1; - + /* ignore count for now */ + (void)count; return (struct MAP_ROOT){ - .len = 0, - .count = 0, - .pow2 = pow2, - .buckets = NULL + .n = 0, + .buf = MAP_SPVEC(create)() }; } static inline void MAP(destroy)(struct MAP_ROOT *root) { - for (size_t i = 0; i < root->count; ++i) - MAP_FREE(root->buckets[i]); - - MAP_FREE(root->buckets); + MAP_SPVEC(destroy)(&root->buf); } -static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data) +static inline MAP_TYPE *MAP(find_hash)(struct MAP_ROOT *root, MAP_KEY key, uint32_t hash) { - size_t hash = MAP_HASH(key); - /* look through buckets in order */ - for (size_t b = 0; b < root->count; ++b) { - struct MAP_BUCKET *bucket = root->buckets[b]; - size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b); - - for (size_t p = 0; p < CONTS_MAP_PACKING; ++p) { - size_t n = idx + p; - if (n >= bucket->size) - break; - struct MAP_NODE *node = &bucket->nodes[n]; - /* free to use this slot */ - if (!node->bucket) { - node->bucket = bucket; - node->hash = hash; - node->t.data = data; - node->t.key = key; - root->len++; - return &node->t.data; - } - - /* there already exists a node like this */ - if (node->hash == hash && MAP_CMP(node->t.key, key) == 0) - return &node->t.data; - } - } + for (size_t s = MAP_SPVEC(len)(&root->buf); s >= conts_spvec_size(0); s /= 2) { + bool base = s == conts_spvec_size(0); + size_t range = base ? s : s / 2; + size_t offst = base ? 0 : s / 2; - /* no bucket available, create new one */ - size_t size = root->pow2 << root->count; - size_t bytes = sizeof(struct MAP_BUCKET) + sizeof(struct MAP_NODE) * size; - struct MAP_BUCKET *bucket = MAP_CALLOC(1, bytes); - if (!bucket) - return NULL; + /* linear probing for now */ + for (size_t i = 0; i < range; ++i) { + assert(__builtin_popcountll(range) == 1); + size_t idx = (hash + i) & (range - 1); - bucket->size = size; + struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, idx + offst); - size_t buckets_bytes = sizeof(struct MAP_BUCKET *) * (root->count + 1); - struct MAP_BUCKET **new_buckets = MAP_REALLOC(root->buckets, buckets_bytes); - if (!new_buckets) { - free(bucket); - return NULL; + /* unused node means chain must not exist in this range */ + if (n->used == CONTS_MAP_FREE) + break; + + /* not the node we're looking for but we should + * keep following the chain*/ + if (n->hash != hash) + continue; + + /* found a match */ + if (MAP_CMP(n->t.key, key) == 0) + return &n->t.data; + } } - root->buckets = new_buckets; - if (root->count != 0) - root->buckets[root->count - 1]->next = bucket; - - root->buckets[root->count] = bucket; - - /* populate node */ - size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, root->count); - struct MAP_NODE *node = &bucket->nodes[idx]; - node->bucket = bucket; - node->hash = hash; - node->t.key = key; - node->t.data = data; - root->count++; - root->len++; - return &node->t.data; + return NULL; } static inline MAP_TYPE *MAP(find)(struct MAP_ROOT *root, MAP_KEY key) { - if (root->len == 0) - return NULL; - - size_t hash = MAP_HASH(key); - for (size_t b = 0; b < root->count; ++b) { - struct MAP_BUCKET *bucket = root->buckets[b]; - size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b); - - for (size_t p = 0; p < CONTS_MAP_PACKING; ++p) { - size_t n = idx + p; - if (n >= bucket->size) - break; - - struct MAP_NODE *node = &bucket->nodes[n]; + uint32_t hash = MAP_HASH(key) & 0x3fffffff; + return MAP(find_hash)(root, key, hash); +} - /* note no gravestones */ +static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data) +{ + /* mask top bit away to get 31 bit hash value as used by the nodes */ + uint32_t hash = MAP_HASH(key) & 0x3fffffff; + MAP_TYPE *exists = MAP(find_hash)(root, key, hash); + if (exists) + return exists; + + /* initialize */ + if (root->n == 0) { + if (!MAP_SPVEC(reserve)(&root->buf, conts_spvec_size(0))) + return NULL; + } - if (node->hash != hash) - continue; + /* grow as needed */ + size_t s = MAP_SPVEC(len)(&root->buf); + if (2 * root->n >= s) { + s *= 2; + if (!MAP_SPVEC(reserve)(&root->buf, s)) + return NULL; - if (MAP_CMP(node->t.key, key) != 0) - continue; + /* reserve should zero out all memory so no need to initialize + * any elements */ + } - return &node->t.data; - } + /* there must always be at least one free spot */ + root->n++; + bool base = s == conts_spvec_size(0); + size_t range = base ? s : s / 2; + size_t offst = base ? 0 : s / 2; + + /* linear probing for now */ + for (size_t i = 0; i < range; ++i) { + assert(__builtin_popcountll(range) == 1); + size_t idx = (hash + i) & (range - 1); + + struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, idx + offst); + if (n->used == CONTS_MAP_USED) + continue; + + /* grave or free, either way we're taking it */ + n->used = CONTS_MAP_USED; + n->hash = hash; + n->t.key = key; + n->t.data = data; + return &n->t.data; } + /** @todo use __builtin_unreachable or C attributes? */ + assert(false && "should be unreachable"); return NULL; } @@ -225,8 +223,8 @@ static inline void MAP(remove_found)(struct MAP_ROOT *root, MAP_TYPE *data) { struct MAP_TUPLE *tuple = CONTAINER_OF(data, struct MAP_TUPLE, data); struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t); - node->bucket = NULL; - root->len--; + node->used = CONTS_MAP_GRAVE; + root->n--; } static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key) @@ -238,48 +236,52 @@ static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key) MAP(remove_found)(root, found); } -static inline struct MAP_TUPLE *MAP(find_next)(struct MAP_BUCKET *bucket, struct MAP_TUPLE *tuple) +static inline MAP_ITER MAP(next)(struct MAP_ROOT *root, MAP_ITER t) { - struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t); - size_t idx = node - bucket->nodes; - for (; idx < bucket->size; ++idx) { - struct MAP_NODE *candidate = &bucket->nodes[idx]; - if (candidate->bucket) - return &candidate->t; + for (size_t i = t.i + 1; i < MAP_SPVEC(len)(&root->buf); ++i) { + struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, i); + if (n->used == CONTS_MAP_USED) { + return (MAP_ITER){ + .i = i, + .t = &n->t + }; + } } - struct MAP_BUCKET *next = bucket->next; - if (!next) - return NULL; - - return MAP(find_next)(next, &next->nodes[0].t); + /* no more elements to search through */ + return (MAP_ITER){ + .i = MAP_SPVEC(len)(&root->buf), + .t = NULL + }; } -static inline struct MAP_TUPLE *MAP(begin)(struct MAP_ROOT *root) +static inline MAP_ITER MAP(begin)(struct MAP_ROOT *root) { - if (root->len == 0) - return NULL; + if (root->n == 0) { + return (MAP_ITER){ + .i = MAP_SPVEC(len)(&root->buf), + .t = NULL + }; + } - struct MAP_BUCKET *bucket = root->buckets[0]; - return MAP(find_next)(bucket, &bucket->nodes[0].t); -} + struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, 0); + assert(n); -static inline struct MAP_TUPLE *MAP(next)(struct MAP_ROOT *root, struct MAP_TUPLE *t) -{ - (void)root; - struct MAP_NODE *node = CONTAINER_OF(t, struct MAP_NODE, t); - return MAP(find_next)(node->bucket, &(node + 1)->t); + MAP_ITER t = {.i = 0, .t = &n->t}; + if (n->used == CONTS_MAP_USED) + return t; + + return MAP(next)(root, t); } -static inline bool MAP(end)(struct MAP_ROOT *root, struct MAP_TUPLE *t) +static inline bool MAP(end)(struct MAP_ROOT *root, MAP_ITER t) { - (void)root; - return t == NULL; + return t.i >= MAP_SPVEC(len)(&root->buf); } static inline size_t MAP(len)(struct MAP_ROOT *root) { - return root->len; + return root->n; } #undef MAP @@ -292,6 +294,7 @@ static inline size_t MAP(len)(struct MAP_ROOT *root) #undef MAP_CMP #undef MAP_HASH #undef MAP_NAME +#undef MAP_ITER #undef MAP_MALLOC #undef MAP_CALLOC |
