summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2025-10-16 17:29:07 +0300
committerKimplul <kimi.h.kuparinen@gmail.com>2025-10-16 17:29:07 +0300
commit3f85d1168a9cfa10e7592ebf8870cc658ea70879 (patch)
treeb8e3518106b479816a3127f5e769d0fd7be20f7f /include
parenta2944822ba192e6bc00fdc591284ce194847c731 (diff)
downloadconts-3f85d1168a9cfa10e7592ebf8870cc658ea70879.tar.gz
conts-3f85d1168a9cfa10e7592ebf8870cc658ea70879.zip
reimplement maps to use new spvecs
+ Performance is roughly the same as before, but memory usage is now *way* down, and shouldn't explode out of control if a bas hash is used.
Diffstat (limited to 'include')
-rw-r--r--include/conts/map.h273
1 files changed, 138 insertions, 135 deletions
diff --git a/include/conts/map.h b/include/conts/map.h
index 9824b2a..2060658 100644
--- a/include/conts/map.h
+++ b/include/conts/map.h
@@ -45,14 +45,12 @@
#define CONTS_MAP_H
#include <stddef.h>
+#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include "conts.h"
-/* how many times to retry placing an element in a bucket */
-#define CONTS_MAP_PACKING 4
-
static inline size_t conts_map_generic_hash(const char *s, size_t l)
{
/* djb2 */
@@ -66,9 +64,11 @@ static inline size_t conts_map_generic_hash(const char *s, size_t l)
#define CONTS_MAP_NO_HASH(a) (a)
#define CONTS_MAP_STR_HASH(a) conts_map_generic_hash(a, strlen(a))
-/* fast modulo pow2 */
-#define CONTS_BUCKET_IDX(hash, pow2, bucket) \
- ((hash) & (((pow2) << bucket) - 1))
+enum conts_map_state {
+ CONTS_MAP_FREE = 0,
+ CONTS_MAP_USED = 1,
+ CONTS_MAP_GRAVE = 2
+};
#endif /* CONTS_MAP_H */
@@ -82,142 +82,140 @@ struct MAP_TUPLE {
MAP_TYPE data;
};
-typedef struct MAP_TUPLE *MAP(iter);
+#define MAP_SPVEC(x) CONTS_JOIN(MAP(spvec), x)
+#define MAP_ITER MAP(iter)
+
+typedef struct {
+ size_t i;
+ struct MAP_TUPLE *t;
+} MAP_ITER;
struct MAP_NODE {
- struct MAP_BUCKET *bucket;
- size_t hash;
+ uint32_t used : 2;
+ uint32_t hash : 30;
struct MAP_TUPLE t;
};
-struct MAP_BUCKET {
- size_t size; /* bucket size */
- struct MAP_BUCKET *next;
- struct MAP_NODE nodes[];
-};
+/* use spvec as internal buffer representation */
+#define SPVEC_TYPE struct MAP_NODE
+#define SPVEC_NAME MAP(spvec)
+
+#define SPVEC_MALLOC MAP_MALLOC
+#define SPVEC_CALLOC MAP_CALLOC
+#define SPVEC_REALLOC MAP_REALLOC
+#define SPVEC_FREE MAP_FREE
+
+#include "spvec.h"
struct MAP_ROOT {
- size_t len; /* number of items */
- size_t count; /* bucket count */
- size_t pow2; /* how many nodes in smallest bucket */
- struct MAP_BUCKET **buckets;
+ size_t n; /* number of items */
+ struct MAP(spvec) buf;
};
static inline struct MAP_ROOT MAP(create)(size_t count)
{
- size_t pow2 = 1;
- while (pow2 < count)
- pow2 <<= 1;
-
+ /* ignore count for now */
+ (void)count;
return (struct MAP_ROOT){
- .len = 0,
- .count = 0,
- .pow2 = pow2,
- .buckets = NULL
+ .n = 0,
+ .buf = MAP_SPVEC(create)()
};
}
static inline void MAP(destroy)(struct MAP_ROOT *root)
{
- for (size_t i = 0; i < root->count; ++i)
- MAP_FREE(root->buckets[i]);
-
- MAP_FREE(root->buckets);
+ MAP_SPVEC(destroy)(&root->buf);
}
-static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data)
+static inline MAP_TYPE *MAP(find_hash)(struct MAP_ROOT *root, MAP_KEY key, uint32_t hash)
{
- size_t hash = MAP_HASH(key);
- /* look through buckets in order */
- for (size_t b = 0; b < root->count; ++b) {
- struct MAP_BUCKET *bucket = root->buckets[b];
- size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b);
-
- for (size_t p = 0; p < CONTS_MAP_PACKING; ++p) {
- size_t n = idx + p;
- if (n >= bucket->size)
- break;
- struct MAP_NODE *node = &bucket->nodes[n];
- /* free to use this slot */
- if (!node->bucket) {
- node->bucket = bucket;
- node->hash = hash;
- node->t.data = data;
- node->t.key = key;
- root->len++;
- return &node->t.data;
- }
-
- /* there already exists a node like this */
- if (node->hash == hash && MAP_CMP(node->t.key, key) == 0)
- return &node->t.data;
- }
- }
+ for (size_t s = MAP_SPVEC(len)(&root->buf); s >= conts_spvec_size(0); s /= 2) {
+ bool base = s == conts_spvec_size(0);
+ size_t range = base ? s : s / 2;
+ size_t offst = base ? 0 : s / 2;
- /* no bucket available, create new one */
- size_t size = root->pow2 << root->count;
- size_t bytes = sizeof(struct MAP_BUCKET) + sizeof(struct MAP_NODE) * size;
- struct MAP_BUCKET *bucket = MAP_CALLOC(1, bytes);
- if (!bucket)
- return NULL;
+ /* linear probing for now */
+ for (size_t i = 0; i < range; ++i) {
+ assert(__builtin_popcountll(range) == 1);
+ size_t idx = (hash + i) & (range - 1);
- bucket->size = size;
+ struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, idx + offst);
- size_t buckets_bytes = sizeof(struct MAP_BUCKET *) * (root->count + 1);
- struct MAP_BUCKET **new_buckets = MAP_REALLOC(root->buckets, buckets_bytes);
- if (!new_buckets) {
- free(bucket);
- return NULL;
+ /* unused node means chain must not exist in this range */
+ if (n->used == CONTS_MAP_FREE)
+ break;
+
+ /* not the node we're looking for but we should
+ * keep following the chain*/
+ if (n->hash != hash)
+ continue;
+
+ /* found a match */
+ if (MAP_CMP(n->t.key, key) == 0)
+ return &n->t.data;
+ }
}
- root->buckets = new_buckets;
- if (root->count != 0)
- root->buckets[root->count - 1]->next = bucket;
-
- root->buckets[root->count] = bucket;
-
- /* populate node */
- size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, root->count);
- struct MAP_NODE *node = &bucket->nodes[idx];
- node->bucket = bucket;
- node->hash = hash;
- node->t.key = key;
- node->t.data = data;
- root->count++;
- root->len++;
- return &node->t.data;
+ return NULL;
}
static inline MAP_TYPE *MAP(find)(struct MAP_ROOT *root, MAP_KEY key)
{
- if (root->len == 0)
- return NULL;
-
- size_t hash = MAP_HASH(key);
- for (size_t b = 0; b < root->count; ++b) {
- struct MAP_BUCKET *bucket = root->buckets[b];
- size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b);
-
- for (size_t p = 0; p < CONTS_MAP_PACKING; ++p) {
- size_t n = idx + p;
- if (n >= bucket->size)
- break;
-
- struct MAP_NODE *node = &bucket->nodes[n];
+ uint32_t hash = MAP_HASH(key) & 0x3fffffff;
+ return MAP(find_hash)(root, key, hash);
+}
- /* note no gravestones */
+static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data)
+{
+ /* mask top bit away to get 31 bit hash value as used by the nodes */
+ uint32_t hash = MAP_HASH(key) & 0x3fffffff;
+ MAP_TYPE *exists = MAP(find_hash)(root, key, hash);
+ if (exists)
+ return exists;
+
+ /* initialize */
+ if (root->n == 0) {
+ if (!MAP_SPVEC(reserve)(&root->buf, conts_spvec_size(0)))
+ return NULL;
+ }
- if (node->hash != hash)
- continue;
+ /* grow as needed */
+ size_t s = MAP_SPVEC(len)(&root->buf);
+ if (2 * root->n >= s) {
+ s *= 2;
+ if (!MAP_SPVEC(reserve)(&root->buf, s))
+ return NULL;
- if (MAP_CMP(node->t.key, key) != 0)
- continue;
+ /* reserve should zero out all memory so no need to initialize
+ * any elements */
+ }
- return &node->t.data;
- }
+ /* there must always be at least one free spot */
+ root->n++;
+ bool base = s == conts_spvec_size(0);
+ size_t range = base ? s : s / 2;
+ size_t offst = base ? 0 : s / 2;
+
+ /* linear probing for now */
+ for (size_t i = 0; i < range; ++i) {
+ assert(__builtin_popcountll(range) == 1);
+ size_t idx = (hash + i) & (range - 1);
+
+ struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, idx + offst);
+ if (n->used == CONTS_MAP_USED)
+ continue;
+
+ /* grave or free, either way we're taking it */
+ n->used = CONTS_MAP_USED;
+ n->hash = hash;
+ n->t.key = key;
+ n->t.data = data;
+ return &n->t.data;
}
+ /** @todo use __builtin_unreachable or C attributes? */
+ assert(false && "should be unreachable");
return NULL;
}
@@ -225,8 +223,8 @@ static inline void MAP(remove_found)(struct MAP_ROOT *root, MAP_TYPE *data)
{
struct MAP_TUPLE *tuple = CONTAINER_OF(data, struct MAP_TUPLE, data);
struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t);
- node->bucket = NULL;
- root->len--;
+ node->used = CONTS_MAP_GRAVE;
+ root->n--;
}
static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key)
@@ -238,48 +236,52 @@ static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key)
MAP(remove_found)(root, found);
}
-static inline struct MAP_TUPLE *MAP(find_next)(struct MAP_BUCKET *bucket, struct MAP_TUPLE *tuple)
+static inline MAP_ITER MAP(next)(struct MAP_ROOT *root, MAP_ITER t)
{
- struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t);
- size_t idx = node - bucket->nodes;
- for (; idx < bucket->size; ++idx) {
- struct MAP_NODE *candidate = &bucket->nodes[idx];
- if (candidate->bucket)
- return &candidate->t;
+ for (size_t i = t.i + 1; i < MAP_SPVEC(len)(&root->buf); ++i) {
+ struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, i);
+ if (n->used == CONTS_MAP_USED) {
+ return (MAP_ITER){
+ .i = i,
+ .t = &n->t
+ };
+ }
}
- struct MAP_BUCKET *next = bucket->next;
- if (!next)
- return NULL;
-
- return MAP(find_next)(next, &next->nodes[0].t);
+ /* no more elements to search through */
+ return (MAP_ITER){
+ .i = MAP_SPVEC(len)(&root->buf),
+ .t = NULL
+ };
}
-static inline struct MAP_TUPLE *MAP(begin)(struct MAP_ROOT *root)
+static inline MAP_ITER MAP(begin)(struct MAP_ROOT *root)
{
- if (root->len == 0)
- return NULL;
+ if (root->n == 0) {
+ return (MAP_ITER){
+ .i = MAP_SPVEC(len)(&root->buf),
+ .t = NULL
+ };
+ }
- struct MAP_BUCKET *bucket = root->buckets[0];
- return MAP(find_next)(bucket, &bucket->nodes[0].t);
-}
+ struct MAP_NODE *n = MAP_SPVEC(at)(&root->buf, 0);
+ assert(n);
-static inline struct MAP_TUPLE *MAP(next)(struct MAP_ROOT *root, struct MAP_TUPLE *t)
-{
- (void)root;
- struct MAP_NODE *node = CONTAINER_OF(t, struct MAP_NODE, t);
- return MAP(find_next)(node->bucket, &(node + 1)->t);
+ MAP_ITER t = {.i = 0, .t = &n->t};
+ if (n->used == CONTS_MAP_USED)
+ return t;
+
+ return MAP(next)(root, t);
}
-static inline bool MAP(end)(struct MAP_ROOT *root, struct MAP_TUPLE *t)
+static inline bool MAP(end)(struct MAP_ROOT *root, MAP_ITER t)
{
- (void)root;
- return t == NULL;
+ return t.i >= MAP_SPVEC(len)(&root->buf);
}
static inline size_t MAP(len)(struct MAP_ROOT *root)
{
- return root->len;
+ return root->n;
}
#undef MAP
@@ -292,6 +294,7 @@ static inline size_t MAP(len)(struct MAP_ROOT *root)
#undef MAP_CMP
#undef MAP_HASH
#undef MAP_NAME
+#undef MAP_ITER
#undef MAP_MALLOC
#undef MAP_CALLOC