aboutsummaryrefslogtreecommitdiff
path: root/example_hashmap
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2025-08-30 00:01:15 +0300
committerKimplul <kimi.h.kuparinen@gmail.com>2025-08-30 00:01:15 +0300
commit20ce6fa81cd9f55dca3212699d5949f8ebe7d95b (patch)
treefdb89a8b290668ef61cc4747ac5236b817fdd686 /example_hashmap
parent9a31a10d18ff5de50b4532a7f461ec97c5880d11 (diff)
downloadngc-20ce6fa81cd9f55dca3212699d5949f8ebe7d95b.tar.gz
ngc-20ce6fa81cd9f55dca3212699d5949f8ebe7d95b.zip
add hashmap example
+ Doesn't quite work yet because <type> expansion isn't implemented yet
Diffstat (limited to 'example_hashmap')
-rw-r--r--example_hashmap/new/main.c62
-rw-r--r--example_hashmap/new/map.h209
-rw-r--r--example_hashmap/old/conts.h14
-rw-r--r--example_hashmap/old/main.c40
-rw-r--r--example_hashmap/old/map.h250
5 files changed, 575 insertions, 0 deletions
diff --git a/example_hashmap/new/main.c b/example_hashmap/new/main.c
new file mode 100644
index 0000000..4846ab5
--- /dev/null
+++ b/example_hashmap/new/main.c
@@ -0,0 +1,62 @@
+#include <assert.h>
+#include <stdio.h>
+
+#include "map.h"
+
+#define foreach(name, i, s) \
+ for (name##_iter i = name##_begin(s); !name##_end(s, i); i = name##_next(i))
+
+/* int wrapper, provides cmp/hash for a generic integer type
+ * (here a trait like is_integer would be ideal but oh well) */
+typedef intw[any I]() {
+ typedef I <>;
+
+ size_t <>_hash(<> *a)
+ {
+ /* identity hash */
+ return *a;
+ }
+
+ int <>_cmp(<> *a, <> *b)
+ {
+ return *a - *b;
+ }
+};
+
+/* create wrapper for ints. */
+typedef intw = intw[int]();
+
+/* create hashmap of ints, using the intw wrapper we just created. Kind of
+ * boilerplatey, admittedly. */
+typedef ints = map[intw, intw]();
+
+int main()
+{
+ /* heuristic, but if we know how many elements we'll need, we should
+ * give it to the create function. */
+ ints ints = ints_create(0);
+ for (int i = 0; i < 1000000; ++i) {
+ ints_insert(&ints, i, i);
+ }
+ assert(ints_len(&ints) == 1000000);
+
+ for (int i = 0; i < 1000000; ++i) {
+ int *v = ints_find(&ints, i);
+ assert(v && *v == i);
+ }
+
+ size_t count = 0;
+ foreach(ints, iter, &ints) {
+ assert(iter->key == iter->data);
+ count++;
+ }
+
+ assert(count == 1000000);
+
+ for (int i = 0; i < 1000000; ++i) {
+ ints_remove(&ints, i);
+ }
+
+ assert(ints_len(&ints) == 0);
+ ints_destroy(&ints);
+}
diff --git a/example_hashmap/new/map.h b/example_hashmap/new/map.h
new file mode 100644
index 0000000..e993139
--- /dev/null
+++ b/example_hashmap/new/map.h
@@ -0,0 +1,209 @@
+#ifndef MAP_H
+#define MAP_H
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#define NGC_BUCKET_IDX(hash, pow2, bucket) \
+ ((hash) & (((pow2) << (bucket)) - 1))
+
+#ifndef NGC_CONTAINER_OF
+#define NGC_CONTAINER_OF(ptr, type, member) \
+ (type *)((char *)(ptr) - offsetof(type, member))
+#endif
+
+/* trait would be something like is_hashable + is_comparable, but that's not yet
+ * supported so just go with this for now */
+typedef map[any K, any D]() {
+ struct <>_tuple {
+ K key;
+ D data;
+ };
+
+ typedef struct <>_tuple *<>_iter;
+
+ struct <>_node {
+ size_t hash;
+ struct <>_bucket *bucket;
+ struct <>_tuple t;
+ };
+
+ struct <>_bucket {
+ size_t size;
+ struct <>_bucket *next;
+ struct <>_node nodes[];
+ };
+
+ typedef struct <>_root {
+ size_t len;
+ size_t count;
+ size_t pow2;
+ struct <>_bucket **buckets;
+ } <>;
+
+ <> <>_create(size_t count)
+ {
+ size_t pow2 = 1;
+ while (pow2 < count)
+ pow2 <<= 1;
+
+ return (<>) {
+ .len = 0,
+ .count = 0,
+ .pow2 = pow2,
+ .buckets = NULL
+ };
+ }
+
+ void <>_destroy(<> *root)
+ {
+ for (size_t i = 0; i < root->count; ++i)
+ free(root->buckets[i]);
+
+ free(root->buckets);
+ }
+
+ D *<>_insert(<>* root, K key, D data)
+ {
+ size_t hash = <key>_hash(&key);
+ /* look through buckets in order */
+ for (size_t b = 0; b < root->count; ++b) {
+ struct <>_bucket *bucket = root->buckets[b];
+ size_t idx = NGC_BUCKET_IDX(hash, root->pow2, b);
+
+ struct <>_node *node = &bucket->nodes[idx];
+ /* free to use this slot */
+ if (!node->bucket) {
+ node->bucket = bucket;
+ node->hash = hash;
+ node->t.data = data;
+ node->t.key = key;
+ root->len++;
+ return &node->t.data;
+ }
+
+ /* there already exists a node like this */
+ if (node->hash == hash && <key>_cmp(&node->t.key, &key) == 0)
+ return &node->t.data;
+ }
+
+ /* no bucket available, create new one */
+ size_t size = root->pow2 << root->count;
+ size_t bytes = sizeof(struct <>_bucket) + sizeof(struct <>_node) * size;
+ struct <>_bucket *bucket = calloc(1, bytes);
+ if (!bucket)
+ return NULL;
+
+ bucket->size = size;
+
+ size_t buckets_bytes = sizeof(struct <>_bucket *) * (root->count + 1);
+ struct <>_bucket **new_buckets = realloc(root->buckets, buckets_bytes);
+ if (!new_buckets) {
+ free(bucket);
+ return NULL;
+ }
+
+ root->buckets = new_buckets;
+ if (root->count != 0)
+ root->buckets[root->count - 1]->next = bucket;
+
+ root->buckets[root->count] = bucket;
+
+ /* populate node */
+ size_t idx = NGC_BUCKET_IDX(hash, root->pow2, root->count);
+ struct <>_node *node = &bucket->nodes[idx];
+ node->bucket = bucket;
+ node->hash = hash;
+ node->t.key = key;
+ node->t.data = data;
+ root->count++;
+ root->len++;
+ return &node->t.data;
+ }
+
+ D *<>_find(<> *root, K key)
+ {
+ if (root->len == 0)
+ return NULL;
+
+ size_t hash = <key>_hash(&key);
+ for (size_t b = 0; b < root->count; ++b) {
+ struct <>_bucket *bucket = root->buckets[b];
+ size_t idx = NGC_BUCKET_IDX(hash, root->pow2, b);
+
+ struct <>_node *node = &bucket->nodes[idx];
+ if (node->hash != hash)
+ continue;
+
+ if (<key>_cmp(&node->t.key, key) != 0)
+ continue;
+
+ return &node->t.data;
+ }
+
+ return NULL;
+ }
+
+ void <>_remove_found(<> *root, D *data)
+ {
+ struct <>_tuple *tuple = NGC_CONTAINER_OF(data, struct <>_tuple, data);
+ struct <>_node *node = NGC_CONTAINER_OF(tuple, struct <>_node, t);
+ node->bucket = NULL;
+ root->len--;
+ }
+
+ void <>_remove(<> *root, K key)
+ {
+ D *found = <>_find(root, key);
+ if (!found)
+ return;
+
+ <>_remove_found(root, found);
+ }
+
+ struct <>_tuple *<>_find_next(struct <>_bucket *bucket, struct <>_tuple *tuple)
+ {
+ struct <>_node *node = NGC_CONTAINER_OF(tuple, struct <>_node, t);
+ size_t idx = node - bucket->nodes;
+ for (; idx < bucket->size; ++idx) {
+ struct <>_node *candidate = &bucket->nodes[idx];
+ if (candidate->bucket)
+ return &candidate->t;
+ }
+
+ struct <>_bucket *next = bucket->next;
+ if (!next)
+ return NULL;
+
+ return <>_find_next(next, &next->nodes[0].t);
+ }
+
+ struct <>_tuple *<>_begin(<> *root)
+ {
+ if (root->len == 0)
+ return NULL;
+
+ struct <>_bucket *bucket = root->buckets[0];
+ return <>_find_next(bucket, &bucket->nodes[0].t);
+ }
+
+ struct <>_tuple *<>_next(struct <>_tuple *t)
+ {
+ struct <>_node *node = NGC_CONTAINER_OF(t, struct <>_node, t);
+ return <>_find_next(node->bucket, &(node + 1)->t);
+ }
+
+ bool <>_end(<> *root, struct <>_tuple *t)
+ {
+ (void)root;
+ return t == NULL;
+ }
+
+ size_t <>_len(<> *root)
+ {
+ return root->len;
+ }
+};
+
+#endif /* MAP_H */
diff --git a/example_hashmap/old/conts.h b/example_hashmap/old/conts.h
new file mode 100644
index 0000000..fb0094b
--- /dev/null
+++ b/example_hashmap/old/conts.h
@@ -0,0 +1,14 @@
+#ifndef CONTS_H
+#define CONTS_H
+
+#define CONTS_JOIN2(a, b) a##_##b
+#define CONTS_JOIN(a, b) CONTS_JOIN2(a, b)
+
+#define CONTAINER_OF(ptr, type, member) \
+ (type *)((char *)(ptr) - offsetof(type, member))
+
+#define foreach(name, i, s)\
+ for (CONTS_JOIN(name, iter) i = CONTS_JOIN(name, begin)(s);\
+ !CONTS_JOIN(name, end)(s, i);\
+ i = CONTS_JOIN(name, next)(i))
+#endif /* CONTS_H */
diff --git a/example_hashmap/old/main.c b/example_hashmap/old/main.c
new file mode 100644
index 0000000..6c42787
--- /dev/null
+++ b/example_hashmap/old/main.c
@@ -0,0 +1,40 @@
+#include <assert.h>
+#include <stdio.h>
+
+#define MAP_KEY int
+#define MAP_TYPE int
+#define MAP_HASH(a) CONTS_MAP_NO_HASH(a)
+#define MAP_CMP(a, b) ((a) - (b))
+#define MAP_NAME ints
+#include <conts/map.h>
+
+int main()
+{
+ /* heuristic, but if we know how many elements we'll need, we should
+ * give it to the create function. */
+ struct ints ints = ints_create(0);
+ for (int i = 0; i < 1000000; ++i) {
+ ints_insert(&ints, i, i);
+ }
+ assert(ints_len(&ints) == 1000000);
+
+ for (int i = 0; i < 1000000; ++i) {
+ int *v = ints_find(&ints, i);
+ assert(v && *v == i);
+ }
+
+ size_t count = 0;
+ foreach(ints, iter, &ints) {
+ assert(iter->key == iter->data);
+ count++;
+ }
+
+ assert(count == 1000000);
+
+ for (int i = 0; i < 1000000; ++i) {
+ ints_remove(&ints, i);
+ }
+
+ assert(ints_len(&ints) == 0);
+ ints_destroy(&ints);
+}
diff --git a/example_hashmap/old/map.h b/example_hashmap/old/map.h
new file mode 100644
index 0000000..db1e386
--- /dev/null
+++ b/example_hashmap/old/map.h
@@ -0,0 +1,250 @@
+#ifndef MAP_KEY
+#error "Need map key"
+#endif
+
+#ifndef MAP_TYPE
+#error "Need map type"
+#endif
+
+#ifndef MAP_CMP
+#error "Need map cmp"
+#endif
+
+#ifndef MAP_HASH
+#error "Need map hash"
+#endif
+
+#ifndef MAP_NAME
+#error "Need map name"
+#endif
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "conts.h"
+
+#define MAP(a) CONTS_JOIN(MAP_NAME, a)
+
+#define MAP_TUPLE MAP(tuple)
+#define MAP_NODE MAP(node)
+#define MAP_BUCKET MAP(bucket)
+#define MAP_ROOT MAP_NAME
+
+#ifndef CONTS_MAP_H
+#define CONTS_MAP_H
+
+static inline size_t conts_map_generic_hash(const char *s, size_t l)
+{
+ /* djb2 */
+ size_t hash = 5381;
+ for (size_t i = 0; i < l; ++i)
+ hash = ((hash << 5) + hash) + s[i];
+
+ return hash;
+}
+
+#define CONTS_MAP_NO_HASH(a) (a)
+#define CONTS_MAP_STR_HASH(a) conts_map_generic_hash(a, strlen(a))
+
+/* fast modulo pow2 */
+#define CONTS_BUCKET_IDX(hash, pow2, bucket) \
+ ((hash) & (((pow2) << bucket) - 1))
+
+#endif /* CONTS_MAP_H */
+
+static inline size_t MAP(generic_hash)(MAP_KEY *key)
+{
+ return conts_map_generic_hash((const char *)key, sizeof(*key));
+}
+
+struct MAP_TUPLE {
+ MAP_KEY key;
+ MAP_TYPE data;
+};
+
+typedef struct MAP_TUPLE *MAP(iter);
+
+struct MAP_NODE {
+ struct MAP_BUCKET *bucket;
+ size_t hash;
+ struct MAP_TUPLE t;
+};
+
+struct MAP_BUCKET {
+ size_t size; /* bucket size */
+ struct MAP_BUCKET *next;
+ struct MAP_NODE nodes[];
+};
+
+struct MAP_ROOT {
+ size_t len; /* number of items */
+ size_t count; /* bucket count */
+ size_t pow2; /* how many nodes in smallest bucket */
+ struct MAP_BUCKET **buckets;
+};
+
+static inline struct MAP_ROOT MAP(create)(size_t count)
+{
+ size_t pow2 = 1;
+ while (pow2 < count)
+ pow2 <<= 1;
+
+ return (struct MAP_ROOT){
+ .len = 0,
+ .count = 0,
+ .pow2 = pow2,
+ .buckets = NULL
+ };
+}
+
+static inline void MAP(destroy)(struct MAP_ROOT *root)
+{
+ for (size_t i = 0; i < root->count; ++i)
+ free(root->buckets[i]);
+
+ free(root->buckets);
+}
+
+static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data)
+{
+ size_t hash = MAP_HASH(key);
+ /* look through buckets in order */
+ for (size_t b = 0; b < root->count; ++b) {
+ struct MAP_BUCKET *bucket = root->buckets[b];
+ size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b);
+
+ struct MAP_NODE *node = &bucket->nodes[idx];
+ /* free to use this slot */
+ if (!node->bucket) {
+ node->bucket = bucket;
+ node->hash = hash;
+ node->t.data = data;
+ node->t.key = key;
+ root->len++;
+ return &node->t.data;
+ }
+
+ /* there already exists a node like this */
+ if (node->hash == hash && MAP_CMP(node->t.key, key) == 0)
+ return &node->t.data;
+ }
+
+ /* no bucket available, create new one */
+ size_t size = root->pow2 << root->count;
+ size_t bytes = sizeof(struct MAP_BUCKET) + sizeof(struct MAP_NODE) * size;
+ struct MAP_BUCKET *bucket = calloc(1, bytes);
+ bucket->size = size;
+
+ size_t buckets_bytes = sizeof(struct MAP_BUCKET *) * (root->count + 1);
+ root->buckets = realloc(root->buckets, buckets_bytes);
+ if (root->count != 0)
+ root->buckets[root->count - 1]->next = bucket;
+
+ root->buckets[root->count] = bucket;
+
+ /* populate node */
+ size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, root->count);
+ struct MAP_NODE *node = &bucket->nodes[idx];
+ node->bucket = bucket;
+ node->hash = hash;
+ node->t.key = key;
+ node->t.data = data;
+ root->count++;
+ root->len++;
+ return &node->t.data;
+}
+
+static inline MAP_TYPE *MAP(find)(struct MAP_ROOT *root, MAP_KEY key)
+{
+ if (root->len == 0)
+ return NULL;
+
+ size_t hash = MAP_HASH(key);
+ for (size_t b = 0; b < root->count; ++b) {
+ struct MAP_BUCKET *bucket = root->buckets[b];
+ size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b);
+
+ struct MAP_NODE *node = &bucket->nodes[idx];
+ if (node->hash != hash)
+ continue;
+
+ if (MAP_CMP(node->t.key, key) != 0)
+ continue;
+
+ return &node->t.data;
+ }
+
+ return NULL;
+}
+
+static inline void MAP(remove_found)(struct MAP_ROOT *root, MAP_TYPE *data)
+{
+ struct MAP_TUPLE *tuple = CONTAINER_OF(data, struct MAP_TUPLE, data);
+ struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t);
+ node->bucket = NULL;
+ root->len--;
+}
+
+static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key)
+{
+ MAP_TYPE *found = MAP(find)(root, key);
+ if (!found)
+ return;
+
+ MAP(remove_found)(root, found);
+}
+
+static inline struct MAP_TUPLE *MAP(find_next)(struct MAP_BUCKET *bucket, struct MAP_TUPLE *tuple)
+{
+ struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t);
+ size_t idx = node - bucket->nodes;
+ for (; idx < bucket->size; ++idx) {
+ struct MAP_NODE *candidate = &bucket->nodes[idx];
+ if (candidate->bucket)
+ return &candidate->t;
+ }
+
+ struct MAP_BUCKET *next = bucket->next;
+ if (!next)
+ return NULL;
+
+ return MAP(find_next)(next, &next->nodes[0].t);
+}
+
+static inline struct MAP_TUPLE *MAP(begin)(struct MAP_ROOT *root)
+{
+ if (root->len == 0)
+ return NULL;
+
+ struct MAP_BUCKET *bucket = root->buckets[0];
+ return MAP(find_next)(bucket, &bucket->nodes[0].t);
+}
+
+static inline struct MAP_TUPLE *MAP(next)(struct MAP_TUPLE *t)
+{
+ struct MAP_NODE *node = CONTAINER_OF(t, struct MAP_NODE, t);
+ return MAP(find_next)(node->bucket, &(node + 1)->t);
+}
+
+static inline bool MAP(end)(struct MAP_ROOT *root, struct MAP_TUPLE *t)
+{
+ (void)root;
+ return t == NULL;
+}
+
+static inline size_t MAP(len)(struct MAP_ROOT *root)
+{
+ return root->len;
+}
+
+#undef MAP
+#undef MAP_NODE
+#undef MAP_TUPLE
+#undef MAP_BUCKET
+#undef MAP_ROOT
+#undef MAP_KEY
+#undef MAP_TYPE
+#undef MAP_CMP
+#undef MAP_HASH
+#undef MAP_NAME