add hashmap example

+ Doesn't quite work yet because <type> expansion isn't implemented yet
author: Kimplul <kimi.h.kuparinen@gmail.com> 2025-08-30 00:01:15 +0300
committer: Kimplul <kimi.h.kuparinen@gmail.com> 2025-08-30 00:01:15 +0300
commit: 20ce6fa81cd9f55dca3212699d5949f8ebe7d95b (patch)
tree: fdb89a8b290668ef61cc4747ac5236b817fdd686 /example_hashmap
parent: 9a31a10d18ff5de50b4532a7f461ec97c5880d11 (diff)
download: ngc-20ce6fa81cd9f55dca3212699d5949f8ebe7d95b.tar.gz
ngc-20ce6fa81cd9f55dca3212699d5949f8ebe7d95b.zip
5 files changed, 575 insertions, 0 deletions
diff --git a/example_hashmap/new/main.c b/example_hashmap/new/main.c
new file mode 100644
index 0000000..4846ab5
--- /dev/null
+++ b/example_hashmap/new/main.c
@@ -0,0 +1,62 @@
+#include <assert.h>
+#include <stdio.h>
+
+#include "map.h"
+
+#define foreach(name, i, s) \
+	for (name##_iter i = name##_begin(s); !name##_end(s, i); i = name##_next(i))
+
+/* int wrapper, provides cmp/hash for a generic integer type
+ * (here a trait like is_integer would be ideal but oh well) */
+typedef intw[any I]() {
+	typedef I <>;
+
+	size_t <>_hash(<> *a)
+	{
+		/* identity hash */
+		return *a;
+	}
+
+	int <>_cmp(<> *a, <> *b)
+	{
+		return *a - *b;
+	}
+};
+
+/* create wrapper for ints. */
+typedef intw = intw[int]();
+
+/* create hashmap of ints, using the intw wrapper we just created. Kind of
+ * boilerplatey, admittedly. */
+typedef ints = map[intw, intw]();
+
+int main()
+{
+	/* heuristic, but if we know how many elements we'll need, we should
+	 * give it to the create function. */
+	ints ints = ints_create(0);
+	for (int i = 0; i < 1000000; ++i) {
+		ints_insert(&ints, i, i);
+	}
+	assert(ints_len(&ints) == 1000000);
+
+	for (int i = 0; i < 1000000; ++i) {
+		int *v = ints_find(&ints, i);
+		assert(v && *v == i);
+	}
+
+	size_t count = 0;
+	foreach(ints, iter, &ints) {
+		assert(iter->key == iter->data);
+		count++;
+	}
+
+	assert(count == 1000000);
+
+	for (int i = 0; i < 1000000; ++i) {
+		ints_remove(&ints, i);
+	}
+
+	assert(ints_len(&ints) == 0);
+	ints_destroy(&ints);
+}
diff --git a/example_hashmap/new/map.h b/example_hashmap/new/map.h
new file mode 100644
index 0000000..e993139
--- /dev/null
+++ b/example_hashmap/new/map.h
@@ -0,0 +1,209 @@
+#ifndef MAP_H
+#define MAP_H
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#define NGC_BUCKET_IDX(hash, pow2, bucket) \
+	((hash) & (((pow2) << (bucket)) - 1))
+
+#ifndef NGC_CONTAINER_OF
+#define NGC_CONTAINER_OF(ptr, type, member) \
+	(type *)((char *)(ptr) - offsetof(type, member))
+#endif
+
+/* trait would be something like is_hashable + is_comparable, but that's not yet
+ * supported so just go with this for now */
+typedef map[any K, any D]() {
+	struct <>_tuple {
+		K key;
+		D data;
+	};
+
+	typedef struct <>_tuple *<>_iter;
+
+	struct <>_node {
+		size_t hash;
+		struct <>_bucket *bucket;
+		struct <>_tuple t;
+	};
+
+	struct <>_bucket {
+		size_t size;
+		struct <>_bucket *next;
+		struct <>_node nodes[];
+	};
+
+	typedef struct <>_root {
+		size_t len;
+		size_t count;
+		size_t pow2;
+		struct <>_bucket **buckets;
+	} <>;
+
+	<> <>_create(size_t count)
+	{
+		size_t pow2 = 1;
+		while (pow2 < count)
+			pow2 <<= 1;
+
+		return (<>) {
+			.len = 0,
+			.count = 0,
+			.pow2 = pow2,
+			.buckets = NULL
+		};
+	}
+
+	void <>_destroy(<> *root)
+	{
+		for (size_t i = 0; i < root->count; ++i)
+			free(root->buckets[i]);
+
+		free(root->buckets);
+	}
+
+	D *<>_insert(<>* root, K key, D data)
+	{
+		size_t hash = <key>_hash(&key);
+		/* look through buckets in order */
+		for (size_t b = 0; b < root->count; ++b) {
+			struct <>_bucket *bucket = root->buckets[b];
+			size_t idx = NGC_BUCKET_IDX(hash, root->pow2, b);
+
+			struct <>_node *node = &bucket->nodes[idx];
+			/* free to use this slot */
+			if (!node->bucket) {
+				node->bucket = bucket;
+				node->hash = hash;
+				node->t.data = data;
+				node->t.key = key;
+				root->len++;
+				return &node->t.data;
+			}
+
+			/* there already exists a node like this */
+			if (node->hash == hash && <key>_cmp(&node->t.key, &key) == 0)
+				return &node->t.data;
+		}
+
+		/* no bucket available, create new one */
+		size_t size = root->pow2 << root->count;
+		size_t bytes = sizeof(struct <>_bucket) + sizeof(struct <>_node) * size;
+		struct <>_bucket *bucket = calloc(1, bytes);
+		if (!bucket)
+			return NULL;
+
+		bucket->size = size;
+
+		size_t buckets_bytes = sizeof(struct <>_bucket *) * (root->count + 1);
+		struct <>_bucket **new_buckets = realloc(root->buckets, buckets_bytes);
+		if (!new_buckets) {
+			free(bucket);
+			return NULL;
+		}
+
+		root->buckets = new_buckets;
+		if (root->count != 0)
+			root->buckets[root->count - 1]->next = bucket;
+
+		root->buckets[root->count] = bucket;
+
+		/* populate node */
+		size_t idx = NGC_BUCKET_IDX(hash, root->pow2, root->count);
+		struct <>_node *node = &bucket->nodes[idx];
+		node->bucket = bucket;
+		node->hash = hash;
+		node->t.key = key;
+		node->t.data = data;
+		root->count++;
+		root->len++;
+		return &node->t.data;
+	}
+
+	D *<>_find(<> *root, K key)
+	{
+		if (root->len == 0)
+			return NULL;
+
+		size_t hash = <key>_hash(&key);
+		for (size_t b = 0; b < root->count; ++b) {
+			struct <>_bucket *bucket = root->buckets[b];
+			size_t idx = NGC_BUCKET_IDX(hash, root->pow2, b);
+
+			struct <>_node *node = &bucket->nodes[idx];
+			if (node->hash != hash)
+				continue;
+
+			if (<key>_cmp(&node->t.key, key) != 0)
+				continue;
+
+			return &node->t.data;
+		}
+
+		return NULL;
+	}
+
+	void <>_remove_found(<> *root, D *data)
+	{
+		struct <>_tuple *tuple = NGC_CONTAINER_OF(data, struct <>_tuple, data);
+		struct <>_node *node = NGC_CONTAINER_OF(tuple, struct <>_node, t);
+		node->bucket = NULL;
+		root->len--;
+	}
+
+	void <>_remove(<> *root, K key)
+	{
+		D *found = <>_find(root, key);
+		if (!found)
+			return;
+
+		<>_remove_found(root, found);
+	}
+
+	struct <>_tuple *<>_find_next(struct <>_bucket *bucket, struct <>_tuple *tuple)
+	{
+		struct <>_node *node = NGC_CONTAINER_OF(tuple, struct <>_node, t);
+		size_t idx = node - bucket->nodes;
+		for (; idx < bucket->size; ++idx) {
+			struct <>_node *candidate = &bucket->nodes[idx];
+			if (candidate->bucket)
+				return &candidate->t;
+		}
+
+		struct <>_bucket *next = bucket->next;
+		if (!next)
+			return NULL;
+
+		return <>_find_next(next, &next->nodes[0].t);
+	}
+
+	struct <>_tuple *<>_begin(<> *root)
+	{
+		if (root->len == 0)
+			return NULL;
+
+		struct <>_bucket *bucket = root->buckets[0];
+		return <>_find_next(bucket, &bucket->nodes[0].t);
+	}
+
+	struct <>_tuple *<>_next(struct <>_tuple *t)
+	{
+		struct <>_node *node = NGC_CONTAINER_OF(t, struct <>_node, t);
+		return <>_find_next(node->bucket, &(node + 1)->t);
+	}
+
+	bool <>_end(<> *root, struct <>_tuple *t)
+	{
+		(void)root;
+		return t == NULL;
+	}
+
+	size_t <>_len(<> *root)
+	{
+		return root->len;
+	}
+};
+
+#endif /* MAP_H */
diff --git a/example_hashmap/old/conts.h b/example_hashmap/old/conts.h
new file mode 100644
index 0000000..fb0094b
--- /dev/null
+++ b/example_hashmap/old/conts.h
@@ -0,0 +1,14 @@
+#ifndef CONTS_H
+#define CONTS_H
+
+#define CONTS_JOIN2(a, b) a##_##b
+#define CONTS_JOIN(a, b) CONTS_JOIN2(a, b)
+
+#define CONTAINER_OF(ptr, type, member) \
+	(type *)((char *)(ptr) - offsetof(type, member))
+
+#define foreach(name, i, s)\
+	for (CONTS_JOIN(name, iter) i = CONTS_JOIN(name, begin)(s);\
+			!CONTS_JOIN(name, end)(s, i);\
+			i = CONTS_JOIN(name, next)(i))
+#endif /* CONTS_H */
diff --git a/example_hashmap/old/main.c b/example_hashmap/old/main.c
new file mode 100644
index 0000000..6c42787
--- /dev/null
+++ b/example_hashmap/old/main.c
@@ -0,0 +1,40 @@
+#include <assert.h>
+#include <stdio.h>
+
+#define MAP_KEY int
+#define MAP_TYPE int
+#define MAP_HASH(a) CONTS_MAP_NO_HASH(a)
+#define MAP_CMP(a, b) ((a) - (b))
+#define MAP_NAME ints
+#include <conts/map.h>
+
+int main()
+{
+	/* heuristic, but if we know how many elements we'll need, we should
+	 * give it to the create function. */
+	struct ints ints = ints_create(0);
+	for (int i = 0; i < 1000000; ++i) {
+		ints_insert(&ints, i, i);
+	}
+	assert(ints_len(&ints) == 1000000);
+
+	for (int i = 0; i < 1000000; ++i) {
+		int *v = ints_find(&ints, i);
+		assert(v && *v == i);
+	}
+
+	size_t count = 0;
+	foreach(ints, iter, &ints) {
+		assert(iter->key == iter->data);
+		count++;
+	}
+
+	assert(count == 1000000);
+
+	for (int i = 0; i < 1000000; ++i) {
+		ints_remove(&ints, i);
+	}
+
+	assert(ints_len(&ints) == 0);
+	ints_destroy(&ints);
+}
diff --git a/example_hashmap/old/map.h b/example_hashmap/old/map.h
new file mode 100644
index 0000000..db1e386
--- /dev/null
+++ b/example_hashmap/old/map.h
@@ -0,0 +1,250 @@
+#ifndef MAP_KEY
+#error "Need map key"
+#endif
+
+#ifndef MAP_TYPE
+#error "Need map type"
+#endif
+
+#ifndef MAP_CMP
+#error "Need map cmp"
+#endif
+
+#ifndef MAP_HASH
+#error "Need map hash"
+#endif
+
+#ifndef MAP_NAME
+#error "Need map name"
+#endif
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "conts.h"
+
+#define MAP(a) CONTS_JOIN(MAP_NAME, a)
+
+#define MAP_TUPLE MAP(tuple)
+#define MAP_NODE MAP(node)
+#define MAP_BUCKET MAP(bucket)
+#define MAP_ROOT MAP_NAME
+
+#ifndef CONTS_MAP_H
+#define CONTS_MAP_H
+
+static inline size_t conts_map_generic_hash(const char *s, size_t l)
+{
+	/* djb2 */
+	size_t hash = 5381;
+	for (size_t i = 0; i < l; ++i)
+		hash = ((hash << 5) + hash) + s[i];
+
+	return hash;
+}
+
+#define CONTS_MAP_NO_HASH(a) (a)
+#define CONTS_MAP_STR_HASH(a) conts_map_generic_hash(a, strlen(a))
+
+/* fast modulo pow2 */
+#define CONTS_BUCKET_IDX(hash, pow2, bucket) \
+	((hash) & (((pow2) << bucket) - 1))
+
+#endif /* CONTS_MAP_H */
+
+static inline size_t MAP(generic_hash)(MAP_KEY *key)
+{
+	return conts_map_generic_hash((const char *)key, sizeof(*key));
+}
+
+struct MAP_TUPLE {
+	MAP_KEY key;
+	MAP_TYPE data;
+};
+
+typedef struct MAP_TUPLE *MAP(iter);
+
+struct MAP_NODE {
+	struct MAP_BUCKET *bucket;
+	size_t hash;
+	struct MAP_TUPLE t;
+};
+
+struct MAP_BUCKET {
+	size_t size; /* bucket size */
+	struct MAP_BUCKET *next;
+	struct MAP_NODE nodes[];
+};
+
+struct MAP_ROOT {
+	size_t len; /* number of items */
+	size_t count; /* bucket count */
+	size_t pow2; /* how many nodes in smallest bucket */
+	struct MAP_BUCKET **buckets;
+};
+
+static inline struct MAP_ROOT MAP(create)(size_t count)
+{
+	size_t pow2 = 1;
+	while (pow2 < count)
+		pow2 <<= 1;
+
+	return (struct MAP_ROOT){
+		.len = 0,
+		.count = 0,
+		.pow2 = pow2,
+		.buckets = NULL
+	};
+}
+
+static inline void MAP(destroy)(struct MAP_ROOT *root)
+{
+	for (size_t i = 0; i < root->count; ++i)
+		free(root->buckets[i]);
+
+	free(root->buckets);
+}
+
+static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data)
+{
+	size_t hash = MAP_HASH(key);
+	/* look through buckets in order */
+	for (size_t b = 0; b < root->count; ++b) {
+		struct MAP_BUCKET *bucket = root->buckets[b];
+		size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b);
+
+		struct MAP_NODE *node = &bucket->nodes[idx];
+		/* free to use this slot */
+		if (!node->bucket) {
+			node->bucket = bucket;
+			node->hash = hash;
+			node->t.data = data;
+			node->t.key = key;
+			root->len++;
+			return &node->t.data;
+		}
+
+		/* there already exists a node like this */
+		if (node->hash == hash && MAP_CMP(node->t.key, key) == 0)
+			return &node->t.data;
+	}
+
+	/* no bucket available, create new one */
+	size_t size = root->pow2 << root->count;
+	size_t bytes = sizeof(struct MAP_BUCKET) + sizeof(struct MAP_NODE) * size;
+	struct MAP_BUCKET *bucket = calloc(1, bytes);
+	bucket->size = size;
+
+	size_t buckets_bytes = sizeof(struct MAP_BUCKET *) * (root->count + 1);
+	root->buckets = realloc(root->buckets, buckets_bytes);
+	if (root->count != 0)
+		root->buckets[root->count - 1]->next = bucket;
+
+	root->buckets[root->count] = bucket;
+
+	/* populate node */
+	size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, root->count);
+	struct MAP_NODE *node = &bucket->nodes[idx];
+	node->bucket = bucket;
+	node->hash = hash;
+	node->t.key = key;
+	node->t.data = data;
+	root->count++;
+	root->len++;
+	return &node->t.data;
+}
+
+static inline MAP_TYPE *MAP(find)(struct MAP_ROOT *root, MAP_KEY key)
+{
+	if (root->len == 0)
+		return NULL;
+
+	size_t hash = MAP_HASH(key);
+	for (size_t b = 0; b < root->count; ++b) {
+		struct MAP_BUCKET *bucket = root->buckets[b];
+		size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b);
+
+		struct MAP_NODE *node = &bucket->nodes[idx];
+		if (node->hash != hash)
+			continue;
+
+		if (MAP_CMP(node->t.key, key) != 0)
+			continue;
+
+		return &node->t.data;
+	}
+
+	return NULL;
+}
+
+static inline void MAP(remove_found)(struct MAP_ROOT *root, MAP_TYPE *data)
+{
+	struct MAP_TUPLE *tuple = CONTAINER_OF(data, struct MAP_TUPLE, data);
+	struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t);
+	node->bucket = NULL;
+	root->len--;
+}
+
+static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key)
+{
+	MAP_TYPE *found = MAP(find)(root, key);
+	if (!found)
+		return;
+
+	MAP(remove_found)(root, found);
+}
+
+static inline struct MAP_TUPLE *MAP(find_next)(struct MAP_BUCKET *bucket, struct MAP_TUPLE *tuple)
+{
+	struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t);
+	size_t idx = node - bucket->nodes;
+	for (; idx < bucket->size; ++idx) {
+		struct MAP_NODE *candidate = &bucket->nodes[idx];
+		if (candidate->bucket)
+			return &candidate->t;
+	}
+
+	struct MAP_BUCKET *next = bucket->next;
+	if (!next)
+		return NULL;
+
+	return MAP(find_next)(next, &next->nodes[0].t);
+}
+
+static inline struct MAP_TUPLE *MAP(begin)(struct MAP_ROOT *root)
+{
+	if (root->len == 0)
+		return NULL;
+
+	struct MAP_BUCKET *bucket = root->buckets[0];
+	return MAP(find_next)(bucket, &bucket->nodes[0].t);
+}
+
+static inline struct MAP_TUPLE *MAP(next)(struct MAP_TUPLE *t)
+{
+	struct MAP_NODE *node = CONTAINER_OF(t, struct MAP_NODE, t);
+	return MAP(find_next)(node->bucket, &(node + 1)->t);
+}
+
+static inline bool MAP(end)(struct MAP_ROOT *root, struct MAP_TUPLE *t)
+{
+	(void)root;
+	return t == NULL;
+}
+
+static inline size_t MAP(len)(struct MAP_ROOT *root)
+{
+	return root->len;
+}
+
+#undef MAP
+#undef MAP_NODE
+#undef MAP_TUPLE
+#undef MAP_BUCKET
+#undef MAP_ROOT
+#undef MAP_KEY
+#undef MAP_TYPE
+#undef MAP_CMP
+#undef MAP_HASH
+#undef MAP_NAME
author	Kimplul <kimi.h.kuparinen@gmail.com>	2025-08-30 00:01:15 +0300
committer	Kimplul <kimi.h.kuparinen@gmail.com>	2025-08-30 00:01:15 +0300
commit	20ce6fa81cd9f55dca3212699d5949f8ebe7d95b (patch)
tree	fdb89a8b290668ef61cc4747ac5236b817fdd686 /example_hashmap
parent	9a31a10d18ff5de50b4532a7f461ec97c5880d11 (diff)
download	ngc-20ce6fa81cd9f55dca3212699d5949f8ebe7d95b.tar.gz ngc-20ce6fa81cd9f55dca3212699d5949f8ebe7d95b.zip