diff options
Diffstat (limited to 'example_hashmap')
-rw-r--r-- | example_hashmap/new/main.c | 62 | ||||
-rw-r--r-- | example_hashmap/new/map.h | 209 | ||||
-rw-r--r-- | example_hashmap/old/conts.h | 14 | ||||
-rw-r--r-- | example_hashmap/old/main.c | 40 | ||||
-rw-r--r-- | example_hashmap/old/map.h | 250 |
5 files changed, 575 insertions, 0 deletions
diff --git a/example_hashmap/new/main.c b/example_hashmap/new/main.c new file mode 100644 index 0000000..4846ab5 --- /dev/null +++ b/example_hashmap/new/main.c @@ -0,0 +1,62 @@ +#include <assert.h> +#include <stdio.h> + +#include "map.h" + +#define foreach(name, i, s) \ + for (name##_iter i = name##_begin(s); !name##_end(s, i); i = name##_next(i)) + +/* int wrapper, provides cmp/hash for a generic integer type + * (here a trait like is_integer would be ideal but oh well) */ +typedef intw[any I]() { + typedef I <>; + + size_t <>_hash(<> *a) + { + /* identity hash */ + return *a; + } + + int <>_cmp(<> *a, <> *b) + { + return *a - *b; + } +}; + +/* create wrapper for ints. */ +typedef intw = intw[int](); + +/* create hashmap of ints, using the intw wrapper we just created. Kind of + * boilerplatey, admittedly. */ +typedef ints = map[intw, intw](); + +int main() +{ + /* heuristic, but if we know how many elements we'll need, we should + * give it to the create function. */ + ints ints = ints_create(0); + for (int i = 0; i < 1000000; ++i) { + ints_insert(&ints, i, i); + } + assert(ints_len(&ints) == 1000000); + + for (int i = 0; i < 1000000; ++i) { + int *v = ints_find(&ints, i); + assert(v && *v == i); + } + + size_t count = 0; + foreach(ints, iter, &ints) { + assert(iter->key == iter->data); + count++; + } + + assert(count == 1000000); + + for (int i = 0; i < 1000000; ++i) { + ints_remove(&ints, i); + } + + assert(ints_len(&ints) == 0); + ints_destroy(&ints); +} diff --git a/example_hashmap/new/map.h b/example_hashmap/new/map.h new file mode 100644 index 0000000..e993139 --- /dev/null +++ b/example_hashmap/new/map.h @@ -0,0 +1,209 @@ +#ifndef MAP_H +#define MAP_H + +#include <stdlib.h> +#include <stddef.h> +#include <stdbool.h> + +#define NGC_BUCKET_IDX(hash, pow2, bucket) \ + ((hash) & (((pow2) << (bucket)) - 1)) + +#ifndef NGC_CONTAINER_OF +#define NGC_CONTAINER_OF(ptr, type, member) \ + (type *)((char *)(ptr) - offsetof(type, member)) +#endif + +/* trait would be something like is_hashable + is_comparable, but that's not yet + * supported so just go with this for now */ +typedef map[any K, any D]() { + struct <>_tuple { + K key; + D data; + }; + + typedef struct <>_tuple *<>_iter; + + struct <>_node { + size_t hash; + struct <>_bucket *bucket; + struct <>_tuple t; + }; + + struct <>_bucket { + size_t size; + struct <>_bucket *next; + struct <>_node nodes[]; + }; + + typedef struct <>_root { + size_t len; + size_t count; + size_t pow2; + struct <>_bucket **buckets; + } <>; + + <> <>_create(size_t count) + { + size_t pow2 = 1; + while (pow2 < count) + pow2 <<= 1; + + return (<>) { + .len = 0, + .count = 0, + .pow2 = pow2, + .buckets = NULL + }; + } + + void <>_destroy(<> *root) + { + for (size_t i = 0; i < root->count; ++i) + free(root->buckets[i]); + + free(root->buckets); + } + + D *<>_insert(<>* root, K key, D data) + { + size_t hash = <key>_hash(&key); + /* look through buckets in order */ + for (size_t b = 0; b < root->count; ++b) { + struct <>_bucket *bucket = root->buckets[b]; + size_t idx = NGC_BUCKET_IDX(hash, root->pow2, b); + + struct <>_node *node = &bucket->nodes[idx]; + /* free to use this slot */ + if (!node->bucket) { + node->bucket = bucket; + node->hash = hash; + node->t.data = data; + node->t.key = key; + root->len++; + return &node->t.data; + } + + /* there already exists a node like this */ + if (node->hash == hash && <key>_cmp(&node->t.key, &key) == 0) + return &node->t.data; + } + + /* no bucket available, create new one */ + size_t size = root->pow2 << root->count; + size_t bytes = sizeof(struct <>_bucket) + sizeof(struct <>_node) * size; + struct <>_bucket *bucket = calloc(1, bytes); + if (!bucket) + return NULL; + + bucket->size = size; + + size_t buckets_bytes = sizeof(struct <>_bucket *) * (root->count + 1); + struct <>_bucket **new_buckets = realloc(root->buckets, buckets_bytes); + if (!new_buckets) { + free(bucket); + return NULL; + } + + root->buckets = new_buckets; + if (root->count != 0) + root->buckets[root->count - 1]->next = bucket; + + root->buckets[root->count] = bucket; + + /* populate node */ + size_t idx = NGC_BUCKET_IDX(hash, root->pow2, root->count); + struct <>_node *node = &bucket->nodes[idx]; + node->bucket = bucket; + node->hash = hash; + node->t.key = key; + node->t.data = data; + root->count++; + root->len++; + return &node->t.data; + } + + D *<>_find(<> *root, K key) + { + if (root->len == 0) + return NULL; + + size_t hash = <key>_hash(&key); + for (size_t b = 0; b < root->count; ++b) { + struct <>_bucket *bucket = root->buckets[b]; + size_t idx = NGC_BUCKET_IDX(hash, root->pow2, b); + + struct <>_node *node = &bucket->nodes[idx]; + if (node->hash != hash) + continue; + + if (<key>_cmp(&node->t.key, key) != 0) + continue; + + return &node->t.data; + } + + return NULL; + } + + void <>_remove_found(<> *root, D *data) + { + struct <>_tuple *tuple = NGC_CONTAINER_OF(data, struct <>_tuple, data); + struct <>_node *node = NGC_CONTAINER_OF(tuple, struct <>_node, t); + node->bucket = NULL; + root->len--; + } + + void <>_remove(<> *root, K key) + { + D *found = <>_find(root, key); + if (!found) + return; + + <>_remove_found(root, found); + } + + struct <>_tuple *<>_find_next(struct <>_bucket *bucket, struct <>_tuple *tuple) + { + struct <>_node *node = NGC_CONTAINER_OF(tuple, struct <>_node, t); + size_t idx = node - bucket->nodes; + for (; idx < bucket->size; ++idx) { + struct <>_node *candidate = &bucket->nodes[idx]; + if (candidate->bucket) + return &candidate->t; + } + + struct <>_bucket *next = bucket->next; + if (!next) + return NULL; + + return <>_find_next(next, &next->nodes[0].t); + } + + struct <>_tuple *<>_begin(<> *root) + { + if (root->len == 0) + return NULL; + + struct <>_bucket *bucket = root->buckets[0]; + return <>_find_next(bucket, &bucket->nodes[0].t); + } + + struct <>_tuple *<>_next(struct <>_tuple *t) + { + struct <>_node *node = NGC_CONTAINER_OF(t, struct <>_node, t); + return <>_find_next(node->bucket, &(node + 1)->t); + } + + bool <>_end(<> *root, struct <>_tuple *t) + { + (void)root; + return t == NULL; + } + + size_t <>_len(<> *root) + { + return root->len; + } +}; + +#endif /* MAP_H */ diff --git a/example_hashmap/old/conts.h b/example_hashmap/old/conts.h new file mode 100644 index 0000000..fb0094b --- /dev/null +++ b/example_hashmap/old/conts.h @@ -0,0 +1,14 @@ +#ifndef CONTS_H +#define CONTS_H + +#define CONTS_JOIN2(a, b) a##_##b +#define CONTS_JOIN(a, b) CONTS_JOIN2(a, b) + +#define CONTAINER_OF(ptr, type, member) \ + (type *)((char *)(ptr) - offsetof(type, member)) + +#define foreach(name, i, s)\ + for (CONTS_JOIN(name, iter) i = CONTS_JOIN(name, begin)(s);\ + !CONTS_JOIN(name, end)(s, i);\ + i = CONTS_JOIN(name, next)(i)) +#endif /* CONTS_H */ diff --git a/example_hashmap/old/main.c b/example_hashmap/old/main.c new file mode 100644 index 0000000..6c42787 --- /dev/null +++ b/example_hashmap/old/main.c @@ -0,0 +1,40 @@ +#include <assert.h> +#include <stdio.h> + +#define MAP_KEY int +#define MAP_TYPE int +#define MAP_HASH(a) CONTS_MAP_NO_HASH(a) +#define MAP_CMP(a, b) ((a) - (b)) +#define MAP_NAME ints +#include <conts/map.h> + +int main() +{ + /* heuristic, but if we know how many elements we'll need, we should + * give it to the create function. */ + struct ints ints = ints_create(0); + for (int i = 0; i < 1000000; ++i) { + ints_insert(&ints, i, i); + } + assert(ints_len(&ints) == 1000000); + + for (int i = 0; i < 1000000; ++i) { + int *v = ints_find(&ints, i); + assert(v && *v == i); + } + + size_t count = 0; + foreach(ints, iter, &ints) { + assert(iter->key == iter->data); + count++; + } + + assert(count == 1000000); + + for (int i = 0; i < 1000000; ++i) { + ints_remove(&ints, i); + } + + assert(ints_len(&ints) == 0); + ints_destroy(&ints); +} diff --git a/example_hashmap/old/map.h b/example_hashmap/old/map.h new file mode 100644 index 0000000..db1e386 --- /dev/null +++ b/example_hashmap/old/map.h @@ -0,0 +1,250 @@ +#ifndef MAP_KEY +#error "Need map key" +#endif + +#ifndef MAP_TYPE +#error "Need map type" +#endif + +#ifndef MAP_CMP +#error "Need map cmp" +#endif + +#ifndef MAP_HASH +#error "Need map hash" +#endif + +#ifndef MAP_NAME +#error "Need map name" +#endif + +#include <stddef.h> +#include <stdlib.h> +#include <stdbool.h> + +#include "conts.h" + +#define MAP(a) CONTS_JOIN(MAP_NAME, a) + +#define MAP_TUPLE MAP(tuple) +#define MAP_NODE MAP(node) +#define MAP_BUCKET MAP(bucket) +#define MAP_ROOT MAP_NAME + +#ifndef CONTS_MAP_H +#define CONTS_MAP_H + +static inline size_t conts_map_generic_hash(const char *s, size_t l) +{ + /* djb2 */ + size_t hash = 5381; + for (size_t i = 0; i < l; ++i) + hash = ((hash << 5) + hash) + s[i]; + + return hash; +} + +#define CONTS_MAP_NO_HASH(a) (a) +#define CONTS_MAP_STR_HASH(a) conts_map_generic_hash(a, strlen(a)) + +/* fast modulo pow2 */ +#define CONTS_BUCKET_IDX(hash, pow2, bucket) \ + ((hash) & (((pow2) << bucket) - 1)) + +#endif /* CONTS_MAP_H */ + +static inline size_t MAP(generic_hash)(MAP_KEY *key) +{ + return conts_map_generic_hash((const char *)key, sizeof(*key)); +} + +struct MAP_TUPLE { + MAP_KEY key; + MAP_TYPE data; +}; + +typedef struct MAP_TUPLE *MAP(iter); + +struct MAP_NODE { + struct MAP_BUCKET *bucket; + size_t hash; + struct MAP_TUPLE t; +}; + +struct MAP_BUCKET { + size_t size; /* bucket size */ + struct MAP_BUCKET *next; + struct MAP_NODE nodes[]; +}; + +struct MAP_ROOT { + size_t len; /* number of items */ + size_t count; /* bucket count */ + size_t pow2; /* how many nodes in smallest bucket */ + struct MAP_BUCKET **buckets; +}; + +static inline struct MAP_ROOT MAP(create)(size_t count) +{ + size_t pow2 = 1; + while (pow2 < count) + pow2 <<= 1; + + return (struct MAP_ROOT){ + .len = 0, + .count = 0, + .pow2 = pow2, + .buckets = NULL + }; +} + +static inline void MAP(destroy)(struct MAP_ROOT *root) +{ + for (size_t i = 0; i < root->count; ++i) + free(root->buckets[i]); + + free(root->buckets); +} + +static inline MAP_TYPE *MAP(insert)(struct MAP_ROOT *root, MAP_KEY key, MAP_TYPE data) +{ + size_t hash = MAP_HASH(key); + /* look through buckets in order */ + for (size_t b = 0; b < root->count; ++b) { + struct MAP_BUCKET *bucket = root->buckets[b]; + size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b); + + struct MAP_NODE *node = &bucket->nodes[idx]; + /* free to use this slot */ + if (!node->bucket) { + node->bucket = bucket; + node->hash = hash; + node->t.data = data; + node->t.key = key; + root->len++; + return &node->t.data; + } + + /* there already exists a node like this */ + if (node->hash == hash && MAP_CMP(node->t.key, key) == 0) + return &node->t.data; + } + + /* no bucket available, create new one */ + size_t size = root->pow2 << root->count; + size_t bytes = sizeof(struct MAP_BUCKET) + sizeof(struct MAP_NODE) * size; + struct MAP_BUCKET *bucket = calloc(1, bytes); + bucket->size = size; + + size_t buckets_bytes = sizeof(struct MAP_BUCKET *) * (root->count + 1); + root->buckets = realloc(root->buckets, buckets_bytes); + if (root->count != 0) + root->buckets[root->count - 1]->next = bucket; + + root->buckets[root->count] = bucket; + + /* populate node */ + size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, root->count); + struct MAP_NODE *node = &bucket->nodes[idx]; + node->bucket = bucket; + node->hash = hash; + node->t.key = key; + node->t.data = data; + root->count++; + root->len++; + return &node->t.data; +} + +static inline MAP_TYPE *MAP(find)(struct MAP_ROOT *root, MAP_KEY key) +{ + if (root->len == 0) + return NULL; + + size_t hash = MAP_HASH(key); + for (size_t b = 0; b < root->count; ++b) { + struct MAP_BUCKET *bucket = root->buckets[b]; + size_t idx = CONTS_BUCKET_IDX(hash, root->pow2, b); + + struct MAP_NODE *node = &bucket->nodes[idx]; + if (node->hash != hash) + continue; + + if (MAP_CMP(node->t.key, key) != 0) + continue; + + return &node->t.data; + } + + return NULL; +} + +static inline void MAP(remove_found)(struct MAP_ROOT *root, MAP_TYPE *data) +{ + struct MAP_TUPLE *tuple = CONTAINER_OF(data, struct MAP_TUPLE, data); + struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t); + node->bucket = NULL; + root->len--; +} + +static inline void MAP(remove)(struct MAP_ROOT *root, MAP_KEY key) +{ + MAP_TYPE *found = MAP(find)(root, key); + if (!found) + return; + + MAP(remove_found)(root, found); +} + +static inline struct MAP_TUPLE *MAP(find_next)(struct MAP_BUCKET *bucket, struct MAP_TUPLE *tuple) +{ + struct MAP_NODE *node = CONTAINER_OF(tuple, struct MAP_NODE, t); + size_t idx = node - bucket->nodes; + for (; idx < bucket->size; ++idx) { + struct MAP_NODE *candidate = &bucket->nodes[idx]; + if (candidate->bucket) + return &candidate->t; + } + + struct MAP_BUCKET *next = bucket->next; + if (!next) + return NULL; + + return MAP(find_next)(next, &next->nodes[0].t); +} + +static inline struct MAP_TUPLE *MAP(begin)(struct MAP_ROOT *root) +{ + if (root->len == 0) + return NULL; + + struct MAP_BUCKET *bucket = root->buckets[0]; + return MAP(find_next)(bucket, &bucket->nodes[0].t); +} + +static inline struct MAP_TUPLE *MAP(next)(struct MAP_TUPLE *t) +{ + struct MAP_NODE *node = CONTAINER_OF(t, struct MAP_NODE, t); + return MAP(find_next)(node->bucket, &(node + 1)->t); +} + +static inline bool MAP(end)(struct MAP_ROOT *root, struct MAP_TUPLE *t) +{ + (void)root; + return t == NULL; +} + +static inline size_t MAP(len)(struct MAP_ROOT *root) +{ + return root->len; +} + +#undef MAP +#undef MAP_NODE +#undef MAP_TUPLE +#undef MAP_BUCKET +#undef MAP_ROOT +#undef MAP_KEY +#undef MAP_TYPE +#undef MAP_CMP +#undef MAP_HASH +#undef MAP_NAME |