From 704baab677029882a5924a59e3fb92f2295132a4 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Tue, 1 Apr 2025 19:40:53 +0300
Subject: somewhat improved register allocator

---
 src/compile/compile.c | 188 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 166 insertions(+), 22 deletions(-)

(limited to 'src/compile/compile.c')

diff --git a/src/compile/compile.c b/src/compile/compile.c
index 50f12dc..aba44a0 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -2203,7 +2203,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 			jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1);
 
 			jit_operand_t arg;
-			if (i.r2 < physfpr_count()) {
+			if (f2 < physfpr_count()) {
 				/* regular register */
 				arg = jit_operand_fpr(jit_abi_from(i.r1),
 				                      physfpr_at(f2));
@@ -2421,43 +2421,187 @@ calli:
 	return size;
 }
 
-/* highest prio first */
-static int gpr_sort_prio(struct gpr_stat *a, struct gpr_stat *b)
+struct alive_slot {
+	long r;
+	size_t cost;
+	size_t idx;
+	size_t remap;
+};
+
+#define VEC_NAME alive
+#define VEC_TYPE struct alive_slot
+#include "../vec.h"
+
+static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b)
 {
-	return (int)b->prio - (int)a->prio;
+	if (a->cost > b->cost)
+		return -1;
+
+	return a->cost < b->cost;
+}
+
+/* slightly more parameters than I would like but I guess it's fine */
+static void calculate_alive(struct alive *alive, size_t idx,
+		size_t prio, size_t start, size_t end, size_t *rno,
+		void *regs, int (*dead)(void *regs, size_t idx, size_t start))
+{
+	/* single-shot */
+	if (end <= start + 1) {
+		*rno = 0;
+		struct alive_slot *a = alive_at(alive, 0);
+		a->cost += prio;
+		return;
+	}
+
+	long max_cost_idx = -1;
+	size_t max_cost = 0;
+	long counter = 0;
+	foreach_vec(ai, *alive) {
+		struct alive_slot *a = alive_at(alive, ai);
+
+		/* skip reserved slot */
+		if (idx == 0) {
+			counter++;
+			continue;
+		}
+
+		if (a->r < 0 && a->cost > max_cost) {
+			max_cost = a->cost;
+			max_cost_idx = counter;
+		}
+
+		counter++;
+
+		/* skip gravestones */
+		if (a->r < 0)
+			continue;
+
+		if (dead(regs, a->r, start))
+			a->r = -1; /* gravestone */
+	}
+
+	/* there's a suitable slot for us */
+	if (max_cost_idx >= 0) {
+		*rno = max_cost_idx;
+
+		struct alive_slot *a = alive_at(alive, max_cost_idx);
+		a->cost += prio;
+		a->r = idx;
+		return;
+	}
+
+	*rno = alive_len(alive);
+	struct alive_slot a = {
+		.cost = prio,
+		.r = idx,
+		.idx = *rno
+	};
+	alive_append(alive, a);
 }
 
-static int fpr_sort_prio(struct fpr_stat *a, struct fpr_stat *b)
+static int gpr_dead(void *regs, size_t idx, size_t start)
 {
-	return (int)b->prio - (int)a->prio;
+	struct gpr_stats *gprs = regs;
+	return gpr_stats_at(gprs, idx)->end < start;
 }
 
-/* sort registers by highest priority first, then renumber registers in the
- * given order. Higher priority is given a physical register first.
- *
- * Note that the `->r` field becomes 'meaningless' after sorting, and you should
- * only use the `->rno` field after this point. Essentially, if you have a
- * register EJIT_GPR(2), you should use `gpr_stats_at(2)->rno` for the 'actual'
- * register number in `getloc` and the like.
- *
- * Can be a bit confusing, but this way we don't have to allocate any new
- * arrays, which is cool. */
+static void linear_gpr_alloc(struct ejit_func *f)
+{
+	foreach_vec(gi, f->gpr) {
+		gpr_stats_at(&f->gpr, gi)->rno = gi;
+	}
+}
+
+/* there's a fair bit of repetition between this and the gpr case, hmm */
 static void assign_gprs(struct ejit_func *f)
 {
-	gpr_stats_sort(&f->gpr, (vec_comp_t)gpr_sort_prio);
+	/* everything fits into registers, no need to start optimizing */
+	if (gpr_stats_len(&f->gpr) <= physgpr_count())
+		return linear_gpr_alloc(f);
+
+	struct alive alive = alive_create(gpr_stats_len(&f->gpr));
+
+	/* special oneshot register class */
+	struct alive_slot a = {.r = 0, .cost = 0, .idx = 0};
+	alive_append(&alive, a);
+
+	foreach_vec(gi, f->gpr) {
+		struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi);
+		calculate_alive(&alive, gi,
+				gpr->prio, gpr->start, gpr->end, &gpr->rno,
+				&f->gpr, gpr_dead);
+	}
+
+	/* sort so that the highest spill cost register classes are at the front and
+	 * as such more likely to be placed in registers */
+	alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+
+	/* update remapping info */
+	for(size_t i = 0; i < alive_len(&alive); ++i) {
+		struct alive_slot *a = alive_at(&alive, i);
+		alive_at(&alive, a->idx)->remap = i;
+	}
+
+	/* remap locations */
 	for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) {
-		size_t rno = gpr_stats_at(&f->gpr, i)->r.r;
-		gpr_stats_at(&f->gpr, rno)->rno = i;
+		struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i);
+		struct alive_slot *a = alive_at(&alive, gpr->rno);
+		gpr->rno = a->remap;
+	}
+
+	alive_destroy(&alive);
+}
+
+static int fpr_dead(void *regs, size_t idx, size_t start)
+{
+	struct fpr_stats *fprs = regs;
+	return fpr_stats_at(fprs, idx)->end < start;
+}
+
+static void linear_fpr_alloc(struct ejit_func *f)
+{
+	foreach_vec(fi, f->fpr) {
+		fpr_stats_at(&f->fpr, fi)->fno = fi;
 	}
 }
 
 static void assign_fprs(struct ejit_func *f)
 {
-	fpr_stats_sort(&f->fpr, (vec_comp_t)fpr_sort_prio);
+	/* everything fits into registers, no need to start optimizing */
+	if (fpr_stats_len(&f->fpr) <= physfpr_count())
+		return linear_fpr_alloc(f);
+
+	struct alive alive = alive_create(fpr_stats_len(&f->fpr));
+
+	/* special oneshot register class */
+	struct alive_slot a = {.r = 0, .cost = 0, .idx = 0};
+	alive_append(&alive, a);
+
+	foreach_vec(fi, f->fpr) {
+		struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi);
+		calculate_alive(&alive, fi,
+				fpr->prio, fpr->start, fpr->end, &fpr->fno,
+				&f->fpr, fpr_dead);
+	}
+
+	/* sort so that the highest spill cost register classes are at the front and
+	 * as such more likely to be placed in registers */
+	alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+
+	/* update remapping info */
+	for(size_t i = 0; i < alive_len(&alive); ++i) {
+		struct alive_slot *a = alive_at(&alive, i);
+		alive_at(&alive, a->idx)->remap = i;
+	}
+
+	/* remap locations */
 	for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) {
-		size_t rno = fpr_stats_at(&f->fpr, i)->f.f;
-		fpr_stats_at(&f->fpr, rno)->fno = i;
+		struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i);
+		struct alive_slot *a = alive_at(&alive, fpr->fno);
+		fpr->fno = a->remap;
 	}
+
+	alive_destroy(&alive);
 }
 
 static size_t align_up(size_t a, size_t n)
-- 
cgit v1.2.3