aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2025-05-18 17:00:32 +0300
committerKimplul <kimi.h.kuparinen@gmail.com>2025-05-18 17:00:32 +0300
commitb0a6350b96f6ef3f8b2af5a385474d885a41759b (patch)
tree6b78e1cdd69b0f21ab2a08452259985ed6460d4d
parentc078339496f43ff3c5340ddd19badcf11038198d (diff)
downloadejit-b0a6350b96f6ef3f8b2af5a385474d885a41759b.tar.gz
ejit-b0a6350b96f6ef3f8b2af5a385474d885a41759b.zip
fix register ordering
+ The register allocator assumed registers were ordered according to increasing start address, but there was no sorting to ensure that condition. Most programs follow the above construction, which made it a fairly easy bug to miss
-rw-r--r--src/compile/compile.c90
1 files changed, 69 insertions, 21 deletions
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 8e0e250..5aab67e 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -2668,13 +2668,12 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
case EJIT_OP_CALLI: {
save_caller_save_regs(f, j);
-
- struct ejit_func *f = (struct ejit_func *)i.p;
+ struct ejit_func *t = (struct ejit_func *)i.p;
#if __WORDSIZE != 64
- assert(f->rtype != EJIT_INT64 && f->rtype != EJIT_UINT64);
+ assert(t->rtype != EJIT_INT64 && t->rtype != EJIT_UINT64);
#endif
- if (f && f->direct_call) {
- jit_calli(j, f->direct_call, operands_len(&direct), direct.buf);
+ if (t && t->direct_call) {
+ jit_calli(j, t->direct_call, operands_len(&direct), direct.buf);
restore_caller_save_regs(f, j);
operands_reset(&src);
@@ -2693,7 +2692,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
};
void *call = NULL;
- switch (f->rtype) {
+ switch (t->rtype) {
case EJIT_INT64:
case EJIT_UINT64: call = checked_run_l; break;
case EJIT_FLOAT: call = checked_run_f; break;
@@ -2865,7 +2864,27 @@ static int barrier_sort(struct barrier_tuple *a, struct barrier_tuple *b)
if (a->start > b->start)
return 1;
- return a->end - b->end;
+ if (a->end < b->end)
+ return -1;
+
+ return 1;
+}
+
+/* sort gprs in order of starting address */
+static int gpr_start_sort(struct gpr_stat *a, struct gpr_stat *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ return 1;
+}
+
+static int fpr_start_sort(struct fpr_stat *a, struct fpr_stat *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ return 1;
}
/* slightly more parameters than I would like but I guess it's fine */
@@ -2977,7 +2996,15 @@ static void assign_gprs(struct ejit_func *f)
if (gpr_stats_len(&f->gpr) <= physgpr_count())
return linear_gpr_alloc(f);
- struct alive alive = alive_create(gpr_stats_len(&f->gpr));
+ /* create temporary buffer to sort */
+ struct gpr_stats gprs = gpr_stats_create(gpr_stats_len(&f->gpr));
+ foreach(gpr_stats, g, &f->gpr) {
+ gpr_stats_append(&gprs, *g);
+ }
+
+ gpr_stats_sort(&gprs, gpr_start_sort);
+
+ struct alive alive = alive_create(gpr_stats_len(&gprs));
/* special oneshot register class */
struct alive_slot a = {.r = -1, .cost = 0, .idx = 0};
@@ -2987,8 +3014,10 @@ static void assign_gprs(struct ejit_func *f)
* dealing with. Since register start addresses grow upward, we can
* fairly easily keep track of which barrier a register cannot cross */
size_t bi = 0;
- for (size_t gi = 0; gi < gpr_stats_len(&f->gpr); ++gi) {
- struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi);
+ for (size_t gi = 0; gi < gpr_stats_len(&gprs); ++gi) {
+ struct gpr_stat *gpr = gpr_stats_at(&gprs, gi);
+ if (gpr->prio == 0)
+ continue;
extend_gpr_lifetime(gpr, &f->barriers, bi);
if (bi < barriers_len(&f->barriers)) {
@@ -2999,7 +3028,7 @@ static void assign_gprs(struct ejit_func *f)
calculate_alive(&alive, gi,
gpr->prio, gpr->start, gpr->end, &gpr->rno,
- &f->gpr, gpr_dead);
+ &gprs, gpr_dead);
}
/* sort so that the highest spill cost register classes are at the front and
@@ -3013,13 +3042,18 @@ static void assign_gprs(struct ejit_func *f)
}
/* remap locations */
- for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) {
- struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i);
+ for (size_t i = 0; i < gpr_stats_len(&gprs); ++i) {
+ struct gpr_stat *gpr = gpr_stats_at(&gprs, i);
+ if (gpr->prio == 0)
+ continue;
+
struct alive_slot *a = alive_at(&alive, gpr->rno);
- gpr->rno = a->remap;
+ struct gpr_stat *orig = gpr_stats_at(&f->gpr, gpr->r.r);
+ orig->rno = a->remap;
}
alive_destroy(&alive);
+ gpr_stats_destroy(&gprs);
}
static int fpr_dead(void *regs, size_t idx, size_t start)
@@ -3041,6 +3075,13 @@ static void assign_fprs(struct ejit_func *f)
if (fpr_stats_len(&f->fpr) <= physfpr_count())
return linear_fpr_alloc(f);
+ struct fpr_stats fprs = fpr_stats_create(fpr_stats_len(&f->fpr));
+ foreach(fpr_stats, r, &f->fpr) {
+ fpr_stats_append(&fprs, *r);
+ }
+
+ fpr_stats_sort(&fprs, fpr_start_sort);
+
struct alive alive = alive_create(fpr_stats_len(&f->fpr));
/* special oneshot register class */
@@ -3048,8 +3089,10 @@ static void assign_fprs(struct ejit_func *f)
alive_append(&alive, a);
size_t bi = 0;
- for (size_t fi = 0; fi < fpr_stats_len(&f->fpr); ++fi) {
- struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi);
+ for (size_t fi = 0; fi < fpr_stats_len(&fprs); ++fi) {
+ struct fpr_stat *fpr = fpr_stats_at(&fprs, fi);
+ if (fpr->prio == 0)
+ continue;
extend_fpr_lifetime(fpr, &f->barriers, bi);
if (bi < barriers_len(&f->barriers)) {
@@ -3060,7 +3103,7 @@ static void assign_fprs(struct ejit_func *f)
calculate_alive(&alive, fi,
fpr->prio, fpr->start, fpr->end, &fpr->fno,
- &f->fpr, fpr_dead);
+ &fprs, fpr_dead);
}
/* sort so that the highest spill cost register classes are at the front and
@@ -3074,13 +3117,18 @@ static void assign_fprs(struct ejit_func *f)
}
/* remap locations */
- for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) {
- struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i);
+ for (size_t i = 0; i < fpr_stats_len(&fprs); ++i) {
+ struct fpr_stat *fpr = fpr_stats_at(&fprs, i);
+ if (fpr->prio == 0)
+ continue;
+
struct alive_slot *a = alive_at(&alive, fpr->fno);
- fpr->fno = a->remap;
+ struct fpr_stat *orig = fpr_stats_at(&f->fpr, fpr->f.f);
+ orig->fno = a->remap;
}
alive_destroy(&alive);
+ fpr_stats_destroy(&fprs);
}
static size_t align_up(size_t a, size_t n)
@@ -3102,7 +3150,7 @@ bool ejit_compile(struct ejit_func *f, bool use_64, bool im_scawed)
if (!init_jit())
return false;
- /* sort barriers so they can be used to extend register life times in
+ /* sort barriers so they can be used to extend register lifetimes in
* loops */
barriers_sort(&f->barriers, barrier_sort);
assign_gprs(f);