diff options
Diffstat (limited to 'src/compile/compile.c')
-rw-r--r-- | src/compile/compile.c | 188 |
1 files changed, 166 insertions, 22 deletions
diff --git a/src/compile/compile.c b/src/compile/compile.c index 50f12dc..aba44a0 100644 --- a/src/compile/compile.c +++ b/src/compile/compile.c @@ -2203,7 +2203,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1); jit_operand_t arg; - if (i.r2 < physfpr_count()) { + if (f2 < physfpr_count()) { /* regular register */ arg = jit_operand_fpr(jit_abi_from(i.r1), physfpr_at(f2)); @@ -2421,43 +2421,187 @@ calli: return size; } -/* highest prio first */ -static int gpr_sort_prio(struct gpr_stat *a, struct gpr_stat *b) +struct alive_slot { + long r; + size_t cost; + size_t idx; + size_t remap; +}; + +#define VEC_NAME alive +#define VEC_TYPE struct alive_slot +#include "../vec.h" + +static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b) { - return (int)b->prio - (int)a->prio; + if (a->cost > b->cost) + return -1; + + return a->cost < b->cost; +} + +/* slightly more parameters than I would like but I guess it's fine */ +static void calculate_alive(struct alive *alive, size_t idx, + size_t prio, size_t start, size_t end, size_t *rno, + void *regs, int (*dead)(void *regs, size_t idx, size_t start)) +{ + /* single-shot */ + if (end <= start + 1) { + *rno = 0; + struct alive_slot *a = alive_at(alive, 0); + a->cost += prio; + return; + } + + long max_cost_idx = -1; + size_t max_cost = 0; + long counter = 0; + foreach_vec(ai, *alive) { + struct alive_slot *a = alive_at(alive, ai); + + /* skip reserved slot */ + if (idx == 0) { + counter++; + continue; + } + + if (a->r < 0 && a->cost > max_cost) { + max_cost = a->cost; + max_cost_idx = counter; + } + + counter++; + + /* skip gravestones */ + if (a->r < 0) + continue; + + if (dead(regs, a->r, start)) + a->r = -1; /* gravestone */ + } + + /* there's a suitable slot for us */ + if (max_cost_idx >= 0) { + *rno = max_cost_idx; + + struct alive_slot *a = alive_at(alive, max_cost_idx); + a->cost += prio; + a->r = idx; + return; + } + + *rno = alive_len(alive); + struct alive_slot a = { + .cost = prio, + .r = idx, + .idx = *rno + }; + alive_append(alive, a); } -static int fpr_sort_prio(struct fpr_stat *a, struct fpr_stat *b) +static int gpr_dead(void *regs, size_t idx, size_t start) { - return (int)b->prio - (int)a->prio; + struct gpr_stats *gprs = regs; + return gpr_stats_at(gprs, idx)->end < start; } -/* sort registers by highest priority first, then renumber registers in the - * given order. Higher priority is given a physical register first. - * - * Note that the `->r` field becomes 'meaningless' after sorting, and you should - * only use the `->rno` field after this point. Essentially, if you have a - * register EJIT_GPR(2), you should use `gpr_stats_at(2)->rno` for the 'actual' - * register number in `getloc` and the like. - * - * Can be a bit confusing, but this way we don't have to allocate any new - * arrays, which is cool. */ +static void linear_gpr_alloc(struct ejit_func *f) +{ + foreach_vec(gi, f->gpr) { + gpr_stats_at(&f->gpr, gi)->rno = gi; + } +} + +/* there's a fair bit of repetition between this and the gpr case, hmm */ static void assign_gprs(struct ejit_func *f) { - gpr_stats_sort(&f->gpr, (vec_comp_t)gpr_sort_prio); + /* everything fits into registers, no need to start optimizing */ + if (gpr_stats_len(&f->gpr) <= physgpr_count()) + return linear_gpr_alloc(f); + + struct alive alive = alive_create(gpr_stats_len(&f->gpr)); + + /* special oneshot register class */ + struct alive_slot a = {.r = 0, .cost = 0, .idx = 0}; + alive_append(&alive, a); + + foreach_vec(gi, f->gpr) { + struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi); + calculate_alive(&alive, gi, + gpr->prio, gpr->start, gpr->end, &gpr->rno, + &f->gpr, gpr_dead); + } + + /* sort so that the highest spill cost register classes are at the front and + * as such more likely to be placed in registers */ + alive_sort(&alive, (vec_comp_t)spill_cost_sort); + + /* update remapping info */ + for(size_t i = 0; i < alive_len(&alive); ++i) { + struct alive_slot *a = alive_at(&alive, i); + alive_at(&alive, a->idx)->remap = i; + } + + /* remap locations */ for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) { - size_t rno = gpr_stats_at(&f->gpr, i)->r.r; - gpr_stats_at(&f->gpr, rno)->rno = i; + struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i); + struct alive_slot *a = alive_at(&alive, gpr->rno); + gpr->rno = a->remap; + } + + alive_destroy(&alive); +} + +static int fpr_dead(void *regs, size_t idx, size_t start) +{ + struct fpr_stats *fprs = regs; + return fpr_stats_at(fprs, idx)->end < start; +} + +static void linear_fpr_alloc(struct ejit_func *f) +{ + foreach_vec(fi, f->fpr) { + fpr_stats_at(&f->fpr, fi)->fno = fi; } } static void assign_fprs(struct ejit_func *f) { - fpr_stats_sort(&f->fpr, (vec_comp_t)fpr_sort_prio); + /* everything fits into registers, no need to start optimizing */ + if (fpr_stats_len(&f->fpr) <= physfpr_count()) + return linear_fpr_alloc(f); + + struct alive alive = alive_create(fpr_stats_len(&f->fpr)); + + /* special oneshot register class */ + struct alive_slot a = {.r = 0, .cost = 0, .idx = 0}; + alive_append(&alive, a); + + foreach_vec(fi, f->fpr) { + struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi); + calculate_alive(&alive, fi, + fpr->prio, fpr->start, fpr->end, &fpr->fno, + &f->fpr, fpr_dead); + } + + /* sort so that the highest spill cost register classes are at the front and + * as such more likely to be placed in registers */ + alive_sort(&alive, (vec_comp_t)spill_cost_sort); + + /* update remapping info */ + for(size_t i = 0; i < alive_len(&alive); ++i) { + struct alive_slot *a = alive_at(&alive, i); + alive_at(&alive, a->idx)->remap = i; + } + + /* remap locations */ for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) { - size_t rno = fpr_stats_at(&f->fpr, i)->f.f; - fpr_stats_at(&f->fpr, rno)->fno = i; + struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i); + struct alive_slot *a = alive_at(&alive, fpr->fno); + fpr->fno = a->remap; } + + alive_destroy(&alive); } static size_t align_up(size_t a, size_t n) |