diff options
Diffstat (limited to 'src/compile/compile.c')
-rw-r--r-- | src/compile/compile.c | 223 |
1 files changed, 174 insertions, 49 deletions
diff --git a/src/compile/compile.c b/src/compile/compile.c index 5432bc1..2c72b91 100644 --- a/src/compile/compile.c +++ b/src/compile/compile.c @@ -4,7 +4,7 @@ #define VEC_TYPE jit_operand_t #define VEC_NAME operands -#include "../vec.h" +#include <conts/vec.h> struct reloc_helper { jit_reloc_t r; @@ -13,11 +13,11 @@ struct reloc_helper { #define VEC_TYPE struct reloc_helper #define VEC_NAME relocs -#include "../vec.h" +#include <conts/vec.h> #define VEC_TYPE jit_addr_t #define VEC_NAME addrs -#include "../vec.h" +#include <conts/vec.h> /* skip assertions since we know they must be valid due to type checking earlier */ static long checked_run_i(struct ejit_func *f, size_t argc, struct ejit_arg args[argc]) @@ -1880,13 +1880,11 @@ static jit_off_t type_offset(struct ejit_insn i) static void fixup_operands(struct operands *operands, size_t fixup) { - foreach_vec(i, *operands) { - jit_operand_t op = *operands_at(operands, i); - if (op.kind != JIT_OPERAND_KIND_MEM) + foreach(operands, op, operands) { + if (op->kind != JIT_OPERAND_KIND_MEM) continue; - op.loc.mem.offset += fixup; - *operands_at(operands, i) = op; + op->loc.mem.offset += fixup; } } @@ -1922,17 +1920,22 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j) struct operands args = operands_create(0); - foreach_vec(ii, f->insns) { - struct ejit_insn i = *insns_at(&f->insns, ii); - switch (i.op) { + foreach(insns, i, &f->insns) { + switch (i->op) { case EJIT_OP_PARAM: { - jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i)); + jit_operand_t p = jit_operand_mem( + jit_abi_from(i->r1), + JIT_R1, arg_offset(*i)); + operands_append(&args, p); break; } case EJIT_OP_PARAM_F: { - jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i)); + jit_operand_t p = jit_operand_mem( + jit_abi_from(i->r1), + JIT_R1, arg_offset(*i)); + operands_append(&args, p); break; } @@ -1971,17 +1974,21 @@ static void resolve_top_reloc(jit_state_t *j, struct relocs *relocs, struct addr assert(a); jit_patch_there(j, r, a); relocs_pop(relocs); + + /* hope this turns into a tailcall */ + if (relocs_len(relocs)) + resolve_top_reloc(j, relocs, addrs, ii); } static void resolve_relocs(jit_state_t *j, struct relocs *relocs, struct addrs *addrs, size_t ii) { - foreach_vec(ri, *relocs) { - struct reloc_helper h = *relocs_at(relocs, ri); - if (h.to != ii) + for (size_t ri = 0; ri < relocs_len(relocs); ++ri) { + struct reloc_helper *h = relocs_at(relocs, ri); + if (h->to != ii) continue; jit_addr_t a = *addrs_at(addrs, ii); - jit_reloc_t r = h.r; + jit_reloc_t r = h->r; assert(a); jit_patch_there(j, r, a); @@ -2052,16 +2059,16 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, size_t frame = jit_enter_jit_abi(j, gprs, fprs, 0); size_t stack = jit_align_stack(j, stack_size(f)); - struct operands src = operands_create(); - struct operands dst = operands_create(); - struct operands direct = operands_create(); + struct operands src = operands_create(f->max_args); + struct operands dst = operands_create(f->max_args); + struct operands direct = operands_create(f->max_args); - struct relocs relocs = relocs_create(); - struct addrs addrs = addrs_create(); + struct relocs relocs = relocs_create(labels_len(&f->labels)); + struct addrs addrs = addrs_create(0); addrs_reserve(&addrs, insns_len(&f->insns)); size_t label = 0; - foreach_vec(ii, f->insns) { + for (size_t ii = 0; ii < insns_len(&f->insns); ++ii) { /* if we've hit a label, add it to our vector of label addresses */ if (label < labels_len(&f->labels)) { if (*labels_at(&f->labels, label) == ii) { @@ -2530,7 +2537,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, /* now move args into place */ jit_operand_t args[2] = {}; - foreach_vec(oi, direct) { + for (size_t oi = 0; oi < operands_len(&direct); ++oi) { args[oi] = *operands_at(&direct, oi); } @@ -2665,13 +2672,12 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, case EJIT_OP_CALLI: { save_caller_save_regs(f, j); - - struct ejit_func *f = (struct ejit_func *)i.p; + struct ejit_func *t = (struct ejit_func *)i.p; #if __WORDSIZE != 64 - assert(f->rtype != EJIT_INT64 && f->rtype != EJIT_UINT64); + assert(t->rtype != EJIT_INT64 && t->rtype != EJIT_UINT64); #endif - if (f && f->direct_call) { - jit_calli(j, f->direct_call, operands_len(&direct), direct.buf); + if (t && t->direct_call) { + jit_calli(j, t->direct_call, operands_len(&direct), direct.buf); restore_caller_save_regs(f, j); operands_reset(&src); @@ -2690,7 +2696,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, }; void *call = NULL; - switch (f->rtype) { + switch (t->rtype) { case EJIT_INT64: case EJIT_UINT64: call = checked_run_l; break; case EJIT_FLOAT: call = checked_run_f; break; @@ -2843,7 +2849,7 @@ struct alive_slot { #define VEC_NAME alive #define VEC_TYPE struct alive_slot -#include "../vec.h" +#include <conts/vec.h> static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b) { @@ -2853,6 +2859,38 @@ static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b) return a->cost < b->cost; } +/* sort barriers according to starting address, smallest to largest */ +static int barrier_sort(struct barrier_tuple *a, struct barrier_tuple *b) +{ + if (a->start < b->start) + return -1; + + if (a->start > b->start) + return 1; + + if (a->end < b->end) + return -1; + + return 1; +} + +/* sort gprs in order of starting address */ +static int gpr_start_sort(struct gpr_stat *a, struct gpr_stat *b) +{ + if (a->start < b->start) + return -1; + + return 1; +} + +static int fpr_start_sort(struct fpr_stat *a, struct fpr_stat *b) +{ + if (a->start < b->start) + return -1; + + return 1; +} + /* slightly more parameters than I would like but I guess it's fine */ static void calculate_alive(struct alive *alive, size_t idx, size_t prio, size_t start, size_t end, size_t *rno, @@ -2871,7 +2909,7 @@ static void calculate_alive(struct alive *alive, size_t idx, long max_cost_idx = -1; size_t max_cost = 0; long counter = 0; - foreach_vec(ai, *alive) { + for (size_t ai = 0; ai < alive_len(alive); ++ai) { /* skip oneshot */ if (ai == 0) goto next; @@ -2916,11 +2954,45 @@ static int gpr_dead(void *regs, size_t idx, size_t start) static void linear_gpr_alloc(struct ejit_func *f) { - foreach_vec(gi, f->gpr) { + for (size_t gi = 0; gi < gpr_stats_len(&f->gpr); ++gi) { gpr_stats_at(&f->gpr, gi)->rno = gi; } } +static void extend_gpr_lifetime(struct gpr_stat *gpr, struct barriers *barriers, size_t bi) +{ + if (bi >= barriers_len(barriers)) + return; + + struct barrier_tuple *barrier = barriers_at(barriers, bi); + if (gpr->end < barrier->start) + return; + + /* we cross a barrier */ + gpr->end = gpr->end > barrier->end ? gpr->end : barrier->end; + + /* check if we cross the next barrier as well, + * hopefully gets optimized into a tail call */ + return extend_gpr_lifetime(gpr, barriers, bi + 1); +} + +static void extend_fpr_lifetime(struct fpr_stat *fpr, struct barriers *barriers, size_t bi) +{ + if (bi >= barriers_len(barriers)) + return; + + struct barrier_tuple *barrier = barriers_at(barriers, bi); + if (fpr->end < barrier->start) + return; + + /* we cross a barrier */ + fpr->end = fpr->end > barrier->end ? fpr->end : barrier->end; + + /* check if we cross the next barrier as well, + * hopefully gets optimized into a tail call */ + return extend_fpr_lifetime(fpr, barriers, bi + 1); +} + /* there's a fair bit of repetition between this and the gpr case, hmm */ static void assign_gprs(struct ejit_func *f) { @@ -2928,22 +3000,44 @@ static void assign_gprs(struct ejit_func *f) if (gpr_stats_len(&f->gpr) <= physgpr_count()) return linear_gpr_alloc(f); - struct alive alive = alive_create(gpr_stats_len(&f->gpr)); + /* create temporary buffer to sort */ + struct gpr_stats gprs = gpr_stats_create(gpr_stats_len(&f->gpr)); + foreach(gpr_stats, g, &f->gpr) { + gpr_stats_append(&gprs, *g); + } + + gpr_stats_sort(&gprs, gpr_start_sort); + + struct alive alive = alive_create(gpr_stats_len(&gprs)); /* special oneshot register class */ struct alive_slot a = {.r = -1, .cost = 0, .idx = 0}; alive_append(&alive, a); - foreach_vec(gi, f->gpr) { - struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi); + /* barrier index, keeps track of earliest possible barrier we can be + * dealing with. Since register start addresses grow upward, we can + * fairly easily keep track of which barrier a register cannot cross */ + size_t bi = 0; + for (size_t gi = 0; gi < gpr_stats_len(&gprs); ++gi) { + struct gpr_stat *gpr = gpr_stats_at(&gprs, gi); + if (gpr->prio == 0) + continue; + + extend_gpr_lifetime(gpr, &f->barriers, bi); + if (bi < barriers_len(&f->barriers)) { + struct barrier_tuple *barrier = barriers_at(&f->barriers, bi); + if (gpr->start >= barrier->start) + bi++; + } + calculate_alive(&alive, gi, gpr->prio, gpr->start, gpr->end, &gpr->rno, - &f->gpr, gpr_dead); + &gprs, gpr_dead); } /* sort so that the highest spill cost register classes are at the front and * as such more likely to be placed in registers */ - alive_sort(&alive, (vec_comp_t)spill_cost_sort); + alive_sort(&alive, (alive_comp_t)spill_cost_sort); /* update remapping info */ for(size_t i = 0; i < alive_len(&alive); ++i) { @@ -2952,13 +3046,18 @@ static void assign_gprs(struct ejit_func *f) } /* remap locations */ - for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) { - struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i); + for (size_t i = 0; i < gpr_stats_len(&gprs); ++i) { + struct gpr_stat *gpr = gpr_stats_at(&gprs, i); + if (gpr->prio == 0) + continue; + struct alive_slot *a = alive_at(&alive, gpr->rno); - gpr->rno = a->remap; + struct gpr_stat *orig = gpr_stats_at(&f->gpr, gpr->r.r); + orig->rno = a->remap; } alive_destroy(&alive); + gpr_stats_destroy(&gprs); } static int fpr_dead(void *regs, size_t idx, size_t start) @@ -2969,7 +3068,7 @@ static int fpr_dead(void *regs, size_t idx, size_t start) static void linear_fpr_alloc(struct ejit_func *f) { - foreach_vec(fi, f->fpr) { + for (size_t fi = 0; fi < fpr_stats_len(&f->fpr); ++fi) { fpr_stats_at(&f->fpr, fi)->fno = fi; } } @@ -2980,22 +3079,40 @@ static void assign_fprs(struct ejit_func *f) if (fpr_stats_len(&f->fpr) <= physfpr_count()) return linear_fpr_alloc(f); + struct fpr_stats fprs = fpr_stats_create(fpr_stats_len(&f->fpr)); + foreach(fpr_stats, r, &f->fpr) { + fpr_stats_append(&fprs, *r); + } + + fpr_stats_sort(&fprs, fpr_start_sort); + struct alive alive = alive_create(fpr_stats_len(&f->fpr)); /* special oneshot register class */ struct alive_slot a = {.r = -1, .cost = 0, .idx = 0}; alive_append(&alive, a); - foreach_vec(fi, f->fpr) { - struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi); + size_t bi = 0; + for (size_t fi = 0; fi < fpr_stats_len(&fprs); ++fi) { + struct fpr_stat *fpr = fpr_stats_at(&fprs, fi); + if (fpr->prio == 0) + continue; + + extend_fpr_lifetime(fpr, &f->barriers, bi); + if (bi < barriers_len(&f->barriers)) { + struct barrier_tuple *barrier = barriers_at(&f->barriers, bi); + if (fpr->start >= barrier->start) + bi++; + } + calculate_alive(&alive, fi, fpr->prio, fpr->start, fpr->end, &fpr->fno, - &f->fpr, fpr_dead); + &fprs, fpr_dead); } /* sort so that the highest spill cost register classes are at the front and * as such more likely to be placed in registers */ - alive_sort(&alive, (vec_comp_t)spill_cost_sort); + alive_sort(&alive, (alive_comp_t)spill_cost_sort); /* update remapping info */ for(size_t i = 0; i < alive_len(&alive); ++i) { @@ -3004,13 +3121,18 @@ static void assign_fprs(struct ejit_func *f) } /* remap locations */ - for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) { - struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i); + for (size_t i = 0; i < fpr_stats_len(&fprs); ++i) { + struct fpr_stat *fpr = fpr_stats_at(&fprs, i); + if (fpr->prio == 0) + continue; + struct alive_slot *a = alive_at(&alive, fpr->fno); - fpr->fno = a->remap; + struct fpr_stat *orig = fpr_stats_at(&f->fpr, fpr->f.f); + orig->fno = a->remap; } alive_destroy(&alive); + fpr_stats_destroy(&fprs); } static size_t align_up(size_t a, size_t n) @@ -3032,6 +3154,9 @@ bool ejit_compile(struct ejit_func *f, bool use_64, bool im_scawed) if (!init_jit()) return false; + /* sort barriers so they can be used to extend register lifetimes in + * loops */ + barriers_sort(&f->barriers, barrier_sort); assign_gprs(f); assign_fprs(f); |