aboutsummaryrefslogtreecommitdiff
path: root/src/compile
diff options
context:
space:
mode:
Diffstat (limited to 'src/compile')
-rw-r--r--src/compile/compile.c223
1 files changed, 174 insertions, 49 deletions
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 5432bc1..2c72b91 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -4,7 +4,7 @@
#define VEC_TYPE jit_operand_t
#define VEC_NAME operands
-#include "../vec.h"
+#include <conts/vec.h>
struct reloc_helper {
jit_reloc_t r;
@@ -13,11 +13,11 @@ struct reloc_helper {
#define VEC_TYPE struct reloc_helper
#define VEC_NAME relocs
-#include "../vec.h"
+#include <conts/vec.h>
#define VEC_TYPE jit_addr_t
#define VEC_NAME addrs
-#include "../vec.h"
+#include <conts/vec.h>
/* skip assertions since we know they must be valid due to type checking earlier */
static long checked_run_i(struct ejit_func *f, size_t argc, struct ejit_arg args[argc])
@@ -1880,13 +1880,11 @@ static jit_off_t type_offset(struct ejit_insn i)
static void fixup_operands(struct operands *operands, size_t fixup)
{
- foreach_vec(i, *operands) {
- jit_operand_t op = *operands_at(operands, i);
- if (op.kind != JIT_OPERAND_KIND_MEM)
+ foreach(operands, op, operands) {
+ if (op->kind != JIT_OPERAND_KIND_MEM)
continue;
- op.loc.mem.offset += fixup;
- *operands_at(operands, i) = op;
+ op->loc.mem.offset += fixup;
}
}
@@ -1922,17 +1920,22 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j)
struct operands args = operands_create(0);
- foreach_vec(ii, f->insns) {
- struct ejit_insn i = *insns_at(&f->insns, ii);
- switch (i.op) {
+ foreach(insns, i, &f->insns) {
+ switch (i->op) {
case EJIT_OP_PARAM: {
- jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i));
+ jit_operand_t p = jit_operand_mem(
+ jit_abi_from(i->r1),
+ JIT_R1, arg_offset(*i));
+
operands_append(&args, p);
break;
}
case EJIT_OP_PARAM_F: {
- jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i));
+ jit_operand_t p = jit_operand_mem(
+ jit_abi_from(i->r1),
+ JIT_R1, arg_offset(*i));
+
operands_append(&args, p);
break;
}
@@ -1971,17 +1974,21 @@ static void resolve_top_reloc(jit_state_t *j, struct relocs *relocs, struct addr
assert(a);
jit_patch_there(j, r, a);
relocs_pop(relocs);
+
+ /* hope this turns into a tailcall */
+ if (relocs_len(relocs))
+ resolve_top_reloc(j, relocs, addrs, ii);
}
static void resolve_relocs(jit_state_t *j, struct relocs *relocs, struct addrs *addrs, size_t ii)
{
- foreach_vec(ri, *relocs) {
- struct reloc_helper h = *relocs_at(relocs, ri);
- if (h.to != ii)
+ for (size_t ri = 0; ri < relocs_len(relocs); ++ri) {
+ struct reloc_helper *h = relocs_at(relocs, ri);
+ if (h->to != ii)
continue;
jit_addr_t a = *addrs_at(addrs, ii);
- jit_reloc_t r = h.r;
+ jit_reloc_t r = h->r;
assert(a);
jit_patch_there(j, r, a);
@@ -2052,16 +2059,16 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
size_t frame = jit_enter_jit_abi(j, gprs, fprs, 0);
size_t stack = jit_align_stack(j, stack_size(f));
- struct operands src = operands_create();
- struct operands dst = operands_create();
- struct operands direct = operands_create();
+ struct operands src = operands_create(f->max_args);
+ struct operands dst = operands_create(f->max_args);
+ struct operands direct = operands_create(f->max_args);
- struct relocs relocs = relocs_create();
- struct addrs addrs = addrs_create();
+ struct relocs relocs = relocs_create(labels_len(&f->labels));
+ struct addrs addrs = addrs_create(0);
addrs_reserve(&addrs, insns_len(&f->insns));
size_t label = 0;
- foreach_vec(ii, f->insns) {
+ for (size_t ii = 0; ii < insns_len(&f->insns); ++ii) {
/* if we've hit a label, add it to our vector of label addresses */
if (label < labels_len(&f->labels)) {
if (*labels_at(&f->labels, label) == ii) {
@@ -2530,7 +2537,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
/* now move args into place */
jit_operand_t args[2] = {};
- foreach_vec(oi, direct) {
+ for (size_t oi = 0; oi < operands_len(&direct); ++oi) {
args[oi] = *operands_at(&direct, oi);
}
@@ -2665,13 +2672,12 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
case EJIT_OP_CALLI: {
save_caller_save_regs(f, j);
-
- struct ejit_func *f = (struct ejit_func *)i.p;
+ struct ejit_func *t = (struct ejit_func *)i.p;
#if __WORDSIZE != 64
- assert(f->rtype != EJIT_INT64 && f->rtype != EJIT_UINT64);
+ assert(t->rtype != EJIT_INT64 && t->rtype != EJIT_UINT64);
#endif
- if (f && f->direct_call) {
- jit_calli(j, f->direct_call, operands_len(&direct), direct.buf);
+ if (t && t->direct_call) {
+ jit_calli(j, t->direct_call, operands_len(&direct), direct.buf);
restore_caller_save_regs(f, j);
operands_reset(&src);
@@ -2690,7 +2696,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
};
void *call = NULL;
- switch (f->rtype) {
+ switch (t->rtype) {
case EJIT_INT64:
case EJIT_UINT64: call = checked_run_l; break;
case EJIT_FLOAT: call = checked_run_f; break;
@@ -2843,7 +2849,7 @@ struct alive_slot {
#define VEC_NAME alive
#define VEC_TYPE struct alive_slot
-#include "../vec.h"
+#include <conts/vec.h>
static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b)
{
@@ -2853,6 +2859,38 @@ static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b)
return a->cost < b->cost;
}
+/* sort barriers according to starting address, smallest to largest */
+static int barrier_sort(struct barrier_tuple *a, struct barrier_tuple *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ if (a->start > b->start)
+ return 1;
+
+ if (a->end < b->end)
+ return -1;
+
+ return 1;
+}
+
+/* sort gprs in order of starting address */
+static int gpr_start_sort(struct gpr_stat *a, struct gpr_stat *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ return 1;
+}
+
+static int fpr_start_sort(struct fpr_stat *a, struct fpr_stat *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ return 1;
+}
+
/* slightly more parameters than I would like but I guess it's fine */
static void calculate_alive(struct alive *alive, size_t idx,
size_t prio, size_t start, size_t end, size_t *rno,
@@ -2871,7 +2909,7 @@ static void calculate_alive(struct alive *alive, size_t idx,
long max_cost_idx = -1;
size_t max_cost = 0;
long counter = 0;
- foreach_vec(ai, *alive) {
+ for (size_t ai = 0; ai < alive_len(alive); ++ai) {
/* skip oneshot */
if (ai == 0)
goto next;
@@ -2916,11 +2954,45 @@ static int gpr_dead(void *regs, size_t idx, size_t start)
static void linear_gpr_alloc(struct ejit_func *f)
{
- foreach_vec(gi, f->gpr) {
+ for (size_t gi = 0; gi < gpr_stats_len(&f->gpr); ++gi) {
gpr_stats_at(&f->gpr, gi)->rno = gi;
}
}
+static void extend_gpr_lifetime(struct gpr_stat *gpr, struct barriers *barriers, size_t bi)
+{
+ if (bi >= barriers_len(barriers))
+ return;
+
+ struct barrier_tuple *barrier = barriers_at(barriers, bi);
+ if (gpr->end < barrier->start)
+ return;
+
+ /* we cross a barrier */
+ gpr->end = gpr->end > barrier->end ? gpr->end : barrier->end;
+
+ /* check if we cross the next barrier as well,
+ * hopefully gets optimized into a tail call */
+ return extend_gpr_lifetime(gpr, barriers, bi + 1);
+}
+
+static void extend_fpr_lifetime(struct fpr_stat *fpr, struct barriers *barriers, size_t bi)
+{
+ if (bi >= barriers_len(barriers))
+ return;
+
+ struct barrier_tuple *barrier = barriers_at(barriers, bi);
+ if (fpr->end < barrier->start)
+ return;
+
+ /* we cross a barrier */
+ fpr->end = fpr->end > barrier->end ? fpr->end : barrier->end;
+
+ /* check if we cross the next barrier as well,
+ * hopefully gets optimized into a tail call */
+ return extend_fpr_lifetime(fpr, barriers, bi + 1);
+}
+
/* there's a fair bit of repetition between this and the gpr case, hmm */
static void assign_gprs(struct ejit_func *f)
{
@@ -2928,22 +3000,44 @@ static void assign_gprs(struct ejit_func *f)
if (gpr_stats_len(&f->gpr) <= physgpr_count())
return linear_gpr_alloc(f);
- struct alive alive = alive_create(gpr_stats_len(&f->gpr));
+ /* create temporary buffer to sort */
+ struct gpr_stats gprs = gpr_stats_create(gpr_stats_len(&f->gpr));
+ foreach(gpr_stats, g, &f->gpr) {
+ gpr_stats_append(&gprs, *g);
+ }
+
+ gpr_stats_sort(&gprs, gpr_start_sort);
+
+ struct alive alive = alive_create(gpr_stats_len(&gprs));
/* special oneshot register class */
struct alive_slot a = {.r = -1, .cost = 0, .idx = 0};
alive_append(&alive, a);
- foreach_vec(gi, f->gpr) {
- struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi);
+ /* barrier index, keeps track of earliest possible barrier we can be
+ * dealing with. Since register start addresses grow upward, we can
+ * fairly easily keep track of which barrier a register cannot cross */
+ size_t bi = 0;
+ for (size_t gi = 0; gi < gpr_stats_len(&gprs); ++gi) {
+ struct gpr_stat *gpr = gpr_stats_at(&gprs, gi);
+ if (gpr->prio == 0)
+ continue;
+
+ extend_gpr_lifetime(gpr, &f->barriers, bi);
+ if (bi < barriers_len(&f->barriers)) {
+ struct barrier_tuple *barrier = barriers_at(&f->barriers, bi);
+ if (gpr->start >= barrier->start)
+ bi++;
+ }
+
calculate_alive(&alive, gi,
gpr->prio, gpr->start, gpr->end, &gpr->rno,
- &f->gpr, gpr_dead);
+ &gprs, gpr_dead);
}
/* sort so that the highest spill cost register classes are at the front and
* as such more likely to be placed in registers */
- alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+ alive_sort(&alive, (alive_comp_t)spill_cost_sort);
/* update remapping info */
for(size_t i = 0; i < alive_len(&alive); ++i) {
@@ -2952,13 +3046,18 @@ static void assign_gprs(struct ejit_func *f)
}
/* remap locations */
- for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) {
- struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i);
+ for (size_t i = 0; i < gpr_stats_len(&gprs); ++i) {
+ struct gpr_stat *gpr = gpr_stats_at(&gprs, i);
+ if (gpr->prio == 0)
+ continue;
+
struct alive_slot *a = alive_at(&alive, gpr->rno);
- gpr->rno = a->remap;
+ struct gpr_stat *orig = gpr_stats_at(&f->gpr, gpr->r.r);
+ orig->rno = a->remap;
}
alive_destroy(&alive);
+ gpr_stats_destroy(&gprs);
}
static int fpr_dead(void *regs, size_t idx, size_t start)
@@ -2969,7 +3068,7 @@ static int fpr_dead(void *regs, size_t idx, size_t start)
static void linear_fpr_alloc(struct ejit_func *f)
{
- foreach_vec(fi, f->fpr) {
+ for (size_t fi = 0; fi < fpr_stats_len(&f->fpr); ++fi) {
fpr_stats_at(&f->fpr, fi)->fno = fi;
}
}
@@ -2980,22 +3079,40 @@ static void assign_fprs(struct ejit_func *f)
if (fpr_stats_len(&f->fpr) <= physfpr_count())
return linear_fpr_alloc(f);
+ struct fpr_stats fprs = fpr_stats_create(fpr_stats_len(&f->fpr));
+ foreach(fpr_stats, r, &f->fpr) {
+ fpr_stats_append(&fprs, *r);
+ }
+
+ fpr_stats_sort(&fprs, fpr_start_sort);
+
struct alive alive = alive_create(fpr_stats_len(&f->fpr));
/* special oneshot register class */
struct alive_slot a = {.r = -1, .cost = 0, .idx = 0};
alive_append(&alive, a);
- foreach_vec(fi, f->fpr) {
- struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi);
+ size_t bi = 0;
+ for (size_t fi = 0; fi < fpr_stats_len(&fprs); ++fi) {
+ struct fpr_stat *fpr = fpr_stats_at(&fprs, fi);
+ if (fpr->prio == 0)
+ continue;
+
+ extend_fpr_lifetime(fpr, &f->barriers, bi);
+ if (bi < barriers_len(&f->barriers)) {
+ struct barrier_tuple *barrier = barriers_at(&f->barriers, bi);
+ if (fpr->start >= barrier->start)
+ bi++;
+ }
+
calculate_alive(&alive, fi,
fpr->prio, fpr->start, fpr->end, &fpr->fno,
- &f->fpr, fpr_dead);
+ &fprs, fpr_dead);
}
/* sort so that the highest spill cost register classes are at the front and
* as such more likely to be placed in registers */
- alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+ alive_sort(&alive, (alive_comp_t)spill_cost_sort);
/* update remapping info */
for(size_t i = 0; i < alive_len(&alive); ++i) {
@@ -3004,13 +3121,18 @@ static void assign_fprs(struct ejit_func *f)
}
/* remap locations */
- for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) {
- struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i);
+ for (size_t i = 0; i < fpr_stats_len(&fprs); ++i) {
+ struct fpr_stat *fpr = fpr_stats_at(&fprs, i);
+ if (fpr->prio == 0)
+ continue;
+
struct alive_slot *a = alive_at(&alive, fpr->fno);
- fpr->fno = a->remap;
+ struct fpr_stat *orig = fpr_stats_at(&f->fpr, fpr->f.f);
+ orig->fno = a->remap;
}
alive_destroy(&alive);
+ fpr_stats_destroy(&fprs);
}
static size_t align_up(size_t a, size_t n)
@@ -3032,6 +3154,9 @@ bool ejit_compile(struct ejit_func *f, bool use_64, bool im_scawed)
if (!init_jit())
return false;
+ /* sort barriers so they can be used to extend register lifetimes in
+ * loops */
+ barriers_sort(&f->barriers, barrier_sort);
assign_gprs(f);
assign_fprs(f);