aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common.h25
-rw-r--r--src/compile/compile.c223
-rw-r--r--src/ejit.c26
-rw-r--r--src/vec.h119
4 files changed, 207 insertions, 186 deletions
diff --git a/src/common.h b/src/common.h
index dc970f0..41a17cf 100644
--- a/src/common.h
+++ b/src/common.h
@@ -4,21 +4,29 @@
#include <ejit/ejit.h>
#include <stdbool.h>
+struct barrier_tuple {
+ size_t start, end;
+};
+
+#define VEC_TYPE struct barrier_tuple
+#define VEC_NAME barriers
+#include <conts/vec.h>
+
#define VEC_TYPE struct ejit_arg
#define VEC_NAME args
-#include "vec.h"
+#include <conts/vec.h>
#define VEC_TYPE int64_t
#define VEC_NAME gprs
-#include "vec.h"
+#include <conts/vec.h>
#define VEC_TYPE double
#define VEC_NAME fprs
-#include "vec.h"
+#include <conts/vec.h>
#define VEC_TYPE size_t
#define VEC_NAME labels
-#include "vec.h"
+#include <conts/vec.h>
enum ejit_opcode {
EJIT_OP_MOVI,
@@ -263,11 +271,11 @@ struct ejit_insn {
#define VEC_TYPE struct ejit_insn
#define VEC_NAME insns
-#include "vec.h"
+#include <conts/vec.h>
#define VEC_TYPE enum ejit_type
#define VEC_NAME types
-#include "vec.h"
+#include <conts/vec.h>
struct fpr_stat {
struct ejit_fpr f;
@@ -276,7 +284,7 @@ struct fpr_stat {
#define VEC_NAME fpr_stats
#define VEC_TYPE struct fpr_stat
-#include "vec.h"
+#include <conts/vec.h>
struct gpr_stat {
struct ejit_gpr r;
@@ -285,12 +293,13 @@ struct gpr_stat {
#define VEC_NAME gpr_stats
#define VEC_TYPE struct gpr_stat
-#include "vec.h"
+#include <conts/vec.h>
struct ejit_func {
struct types sign;
struct insns insns;
struct labels labels;
+ struct barriers barriers;
enum ejit_type rtype;
struct gpr_stats gpr;
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 5432bc1..2c72b91 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -4,7 +4,7 @@
#define VEC_TYPE jit_operand_t
#define VEC_NAME operands
-#include "../vec.h"
+#include <conts/vec.h>
struct reloc_helper {
jit_reloc_t r;
@@ -13,11 +13,11 @@ struct reloc_helper {
#define VEC_TYPE struct reloc_helper
#define VEC_NAME relocs
-#include "../vec.h"
+#include <conts/vec.h>
#define VEC_TYPE jit_addr_t
#define VEC_NAME addrs
-#include "../vec.h"
+#include <conts/vec.h>
/* skip assertions since we know they must be valid due to type checking earlier */
static long checked_run_i(struct ejit_func *f, size_t argc, struct ejit_arg args[argc])
@@ -1880,13 +1880,11 @@ static jit_off_t type_offset(struct ejit_insn i)
static void fixup_operands(struct operands *operands, size_t fixup)
{
- foreach_vec(i, *operands) {
- jit_operand_t op = *operands_at(operands, i);
- if (op.kind != JIT_OPERAND_KIND_MEM)
+ foreach(operands, op, operands) {
+ if (op->kind != JIT_OPERAND_KIND_MEM)
continue;
- op.loc.mem.offset += fixup;
- *operands_at(operands, i) = op;
+ op->loc.mem.offset += fixup;
}
}
@@ -1922,17 +1920,22 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j)
struct operands args = operands_create(0);
- foreach_vec(ii, f->insns) {
- struct ejit_insn i = *insns_at(&f->insns, ii);
- switch (i.op) {
+ foreach(insns, i, &f->insns) {
+ switch (i->op) {
case EJIT_OP_PARAM: {
- jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i));
+ jit_operand_t p = jit_operand_mem(
+ jit_abi_from(i->r1),
+ JIT_R1, arg_offset(*i));
+
operands_append(&args, p);
break;
}
case EJIT_OP_PARAM_F: {
- jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i));
+ jit_operand_t p = jit_operand_mem(
+ jit_abi_from(i->r1),
+ JIT_R1, arg_offset(*i));
+
operands_append(&args, p);
break;
}
@@ -1971,17 +1974,21 @@ static void resolve_top_reloc(jit_state_t *j, struct relocs *relocs, struct addr
assert(a);
jit_patch_there(j, r, a);
relocs_pop(relocs);
+
+ /* hope this turns into a tailcall */
+ if (relocs_len(relocs))
+ resolve_top_reloc(j, relocs, addrs, ii);
}
static void resolve_relocs(jit_state_t *j, struct relocs *relocs, struct addrs *addrs, size_t ii)
{
- foreach_vec(ri, *relocs) {
- struct reloc_helper h = *relocs_at(relocs, ri);
- if (h.to != ii)
+ for (size_t ri = 0; ri < relocs_len(relocs); ++ri) {
+ struct reloc_helper *h = relocs_at(relocs, ri);
+ if (h->to != ii)
continue;
jit_addr_t a = *addrs_at(addrs, ii);
- jit_reloc_t r = h.r;
+ jit_reloc_t r = h->r;
assert(a);
jit_patch_there(j, r, a);
@@ -2052,16 +2059,16 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
size_t frame = jit_enter_jit_abi(j, gprs, fprs, 0);
size_t stack = jit_align_stack(j, stack_size(f));
- struct operands src = operands_create();
- struct operands dst = operands_create();
- struct operands direct = operands_create();
+ struct operands src = operands_create(f->max_args);
+ struct operands dst = operands_create(f->max_args);
+ struct operands direct = operands_create(f->max_args);
- struct relocs relocs = relocs_create();
- struct addrs addrs = addrs_create();
+ struct relocs relocs = relocs_create(labels_len(&f->labels));
+ struct addrs addrs = addrs_create(0);
addrs_reserve(&addrs, insns_len(&f->insns));
size_t label = 0;
- foreach_vec(ii, f->insns) {
+ for (size_t ii = 0; ii < insns_len(&f->insns); ++ii) {
/* if we've hit a label, add it to our vector of label addresses */
if (label < labels_len(&f->labels)) {
if (*labels_at(&f->labels, label) == ii) {
@@ -2530,7 +2537,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
/* now move args into place */
jit_operand_t args[2] = {};
- foreach_vec(oi, direct) {
+ for (size_t oi = 0; oi < operands_len(&direct); ++oi) {
args[oi] = *operands_at(&direct, oi);
}
@@ -2665,13 +2672,12 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
case EJIT_OP_CALLI: {
save_caller_save_regs(f, j);
-
- struct ejit_func *f = (struct ejit_func *)i.p;
+ struct ejit_func *t = (struct ejit_func *)i.p;
#if __WORDSIZE != 64
- assert(f->rtype != EJIT_INT64 && f->rtype != EJIT_UINT64);
+ assert(t->rtype != EJIT_INT64 && t->rtype != EJIT_UINT64);
#endif
- if (f && f->direct_call) {
- jit_calli(j, f->direct_call, operands_len(&direct), direct.buf);
+ if (t && t->direct_call) {
+ jit_calli(j, t->direct_call, operands_len(&direct), direct.buf);
restore_caller_save_regs(f, j);
operands_reset(&src);
@@ -2690,7 +2696,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
};
void *call = NULL;
- switch (f->rtype) {
+ switch (t->rtype) {
case EJIT_INT64:
case EJIT_UINT64: call = checked_run_l; break;
case EJIT_FLOAT: call = checked_run_f; break;
@@ -2843,7 +2849,7 @@ struct alive_slot {
#define VEC_NAME alive
#define VEC_TYPE struct alive_slot
-#include "../vec.h"
+#include <conts/vec.h>
static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b)
{
@@ -2853,6 +2859,38 @@ static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b)
return a->cost < b->cost;
}
+/* sort barriers according to starting address, smallest to largest */
+static int barrier_sort(struct barrier_tuple *a, struct barrier_tuple *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ if (a->start > b->start)
+ return 1;
+
+ if (a->end < b->end)
+ return -1;
+
+ return 1;
+}
+
+/* sort gprs in order of starting address */
+static int gpr_start_sort(struct gpr_stat *a, struct gpr_stat *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ return 1;
+}
+
+static int fpr_start_sort(struct fpr_stat *a, struct fpr_stat *b)
+{
+ if (a->start < b->start)
+ return -1;
+
+ return 1;
+}
+
/* slightly more parameters than I would like but I guess it's fine */
static void calculate_alive(struct alive *alive, size_t idx,
size_t prio, size_t start, size_t end, size_t *rno,
@@ -2871,7 +2909,7 @@ static void calculate_alive(struct alive *alive, size_t idx,
long max_cost_idx = -1;
size_t max_cost = 0;
long counter = 0;
- foreach_vec(ai, *alive) {
+ for (size_t ai = 0; ai < alive_len(alive); ++ai) {
/* skip oneshot */
if (ai == 0)
goto next;
@@ -2916,11 +2954,45 @@ static int gpr_dead(void *regs, size_t idx, size_t start)
static void linear_gpr_alloc(struct ejit_func *f)
{
- foreach_vec(gi, f->gpr) {
+ for (size_t gi = 0; gi < gpr_stats_len(&f->gpr); ++gi) {
gpr_stats_at(&f->gpr, gi)->rno = gi;
}
}
+static void extend_gpr_lifetime(struct gpr_stat *gpr, struct barriers *barriers, size_t bi)
+{
+ if (bi >= barriers_len(barriers))
+ return;
+
+ struct barrier_tuple *barrier = barriers_at(barriers, bi);
+ if (gpr->end < barrier->start)
+ return;
+
+ /* we cross a barrier */
+ gpr->end = gpr->end > barrier->end ? gpr->end : barrier->end;
+
+ /* check if we cross the next barrier as well,
+ * hopefully gets optimized into a tail call */
+ return extend_gpr_lifetime(gpr, barriers, bi + 1);
+}
+
+static void extend_fpr_lifetime(struct fpr_stat *fpr, struct barriers *barriers, size_t bi)
+{
+ if (bi >= barriers_len(barriers))
+ return;
+
+ struct barrier_tuple *barrier = barriers_at(barriers, bi);
+ if (fpr->end < barrier->start)
+ return;
+
+ /* we cross a barrier */
+ fpr->end = fpr->end > barrier->end ? fpr->end : barrier->end;
+
+ /* check if we cross the next barrier as well,
+ * hopefully gets optimized into a tail call */
+ return extend_fpr_lifetime(fpr, barriers, bi + 1);
+}
+
/* there's a fair bit of repetition between this and the gpr case, hmm */
static void assign_gprs(struct ejit_func *f)
{
@@ -2928,22 +3000,44 @@ static void assign_gprs(struct ejit_func *f)
if (gpr_stats_len(&f->gpr) <= physgpr_count())
return linear_gpr_alloc(f);
- struct alive alive = alive_create(gpr_stats_len(&f->gpr));
+ /* create temporary buffer to sort */
+ struct gpr_stats gprs = gpr_stats_create(gpr_stats_len(&f->gpr));
+ foreach(gpr_stats, g, &f->gpr) {
+ gpr_stats_append(&gprs, *g);
+ }
+
+ gpr_stats_sort(&gprs, gpr_start_sort);
+
+ struct alive alive = alive_create(gpr_stats_len(&gprs));
/* special oneshot register class */
struct alive_slot a = {.r = -1, .cost = 0, .idx = 0};
alive_append(&alive, a);
- foreach_vec(gi, f->gpr) {
- struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi);
+ /* barrier index, keeps track of earliest possible barrier we can be
+ * dealing with. Since register start addresses grow upward, we can
+ * fairly easily keep track of which barrier a register cannot cross */
+ size_t bi = 0;
+ for (size_t gi = 0; gi < gpr_stats_len(&gprs); ++gi) {
+ struct gpr_stat *gpr = gpr_stats_at(&gprs, gi);
+ if (gpr->prio == 0)
+ continue;
+
+ extend_gpr_lifetime(gpr, &f->barriers, bi);
+ if (bi < barriers_len(&f->barriers)) {
+ struct barrier_tuple *barrier = barriers_at(&f->barriers, bi);
+ if (gpr->start >= barrier->start)
+ bi++;
+ }
+
calculate_alive(&alive, gi,
gpr->prio, gpr->start, gpr->end, &gpr->rno,
- &f->gpr, gpr_dead);
+ &gprs, gpr_dead);
}
/* sort so that the highest spill cost register classes are at the front and
* as such more likely to be placed in registers */
- alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+ alive_sort(&alive, (alive_comp_t)spill_cost_sort);
/* update remapping info */
for(size_t i = 0; i < alive_len(&alive); ++i) {
@@ -2952,13 +3046,18 @@ static void assign_gprs(struct ejit_func *f)
}
/* remap locations */
- for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) {
- struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i);
+ for (size_t i = 0; i < gpr_stats_len(&gprs); ++i) {
+ struct gpr_stat *gpr = gpr_stats_at(&gprs, i);
+ if (gpr->prio == 0)
+ continue;
+
struct alive_slot *a = alive_at(&alive, gpr->rno);
- gpr->rno = a->remap;
+ struct gpr_stat *orig = gpr_stats_at(&f->gpr, gpr->r.r);
+ orig->rno = a->remap;
}
alive_destroy(&alive);
+ gpr_stats_destroy(&gprs);
}
static int fpr_dead(void *regs, size_t idx, size_t start)
@@ -2969,7 +3068,7 @@ static int fpr_dead(void *regs, size_t idx, size_t start)
static void linear_fpr_alloc(struct ejit_func *f)
{
- foreach_vec(fi, f->fpr) {
+ for (size_t fi = 0; fi < fpr_stats_len(&f->fpr); ++fi) {
fpr_stats_at(&f->fpr, fi)->fno = fi;
}
}
@@ -2980,22 +3079,40 @@ static void assign_fprs(struct ejit_func *f)
if (fpr_stats_len(&f->fpr) <= physfpr_count())
return linear_fpr_alloc(f);
+ struct fpr_stats fprs = fpr_stats_create(fpr_stats_len(&f->fpr));
+ foreach(fpr_stats, r, &f->fpr) {
+ fpr_stats_append(&fprs, *r);
+ }
+
+ fpr_stats_sort(&fprs, fpr_start_sort);
+
struct alive alive = alive_create(fpr_stats_len(&f->fpr));
/* special oneshot register class */
struct alive_slot a = {.r = -1, .cost = 0, .idx = 0};
alive_append(&alive, a);
- foreach_vec(fi, f->fpr) {
- struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi);
+ size_t bi = 0;
+ for (size_t fi = 0; fi < fpr_stats_len(&fprs); ++fi) {
+ struct fpr_stat *fpr = fpr_stats_at(&fprs, fi);
+ if (fpr->prio == 0)
+ continue;
+
+ extend_fpr_lifetime(fpr, &f->barriers, bi);
+ if (bi < barriers_len(&f->barriers)) {
+ struct barrier_tuple *barrier = barriers_at(&f->barriers, bi);
+ if (fpr->start >= barrier->start)
+ bi++;
+ }
+
calculate_alive(&alive, fi,
fpr->prio, fpr->start, fpr->end, &fpr->fno,
- &f->fpr, fpr_dead);
+ &fprs, fpr_dead);
}
/* sort so that the highest spill cost register classes are at the front and
* as such more likely to be placed in registers */
- alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+ alive_sort(&alive, (alive_comp_t)spill_cost_sort);
/* update remapping info */
for(size_t i = 0; i < alive_len(&alive); ++i) {
@@ -3004,13 +3121,18 @@ static void assign_fprs(struct ejit_func *f)
}
/* remap locations */
- for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) {
- struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i);
+ for (size_t i = 0; i < fpr_stats_len(&fprs); ++i) {
+ struct fpr_stat *fpr = fpr_stats_at(&fprs, i);
+ if (fpr->prio == 0)
+ continue;
+
struct alive_slot *a = alive_at(&alive, fpr->fno);
- fpr->fno = a->remap;
+ struct fpr_stat *orig = fpr_stats_at(&f->fpr, fpr->f.f);
+ orig->fno = a->remap;
}
alive_destroy(&alive);
+ fpr_stats_destroy(&fprs);
}
static size_t align_up(size_t a, size_t n)
@@ -3032,6 +3154,9 @@ bool ejit_compile(struct ejit_func *f, bool use_64, bool im_scawed)
if (!init_jit())
return false;
+ /* sort barriers so they can be used to extend register lifetimes in
+ * loops */
+ barriers_sort(&f->barriers, barrier_sort);
assign_gprs(f);
assign_fprs(f);
diff --git a/src/ejit.c b/src/ejit.c
index 6a0ffb0..1bb272d 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -338,11 +338,12 @@ struct ejit_func *ejit_create_func(enum ejit_type rtype, size_t argc,
f->rtype = rtype;
- f->sign = types_create();
- f->insns = insns_create();
- f->labels = labels_create();
- f->gpr = gpr_stats_create();
- f->fpr = fpr_stats_create();
+ f->sign = types_create(0);
+ f->insns = insns_create(0);
+ f->labels = labels_create(0);
+ f->barriers = barriers_create(0);
+ f->gpr = gpr_stats_create(0);
+ f->fpr = fpr_stats_create(0);
f->arena = NULL;
f->direct_call = NULL;
f->extern_call = NULL;
@@ -416,12 +417,10 @@ void ejit_select_compile_func(struct ejit_func *f, size_t gpr, size_t fpr,
/* just get labels, don't actually run anything yet */
ejit_run(f, 0, NULL, &labels);
- foreach_vec(ii, f->insns) {
- struct ejit_insn i = *insns_at(&f->insns, ii);
- void *addr = labels[i.op];
+ foreach(insns, i, &f->insns) {
+ void *addr = labels[i->op];
assert(addr);
- i.addr = addr;
- *insns_at(&f->insns, ii) = i;
+ i->addr = addr;
}
/* doesn't really matter what we put here as long as it isn't 0 */
@@ -436,6 +435,7 @@ void ejit_destroy_func(struct ejit_func *f)
types_destroy(&f->sign);
insns_destroy(&f->insns);
labels_destroy(&f->labels);
+ barriers_destroy(&f->barriers);
gpr_stats_destroy(&f->gpr);
fpr_stats_destroy(&f->fpr);
free(f);
@@ -454,6 +454,12 @@ void ejit_patch(struct ejit_func *f, struct ejit_reloc r, struct ejit_label l)
/** @todo some assert that checks the opcode? */
i.r0 = l.addr;
*insns_at(&f->insns, r.insn) = i;
+
+ struct barrier_tuple tuple = {
+ .start = r.insn > l.addr ? l.addr : r.insn,
+ .end = r.insn > l.addr ? r.insn : l.addr
+ };
+ barriers_append(&f->barriers, tuple);
}
void ejit_taili(struct ejit_func *s, struct ejit_func *f,
diff --git a/src/vec.h b/src/vec.h
deleted file mode 100644
index f5a6fd9..0000000
--- a/src/vec.h
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef VEC_TYPE
-#error "Need vector type"
-#endif
-
-#ifndef VEC_NAME
-#error "Need vector name"
-#endif
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#define UNDERSCORE2(a, b) a##_##b
-#define UNDERSCORE(a, b) UNDERSCORE2(a, b)
-#define VEC(n) UNDERSCORE(VEC_NAME, n)
-
-
-#define VEC_STRUCT VEC_NAME
-struct VEC_STRUCT {
- size_t n;
- size_t s;
- VEC_TYPE *buf;
-};
-
-#ifndef VEC_H
-#define VEC_H
-
-#define foreach_vec(iter, v) \
- for (size_t iter = 0; iter < (v).n; ++iter)
-
-#define vec_uninit(v) \
- (v.buf == NULL)
-#endif
-
-static inline struct VEC_STRUCT VEC(create)()
-{
- const size_t s = 8;
- return (struct VEC_STRUCT) {
- .n = 0,
- .s = s,
- .buf = malloc(s * sizeof(VEC_TYPE)),
- };
-}
-
-static inline size_t VEC(len)(struct VEC_STRUCT *v)
-{
- return v->n;
-}
-
-static inline VEC_TYPE *VEC(at)(struct VEC_STRUCT *v, size_t i)
-{
- assert(i < v->n && "out of vector bounds");
- return &v->buf[i];
-}
-
-static inline VEC_TYPE *VEC(back)(struct VEC_STRUCT *v)
-{
- assert(v->n);
- return &v->buf[v->n - 1];
-}
-
-static inline VEC_TYPE *VEC(pop)(struct VEC_STRUCT *v)
-{
- assert(v->n && "attempting to pop empty vector");
- v->n--;
- return &v->buf[v->n];
-}
-
-static inline void VEC(append)(struct VEC_STRUCT *v, VEC_TYPE n)
-{
- v->n++;
- if (v->n >= v->s) {
- v->s *= 2;
- v->buf = realloc(v->buf, v->s * sizeof(VEC_TYPE));
- assert(v->buf);
- }
-
- v->buf[v->n - 1] = n;
-}
-
-static inline void VEC(reset)(struct VEC_STRUCT *v)
-{
- v->n = 0;
-}
-
-static inline void VEC(destroy)(struct VEC_STRUCT *v) {
- free(v->buf);
-}
-
-typedef int (*vec_comp_t)(void *a, void *b);
-static inline void VEC(sort)(struct VEC_STRUCT *v, vec_comp_t comp)
-{
- qsort(v->buf, v->n, sizeof(VEC_TYPE), (__compar_fn_t)comp);
-}
-
-static inline void VEC(reserve)(struct VEC_STRUCT *v, size_t n)
-{
- if (v->n >= n)
- return;
-
- v->n = n;
- if (v->s >= v->n)
- return;
-
- while (v->s < v->n)
- v->s *= 2;
-
- v->buf = realloc(v->buf, v->s * sizeof(VEC_TYPE));
-}
-
-static inline void VEC(shrink)(struct VEC_STRUCT *v, size_t n)
-{
- /* assert(v->n >= n); */
- v->n = n;
-}
-
-#undef VEC_TYPE
-#undef VEC_NAME