aboutsummaryrefslogtreecommitdiff
path: root/src/compile/compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/compile/compile.c')
-rw-r--r--src/compile/compile.c1115
1 files changed, 857 insertions, 258 deletions
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 490bc43..5432bc1 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -19,6 +19,27 @@ struct reloc_helper {
#define VEC_NAME addrs
#include "../vec.h"
+/* skip assertions since we know they must be valid due to type checking earlier */
+static long checked_run_i(struct ejit_func *f, size_t argc, struct ejit_arg args[argc])
+{
+ return ejit_run(f, argc, args, NULL).i;
+}
+
+static int64_t checked_run_l(struct ejit_func *f, size_t argc, struct ejit_arg args[argc])
+{
+ return ejit_run(f, argc, args, NULL).i;
+}
+
+static float checked_run_f(struct ejit_func *f, size_t argc, struct ejit_arg args[argc])
+{
+ return ejit_run(f, argc, args, NULL).f;
+}
+
+static double checked_run_d(struct ejit_func *f, size_t argc, struct ejit_arg args[argc])
+{
+ return ejit_run(f, argc, args, NULL).f;
+}
+
static void *alloc_arena(size_t size, bool im_scawed)
{
return mmap(NULL, size,
@@ -26,6 +47,24 @@ static void *alloc_arena(size_t size, bool im_scawed)
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
}
+static void assert_helper(const char *msg)
+{
+ assert(false && msg);
+}
+
+static bool gpr_free(size_t argc, jit_operand_t args[argc], jit_gpr_t r)
+{
+ for (size_t i = 0; i < argc; ++i) {
+ if (args[i].kind != JIT_OPERAND_KIND_GPR)
+ continue;
+
+ if (jit_gpr_regno(args[i].loc.gpr.gpr) == jit_gpr_regno(r))
+ return false;
+ }
+
+ return true;
+}
+
static void free_arena(void *arena, size_t size)
{
munmap(arena, size);
@@ -545,7 +584,7 @@ static void compile_absr_d(struct ejit_func *f, jit_state_t *j,
{
jit_fpr_t r0 = getfpr(f, i.r0, 0);
jit_fpr_t r1 = getloc_d(f, j, i.r1, 1);
- jit_absr_f(j, r0, r1);
+ jit_absr_d(j, r0, r1);
putloc_d(f, j, i.r0, r0);
}
@@ -1305,6 +1344,23 @@ static void compile_truncr_f_32(struct ejit_func *f, jit_state_t *j,
#endif
}
+static void compile_sqrtr_f(struct ejit_func *f, jit_state_t *j,
+ struct ejit_insn i)
+{
+ jit_fpr_t r0 = getfpr(f, i.r0, 0);
+ jit_fpr_t r1 = getloc_f(f, j, i.r1, 1);
+ jit_sqrtr_f(j, r0, r1);
+ putloc_d(f, j, i.r0, r0);
+}
+
+static void compile_sqrtr_d(struct ejit_func *f, jit_state_t *j,
+ struct ejit_insn i)
+{
+ jit_fpr_t r0 = getfpr(f, i.r0, 0);
+ jit_fpr_t r1 = getloc_d(f, j, i.r1, 1);
+ jit_sqrtr_d(j, r0, r1);
+ putloc_d(f, j, i.r0, r0);
+}
static void compile_reg_cmp(struct ejit_func *f, jit_state_t *j,
struct ejit_insn i,
@@ -1783,7 +1839,7 @@ static enum jit_operand_abi jit_abi_from(enum ejit_type t)
case EJIT_POINTER: return JIT_OPERAND_ABI_POINTER;
case EJIT_FLOAT: return JIT_OPERAND_ABI_FLOAT;
case EJIT_DOUBLE: return JIT_OPERAND_ABI_DOUBLE;
- default:
+ default: break;
}
abort();
@@ -1803,7 +1859,7 @@ static size_t arg_offsetof(enum ejit_type t)
case EJIT_POINTER: return offsetof(struct ejit_arg, p);
case EJIT_FLOAT: return offsetof(struct ejit_arg, f);
case EJIT_DOUBLE: return offsetof(struct ejit_arg, d);
- default:
+ default: break;
};
abort();
@@ -1869,19 +1925,19 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j)
foreach_vec(ii, f->insns) {
struct ejit_insn i = *insns_at(&f->insns, ii);
switch (i.op) {
- case PARAM: {
+ case EJIT_OP_PARAM: {
jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i));
operands_append(&args, p);
break;
}
- case PARAM_F: {
+ case EJIT_OP_PARAM_F: {
jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i));
operands_append(&args, p);
break;
}
- case START: {
+ case EJIT_OP_START: {
/* callr should be smart enough to avoid JIT_R0 if some
* other register wants to write to it */
jit_reloc_t r = jit_mov_addr(j, JIT_R0);
@@ -1889,7 +1945,7 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j)
jit_leave_jit_abi(j, 0, 0, frame);
jit_ret(j); /* should just forward the return value */
- f->direct_call = jit_address(j);
+ f->direct_call = jit_address_to_function_pointer(jit_address(j));
jit_patch_here(j, r);
operands_destroy(&args);
@@ -1901,6 +1957,84 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j)
}
}
+static void resolve_top_reloc(jit_state_t *j, struct relocs *relocs, struct addrs *addrs, size_t ii)
+{
+ assert(relocs_len(relocs) != 0);
+
+ struct reloc_helper h = *relocs_back(relocs);
+ if (h.to >= ii)
+ return;
+
+ jit_addr_t a = *addrs_at(addrs, h.to);
+ jit_reloc_t r = h.r;
+
+ assert(a);
+ jit_patch_there(j, r, a);
+ relocs_pop(relocs);
+}
+
+static void resolve_relocs(jit_state_t *j, struct relocs *relocs, struct addrs *addrs, size_t ii)
+{
+ foreach_vec(ri, *relocs) {
+ struct reloc_helper h = *relocs_at(relocs, ri);
+ if (h.to != ii)
+ continue;
+
+ jit_addr_t a = *addrs_at(addrs, ii);
+ jit_reloc_t r = h.r;
+
+ assert(a);
+ jit_patch_there(j, r, a);
+
+ /* 'shift' down */
+ if (ri != relocs_len(relocs) - 1)
+ *relocs_at(relocs, ri) = *relocs_back(relocs);
+
+ assert(relocs_len(relocs) != 0);
+ relocs_shrink(relocs, relocs_len(relocs) - 1);
+ }
+}
+
+static void compile_maxr_f(struct ejit_func *f, jit_state_t *j,
+ struct ejit_insn i)
+{
+ jit_fpr_t r0 = getfpr(f, i.r0, 0);
+ jit_fpr_t r1 = getloc_f(f, j, i.r1, 1);
+ jit_fpr_t r2 = getloc_f(f, j, i.r2, 2);
+ jit_maxr_f(j, r0, r1, r2);
+ putloc_f(f, j, i.r0, r0);
+}
+
+static void compile_maxr_d(struct ejit_func *f, jit_state_t *j,
+ struct ejit_insn i)
+{
+ jit_fpr_t r0 = getfpr(f, i.r0, 0);
+ jit_fpr_t r1 = getloc_d(f, j, i.r1, 1);
+ jit_fpr_t r2 = getloc_d(f, j, i.r2, 2);
+ jit_maxr_d(j, r0, r1, r2);
+ putloc_d(f, j, i.r0, r0);
+}
+
+static void compile_minr_f(struct ejit_func *f, jit_state_t *j,
+ struct ejit_insn i)
+{
+ jit_fpr_t r0 = getfpr(f, i.r0, 0);
+ jit_fpr_t r1 = getloc_f(f, j, i.r1, 1);
+ jit_fpr_t r2 = getloc_f(f, j, i.r2, 2);
+ jit_minr_f(j, r0, r1, r2);
+ putloc_f(f, j, i.r0, r0);
+}
+
+static void compile_minr_d(struct ejit_func *f, jit_state_t *j,
+ struct ejit_insn i)
+{
+ jit_fpr_t r0 = getfpr(f, i.r0, 0);
+ jit_fpr_t r1 = getloc_d(f, j, i.r1, 1);
+ jit_fpr_t r2 = getloc_d(f, j, i.r2, 2);
+ jit_minr_d(j, r0, r1, r2);
+ putloc_d(f, j, i.r0, r0);
+}
+
static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
size_t size)
{
@@ -1922,204 +2056,373 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
struct operands dst = operands_create();
struct operands direct = operands_create();
- struct relocs relocs = relocs_create(sizeof(struct reloc_helper));
+ struct relocs relocs = relocs_create();
struct addrs addrs = addrs_create();
addrs_reserve(&addrs, insns_len(&f->insns));
- void *call = NULL;
-
size_t label = 0;
foreach_vec(ii, f->insns) {
/* if we've hit a label, add it to our vector of label addresses */
if (label < labels_len(&f->labels)) {
if (*labels_at(&f->labels, label) == ii) {
compile_label(j, ii, &addrs);
+ resolve_relocs(j, &relocs, &addrs, ii);
label++;
}
}
struct ejit_insn i = *insns_at(&f->insns, ii);
switch (i.op) {
- case MOVR: compile_movr(f, j, i); break;
- case MOVR_F: compile_movr_f(f, j, i); break;
- case MOVR_D: compile_movr_d(f, j, i); break;
-
- case MOVI: compile_movi(f, j, i); break;
- case MOVI_F: compile_movi_f(f, j, i); break;
- case MOVI_D: compile_movi_d(f, j, i); break;
-
- case ADDR: compile_addr(f, j, i); break;
- case ADDI: compile_addi(f, j, i); break;
- case ADDR_F: compile_addr_f(f, j, i); break;
- case ADDR_D: compile_addr_d(f, j, i); break;
-
- case SUBR: compile_subr(f, j, i); break;
- case SUBI: compile_subi(f, j, i); break;
- case SUBR_F: compile_subr_f(f, j, i); break;
- case SUBR_D: compile_subr_d(f, j, i); break;
-
- case MULR: compile_mulr(f, j, i); break;
- case MULR_F: compile_mulr_f(f, j, i); break;
- case MULR_D: compile_mulr_d(f, j, i); break;
-
- case ANDI: compile_andi(f, j, i); break;
- case ANDR: compile_andr(f, j, i); break;
-
- case ORI: compile_ori(f, j, i); break;
- case ORR: compile_orr(f, j, i); break;
-
- case XORI: compile_xori(f, j, i); break;
- case XORR: compile_xorr(f, j, i); break;
-
- case DIVR: compile_divr(f, j, i); break;
- case DIVR_U: compile_divr_u(f, j, i); break;
- case DIVR_F: compile_divr_f(f, j, i); break;
- case DIVR_D: compile_divr_d(f, j, i); break;
-
- case REMR: compile_remr(f, j, i); break;
- case REMR_U: compile_remr_u(f, j, i); break;
-
- case ABSR_F: compile_absr_f(f, j, i); break;
- case ABSR_D: compile_absr_d(f, j, i); break;
-
- case LSHI: compile_lshi(f, j, i); break;
- case LSHR: compile_lshr(f, j, i); break;
- case RSHI: compile_rshi(f, j, i); break;
- case RSHI_U: compile_rshi_u(f, j, i); break;
- case RSHR: compile_rshr(f, j, i); break;
- case RSHR_U: compile_rshr_u(f, j, i); break;
-
- case STI8: compile_sti8(f, j, i); break;
- case STI16: compile_sti16(f, j, i); break;
- case STI32: compile_sti32(f, j, i); break;
- case STI64: compile_sti64(f, j, i); break;
- case STIF: compile_stif(f, j, i); break;
- case STID: compile_stid(f, j, i); break;
-
- case STXI8: compile_stxi8(f, j, i); break;
- case STXI16: compile_stxi16(f, j, i); break;
- case STXI32: compile_stxi32(f, j, i); break;
- case STXI64: compile_stxi64(f, j, i); break;
- case STXIF: compile_stxif(f, j, i); break;
- case STXID: compile_stxid(f, j, i); break;
-
- case STXR8: compile_stxr8(f, j, i); break;
- case STXR16: compile_stxr16(f, j, i); break;
- case STXR32: compile_stxr32(f, j, i); break;
- case STXR64: compile_stxr64(f, j, i); break;
- case STXRF: compile_stxrf(f, j, i); break;
- case STXRD: compile_stxrd(f, j, i); break;
-
- case LDI8: compile_ldi8(f, j, i); break;
- case LDI16: compile_ldi16(f, j, i); break;
- case LDI32: compile_ldi32(f, j, i); break;
- case LDI64: compile_ldi64(f, j, i); break;
- case LDIU8: compile_ldiu8(f, j, i); break;
- case LDIU16: compile_ldiu16(f, j, i); break;
- case LDIU32: compile_ldiu32(f, j, i); break;
- case LDIU64: compile_ldiu64(f, j, i); break;
- case LDIF: compile_ldif(f, j, i); break;
- case LDID: compile_ldid(f, j, i); break;
-
- case LDXI8: compile_ldxi8(f, j, i); break;
- case LDXI16: compile_ldxi16(f, j, i); break;
- case LDXI32: compile_ldxi32(f, j, i); break;
- case LDXI64: compile_ldxi64(f, j, i); break;
- case LDXIU8: compile_ldxiu8(f, j, i); break;
- case LDXIU16: compile_ldxiu16(f, j, i); break;
- case LDXIU32: compile_ldxiu32(f, j, i); break;
- case LDXIU64: compile_ldxiu64(f, j, i); break;
- case LDXIF: compile_ldxif(f, j, i); break;
- case LDXID: compile_ldxid(f, j, i); break;
-
- case LDXR8: compile_ldxr8(f, j, i); break;
- case LDXR16: compile_ldxr16(f, j, i); break;
- case LDXR32: compile_ldxr32(f, j, i); break;
- case LDXR64: compile_ldxr64(f, j, i); break;
- case LDXRU8: compile_ldxru8(f, j, i); break;
- case LDXRU16: compile_ldxru16(f, j, i); break;
- case LDXRU32: compile_ldxru32(f, j, i); break;
- case LDXRU64: compile_ldxru64(f, j, i); break;
- case LDXRF: compile_ldxrf(f, j, i); break;
- case LDXRD: compile_ldxrd(f, j, i); break;
-
- case COMR: compile_comr(f, j, i); break;
-
- case NEGR: compile_negr(f, j, i); break;
- case NEGR_F: compile_negr_f(f, j, i); break;
- case NEGR_D: compile_negr_d(f, j, i); break;
-
- case EXTR8: compile_extr8(f, j, i); break;
- case EXTR16: compile_extr16(f, j, i); break;
- case EXTR32: compile_extr32(f, j, i); break;
- case EXTRU8: compile_extru8(f, j, i); break;
- case EXTRU16: compile_extru16(f, j, i); break;
- case EXTRU32: compile_extru32(f, j, i); break;
- case EXTRF: compile_extrf(f, j, i); break;
- case EXTRD: compile_extrd(f, j, i); break;
-
- case TRUNCR_D_32: compile_truncr_d_32(f, j, i); break;
- case TRUNCR_D_64: compile_truncr_d_64(f, j, i); break;
- case TRUNCR_F_32: compile_truncr_f_32(f, j, i); break;
- case TRUNCR_F_64: compile_truncr_f_64(f, j, i); break;
-
- case EQR: compile_eqr(f, j, i); break;
- case EQR_F: compile_eqr_f(f, j, i); break;
- case EQR_D: compile_eqr_d(f, j, i); break;
-
- case NER: compile_ner(f, j, i); break;
- case NER_F: compile_ner_f(f, j, i); break;
- case NER_D: compile_ner_d(f, j, i); break;
-
- case GER: compile_ger(f, j, i); break;
- case GER_U: compile_ger_u(f, j, i); break;
- case GER_F: compile_ger_f(f, j, i); break;
- case GER_D: compile_ger_d(f, j, i); break;
-
- case GTR: compile_gtr(f, j, i); break;
- case GTR_U: compile_gtr_u(f, j, i); break;
- case GTR_F: compile_gtr_f(f, j, i); break;
- case GTR_D: compile_gtr_d(f, j, i); break;
-
- case BMCI: compile_bmci(f, j, i, &relocs); break;
- case BMCR: compile_bmcr(f, j, i, &relocs); break;
-
- case BMSI: compile_bmsi(f, j, i, &relocs); break;
- case BMSR: compile_bmsr(f, j, i, &relocs); break;
-
- case BEQR: compile_beqr(f, j, i, &relocs); break;
- case BEQI: compile_beqi(f, j, i, &relocs); break;
- case BEQR_F: compile_beqr_f(f, j, i, &relocs); break;
- case BEQR_D: compile_beqr_d(f, j, i, &relocs); break;
-
- case BNER: compile_bner(f, j, i, &relocs); break;
- case BNEI: compile_bnei(f, j, i, &relocs); break;
- case BNER_F: compile_bner_f(f, j, i, &relocs); break;
- case BNER_D: compile_bner_d(f, j, i, &relocs); break;
-
- case BGER: compile_bger(f, j, i, &relocs); break;
- case BGER_U: compile_bger_u(f, j, i, &relocs); break;
- case BGEI: compile_bgei(f, j, i, &relocs); break;
- case BGEI_U: compile_bgei_u(f, j, i, &relocs); break;
- case BGER_F: compile_bger_f(f, j, i, &relocs); break;
- case BGER_D: compile_bger_d(f, j, i, &relocs); break;
-
- case BGTR: compile_bgtr(f, j, i, &relocs); break;
- case BGTR_U: compile_bgtr_u(f, j, i, &relocs); break;
- case BGTI: compile_bgti(f, j, i, &relocs); break;
- case BGTI_U: compile_bgti_u(f, j, i, &relocs); break;
- case BGTR_F: compile_bgtr_f(f, j, i, &relocs); break;
- case BGTR_D: compile_bgtr_d(f, j, i, &relocs); break;
-
- case BLEI: compile_blei(f, j, i, &relocs); break;
- case BLEI_U: compile_blei_u(f, j, i, &relocs); break;
-
- case BLTI: compile_blti(f, j, i, &relocs); break;
- case BLTI_U: compile_blti_u(f, j, i, &relocs); break;
-
- case JMP: compile_jmp(f, j, i, &relocs); break;
-
- case ARG: {
+ case EJIT_OP_MOVR: compile_movr(f, j, i); break;
+ case EJIT_OP_MOVR_F: compile_movr_f(f, j, i); break;
+ case EJIT_OP_MOVR_D: compile_movr_d(f, j, i); break;
+
+ case EJIT_OP_MOVI: compile_movi(f, j, i); break;
+ case EJIT_OP_MOVI_F: compile_movi_f(f, j, i); break;
+ case EJIT_OP_MOVI_D: compile_movi_d(f, j, i); break;
+
+ case EJIT_OP_ADDR: compile_addr(f, j, i); break;
+ case EJIT_OP_ADDI: compile_addi(f, j, i); break;
+ case EJIT_OP_ADDR_F: compile_addr_f(f, j, i); break;
+ case EJIT_OP_ADDR_D: compile_addr_d(f, j, i); break;
+
+ case EJIT_OP_SUBR: compile_subr(f, j, i); break;
+ case EJIT_OP_SUBI: compile_subi(f, j, i); break;
+ case EJIT_OP_SUBR_F: compile_subr_f(f, j, i); break;
+ case EJIT_OP_SUBR_D: compile_subr_d(f, j, i); break;
+
+ case EJIT_OP_MULR: compile_mulr(f, j, i); break;
+ case EJIT_OP_MULR_F: compile_mulr_f(f, j, i); break;
+ case EJIT_OP_MULR_D: compile_mulr_d(f, j, i); break;
+
+ case EJIT_OP_ANDI: compile_andi(f, j, i); break;
+ case EJIT_OP_ANDR: compile_andr(f, j, i); break;
+
+ case EJIT_OP_ORI: compile_ori(f, j, i); break;
+ case EJIT_OP_ORR: compile_orr(f, j, i); break;
+
+ case EJIT_OP_XORI: compile_xori(f, j, i); break;
+ case EJIT_OP_XORR: compile_xorr(f, j, i); break;
+
+ case EJIT_OP_DIVR: compile_divr(f, j, i); break;
+ case EJIT_OP_DIVR_U: compile_divr_u(f, j, i); break;
+ case EJIT_OP_DIVR_F: compile_divr_f(f, j, i); break;
+ case EJIT_OP_DIVR_D: compile_divr_d(f, j, i); break;
+
+ case EJIT_OP_REMR: compile_remr(f, j, i); break;
+ case EJIT_OP_REMR_U: compile_remr_u(f, j, i); break;
+
+ case EJIT_OP_ABSR_F: compile_absr_f(f, j, i); break;
+ case EJIT_OP_ABSR_D: compile_absr_d(f, j, i); break;
+
+ case EJIT_OP_LSHI: compile_lshi(f, j, i); break;
+ case EJIT_OP_LSHR: compile_lshr(f, j, i); break;
+ case EJIT_OP_RSHI: compile_rshi(f, j, i); break;
+ case EJIT_OP_RSHI_U: compile_rshi_u(f, j, i); break;
+ case EJIT_OP_RSHR: compile_rshr(f, j, i); break;
+ case EJIT_OP_RSHR_U: compile_rshr_u(f, j, i); break;
+
+ case EJIT_OP_STI8: compile_sti8(f, j, i); break;
+ case EJIT_OP_STI16: compile_sti16(f, j, i); break;
+ case EJIT_OP_STI32: compile_sti32(f, j, i); break;
+ case EJIT_OP_STI64: compile_sti64(f, j, i); break;
+ case EJIT_OP_STIF: compile_stif(f, j, i); break;
+ case EJIT_OP_STID: compile_stid(f, j, i); break;
+
+ case EJIT_OP_STXI8: compile_stxi8(f, j, i); break;
+ case EJIT_OP_STXI16: compile_stxi16(f, j, i); break;
+ case EJIT_OP_STXI32: compile_stxi32(f, j, i); break;
+ case EJIT_OP_STXI64: compile_stxi64(f, j, i); break;
+ case EJIT_OP_STXIF: compile_stxif(f, j, i); break;
+ case EJIT_OP_STXID: compile_stxid(f, j, i); break;
+
+ case EJIT_OP_STXR8: compile_stxr8(f, j, i); break;
+ case EJIT_OP_STXR16: compile_stxr16(f, j, i); break;
+ case EJIT_OP_STXR32: compile_stxr32(f, j, i); break;
+ case EJIT_OP_STXR64: compile_stxr64(f, j, i); break;
+ case EJIT_OP_STXRF: compile_stxrf(f, j, i); break;
+ case EJIT_OP_STXRD: compile_stxrd(f, j, i); break;
+
+ case EJIT_OP_LDI8: compile_ldi8(f, j, i); break;
+ case EJIT_OP_LDI16: compile_ldi16(f, j, i); break;
+ case EJIT_OP_LDI32: compile_ldi32(f, j, i); break;
+ case EJIT_OP_LDI64: compile_ldi64(f, j, i); break;
+ case EJIT_OP_LDIU8: compile_ldiu8(f, j, i); break;
+ case EJIT_OP_LDIU16: compile_ldiu16(f, j, i); break;
+ case EJIT_OP_LDIU32: compile_ldiu32(f, j, i); break;
+ case EJIT_OP_LDIU64: compile_ldiu64(f, j, i); break;
+ case EJIT_OP_LDIF: compile_ldif(f, j, i); break;
+ case EJIT_OP_LDID: compile_ldid(f, j, i); break;
+
+ case EJIT_OP_LDXI8: compile_ldxi8(f, j, i); break;
+ case EJIT_OP_LDXI16: compile_ldxi16(f, j, i); break;
+ case EJIT_OP_LDXI32: compile_ldxi32(f, j, i); break;
+ case EJIT_OP_LDXI64: compile_ldxi64(f, j, i); break;
+ case EJIT_OP_LDXIU8: compile_ldxiu8(f, j, i); break;
+ case EJIT_OP_LDXIU16: compile_ldxiu16(f, j, i); break;
+ case EJIT_OP_LDXIU32: compile_ldxiu32(f, j, i); break;
+ case EJIT_OP_LDXIU64: compile_ldxiu64(f, j, i); break;
+ case EJIT_OP_LDXIF: compile_ldxif(f, j, i); break;
+ case EJIT_OP_LDXID: compile_ldxid(f, j, i); break;
+
+ case EJIT_OP_LDXR8: compile_ldxr8(f, j, i); break;
+ case EJIT_OP_LDXR16: compile_ldxr16(f, j, i); break;
+ case EJIT_OP_LDXR32: compile_ldxr32(f, j, i); break;
+ case EJIT_OP_LDXR64: compile_ldxr64(f, j, i); break;
+ case EJIT_OP_LDXRU8: compile_ldxru8(f, j, i); break;
+ case EJIT_OP_LDXRU16: compile_ldxru16(f, j, i); break;
+ case EJIT_OP_LDXRU32: compile_ldxru32(f, j, i); break;
+ case EJIT_OP_LDXRU64: compile_ldxru64(f, j, i); break;
+ case EJIT_OP_LDXRF: compile_ldxrf(f, j, i); break;
+ case EJIT_OP_LDXRD: compile_ldxrd(f, j, i); break;
+
+ case EJIT_OP_COMR: compile_comr(f, j, i); break;
+
+ case EJIT_OP_NEGR: compile_negr(f, j, i); break;
+ case EJIT_OP_NEGR_F: compile_negr_f(f, j, i); break;
+ case EJIT_OP_NEGR_D: compile_negr_d(f, j, i); break;
+
+ case EJIT_OP_EXTR8: compile_extr8(f, j, i); break;
+ case EJIT_OP_EXTR16: compile_extr16(f, j, i); break;
+ case EJIT_OP_EXTR32: compile_extr32(f, j, i); break;
+ case EJIT_OP_EXTRU8: compile_extru8(f, j, i); break;
+ case EJIT_OP_EXTRU16: compile_extru16(f, j, i); break;
+ case EJIT_OP_EXTRU32: compile_extru32(f, j, i); break;
+ case EJIT_OP_EXTRF: compile_extrf(f, j, i); break;
+ case EJIT_OP_EXTRD: compile_extrd(f, j, i); break;
+
+ case EJIT_OP_TRUNCR_D_32: compile_truncr_d_32(f, j, i); break;
+ case EJIT_OP_TRUNCR_D_64: compile_truncr_d_64(f, j, i); break;
+ case EJIT_OP_TRUNCR_F_32: compile_truncr_f_32(f, j, i); break;
+ case EJIT_OP_TRUNCR_F_64: compile_truncr_f_64(f, j, i); break;
+
+ case EJIT_OP_SQRTR_F: compile_sqrtr_f(f, j, i); break;
+ case EJIT_OP_SQRTR_D: compile_sqrtr_d(f, j, i); break;
+
+ case EJIT_OP_MINR_F: compile_minr_f(f, j, i); break;
+ case EJIT_OP_MINR_D: compile_minr_d(f, j, i); break;
+
+ case EJIT_OP_MAXR_F: compile_maxr_f(f, j, i); break;
+ case EJIT_OP_MAXR_D: compile_maxr_d(f, j, i); break;
+
+ case EJIT_OP_EQR: compile_eqr(f, j, i); break;
+ case EJIT_OP_EQR_F: compile_eqr_f(f, j, i); break;
+ case EJIT_OP_EQR_D: compile_eqr_d(f, j, i); break;
+
+ case EJIT_OP_NER: compile_ner(f, j, i); break;
+ case EJIT_OP_NER_F: compile_ner_f(f, j, i); break;
+ case EJIT_OP_NER_D: compile_ner_d(f, j, i); break;
+
+ case EJIT_OP_GER: compile_ger(f, j, i); break;
+ case EJIT_OP_GER_U: compile_ger_u(f, j, i); break;
+ case EJIT_OP_GER_F: compile_ger_f(f, j, i); break;
+ case EJIT_OP_GER_D: compile_ger_d(f, j, i); break;
+
+ case EJIT_OP_GTR: compile_gtr(f, j, i); break;
+ case EJIT_OP_GTR_U: compile_gtr_u(f, j, i); break;
+ case EJIT_OP_GTR_F: compile_gtr_f(f, j, i); break;
+ case EJIT_OP_GTR_D: compile_gtr_d(f, j, i); break;
+
+ case EJIT_OP_BMCI: {
+ compile_bmci(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BMCR: {
+ compile_bmcr(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BMSI: {
+ compile_bmsi(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BMSR: {
+ compile_bmsr(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BEQR: {
+ compile_beqr(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BEQI: {
+ compile_beqi(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BEQR_F: {
+ compile_beqr_f(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BEQR_D: {
+ compile_beqr_d(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BNER: {
+ compile_bner(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BNEI: {
+ compile_bnei(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BNER_F: {
+ compile_bner_f(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BNER_D: {
+ compile_bner_d(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGER: {
+ compile_bger(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGER_U: {
+ compile_bger_u(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGEI: {
+ compile_bgei(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGEI_U: {
+ compile_bgei_u(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGER_F: {
+ compile_bger_f(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGER_D: {
+ compile_bger_d(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGTR: {
+ compile_bgtr(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGTR_U: {
+ compile_bgtr_u(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGTI: {
+ compile_bgti(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGTI_U: {
+ compile_bgti_u(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGTR_F: {
+ compile_bgtr_f(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BGTR_D: {
+ compile_bgtr_d(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BLEI: {
+ compile_blei(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BLEI_U: {
+ compile_blei_u(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BLTI: {
+ compile_blti(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_BLTI_U: {
+ compile_blti_u(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_JMP: {
+ compile_jmp(f, j, i, &relocs);
+ resolve_top_reloc(j, &relocs, &addrs, ii);
+ break;
+ }
+
+ case EJIT_OP_ARG_I: {
+ jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1);
+ jit_operand_t arg = jit_operand_imm(jit_abi_from(i.r1), i.o);
+ operands_append(&src, type);
+ operands_append(&src, arg);
+ operands_append(&direct, arg);
+
+ jit_operand_t to[2] = {
+ jit_operand_mem(JIT_OPERAND_ABI_WORD, JIT_SP,
+ type_offset(i)),
+ jit_operand_mem(jit_abi_from(i.r1), JIT_SP,
+ arg_offset(i))
+ };
+
+ operands_append(&dst, to[0]);
+ operands_append(&dst, to[1]);
+ break;
+ }
+
+ case EJIT_OP_ARG_FI: {
+ assert(false && "immediate floats (currently?) not supported");
+ abort();
+ break;
+ }
+
+ case EJIT_OP_ARG: {
size_t r2 = gpr_stats_at(&f->gpr, i.r2)->rno;
jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1);
@@ -2152,12 +2455,12 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
break;
}
- case ARG_F: {
+ case EJIT_OP_ARG_F: {
size_t f2 = fpr_stats_at(&f->fpr, i.r2)->fno;
jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1);
jit_operand_t arg;
- if (i.r2 < physfpr_count()) {
+ if (f2 < physfpr_count()) {
/* regular register */
arg = jit_operand_fpr(jit_abi_from(i.r1),
physfpr_at(f2));
@@ -2186,17 +2489,10 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
break;
}
- case ESCAPEI_L:
-#if __WORDSIZE == 64
- /* fallthrough */
-#else
- assert(0 && "trying to compile escapei_l on 32bit arch");
- break;
-#endif
-
- case ESCAPEI_D:
- case ESCAPEI_F:
- case ESCAPEI_I: {
+ case EJIT_OP_ESCAPEI_I:
+ case EJIT_OP_ESCAPEI_L:
+ case EJIT_OP_ESCAPEI_F:
+ case EJIT_OP_ESCAPEI_D: {
save_caller_save_regs(f, j);
jit_operand_t args[2] = {
@@ -2206,7 +2502,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
* argument stack address */
jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0)
};
- compile_imm_call(j, &src, &dst, (void *)(uintptr_t)i.o, 2, args);
+ compile_imm_call(j, &src, &dst, (void *)i.p, 2, args);
restore_caller_save_regs(f, j);
operands_reset(&src);
@@ -2215,21 +2511,165 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
break;
}
- case CALLI_L:
-#if __WORDSIZE == 64
- call = ejit_run_func_l; goto calli;
-#else
- assert(0 && "trying to compile calli_l on 32bit arch");
- break;
+ case EJIT_OP_TAILI: {
+ /* a bit of copy-paste between this and the next func,
+ * hmm */
+ assert(operands_len(&direct) <= 2);
+ struct ejit_func *f = (struct ejit_func *)i.p;
+ assert(f->direct_call);
+
+ jit_operand_t regs[2] = {
+ jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R1),
+ jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R2)
+ };
+ jit_move_operands(j, regs, direct.buf, operands_len(&direct));
+
+ int frame_size = j->frame_size;
+ jit_shrink_stack(j, stack);
+ jit_leave_jit_abi(j, gprs, fprs, frame);
+
+ /* now move args into place */
+ jit_operand_t args[2] = {};
+ foreach_vec(oi, direct) {
+ args[oi] = *operands_at(&direct, oi);
+ }
+
+ jit_locate_args(j, operands_len(&direct), args);
+ jit_move_operands(j, args, regs, operands_len(&direct));
+ jit_jmpi(j, f->direct_call);
+ j->frame_size = frame_size;
+
+ operands_reset(&src);
+ operands_reset(&dst);
+ operands_reset(&direct);
+ break;
+ }
+
+ case EJIT_OP_TAILR: {
+ /* this is admittedly a slightly roundabout way of
+ * implementing tail calls and is arguably not the most
+ * performant way (if it works at all, heh) but for now
+ * I'm more interested in functionality than raw
+ * performance. Currently only supports two gpr
+ * registers, but should be fairly easy to extend with
+ * fprs as well */
+
+ assert(operands_len(&direct) <= 2);
+ jit_gpr_t r = getloc(f, j, i.r1, 0);
+
+#if defined(DEBUG)
+ jit_ldxi(j, JIT_R1, r, offsetof(struct ejit_func, rtype));
+ jit_reloc_t rtype_reloc = jit_beqi(j, JIT_R1, f->rtype);
+ jit_calli_1(j, assert_helper,
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER,
+ (jit_imm_t)"trying to tail call different rtype"));
+
+ jit_patch_here(j, rtype_reloc);
+
+ jit_ldxi(j, JIT_R1, r, offsetof(struct ejit_func, direct_call));
+ jit_reloc_t direct_reloc = jit_bnei(j, JIT_R1, 0); /* null */
+ jit_calli_1(j, assert_helper,
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER,
+ (jit_imm_t)"trying to tail call interpreted function"));
+ jit_patch_here(j, direct_reloc);
#endif
+ size_t argc = operands_len(&direct);
+
+ /* r0 = target, r1 = arg1, r2 = arg2 */
+ jit_ldxi(j, JIT_R0, r, offsetof(struct ejit_func, direct_call));
+ jit_operand_t regs[3] = {
+ jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R1),
+ jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R2)
+ };
+ jit_move_operands(j, regs, direct.buf, argc);
+
+ /* with args safely in registers, reset stack/state
+ * while avoiding overwriting the call target */
+ int frame_size = j->frame_size;
+ jit_shrink_stack(j, stack);
+ jit_leave_jit_abi(j, gprs, fprs, frame);
+
+ /* now move args into place, making sure we avoid our
+ * target register */
+ jit_operand_t args[3] = {};
+ for (size_t oi = 0; oi < argc; ++oi) {
+ args[oi] = *operands_at(&direct, oi);
+ }
+
+ jit_locate_args(j, argc, args);
+
+ /* we know that at least one gpr must be free */
+ jit_gpr_t target = gpr_free(argc, args, JIT_R0) ? JIT_R0
+ : gpr_free(argc, args, JIT_R1) ? JIT_R1
+ : gpr_free(argc, args, JIT_R2) ? JIT_R2
+ : (abort(), JIT_R0);
+
+ /* move our target in JIT_R0 to whatever the free
+ * register is to avoid it being clobbered when we move
+ * the actual arguments */
+ args[argc] = jit_operand_gpr(JIT_OPERAND_ABI_POINTER, target);
+ regs[argc] = jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0);
+ jit_move_operands(j, args, regs, argc + 1);
+ jit_jmpr(j, target);
+ j->frame_size = frame_size;
+
+ operands_reset(&src);
+ operands_reset(&dst);
+ operands_reset(&direct);
+ break;
+ }
+
+ case EJIT_OP_CALLR_I:
+ case EJIT_OP_CALLR_L:
+ case EJIT_OP_CALLR_F:
+ case EJIT_OP_CALLR_D: {
+ save_caller_save_regs(f, j);
+
+ jit_gpr_t target = getgpr(f, i.r1, 0);
+
+ /* check if there's a direct call avaiable */
+ jit_ldxi(j, JIT_R1, target, offsetof(struct ejit_func, direct_call));
+ jit_reloc_t direct_reloc = jit_beqi(j, JIT_R0, 0);
+ /* we can do a jit -> jit call */
+ jit_callr(j, JIT_R1, operands_len(&direct), direct.buf);
+ jit_reloc_t out_reloc = jit_jmp(j);
+
+ jit_patch_here(j, direct_reloc);
- case CALLI_F: { call = ejit_run_func_f; goto calli; }
- case CALLI_D: { call = ejit_run_func_d; goto calli; }
- case CALLI_I: { call = ejit_run_func_i; goto calli;
-calli:
+ /* we must do a jit -> bytecode call */
+ jit_operand_t args[3] = {
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R1),
+ jit_operand_imm(JIT_OPERAND_ABI_WORD, operands_len(&src) / 2),
+ /* compile_imm_call populate JIT_R0 with the
+ * argument stack address */
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0)
+ };
+ void *call = NULL;
+ switch (i.op) {
+ case EJIT_OP_CALLR_I: call = ejit_run_func_i; break;
+ case EJIT_OP_CALLR_L: call = ejit_run_func_l; break;
+ case EJIT_OP_CALLR_F: call = ejit_run_func_f; break;
+ case EJIT_OP_CALLR_D: call = ejit_run_func_d; break;
+ default: abort();
+ }
+
+ compile_imm_call(j, &src, &dst, call, 3, args);
+ jit_patch_here(j, out_reloc);
+ restore_caller_save_regs(f, j);
+
+ operands_reset(&src);
+ operands_reset(&dst);
+ operands_reset(&direct);
+ break;
+ }
+
+ case EJIT_OP_CALLI: {
save_caller_save_regs(f, j);
- struct ejit_func *f = (struct ejit_func *)(uintptr_t)i.o;
+ struct ejit_func *f = (struct ejit_func *)i.p;
+#if __WORDSIZE != 64
+ assert(f->rtype != EJIT_INT64 && f->rtype != EJIT_UINT64);
+#endif
if (f && f->direct_call) {
jit_calli(j, f->direct_call, operands_len(&direct), direct.buf);
restore_caller_save_regs(f, j);
@@ -2248,6 +2688,16 @@ calli:
* argument stack address */
jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0)
};
+
+ void *call = NULL;
+ switch (f->rtype) {
+ case EJIT_INT64:
+ case EJIT_UINT64: call = checked_run_l; break;
+ case EJIT_FLOAT: call = checked_run_f; break;
+ case EJIT_DOUBLE: call = checked_run_d; break;
+ default: call = checked_run_i; break;
+ }
+
compile_imm_call(j, &src, &dst, call, 3, args);
restore_caller_save_regs(f, j);
@@ -2257,54 +2707,70 @@ calli:
break;
}
- case RETVAL: compile_retval(f, j, i); break;
- case RETVAL_F: compile_retval_f(f, j, i); break;
- case RETVAL_D: compile_retval_d(f, j, i); break;
+ case EJIT_OP_RETVAL: compile_retval(f, j, i); break;
+ case EJIT_OP_RETVAL_F: compile_retval_f(f, j, i); break;
+ case EJIT_OP_RETVAL_D: compile_retval_d(f, j, i); break;
- case RETR: {
+ case EJIT_OP_RETR: {
jit_gpr_t r = getloc(f, j, i.r1, 0);
/* R0 won't get overwritten by jit_leave_jit_abi */
jit_movr(j, JIT_R0, r);
+
+ /* keep track of frame size so we can continue
+ * generating code after 'leaving' the ABI. Bit of a
+ * hack, should maybe codify this better in the
+ * lightening API? */
+ int frame_size = j->frame_size;
jit_shrink_stack(j, stack);
jit_leave_jit_abi(j, gprs, fprs, frame);
jit_retr(j, JIT_R0);
+ j->frame_size = frame_size;
break;
}
- case RETR_F: {
+ case EJIT_OP_RETR_F: {
jit_fpr_t r = getloc_f(f, j, i.r1, 0);
jit_movr_f(j, JIT_F0, r);
+
+ int frame_size = j->frame_size;
jit_shrink_stack(j, stack);
jit_leave_jit_abi(j, gprs, fprs, frame);
jit_retr_f(j, JIT_F0);
+ j->frame_size = frame_size;
break;
}
- case RETR_D: {
+ case EJIT_OP_RETR_D: {
jit_fpr_t r = getloc_d(f, j, i.r1, 0);
jit_movr_d(j, JIT_F0, r);
+
+ int frame_size = j->frame_size;
jit_shrink_stack(j, stack);
jit_leave_jit_abi(j, gprs, fprs, frame);
jit_retr_d(j, JIT_F0);
+ j->frame_size = frame_size;
break;
}
- case RETI: {
+ case EJIT_OP_RETI: {
+ int frame_size = j->frame_size;
jit_shrink_stack(j, stack);
jit_leave_jit_abi(j, gprs, fprs, frame);
jit_reti(j, i.o);
+ j->frame_size = frame_size;
break;
}
- case END: {
- /* 'void' return */
+ case EJIT_OP_END: {
+ /* 'void' return, must be last thing in function so no
+ * need to keep track of frame size */
jit_shrink_stack(j, stack);
jit_leave_jit_abi(j, gprs, fprs, frame);
jit_reti(j, 0);
break;
}
- case PARAM_F: {
+ case EJIT_OP_PARAM_F: {
size_t f2 = fpr_stats_at(&f->fpr, i.r2)->fno;
jit_operand_t to;
@@ -2323,7 +2789,7 @@ calli:
break;
}
- case PARAM: {
+ case EJIT_OP_PARAM: {
size_t r2 = gpr_stats_at(&f->gpr, i.r2)->rno;
jit_operand_t to;
@@ -2342,7 +2808,7 @@ calli:
break;
}
- case START: {
+ case EJIT_OP_START: {
/* parameters should be done by now */
jit_load_args(j, operands_len(&dst), dst.buf);
/* reuse for arguments */
@@ -2354,14 +2820,7 @@ calli:
}
}
- foreach_vec(ri, relocs) {
- struct reloc_helper h = *relocs_at(&relocs, ri);
- jit_addr_t a = *addrs_at(&addrs, h.to);
- jit_reloc_t r = h.r;
-
- assert(a);
- jit_patch_there(j, r, a);
- }
+ assert(relocs_len(&relocs) == 0);
operands_destroy(&src);
operands_destroy(&dst);
@@ -2369,49 +2828,189 @@ calli:
relocs_destroy(&relocs);
addrs_destroy(&addrs);
- if (jit_end(j, &size))
+ if ((f->extern_call = jit_end(j, &size)))
return 0;
return size;
}
-/* highest prio first */
-static int gpr_sort_prio(struct gpr_stat *a, struct gpr_stat *b)
+struct alive_slot {
+ long r;
+ size_t cost;
+ size_t idx;
+ size_t remap;
+};
+
+#define VEC_NAME alive
+#define VEC_TYPE struct alive_slot
+#include "../vec.h"
+
+static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b)
{
- return (int)b->prio - (int)a->prio;
+ if (a->cost > b->cost)
+ return -1;
+
+ return a->cost < b->cost;
}
-static int fpr_sort_prio(struct fpr_stat *a, struct fpr_stat *b)
+/* slightly more parameters than I would like but I guess it's fine */
+static void calculate_alive(struct alive *alive, size_t idx,
+ size_t prio, size_t start, size_t end, size_t *rno,
+ void *regs, int (*dead)(void *regs, size_t idx, size_t start))
{
- return (int)b->prio - (int)a->prio;
+ /* single-shot registers go in the special reserved slot */
+ if (end <= start + 1) {
+ *rno = 0;
+
+ struct alive_slot *a = alive_at(alive, 0);
+ a->cost += prio;
+ return;
+ }
+
+ /* kill registers whose lifetime has ended */
+ long max_cost_idx = -1;
+ size_t max_cost = 0;
+ long counter = 0;
+ foreach_vec(ai, *alive) {
+ /* skip oneshot */
+ if (ai == 0)
+ goto next;
+
+ struct alive_slot *a = alive_at(alive, ai);
+ if (a->r >= 0 && dead(regs, a->r, start))
+ a->r = -1; /* gravestone */
+
+ if (a->r < 0 && a->cost > max_cost) {
+ max_cost = a->cost;
+ max_cost_idx = counter;
+ }
+
+next:
+ counter++;
+ }
+
+ /* there's a suitable slot for us */
+ if (max_cost_idx >= 0) {
+ *rno = max_cost_idx;
+
+ struct alive_slot *a = alive_at(alive, max_cost_idx);
+ a->cost += prio;
+ a->r = idx;
+ return;
+ }
+
+ *rno = alive_len(alive);
+ struct alive_slot a = {
+ .cost = prio,
+ .r = idx,
+ .idx = *rno
+ };
+ alive_append(alive, a);
}
-/* sort registers by highest priority first, then renumber registers in the
- * given order. Higher priority is given a physical register first.
- *
- * Note that the `->r` field becomes 'meaningless' after sorting, and you should
- * only use the `->rno` field after this point. Essentially, if you have a
- * register EJIT_GPR(2), you should use `gpr_stats_at(2)->rno` for the 'actual'
- * register number in `getloc` and the like.
- *
- * Can be a bit confusing, but this way we don't have to allocate any new
- * arrays, which is cool. */
+static int gpr_dead(void *regs, size_t idx, size_t start)
+{
+ struct gpr_stats *gprs = regs;
+ return gpr_stats_at(gprs, idx)->end <= start;
+}
+
+static void linear_gpr_alloc(struct ejit_func *f)
+{
+ foreach_vec(gi, f->gpr) {
+ gpr_stats_at(&f->gpr, gi)->rno = gi;
+ }
+}
+
+/* there's a fair bit of repetition between this and the gpr case, hmm */
static void assign_gprs(struct ejit_func *f)
{
- gpr_stats_sort(&f->gpr, (vec_comp_t)gpr_sort_prio);
+ /* everything fits into registers, no need to start optimizing */
+ if (gpr_stats_len(&f->gpr) <= physgpr_count())
+ return linear_gpr_alloc(f);
+
+ struct alive alive = alive_create(gpr_stats_len(&f->gpr));
+
+ /* special oneshot register class */
+ struct alive_slot a = {.r = -1, .cost = 0, .idx = 0};
+ alive_append(&alive, a);
+
+ foreach_vec(gi, f->gpr) {
+ struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi);
+ calculate_alive(&alive, gi,
+ gpr->prio, gpr->start, gpr->end, &gpr->rno,
+ &f->gpr, gpr_dead);
+ }
+
+ /* sort so that the highest spill cost register classes are at the front and
+ * as such more likely to be placed in registers */
+ alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+
+ /* update remapping info */
+ for(size_t i = 0; i < alive_len(&alive); ++i) {
+ struct alive_slot *a = alive_at(&alive, i);
+ alive_at(&alive, a->idx)->remap = i;
+ }
+
+ /* remap locations */
for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) {
- size_t rno = gpr_stats_at(&f->gpr, i)->r.r;
- gpr_stats_at(&f->gpr, rno)->rno = i;
+ struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i);
+ struct alive_slot *a = alive_at(&alive, gpr->rno);
+ gpr->rno = a->remap;
+ }
+
+ alive_destroy(&alive);
+}
+
+static int fpr_dead(void *regs, size_t idx, size_t start)
+{
+ struct fpr_stats *fprs = regs;
+ return fpr_stats_at(fprs, idx)->end <= start;
+}
+
+static void linear_fpr_alloc(struct ejit_func *f)
+{
+ foreach_vec(fi, f->fpr) {
+ fpr_stats_at(&f->fpr, fi)->fno = fi;
}
}
static void assign_fprs(struct ejit_func *f)
{
- fpr_stats_sort(&f->fpr, (vec_comp_t)fpr_sort_prio);
+ /* everything fits into registers, no need to start optimizing */
+ if (fpr_stats_len(&f->fpr) <= physfpr_count())
+ return linear_fpr_alloc(f);
+
+ struct alive alive = alive_create(fpr_stats_len(&f->fpr));
+
+ /* special oneshot register class */
+ struct alive_slot a = {.r = -1, .cost = 0, .idx = 0};
+ alive_append(&alive, a);
+
+ foreach_vec(fi, f->fpr) {
+ struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi);
+ calculate_alive(&alive, fi,
+ fpr->prio, fpr->start, fpr->end, &fpr->fno,
+ &f->fpr, fpr_dead);
+ }
+
+ /* sort so that the highest spill cost register classes are at the front and
+ * as such more likely to be placed in registers */
+ alive_sort(&alive, (vec_comp_t)spill_cost_sort);
+
+ /* update remapping info */
+ for(size_t i = 0; i < alive_len(&alive); ++i) {
+ struct alive_slot *a = alive_at(&alive, i);
+ alive_at(&alive, a->idx)->remap = i;
+ }
+
+ /* remap locations */
for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) {
- size_t rno = fpr_stats_at(&f->fpr, i)->f.f;
- fpr_stats_at(&f->fpr, rno)->fno = i;
+ struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i);
+ struct alive_slot *a = alive_at(&alive, fpr->fno);
+ fpr->fno = a->remap;
}
+
+ alive_destroy(&alive);
}
static size_t align_up(size_t a, size_t n)
@@ -2425,7 +3024,7 @@ static size_t align_up(size_t a, size_t n)
bool ejit_compile(struct ejit_func *f, bool use_64, bool im_scawed)
{
(void)use_64;
-#if __WORDSIZE == 32
+#if __WORDSIZE != 64
/* can't compile 64bit code on 32bit systems, give up early */
if (use_64)
return false;