diff options
Diffstat (limited to 'src/compile/compile.c')
-rw-r--r-- | src/compile/compile.c | 1115 |
1 files changed, 857 insertions, 258 deletions
diff --git a/src/compile/compile.c b/src/compile/compile.c index 490bc43..5432bc1 100644 --- a/src/compile/compile.c +++ b/src/compile/compile.c @@ -19,6 +19,27 @@ struct reloc_helper { #define VEC_NAME addrs #include "../vec.h" +/* skip assertions since we know they must be valid due to type checking earlier */ +static long checked_run_i(struct ejit_func *f, size_t argc, struct ejit_arg args[argc]) +{ + return ejit_run(f, argc, args, NULL).i; +} + +static int64_t checked_run_l(struct ejit_func *f, size_t argc, struct ejit_arg args[argc]) +{ + return ejit_run(f, argc, args, NULL).i; +} + +static float checked_run_f(struct ejit_func *f, size_t argc, struct ejit_arg args[argc]) +{ + return ejit_run(f, argc, args, NULL).f; +} + +static double checked_run_d(struct ejit_func *f, size_t argc, struct ejit_arg args[argc]) +{ + return ejit_run(f, argc, args, NULL).f; +} + static void *alloc_arena(size_t size, bool im_scawed) { return mmap(NULL, size, @@ -26,6 +47,24 @@ static void *alloc_arena(size_t size, bool im_scawed) MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } +static void assert_helper(const char *msg) +{ + assert(false && msg); +} + +static bool gpr_free(size_t argc, jit_operand_t args[argc], jit_gpr_t r) +{ + for (size_t i = 0; i < argc; ++i) { + if (args[i].kind != JIT_OPERAND_KIND_GPR) + continue; + + if (jit_gpr_regno(args[i].loc.gpr.gpr) == jit_gpr_regno(r)) + return false; + } + + return true; +} + static void free_arena(void *arena, size_t size) { munmap(arena, size); @@ -545,7 +584,7 @@ static void compile_absr_d(struct ejit_func *f, jit_state_t *j, { jit_fpr_t r0 = getfpr(f, i.r0, 0); jit_fpr_t r1 = getloc_d(f, j, i.r1, 1); - jit_absr_f(j, r0, r1); + jit_absr_d(j, r0, r1); putloc_d(f, j, i.r0, r0); } @@ -1305,6 +1344,23 @@ static void compile_truncr_f_32(struct ejit_func *f, jit_state_t *j, #endif } +static void compile_sqrtr_f(struct ejit_func *f, jit_state_t *j, + struct ejit_insn i) +{ + jit_fpr_t r0 = getfpr(f, i.r0, 0); + jit_fpr_t r1 = getloc_f(f, j, i.r1, 1); + jit_sqrtr_f(j, r0, r1); + putloc_d(f, j, i.r0, r0); +} + +static void compile_sqrtr_d(struct ejit_func *f, jit_state_t *j, + struct ejit_insn i) +{ + jit_fpr_t r0 = getfpr(f, i.r0, 0); + jit_fpr_t r1 = getloc_d(f, j, i.r1, 1); + jit_sqrtr_d(j, r0, r1); + putloc_d(f, j, i.r0, r0); +} static void compile_reg_cmp(struct ejit_func *f, jit_state_t *j, struct ejit_insn i, @@ -1783,7 +1839,7 @@ static enum jit_operand_abi jit_abi_from(enum ejit_type t) case EJIT_POINTER: return JIT_OPERAND_ABI_POINTER; case EJIT_FLOAT: return JIT_OPERAND_ABI_FLOAT; case EJIT_DOUBLE: return JIT_OPERAND_ABI_DOUBLE; - default: + default: break; } abort(); @@ -1803,7 +1859,7 @@ static size_t arg_offsetof(enum ejit_type t) case EJIT_POINTER: return offsetof(struct ejit_arg, p); case EJIT_FLOAT: return offsetof(struct ejit_arg, f); case EJIT_DOUBLE: return offsetof(struct ejit_arg, d); - default: + default: break; }; abort(); @@ -1869,19 +1925,19 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j) foreach_vec(ii, f->insns) { struct ejit_insn i = *insns_at(&f->insns, ii); switch (i.op) { - case PARAM: { + case EJIT_OP_PARAM: { jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i)); operands_append(&args, p); break; } - case PARAM_F: { + case EJIT_OP_PARAM_F: { jit_operand_t p = jit_operand_mem(jit_abi_from(i.r1), JIT_R1, arg_offset(i)); operands_append(&args, p); break; } - case START: { + case EJIT_OP_START: { /* callr should be smart enough to avoid JIT_R0 if some * other register wants to write to it */ jit_reloc_t r = jit_mov_addr(j, JIT_R0); @@ -1889,7 +1945,7 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j) jit_leave_jit_abi(j, 0, 0, frame); jit_ret(j); /* should just forward the return value */ - f->direct_call = jit_address(j); + f->direct_call = jit_address_to_function_pointer(jit_address(j)); jit_patch_here(j, r); operands_destroy(&args); @@ -1901,6 +1957,84 @@ static void compile_trampoline(struct ejit_func *f, jit_state_t *j) } } +static void resolve_top_reloc(jit_state_t *j, struct relocs *relocs, struct addrs *addrs, size_t ii) +{ + assert(relocs_len(relocs) != 0); + + struct reloc_helper h = *relocs_back(relocs); + if (h.to >= ii) + return; + + jit_addr_t a = *addrs_at(addrs, h.to); + jit_reloc_t r = h.r; + + assert(a); + jit_patch_there(j, r, a); + relocs_pop(relocs); +} + +static void resolve_relocs(jit_state_t *j, struct relocs *relocs, struct addrs *addrs, size_t ii) +{ + foreach_vec(ri, *relocs) { + struct reloc_helper h = *relocs_at(relocs, ri); + if (h.to != ii) + continue; + + jit_addr_t a = *addrs_at(addrs, ii); + jit_reloc_t r = h.r; + + assert(a); + jit_patch_there(j, r, a); + + /* 'shift' down */ + if (ri != relocs_len(relocs) - 1) + *relocs_at(relocs, ri) = *relocs_back(relocs); + + assert(relocs_len(relocs) != 0); + relocs_shrink(relocs, relocs_len(relocs) - 1); + } +} + +static void compile_maxr_f(struct ejit_func *f, jit_state_t *j, + struct ejit_insn i) +{ + jit_fpr_t r0 = getfpr(f, i.r0, 0); + jit_fpr_t r1 = getloc_f(f, j, i.r1, 1); + jit_fpr_t r2 = getloc_f(f, j, i.r2, 2); + jit_maxr_f(j, r0, r1, r2); + putloc_f(f, j, i.r0, r0); +} + +static void compile_maxr_d(struct ejit_func *f, jit_state_t *j, + struct ejit_insn i) +{ + jit_fpr_t r0 = getfpr(f, i.r0, 0); + jit_fpr_t r1 = getloc_d(f, j, i.r1, 1); + jit_fpr_t r2 = getloc_d(f, j, i.r2, 2); + jit_maxr_d(j, r0, r1, r2); + putloc_d(f, j, i.r0, r0); +} + +static void compile_minr_f(struct ejit_func *f, jit_state_t *j, + struct ejit_insn i) +{ + jit_fpr_t r0 = getfpr(f, i.r0, 0); + jit_fpr_t r1 = getloc_f(f, j, i.r1, 1); + jit_fpr_t r2 = getloc_f(f, j, i.r2, 2); + jit_minr_f(j, r0, r1, r2); + putloc_f(f, j, i.r0, r0); +} + +static void compile_minr_d(struct ejit_func *f, jit_state_t *j, + struct ejit_insn i) +{ + jit_fpr_t r0 = getfpr(f, i.r0, 0); + jit_fpr_t r1 = getloc_d(f, j, i.r1, 1); + jit_fpr_t r2 = getloc_d(f, j, i.r2, 2); + jit_minr_d(j, r0, r1, r2); + putloc_d(f, j, i.r0, r0); +} + static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, size_t size) { @@ -1922,204 +2056,373 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, struct operands dst = operands_create(); struct operands direct = operands_create(); - struct relocs relocs = relocs_create(sizeof(struct reloc_helper)); + struct relocs relocs = relocs_create(); struct addrs addrs = addrs_create(); addrs_reserve(&addrs, insns_len(&f->insns)); - void *call = NULL; - size_t label = 0; foreach_vec(ii, f->insns) { /* if we've hit a label, add it to our vector of label addresses */ if (label < labels_len(&f->labels)) { if (*labels_at(&f->labels, label) == ii) { compile_label(j, ii, &addrs); + resolve_relocs(j, &relocs, &addrs, ii); label++; } } struct ejit_insn i = *insns_at(&f->insns, ii); switch (i.op) { - case MOVR: compile_movr(f, j, i); break; - case MOVR_F: compile_movr_f(f, j, i); break; - case MOVR_D: compile_movr_d(f, j, i); break; - - case MOVI: compile_movi(f, j, i); break; - case MOVI_F: compile_movi_f(f, j, i); break; - case MOVI_D: compile_movi_d(f, j, i); break; - - case ADDR: compile_addr(f, j, i); break; - case ADDI: compile_addi(f, j, i); break; - case ADDR_F: compile_addr_f(f, j, i); break; - case ADDR_D: compile_addr_d(f, j, i); break; - - case SUBR: compile_subr(f, j, i); break; - case SUBI: compile_subi(f, j, i); break; - case SUBR_F: compile_subr_f(f, j, i); break; - case SUBR_D: compile_subr_d(f, j, i); break; - - case MULR: compile_mulr(f, j, i); break; - case MULR_F: compile_mulr_f(f, j, i); break; - case MULR_D: compile_mulr_d(f, j, i); break; - - case ANDI: compile_andi(f, j, i); break; - case ANDR: compile_andr(f, j, i); break; - - case ORI: compile_ori(f, j, i); break; - case ORR: compile_orr(f, j, i); break; - - case XORI: compile_xori(f, j, i); break; - case XORR: compile_xorr(f, j, i); break; - - case DIVR: compile_divr(f, j, i); break; - case DIVR_U: compile_divr_u(f, j, i); break; - case DIVR_F: compile_divr_f(f, j, i); break; - case DIVR_D: compile_divr_d(f, j, i); break; - - case REMR: compile_remr(f, j, i); break; - case REMR_U: compile_remr_u(f, j, i); break; - - case ABSR_F: compile_absr_f(f, j, i); break; - case ABSR_D: compile_absr_d(f, j, i); break; - - case LSHI: compile_lshi(f, j, i); break; - case LSHR: compile_lshr(f, j, i); break; - case RSHI: compile_rshi(f, j, i); break; - case RSHI_U: compile_rshi_u(f, j, i); break; - case RSHR: compile_rshr(f, j, i); break; - case RSHR_U: compile_rshr_u(f, j, i); break; - - case STI8: compile_sti8(f, j, i); break; - case STI16: compile_sti16(f, j, i); break; - case STI32: compile_sti32(f, j, i); break; - case STI64: compile_sti64(f, j, i); break; - case STIF: compile_stif(f, j, i); break; - case STID: compile_stid(f, j, i); break; - - case STXI8: compile_stxi8(f, j, i); break; - case STXI16: compile_stxi16(f, j, i); break; - case STXI32: compile_stxi32(f, j, i); break; - case STXI64: compile_stxi64(f, j, i); break; - case STXIF: compile_stxif(f, j, i); break; - case STXID: compile_stxid(f, j, i); break; - - case STXR8: compile_stxr8(f, j, i); break; - case STXR16: compile_stxr16(f, j, i); break; - case STXR32: compile_stxr32(f, j, i); break; - case STXR64: compile_stxr64(f, j, i); break; - case STXRF: compile_stxrf(f, j, i); break; - case STXRD: compile_stxrd(f, j, i); break; - - case LDI8: compile_ldi8(f, j, i); break; - case LDI16: compile_ldi16(f, j, i); break; - case LDI32: compile_ldi32(f, j, i); break; - case LDI64: compile_ldi64(f, j, i); break; - case LDIU8: compile_ldiu8(f, j, i); break; - case LDIU16: compile_ldiu16(f, j, i); break; - case LDIU32: compile_ldiu32(f, j, i); break; - case LDIU64: compile_ldiu64(f, j, i); break; - case LDIF: compile_ldif(f, j, i); break; - case LDID: compile_ldid(f, j, i); break; - - case LDXI8: compile_ldxi8(f, j, i); break; - case LDXI16: compile_ldxi16(f, j, i); break; - case LDXI32: compile_ldxi32(f, j, i); break; - case LDXI64: compile_ldxi64(f, j, i); break; - case LDXIU8: compile_ldxiu8(f, j, i); break; - case LDXIU16: compile_ldxiu16(f, j, i); break; - case LDXIU32: compile_ldxiu32(f, j, i); break; - case LDXIU64: compile_ldxiu64(f, j, i); break; - case LDXIF: compile_ldxif(f, j, i); break; - case LDXID: compile_ldxid(f, j, i); break; - - case LDXR8: compile_ldxr8(f, j, i); break; - case LDXR16: compile_ldxr16(f, j, i); break; - case LDXR32: compile_ldxr32(f, j, i); break; - case LDXR64: compile_ldxr64(f, j, i); break; - case LDXRU8: compile_ldxru8(f, j, i); break; - case LDXRU16: compile_ldxru16(f, j, i); break; - case LDXRU32: compile_ldxru32(f, j, i); break; - case LDXRU64: compile_ldxru64(f, j, i); break; - case LDXRF: compile_ldxrf(f, j, i); break; - case LDXRD: compile_ldxrd(f, j, i); break; - - case COMR: compile_comr(f, j, i); break; - - case NEGR: compile_negr(f, j, i); break; - case NEGR_F: compile_negr_f(f, j, i); break; - case NEGR_D: compile_negr_d(f, j, i); break; - - case EXTR8: compile_extr8(f, j, i); break; - case EXTR16: compile_extr16(f, j, i); break; - case EXTR32: compile_extr32(f, j, i); break; - case EXTRU8: compile_extru8(f, j, i); break; - case EXTRU16: compile_extru16(f, j, i); break; - case EXTRU32: compile_extru32(f, j, i); break; - case EXTRF: compile_extrf(f, j, i); break; - case EXTRD: compile_extrd(f, j, i); break; - - case TRUNCR_D_32: compile_truncr_d_32(f, j, i); break; - case TRUNCR_D_64: compile_truncr_d_64(f, j, i); break; - case TRUNCR_F_32: compile_truncr_f_32(f, j, i); break; - case TRUNCR_F_64: compile_truncr_f_64(f, j, i); break; - - case EQR: compile_eqr(f, j, i); break; - case EQR_F: compile_eqr_f(f, j, i); break; - case EQR_D: compile_eqr_d(f, j, i); break; - - case NER: compile_ner(f, j, i); break; - case NER_F: compile_ner_f(f, j, i); break; - case NER_D: compile_ner_d(f, j, i); break; - - case GER: compile_ger(f, j, i); break; - case GER_U: compile_ger_u(f, j, i); break; - case GER_F: compile_ger_f(f, j, i); break; - case GER_D: compile_ger_d(f, j, i); break; - - case GTR: compile_gtr(f, j, i); break; - case GTR_U: compile_gtr_u(f, j, i); break; - case GTR_F: compile_gtr_f(f, j, i); break; - case GTR_D: compile_gtr_d(f, j, i); break; - - case BMCI: compile_bmci(f, j, i, &relocs); break; - case BMCR: compile_bmcr(f, j, i, &relocs); break; - - case BMSI: compile_bmsi(f, j, i, &relocs); break; - case BMSR: compile_bmsr(f, j, i, &relocs); break; - - case BEQR: compile_beqr(f, j, i, &relocs); break; - case BEQI: compile_beqi(f, j, i, &relocs); break; - case BEQR_F: compile_beqr_f(f, j, i, &relocs); break; - case BEQR_D: compile_beqr_d(f, j, i, &relocs); break; - - case BNER: compile_bner(f, j, i, &relocs); break; - case BNEI: compile_bnei(f, j, i, &relocs); break; - case BNER_F: compile_bner_f(f, j, i, &relocs); break; - case BNER_D: compile_bner_d(f, j, i, &relocs); break; - - case BGER: compile_bger(f, j, i, &relocs); break; - case BGER_U: compile_bger_u(f, j, i, &relocs); break; - case BGEI: compile_bgei(f, j, i, &relocs); break; - case BGEI_U: compile_bgei_u(f, j, i, &relocs); break; - case BGER_F: compile_bger_f(f, j, i, &relocs); break; - case BGER_D: compile_bger_d(f, j, i, &relocs); break; - - case BGTR: compile_bgtr(f, j, i, &relocs); break; - case BGTR_U: compile_bgtr_u(f, j, i, &relocs); break; - case BGTI: compile_bgti(f, j, i, &relocs); break; - case BGTI_U: compile_bgti_u(f, j, i, &relocs); break; - case BGTR_F: compile_bgtr_f(f, j, i, &relocs); break; - case BGTR_D: compile_bgtr_d(f, j, i, &relocs); break; - - case BLEI: compile_blei(f, j, i, &relocs); break; - case BLEI_U: compile_blei_u(f, j, i, &relocs); break; - - case BLTI: compile_blti(f, j, i, &relocs); break; - case BLTI_U: compile_blti_u(f, j, i, &relocs); break; - - case JMP: compile_jmp(f, j, i, &relocs); break; - - case ARG: { + case EJIT_OP_MOVR: compile_movr(f, j, i); break; + case EJIT_OP_MOVR_F: compile_movr_f(f, j, i); break; + case EJIT_OP_MOVR_D: compile_movr_d(f, j, i); break; + + case EJIT_OP_MOVI: compile_movi(f, j, i); break; + case EJIT_OP_MOVI_F: compile_movi_f(f, j, i); break; + case EJIT_OP_MOVI_D: compile_movi_d(f, j, i); break; + + case EJIT_OP_ADDR: compile_addr(f, j, i); break; + case EJIT_OP_ADDI: compile_addi(f, j, i); break; + case EJIT_OP_ADDR_F: compile_addr_f(f, j, i); break; + case EJIT_OP_ADDR_D: compile_addr_d(f, j, i); break; + + case EJIT_OP_SUBR: compile_subr(f, j, i); break; + case EJIT_OP_SUBI: compile_subi(f, j, i); break; + case EJIT_OP_SUBR_F: compile_subr_f(f, j, i); break; + case EJIT_OP_SUBR_D: compile_subr_d(f, j, i); break; + + case EJIT_OP_MULR: compile_mulr(f, j, i); break; + case EJIT_OP_MULR_F: compile_mulr_f(f, j, i); break; + case EJIT_OP_MULR_D: compile_mulr_d(f, j, i); break; + + case EJIT_OP_ANDI: compile_andi(f, j, i); break; + case EJIT_OP_ANDR: compile_andr(f, j, i); break; + + case EJIT_OP_ORI: compile_ori(f, j, i); break; + case EJIT_OP_ORR: compile_orr(f, j, i); break; + + case EJIT_OP_XORI: compile_xori(f, j, i); break; + case EJIT_OP_XORR: compile_xorr(f, j, i); break; + + case EJIT_OP_DIVR: compile_divr(f, j, i); break; + case EJIT_OP_DIVR_U: compile_divr_u(f, j, i); break; + case EJIT_OP_DIVR_F: compile_divr_f(f, j, i); break; + case EJIT_OP_DIVR_D: compile_divr_d(f, j, i); break; + + case EJIT_OP_REMR: compile_remr(f, j, i); break; + case EJIT_OP_REMR_U: compile_remr_u(f, j, i); break; + + case EJIT_OP_ABSR_F: compile_absr_f(f, j, i); break; + case EJIT_OP_ABSR_D: compile_absr_d(f, j, i); break; + + case EJIT_OP_LSHI: compile_lshi(f, j, i); break; + case EJIT_OP_LSHR: compile_lshr(f, j, i); break; + case EJIT_OP_RSHI: compile_rshi(f, j, i); break; + case EJIT_OP_RSHI_U: compile_rshi_u(f, j, i); break; + case EJIT_OP_RSHR: compile_rshr(f, j, i); break; + case EJIT_OP_RSHR_U: compile_rshr_u(f, j, i); break; + + case EJIT_OP_STI8: compile_sti8(f, j, i); break; + case EJIT_OP_STI16: compile_sti16(f, j, i); break; + case EJIT_OP_STI32: compile_sti32(f, j, i); break; + case EJIT_OP_STI64: compile_sti64(f, j, i); break; + case EJIT_OP_STIF: compile_stif(f, j, i); break; + case EJIT_OP_STID: compile_stid(f, j, i); break; + + case EJIT_OP_STXI8: compile_stxi8(f, j, i); break; + case EJIT_OP_STXI16: compile_stxi16(f, j, i); break; + case EJIT_OP_STXI32: compile_stxi32(f, j, i); break; + case EJIT_OP_STXI64: compile_stxi64(f, j, i); break; + case EJIT_OP_STXIF: compile_stxif(f, j, i); break; + case EJIT_OP_STXID: compile_stxid(f, j, i); break; + + case EJIT_OP_STXR8: compile_stxr8(f, j, i); break; + case EJIT_OP_STXR16: compile_stxr16(f, j, i); break; + case EJIT_OP_STXR32: compile_stxr32(f, j, i); break; + case EJIT_OP_STXR64: compile_stxr64(f, j, i); break; + case EJIT_OP_STXRF: compile_stxrf(f, j, i); break; + case EJIT_OP_STXRD: compile_stxrd(f, j, i); break; + + case EJIT_OP_LDI8: compile_ldi8(f, j, i); break; + case EJIT_OP_LDI16: compile_ldi16(f, j, i); break; + case EJIT_OP_LDI32: compile_ldi32(f, j, i); break; + case EJIT_OP_LDI64: compile_ldi64(f, j, i); break; + case EJIT_OP_LDIU8: compile_ldiu8(f, j, i); break; + case EJIT_OP_LDIU16: compile_ldiu16(f, j, i); break; + case EJIT_OP_LDIU32: compile_ldiu32(f, j, i); break; + case EJIT_OP_LDIU64: compile_ldiu64(f, j, i); break; + case EJIT_OP_LDIF: compile_ldif(f, j, i); break; + case EJIT_OP_LDID: compile_ldid(f, j, i); break; + + case EJIT_OP_LDXI8: compile_ldxi8(f, j, i); break; + case EJIT_OP_LDXI16: compile_ldxi16(f, j, i); break; + case EJIT_OP_LDXI32: compile_ldxi32(f, j, i); break; + case EJIT_OP_LDXI64: compile_ldxi64(f, j, i); break; + case EJIT_OP_LDXIU8: compile_ldxiu8(f, j, i); break; + case EJIT_OP_LDXIU16: compile_ldxiu16(f, j, i); break; + case EJIT_OP_LDXIU32: compile_ldxiu32(f, j, i); break; + case EJIT_OP_LDXIU64: compile_ldxiu64(f, j, i); break; + case EJIT_OP_LDXIF: compile_ldxif(f, j, i); break; + case EJIT_OP_LDXID: compile_ldxid(f, j, i); break; + + case EJIT_OP_LDXR8: compile_ldxr8(f, j, i); break; + case EJIT_OP_LDXR16: compile_ldxr16(f, j, i); break; + case EJIT_OP_LDXR32: compile_ldxr32(f, j, i); break; + case EJIT_OP_LDXR64: compile_ldxr64(f, j, i); break; + case EJIT_OP_LDXRU8: compile_ldxru8(f, j, i); break; + case EJIT_OP_LDXRU16: compile_ldxru16(f, j, i); break; + case EJIT_OP_LDXRU32: compile_ldxru32(f, j, i); break; + case EJIT_OP_LDXRU64: compile_ldxru64(f, j, i); break; + case EJIT_OP_LDXRF: compile_ldxrf(f, j, i); break; + case EJIT_OP_LDXRD: compile_ldxrd(f, j, i); break; + + case EJIT_OP_COMR: compile_comr(f, j, i); break; + + case EJIT_OP_NEGR: compile_negr(f, j, i); break; + case EJIT_OP_NEGR_F: compile_negr_f(f, j, i); break; + case EJIT_OP_NEGR_D: compile_negr_d(f, j, i); break; + + case EJIT_OP_EXTR8: compile_extr8(f, j, i); break; + case EJIT_OP_EXTR16: compile_extr16(f, j, i); break; + case EJIT_OP_EXTR32: compile_extr32(f, j, i); break; + case EJIT_OP_EXTRU8: compile_extru8(f, j, i); break; + case EJIT_OP_EXTRU16: compile_extru16(f, j, i); break; + case EJIT_OP_EXTRU32: compile_extru32(f, j, i); break; + case EJIT_OP_EXTRF: compile_extrf(f, j, i); break; + case EJIT_OP_EXTRD: compile_extrd(f, j, i); break; + + case EJIT_OP_TRUNCR_D_32: compile_truncr_d_32(f, j, i); break; + case EJIT_OP_TRUNCR_D_64: compile_truncr_d_64(f, j, i); break; + case EJIT_OP_TRUNCR_F_32: compile_truncr_f_32(f, j, i); break; + case EJIT_OP_TRUNCR_F_64: compile_truncr_f_64(f, j, i); break; + + case EJIT_OP_SQRTR_F: compile_sqrtr_f(f, j, i); break; + case EJIT_OP_SQRTR_D: compile_sqrtr_d(f, j, i); break; + + case EJIT_OP_MINR_F: compile_minr_f(f, j, i); break; + case EJIT_OP_MINR_D: compile_minr_d(f, j, i); break; + + case EJIT_OP_MAXR_F: compile_maxr_f(f, j, i); break; + case EJIT_OP_MAXR_D: compile_maxr_d(f, j, i); break; + + case EJIT_OP_EQR: compile_eqr(f, j, i); break; + case EJIT_OP_EQR_F: compile_eqr_f(f, j, i); break; + case EJIT_OP_EQR_D: compile_eqr_d(f, j, i); break; + + case EJIT_OP_NER: compile_ner(f, j, i); break; + case EJIT_OP_NER_F: compile_ner_f(f, j, i); break; + case EJIT_OP_NER_D: compile_ner_d(f, j, i); break; + + case EJIT_OP_GER: compile_ger(f, j, i); break; + case EJIT_OP_GER_U: compile_ger_u(f, j, i); break; + case EJIT_OP_GER_F: compile_ger_f(f, j, i); break; + case EJIT_OP_GER_D: compile_ger_d(f, j, i); break; + + case EJIT_OP_GTR: compile_gtr(f, j, i); break; + case EJIT_OP_GTR_U: compile_gtr_u(f, j, i); break; + case EJIT_OP_GTR_F: compile_gtr_f(f, j, i); break; + case EJIT_OP_GTR_D: compile_gtr_d(f, j, i); break; + + case EJIT_OP_BMCI: { + compile_bmci(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BMCR: { + compile_bmcr(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BMSI: { + compile_bmsi(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BMSR: { + compile_bmsr(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BEQR: { + compile_beqr(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BEQI: { + compile_beqi(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BEQR_F: { + compile_beqr_f(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BEQR_D: { + compile_beqr_d(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BNER: { + compile_bner(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BNEI: { + compile_bnei(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BNER_F: { + compile_bner_f(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BNER_D: { + compile_bner_d(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGER: { + compile_bger(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGER_U: { + compile_bger_u(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGEI: { + compile_bgei(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGEI_U: { + compile_bgei_u(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGER_F: { + compile_bger_f(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGER_D: { + compile_bger_d(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGTR: { + compile_bgtr(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGTR_U: { + compile_bgtr_u(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGTI: { + compile_bgti(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGTI_U: { + compile_bgti_u(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGTR_F: { + compile_bgtr_f(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BGTR_D: { + compile_bgtr_d(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BLEI: { + compile_blei(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BLEI_U: { + compile_blei_u(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BLTI: { + compile_blti(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_BLTI_U: { + compile_blti_u(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_JMP: { + compile_jmp(f, j, i, &relocs); + resolve_top_reloc(j, &relocs, &addrs, ii); + break; + } + + case EJIT_OP_ARG_I: { + jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1); + jit_operand_t arg = jit_operand_imm(jit_abi_from(i.r1), i.o); + operands_append(&src, type); + operands_append(&src, arg); + operands_append(&direct, arg); + + jit_operand_t to[2] = { + jit_operand_mem(JIT_OPERAND_ABI_WORD, JIT_SP, + type_offset(i)), + jit_operand_mem(jit_abi_from(i.r1), JIT_SP, + arg_offset(i)) + }; + + operands_append(&dst, to[0]); + operands_append(&dst, to[1]); + break; + } + + case EJIT_OP_ARG_FI: { + assert(false && "immediate floats (currently?) not supported"); + abort(); + break; + } + + case EJIT_OP_ARG: { size_t r2 = gpr_stats_at(&f->gpr, i.r2)->rno; jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1); @@ -2152,12 +2455,12 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, break; } - case ARG_F: { + case EJIT_OP_ARG_F: { size_t f2 = fpr_stats_at(&f->fpr, i.r2)->fno; jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1); jit_operand_t arg; - if (i.r2 < physfpr_count()) { + if (f2 < physfpr_count()) { /* regular register */ arg = jit_operand_fpr(jit_abi_from(i.r1), physfpr_at(f2)); @@ -2186,17 +2489,10 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, break; } - case ESCAPEI_L: -#if __WORDSIZE == 64 - /* fallthrough */ -#else - assert(0 && "trying to compile escapei_l on 32bit arch"); - break; -#endif - - case ESCAPEI_D: - case ESCAPEI_F: - case ESCAPEI_I: { + case EJIT_OP_ESCAPEI_I: + case EJIT_OP_ESCAPEI_L: + case EJIT_OP_ESCAPEI_F: + case EJIT_OP_ESCAPEI_D: { save_caller_save_regs(f, j); jit_operand_t args[2] = { @@ -2206,7 +2502,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, * argument stack address */ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0) }; - compile_imm_call(j, &src, &dst, (void *)(uintptr_t)i.o, 2, args); + compile_imm_call(j, &src, &dst, (void *)i.p, 2, args); restore_caller_save_regs(f, j); operands_reset(&src); @@ -2215,21 +2511,165 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, break; } - case CALLI_L: -#if __WORDSIZE == 64 - call = ejit_run_func_l; goto calli; -#else - assert(0 && "trying to compile calli_l on 32bit arch"); - break; + case EJIT_OP_TAILI: { + /* a bit of copy-paste between this and the next func, + * hmm */ + assert(operands_len(&direct) <= 2); + struct ejit_func *f = (struct ejit_func *)i.p; + assert(f->direct_call); + + jit_operand_t regs[2] = { + jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R1), + jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R2) + }; + jit_move_operands(j, regs, direct.buf, operands_len(&direct)); + + int frame_size = j->frame_size; + jit_shrink_stack(j, stack); + jit_leave_jit_abi(j, gprs, fprs, frame); + + /* now move args into place */ + jit_operand_t args[2] = {}; + foreach_vec(oi, direct) { + args[oi] = *operands_at(&direct, oi); + } + + jit_locate_args(j, operands_len(&direct), args); + jit_move_operands(j, args, regs, operands_len(&direct)); + jit_jmpi(j, f->direct_call); + j->frame_size = frame_size; + + operands_reset(&src); + operands_reset(&dst); + operands_reset(&direct); + break; + } + + case EJIT_OP_TAILR: { + /* this is admittedly a slightly roundabout way of + * implementing tail calls and is arguably not the most + * performant way (if it works at all, heh) but for now + * I'm more interested in functionality than raw + * performance. Currently only supports two gpr + * registers, but should be fairly easy to extend with + * fprs as well */ + + assert(operands_len(&direct) <= 2); + jit_gpr_t r = getloc(f, j, i.r1, 0); + +#if defined(DEBUG) + jit_ldxi(j, JIT_R1, r, offsetof(struct ejit_func, rtype)); + jit_reloc_t rtype_reloc = jit_beqi(j, JIT_R1, f->rtype); + jit_calli_1(j, assert_helper, + jit_operand_imm(JIT_OPERAND_ABI_POINTER, + (jit_imm_t)"trying to tail call different rtype")); + + jit_patch_here(j, rtype_reloc); + + jit_ldxi(j, JIT_R1, r, offsetof(struct ejit_func, direct_call)); + jit_reloc_t direct_reloc = jit_bnei(j, JIT_R1, 0); /* null */ + jit_calli_1(j, assert_helper, + jit_operand_imm(JIT_OPERAND_ABI_POINTER, + (jit_imm_t)"trying to tail call interpreted function")); + jit_patch_here(j, direct_reloc); #endif + size_t argc = operands_len(&direct); + + /* r0 = target, r1 = arg1, r2 = arg2 */ + jit_ldxi(j, JIT_R0, r, offsetof(struct ejit_func, direct_call)); + jit_operand_t regs[3] = { + jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R1), + jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R2) + }; + jit_move_operands(j, regs, direct.buf, argc); + + /* with args safely in registers, reset stack/state + * while avoiding overwriting the call target */ + int frame_size = j->frame_size; + jit_shrink_stack(j, stack); + jit_leave_jit_abi(j, gprs, fprs, frame); + + /* now move args into place, making sure we avoid our + * target register */ + jit_operand_t args[3] = {}; + for (size_t oi = 0; oi < argc; ++oi) { + args[oi] = *operands_at(&direct, oi); + } + + jit_locate_args(j, argc, args); + + /* we know that at least one gpr must be free */ + jit_gpr_t target = gpr_free(argc, args, JIT_R0) ? JIT_R0 + : gpr_free(argc, args, JIT_R1) ? JIT_R1 + : gpr_free(argc, args, JIT_R2) ? JIT_R2 + : (abort(), JIT_R0); + + /* move our target in JIT_R0 to whatever the free + * register is to avoid it being clobbered when we move + * the actual arguments */ + args[argc] = jit_operand_gpr(JIT_OPERAND_ABI_POINTER, target); + regs[argc] = jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0); + jit_move_operands(j, args, regs, argc + 1); + jit_jmpr(j, target); + j->frame_size = frame_size; + + operands_reset(&src); + operands_reset(&dst); + operands_reset(&direct); + break; + } + + case EJIT_OP_CALLR_I: + case EJIT_OP_CALLR_L: + case EJIT_OP_CALLR_F: + case EJIT_OP_CALLR_D: { + save_caller_save_regs(f, j); + + jit_gpr_t target = getgpr(f, i.r1, 0); + + /* check if there's a direct call avaiable */ + jit_ldxi(j, JIT_R1, target, offsetof(struct ejit_func, direct_call)); + jit_reloc_t direct_reloc = jit_beqi(j, JIT_R0, 0); + /* we can do a jit -> jit call */ + jit_callr(j, JIT_R1, operands_len(&direct), direct.buf); + jit_reloc_t out_reloc = jit_jmp(j); + + jit_patch_here(j, direct_reloc); - case CALLI_F: { call = ejit_run_func_f; goto calli; } - case CALLI_D: { call = ejit_run_func_d; goto calli; } - case CALLI_I: { call = ejit_run_func_i; goto calli; -calli: + /* we must do a jit -> bytecode call */ + jit_operand_t args[3] = { + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R1), + jit_operand_imm(JIT_OPERAND_ABI_WORD, operands_len(&src) / 2), + /* compile_imm_call populate JIT_R0 with the + * argument stack address */ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0) + }; + void *call = NULL; + switch (i.op) { + case EJIT_OP_CALLR_I: call = ejit_run_func_i; break; + case EJIT_OP_CALLR_L: call = ejit_run_func_l; break; + case EJIT_OP_CALLR_F: call = ejit_run_func_f; break; + case EJIT_OP_CALLR_D: call = ejit_run_func_d; break; + default: abort(); + } + + compile_imm_call(j, &src, &dst, call, 3, args); + jit_patch_here(j, out_reloc); + restore_caller_save_regs(f, j); + + operands_reset(&src); + operands_reset(&dst); + operands_reset(&direct); + break; + } + + case EJIT_OP_CALLI: { save_caller_save_regs(f, j); - struct ejit_func *f = (struct ejit_func *)(uintptr_t)i.o; + struct ejit_func *f = (struct ejit_func *)i.p; +#if __WORDSIZE != 64 + assert(f->rtype != EJIT_INT64 && f->rtype != EJIT_UINT64); +#endif if (f && f->direct_call) { jit_calli(j, f->direct_call, operands_len(&direct), direct.buf); restore_caller_save_regs(f, j); @@ -2248,6 +2688,16 @@ calli: * argument stack address */ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0) }; + + void *call = NULL; + switch (f->rtype) { + case EJIT_INT64: + case EJIT_UINT64: call = checked_run_l; break; + case EJIT_FLOAT: call = checked_run_f; break; + case EJIT_DOUBLE: call = checked_run_d; break; + default: call = checked_run_i; break; + } + compile_imm_call(j, &src, &dst, call, 3, args); restore_caller_save_regs(f, j); @@ -2257,54 +2707,70 @@ calli: break; } - case RETVAL: compile_retval(f, j, i); break; - case RETVAL_F: compile_retval_f(f, j, i); break; - case RETVAL_D: compile_retval_d(f, j, i); break; + case EJIT_OP_RETVAL: compile_retval(f, j, i); break; + case EJIT_OP_RETVAL_F: compile_retval_f(f, j, i); break; + case EJIT_OP_RETVAL_D: compile_retval_d(f, j, i); break; - case RETR: { + case EJIT_OP_RETR: { jit_gpr_t r = getloc(f, j, i.r1, 0); /* R0 won't get overwritten by jit_leave_jit_abi */ jit_movr(j, JIT_R0, r); + + /* keep track of frame size so we can continue + * generating code after 'leaving' the ABI. Bit of a + * hack, should maybe codify this better in the + * lightening API? */ + int frame_size = j->frame_size; jit_shrink_stack(j, stack); jit_leave_jit_abi(j, gprs, fprs, frame); jit_retr(j, JIT_R0); + j->frame_size = frame_size; break; } - case RETR_F: { + case EJIT_OP_RETR_F: { jit_fpr_t r = getloc_f(f, j, i.r1, 0); jit_movr_f(j, JIT_F0, r); + + int frame_size = j->frame_size; jit_shrink_stack(j, stack); jit_leave_jit_abi(j, gprs, fprs, frame); jit_retr_f(j, JIT_F0); + j->frame_size = frame_size; break; } - case RETR_D: { + case EJIT_OP_RETR_D: { jit_fpr_t r = getloc_d(f, j, i.r1, 0); jit_movr_d(j, JIT_F0, r); + + int frame_size = j->frame_size; jit_shrink_stack(j, stack); jit_leave_jit_abi(j, gprs, fprs, frame); jit_retr_d(j, JIT_F0); + j->frame_size = frame_size; break; } - case RETI: { + case EJIT_OP_RETI: { + int frame_size = j->frame_size; jit_shrink_stack(j, stack); jit_leave_jit_abi(j, gprs, fprs, frame); jit_reti(j, i.o); + j->frame_size = frame_size; break; } - case END: { - /* 'void' return */ + case EJIT_OP_END: { + /* 'void' return, must be last thing in function so no + * need to keep track of frame size */ jit_shrink_stack(j, stack); jit_leave_jit_abi(j, gprs, fprs, frame); jit_reti(j, 0); break; } - case PARAM_F: { + case EJIT_OP_PARAM_F: { size_t f2 = fpr_stats_at(&f->fpr, i.r2)->fno; jit_operand_t to; @@ -2323,7 +2789,7 @@ calli: break; } - case PARAM: { + case EJIT_OP_PARAM: { size_t r2 = gpr_stats_at(&f->gpr, i.r2)->rno; jit_operand_t to; @@ -2342,7 +2808,7 @@ calli: break; } - case START: { + case EJIT_OP_START: { /* parameters should be done by now */ jit_load_args(j, operands_len(&dst), dst.buf); /* reuse for arguments */ @@ -2354,14 +2820,7 @@ calli: } } - foreach_vec(ri, relocs) { - struct reloc_helper h = *relocs_at(&relocs, ri); - jit_addr_t a = *addrs_at(&addrs, h.to); - jit_reloc_t r = h.r; - - assert(a); - jit_patch_there(j, r, a); - } + assert(relocs_len(&relocs) == 0); operands_destroy(&src); operands_destroy(&dst); @@ -2369,49 +2828,189 @@ calli: relocs_destroy(&relocs); addrs_destroy(&addrs); - if (jit_end(j, &size)) + if ((f->extern_call = jit_end(j, &size))) return 0; return size; } -/* highest prio first */ -static int gpr_sort_prio(struct gpr_stat *a, struct gpr_stat *b) +struct alive_slot { + long r; + size_t cost; + size_t idx; + size_t remap; +}; + +#define VEC_NAME alive +#define VEC_TYPE struct alive_slot +#include "../vec.h" + +static int spill_cost_sort(struct alive_slot *a, struct alive_slot *b) { - return (int)b->prio - (int)a->prio; + if (a->cost > b->cost) + return -1; + + return a->cost < b->cost; } -static int fpr_sort_prio(struct fpr_stat *a, struct fpr_stat *b) +/* slightly more parameters than I would like but I guess it's fine */ +static void calculate_alive(struct alive *alive, size_t idx, + size_t prio, size_t start, size_t end, size_t *rno, + void *regs, int (*dead)(void *regs, size_t idx, size_t start)) { - return (int)b->prio - (int)a->prio; + /* single-shot registers go in the special reserved slot */ + if (end <= start + 1) { + *rno = 0; + + struct alive_slot *a = alive_at(alive, 0); + a->cost += prio; + return; + } + + /* kill registers whose lifetime has ended */ + long max_cost_idx = -1; + size_t max_cost = 0; + long counter = 0; + foreach_vec(ai, *alive) { + /* skip oneshot */ + if (ai == 0) + goto next; + + struct alive_slot *a = alive_at(alive, ai); + if (a->r >= 0 && dead(regs, a->r, start)) + a->r = -1; /* gravestone */ + + if (a->r < 0 && a->cost > max_cost) { + max_cost = a->cost; + max_cost_idx = counter; + } + +next: + counter++; + } + + /* there's a suitable slot for us */ + if (max_cost_idx >= 0) { + *rno = max_cost_idx; + + struct alive_slot *a = alive_at(alive, max_cost_idx); + a->cost += prio; + a->r = idx; + return; + } + + *rno = alive_len(alive); + struct alive_slot a = { + .cost = prio, + .r = idx, + .idx = *rno + }; + alive_append(alive, a); } -/* sort registers by highest priority first, then renumber registers in the - * given order. Higher priority is given a physical register first. - * - * Note that the `->r` field becomes 'meaningless' after sorting, and you should - * only use the `->rno` field after this point. Essentially, if you have a - * register EJIT_GPR(2), you should use `gpr_stats_at(2)->rno` for the 'actual' - * register number in `getloc` and the like. - * - * Can be a bit confusing, but this way we don't have to allocate any new - * arrays, which is cool. */ +static int gpr_dead(void *regs, size_t idx, size_t start) +{ + struct gpr_stats *gprs = regs; + return gpr_stats_at(gprs, idx)->end <= start; +} + +static void linear_gpr_alloc(struct ejit_func *f) +{ + foreach_vec(gi, f->gpr) { + gpr_stats_at(&f->gpr, gi)->rno = gi; + } +} + +/* there's a fair bit of repetition between this and the gpr case, hmm */ static void assign_gprs(struct ejit_func *f) { - gpr_stats_sort(&f->gpr, (vec_comp_t)gpr_sort_prio); + /* everything fits into registers, no need to start optimizing */ + if (gpr_stats_len(&f->gpr) <= physgpr_count()) + return linear_gpr_alloc(f); + + struct alive alive = alive_create(gpr_stats_len(&f->gpr)); + + /* special oneshot register class */ + struct alive_slot a = {.r = -1, .cost = 0, .idx = 0}; + alive_append(&alive, a); + + foreach_vec(gi, f->gpr) { + struct gpr_stat *gpr = gpr_stats_at(&f->gpr, gi); + calculate_alive(&alive, gi, + gpr->prio, gpr->start, gpr->end, &gpr->rno, + &f->gpr, gpr_dead); + } + + /* sort so that the highest spill cost register classes are at the front and + * as such more likely to be placed in registers */ + alive_sort(&alive, (vec_comp_t)spill_cost_sort); + + /* update remapping info */ + for(size_t i = 0; i < alive_len(&alive); ++i) { + struct alive_slot *a = alive_at(&alive, i); + alive_at(&alive, a->idx)->remap = i; + } + + /* remap locations */ for (size_t i = 0; i < gpr_stats_len(&f->gpr); ++i) { - size_t rno = gpr_stats_at(&f->gpr, i)->r.r; - gpr_stats_at(&f->gpr, rno)->rno = i; + struct gpr_stat *gpr = gpr_stats_at(&f->gpr, i); + struct alive_slot *a = alive_at(&alive, gpr->rno); + gpr->rno = a->remap; + } + + alive_destroy(&alive); +} + +static int fpr_dead(void *regs, size_t idx, size_t start) +{ + struct fpr_stats *fprs = regs; + return fpr_stats_at(fprs, idx)->end <= start; +} + +static void linear_fpr_alloc(struct ejit_func *f) +{ + foreach_vec(fi, f->fpr) { + fpr_stats_at(&f->fpr, fi)->fno = fi; } } static void assign_fprs(struct ejit_func *f) { - fpr_stats_sort(&f->fpr, (vec_comp_t)fpr_sort_prio); + /* everything fits into registers, no need to start optimizing */ + if (fpr_stats_len(&f->fpr) <= physfpr_count()) + return linear_fpr_alloc(f); + + struct alive alive = alive_create(fpr_stats_len(&f->fpr)); + + /* special oneshot register class */ + struct alive_slot a = {.r = -1, .cost = 0, .idx = 0}; + alive_append(&alive, a); + + foreach_vec(fi, f->fpr) { + struct fpr_stat *fpr = fpr_stats_at(&f->fpr, fi); + calculate_alive(&alive, fi, + fpr->prio, fpr->start, fpr->end, &fpr->fno, + &f->fpr, fpr_dead); + } + + /* sort so that the highest spill cost register classes are at the front and + * as such more likely to be placed in registers */ + alive_sort(&alive, (vec_comp_t)spill_cost_sort); + + /* update remapping info */ + for(size_t i = 0; i < alive_len(&alive); ++i) { + struct alive_slot *a = alive_at(&alive, i); + alive_at(&alive, a->idx)->remap = i; + } + + /* remap locations */ for (size_t i = 0; i < fpr_stats_len(&f->fpr); ++i) { - size_t rno = fpr_stats_at(&f->fpr, i)->f.f; - fpr_stats_at(&f->fpr, rno)->fno = i; + struct fpr_stat *fpr = fpr_stats_at(&f->fpr, i); + struct alive_slot *a = alive_at(&alive, fpr->fno); + fpr->fno = a->remap; } + + alive_destroy(&alive); } static size_t align_up(size_t a, size_t n) @@ -2425,7 +3024,7 @@ static size_t align_up(size_t a, size_t n) bool ejit_compile(struct ejit_func *f, bool use_64, bool im_scawed) { (void)use_64; -#if __WORDSIZE == 32 +#if __WORDSIZE != 64 /* can't compile 64bit code on 32bit systems, give up early */ if (use_64) return false; |