From 827dec28e4c0b1c4972f1419e0ac23e4dbd9d916 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Wed, 26 Jun 2024 21:51:15 +0300
Subject: enough functionality to implement posthaste

---
 include/ejit/ejit.h    |   5 +-
 scripts/select-compile |   2 -
 src/common.h           |   9 ++
 src/compile/compile.c  | 273 ++++++++++++++++++++++++++++++++++++++++++++++++-
 src/ejit.c             | 104 ++++++++++++++++---
 src/interp.c           | 166 +++++++++++++++++++++---------
 6 files changed, 487 insertions(+), 72 deletions(-)

diff --git a/include/ejit/ejit.h b/include/ejit/ejit.h
index ee3f466..3063ceb 100644
--- a/include/ejit/ejit.h
+++ b/include/ejit/ejit.h
@@ -112,8 +112,6 @@ static inline struct ejit_arg ejit_build_arg_f(enum ejit_type type, double x)
 	return a;
 }
 
-#define EJIT_C(x) ((struct ejit_arg){.c = (int8_t)(x), .type = EJIT_INT8})
-
 /* register allocator could be just pushing everything above V0 or whatever onto
  * the stack, heh */
 struct ejit_gpr {
@@ -160,6 +158,8 @@ void ejit_destroy_func(struct ejit_func *s);
 #define EJIT_GPR(x) ((struct ejit_gpr){.r = (x)})
 #define EJIT_FPR(x) ((struct ejit_fpr){.f = (x)})
 
+#define EJIT_ARG_POINTER(x) ((struct ejit_arg){.p = (x), .type = EJIT_POINTER})
+
 #define EJIT_OPERAND_GPR(x, t) ((struct ejit_operand){ .kind = EJIT_OPERAND_GPR, .r = (long)(x), .type = (t)})
 #define EJIT_OPERAND_FPR(x, t) ((struct ejit_operand){ .kind = EJIT_OPERAND_FPR, .r = (long)(x) .type =(t)})
 #define EJIT_OPERAND_IMM(x, t) ((struct ejit_operand){ .kind = EJIT_OPERAND_IMM, .r = (long)(x), .type = (t)})
@@ -259,6 +259,7 @@ void ejit_ltr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1, struc
 
 struct ejit_reloc ejit_bltr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1);
 struct ejit_reloc ejit_bnei(struct ejit_func *s, struct ejit_gpr r0, long o);
+struct ejit_reloc ejit_beqi(struct ejit_func *s, struct ejit_gpr r0, long o);
 struct ejit_reloc ejit_jmp(struct ejit_func *s);
 
 void ejit_patch(struct ejit_func *s, struct ejit_reloc r, struct ejit_label l);
diff --git a/scripts/select-compile b/scripts/select-compile
index be8b40d..8d87409 100755
--- a/scripts/select-compile
+++ b/scripts/select-compile
@@ -5,8 +5,6 @@ if [ -z "$ARCH" ]; then
 	ARCH=$(uname -m)
 fi
 
->&2 echo $ARCH
-
 JIT="src/compile/compile.c"
 NOJIT="src/compile/nocompile.c"
 
diff --git a/src/common.h b/src/common.h
index 4f4fcd7..921fca2 100644
--- a/src/common.h
+++ b/src/common.h
@@ -57,8 +57,17 @@ enum ejit_opcode {
 	ADDR_F,
 	SUBR,
 	SUBR_F,
+	MULR,
+	DIVR,
+	NEGR,
+
+	EQR,
+	LTR,
 
 	BLTR,
+	BNEI,
+	BEQI,
+	JMP,
 
 	PARAM,
 	PARAM_F,
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 807ecc4..d716ca3 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -30,7 +30,7 @@ static size_t stack_size(struct ejit_func *f)
 		+ frploc_count(f) * sizeof(jit_float64_t);
 }
 
-static jit_off_t stack_loc(struct ejit_func *f, size_t l)
+static jit_off_t stack_loc(size_t l)
 {
 	assert(l >= jit_v_num());
 	return (l - jit_v_num()) * sizeof(jit_uword_t);
@@ -43,6 +43,7 @@ static jit_off_t stack_loc_f(struct ejit_func *f, size_t l)
 		+ (l - jit_vf_num()) * sizeof(jit_float64_t);
 }
 
+
 struct reloc_helper {
 	jit_reloc_t r;
 	size_t to;
@@ -50,15 +51,17 @@ struct reloc_helper {
 
 static jit_gpr_t getloc(struct ejit_func *f, jit_state_t *j, size_t l, size_t i)
 {
+	(void)(f);
 	if (l < jit_v_num())
 		return jit_v(l);
 
-	jit_ldxi(j, jit_r(i), JIT_SP, stack_loc(f, l));
+	jit_ldxi(j, jit_r(i), JIT_SP, stack_loc(l));
 	return jit_r(i);
 }
 
 static jit_gpr_t getreg(struct ejit_func *f, size_t l, size_t i)
 {
+	(void)(f);
 	if (l < jit_v_num())
 		return jit_v(l);
 
@@ -67,12 +70,13 @@ static jit_gpr_t getreg(struct ejit_func *f, size_t l, size_t i)
 
 static void putloc(struct ejit_func *f, jit_state_t *j, size_t l, jit_gpr_t r)
 {
+	(void)(f);
 	if (l < jit_v_num()) {
 		assert(jit_v(l).regno == r.regno);
 		return;
 	}
 
-	jit_stxi(j, stack_loc(f, l), JIT_SP, r);
+	jit_stxi(j, stack_loc(l), JIT_SP, r);
 }
 
 static void compile_label(jit_state_t *j, size_t ii, struct vec *labels)
@@ -87,6 +91,14 @@ static void compile_movi(struct ejit_func *f, jit_state_t *j, struct ejit_insn i
 	putloc(f, j, i.r0, r);
 }
 
+static void compile_movr(struct ejit_func *f, jit_state_t *j, struct ejit_insn i)
+{
+	jit_gpr_t to = getreg(f, i.r0, 0);
+	jit_gpr_t from = getreg(f, i.r1, 1);
+	jit_movr(j, to, from);
+	putloc(f, j, i.r0, to);
+}
+
 static void compile_addr(struct ejit_func *f, jit_state_t *j, struct ejit_insn i)
 {
 	jit_gpr_t dst = getreg(f, i.r0, 0);
@@ -96,6 +108,58 @@ static void compile_addr(struct ejit_func *f, jit_state_t *j, struct ejit_insn i
 	putloc(f, j, i.r0, dst);
 }
 
+static void compile_stxi64(struct ejit_func *f, jit_state_t *j, struct ejit_insn i)
+{
+	jit_gpr_t r0 = getloc(f, j, i.r0, 0);
+	jit_gpr_t r1 = getloc(f, j, i.r1, 1);
+	jit_stxi_l(j, i.o, r1, r0);
+}
+
+static void compile_ldxiu64(struct ejit_func *f, jit_state_t *j, struct ejit_insn i)
+{
+	jit_gpr_t r0 = getreg(f, i.r0, 0);
+	jit_gpr_t r1 = getloc(f, j, i.r1, 1);
+	jit_ldxi_l(j, r0, r1, i.o);
+	putloc(f, j, i.r0, r0);
+}
+
+static void compile_reg_cmp(struct ejit_func *f, jit_state_t *j, struct ejit_insn i,
+		jit_reloc_t (*bcomp)(jit_state_t *, jit_gpr_t, jit_gpr_t), long same)
+{
+	jit_gpr_t r0 = getreg(f, i.r0, 0);
+	if (i.r1 == i.r2) {
+		jit_movi(j, r0, same);
+		putloc(f, j, i.r0, r0);
+		return;
+	}
+
+	jit_gpr_t r1 = getloc(f, j, i.r1, 1);
+	jit_gpr_t r2 = getloc(f, j, i.r2, 2);
+	jit_reloc_t branch = bcomp(j, r1, r2);
+
+	/* not equal */
+	jit_movi(j, r0, 0);
+	jit_reloc_t jump = jit_jmp(j);
+	jit_patch_there(j, branch, jit_address(j));
+
+	/* equal */
+	jit_movi(j, r0, 1);
+	jit_patch_there(j, jump, jit_address(j));
+
+	/* write final result */
+	putloc(f, j, i.r0, r0);
+}
+
+static void compile_eqr(struct ejit_func *f, jit_state_t *j, struct ejit_insn i)
+{
+	compile_reg_cmp(f, j, i, jit_beqr, 1);
+}
+
+static void compile_ltr(struct ejit_func *f, jit_state_t *j, struct ejit_insn i)
+{
+	compile_reg_cmp(f, j, i, jit_bltr, 0);
+}
+
 static void compile_bltr(struct ejit_func *f, jit_state_t *j, struct ejit_insn i, struct vec *relocs)
 {
 	jit_gpr_t c0 = getloc(f, j, i.r1, 0);
@@ -105,6 +169,102 @@ static void compile_bltr(struct ejit_func *f, jit_state_t *j, struct ejit_insn i
 	vect_append(struct reloc_helper, *relocs, &h);
 }
 
+static void compile_beqi(struct ejit_func *f, jit_state_t *j, struct ejit_insn i, struct vec *relocs)
+{
+	jit_gpr_t r1 = getloc(f, j, i.r1, 0);
+	jit_reloc_t r = jit_beqi(j, r1, i.o);
+	struct reloc_helper h = {.r = r, .to = i.r0};
+	vect_append(struct reloc_helper, *relocs, &h);
+}
+
+static void compile_bnei(struct ejit_func *f, jit_state_t *j, struct ejit_insn i, struct vec *relocs)
+{
+	jit_gpr_t r1 = getloc(f, j, i.r1, 0);
+	jit_reloc_t r = jit_bnei(j, r1, i.o);
+	struct reloc_helper h = {.r = r, .to = i.r0};
+	vect_append(struct reloc_helper, *relocs, &h);
+}
+
+static enum jit_operand_abi jit_abi_from(enum ejit_type t)
+{
+	switch (t) {
+	case EJIT_INT8: return JIT_OPERAND_ABI_INT8;
+	case EJIT_INT16: return JIT_OPERAND_ABI_INT16;
+	case EJIT_INT32: return JIT_OPERAND_ABI_INT32;
+	case EJIT_INT64: return JIT_OPERAND_ABI_INT64;
+	case EJIT_UINT8: return JIT_OPERAND_ABI_UINT8;
+	case EJIT_UINT16: return JIT_OPERAND_ABI_UINT16;
+	case EJIT_UINT32: return JIT_OPERAND_ABI_UINT32;
+	case EJIT_UINT64: return JIT_OPERAND_ABI_UINT64;
+	case EJIT_POINTER: return JIT_OPERAND_ABI_POINTER;
+	case EJIT_FLOAT: return JIT_OPERAND_ABI_FLOAT;
+	case EJIT_DOUBLE: return JIT_OPERAND_ABI_DOUBLE;
+	default:
+	}
+
+	abort();
+}
+
+static size_t arg_offsetof(enum ejit_type t)
+{
+	switch (t) {
+	case EJIT_INT8: return offsetof(struct ejit_arg, c);
+	case EJIT_INT16: return offsetof(struct ejit_arg, s);
+	case EJIT_INT32: return offsetof(struct ejit_arg, i);
+	case EJIT_INT64: return offsetof(struct ejit_arg, l);
+	case EJIT_UINT8: return offsetof(struct ejit_arg, uc);
+	case EJIT_UINT16: return offsetof(struct ejit_arg, us);
+	case EJIT_UINT32: return offsetof(struct ejit_arg, ui);
+	case EJIT_UINT64: return offsetof(struct ejit_arg, ul);
+	case EJIT_POINTER: return offsetof(struct ejit_arg, p);
+	case EJIT_FLOAT: return offsetof(struct ejit_arg, f);
+	case EJIT_DOUBLE: return offsetof(struct ejit_arg, d);
+	default:
+	};
+
+	abort();
+}
+
+static jit_off_t arg_offset(struct ejit_insn i)
+{
+	/* index of ejit_arg in stack and offset of whatever type we're dealing
+	 * with */
+	return (sizeof(struct ejit_arg) * i.r0) + arg_offsetof(i.r1);
+}
+
+static jit_off_t type_offset(struct ejit_insn i)
+{
+	return (sizeof(struct ejit_arg) * i.r0) + offsetof(struct ejit_arg, type);
+}
+
+static void fixup_operands(struct vec *operands, size_t fixup)
+{
+	foreach_vec(i, *operands) {
+		jit_operand_t op = vect_at(jit_operand_t, *operands, i);
+		if (op.kind != JIT_OPERAND_KIND_MEM)
+			continue;
+
+		op.loc.mem.offset += fixup;
+		vect_at(jit_operand_t, *operands, i) = op;
+	}
+}
+
+static void compile_imm_call(jit_state_t *j, struct vec *src, struct vec *dst, void *addr, size_t argc, jit_operand_t args[argc])
+{
+	/* each move is type + arg, so twofold */
+	size_t movec = vec_len(src) / 2;
+	size_t fixup = jit_align_stack(j, movec * sizeof(struct ejit_arg));
+	fixup_operands(src, fixup);
+	/* note, do not fix up destination! */
+	/* remember to move all operands */
+	jit_move_operands(j, dst->buf, src->buf, movec * 2);
+	jit_calli(j, addr, argc, args);
+
+	jit_shrink_stack(j, fixup);
+	vec_reset(src);
+	vec_reset(dst);
+}
+
 static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena, size_t size)
 {
 	jit_begin(j, arena, size);
@@ -112,8 +272,17 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 	size_t fprs = f->fpr >= jit_vf_num() ? jit_vf_num() : f->fpr;
 	size_t frame = jit_enter_jit_abi(j, gprs, fprs, 0);
 
+	/* very important, argc we don't really do anything with but JIR_R1
+	 * contains the argument stack! */
+	jit_load_args_2(j,
+			jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R0),
+			jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R1));
+
 	size_t stack = jit_align_stack(j, stack_size(f));
 
+	struct vec src = vec_create(sizeof(jit_operand_t));
+	struct vec dst = vec_create(sizeof(jit_operand_t));
+
 	struct vec relocs = vec_create(sizeof(struct reloc_helper));
 	struct vec labels = vec_create(sizeof(jit_addr_t));
 	vec_reserve(&labels, vec_len(&f->insns));
@@ -121,10 +290,64 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 	foreach_vec(ii, f->insns) {
 		struct ejit_insn i = vect_at(struct ejit_insn, f->insns, ii);
 		switch (i.op) {
+		case MOVR: compile_movr(f, j, i); break;
 		case MOVI: compile_movi(f, j, i); break;
 		case ADDR: compile_addr(f, j, i); break;
 
+		case STXI64: compile_stxi64(f, j, i); break;
+		case LDXIU64: compile_ldxiu64(f, j, i); break;
+
+		case EQR: compile_eqr(f, j, i); break;
+		case LTR: compile_ltr(f, j, i); break;
+
 		case BLTR: compile_bltr(f, j, i, &relocs); break;
+		case BEQI: compile_beqi(f, j, i, &relocs); break;
+		case BNEI: compile_bnei(f, j, i, &relocs); break;
+
+		case ARG: {
+			jit_operand_t type = jit_operand_imm(JIT_OPERAND_ABI_WORD, i.r1);
+			jit_operand_t arg;
+			if (i.r0 < jit_v_num()) {
+				/* regular register */
+				arg = jit_operand_gpr(jit_abi_from(i.r1), jit_v(i.r2));
+			}
+			else {
+				/* stack location, note that we'll fix up the SP
+				 * offset before doing the actual call */
+				arg = jit_operand_mem(jit_abi_from(i.r1), JIT_SP, stack_loc(i.r0));
+			}
+
+			vec_append(&src, &type);
+			vec_append(&src, &arg);
+
+			jit_operand_t to[2] = {
+				jit_operand_mem(JIT_OPERAND_ABI_WORD, JIT_SP, type_offset(i)),
+				jit_operand_mem(jit_abi_from(i.r1), JIT_SP, arg_offset(i))
+			};
+
+			vec_append(&dst, &to[0]);
+			vec_append(&dst, &to[1]);
+			break;
+		}
+
+		case ESCAPEI: {
+			jit_operand_t args[2] = {
+				jit_operand_imm(JIT_OPERAND_ABI_WORD, vec_len(&src) / 2),
+				jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_SP)
+			};
+			compile_imm_call(j, &src, &dst, (void *)i.o, 2, args);
+			break;
+		}
+
+		case CALLI: {
+			jit_operand_t args[3] = {
+				jit_operand_imm(JIT_OPERAND_ABI_POINTER, i.o),
+				jit_operand_imm(JIT_OPERAND_ABI_WORD, vec_len(&src) / 2),
+				jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_SP)
+			};
+			compile_imm_call(j, &src, &dst, ejit_run_func, 3, args);
+			break;
+		}
 
 		case LABEL: compile_label(j, ii, &labels); break;
 		case RET: {
@@ -137,8 +360,46 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 			break;
 		}
 
-		case START: continue;
-		case END: continue;
+		case END: {
+			/* 'void' return */
+			jit_shrink_stack(j, stack);
+			jit_leave_jit_abi(j, gprs, fprs, frame);
+			jit_reti(j, 0);
+			break;
+		}
+
+		case PARAM: {
+			/* move from argument stack to location */
+			jit_operand_t from = jit_operand_mem(
+					jit_abi_from(i.r1),
+					JIT_R1,
+					arg_offset(i)
+					);
+
+			jit_operand_t to;
+			if (i.r0 < jit_v_num()) {
+				/* regular register */
+				to = jit_operand_gpr(jit_abi_from(i.r1), jit_v(i.r2));
+			}
+			else {
+				/* stack location */
+				to = jit_operand_mem(jit_abi_from(i.r1), JIT_SP, stack_loc(i.r2));
+			}
+
+			vec_append(&src, &from);
+			vec_append(&dst, &to);
+			break;
+		}
+
+		case START: {
+			/* parameters should be done by now */
+			jit_move_operands(j, dst.buf, src.buf, vec_len(&src));
+			/* reuse for arguments */
+			vec_reset(&dst);
+			vec_reset(&src);
+			break;
+		}
+
 		default: abort();
 		}
 	}
@@ -152,6 +413,8 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 		jit_patch_there(j, r, a);
 	}
 
+	vec_destroy(&src);
+	vec_destroy(&dst);
 	vec_destroy(&relocs);
 	vec_destroy(&labels);
 
diff --git a/src/ejit.c b/src/ejit.c
index 9393dc0..246f1fc 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -42,8 +42,8 @@ struct ejit_func *ejit_create_func(enum ejit_type rtype, size_t argc, const stru
 
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_i(f, PARAM, args[i].r, 0, args[i].type); break;
-		case EJIT_OPERAND_FPR: emit_insn_i(f, PARAM_F, args[i].r, 0, args[i].type); break;
+		case EJIT_OPERAND_GPR: emit_insn_r(f, PARAM, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_FPR: emit_insn_r(f, PARAM_F, i, args[i].type, args[i].r); break;
 		default: abort();
 		}
 	}
@@ -54,6 +54,9 @@ struct ejit_func *ejit_create_func(enum ejit_type rtype, size_t argc, const stru
 
 void ejit_compile_func(struct ejit_func *f, size_t gpr, size_t fpr)
 {
+	/* emit a final end instruction in case user didn't do a return */
+	emit_insn_i(f, END, 0, 0, 0);
+
 	f->gpr = gpr;
 	f->fpr = fpr;
 
@@ -68,7 +71,9 @@ void ejit_compile_func(struct ejit_func *f, size_t gpr, size_t fpr)
 	ejit_interp(f, 0, NULL, false, &labels);
 	foreach_vec(ii, f->insns) {
 		struct ejit_insn i = vect_at(struct ejit_insn, f->insns, ii);
-		i.addr = labels[i.op];
+		void *addr = labels[i.op];
+		assert(addr);
+		i.addr = addr;
 		vect_at(struct ejit_insn, f->insns, ii) = i;
 	}
 }
@@ -101,10 +106,10 @@ void ejit_calli(struct ejit_func *s, struct ejit_func *f, size_t argc, const str
 {
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_i(s, ARG, args[i].r, args[i].type, 0); break;
-		case EJIT_OPERAND_FPR: emit_insn_i(s, ARG_F, args[i].r, args[i].type, 0); break;
-		case EJIT_OPERAND_IMM: emit_insn_i(s, ARG_I, 0, args[i].type, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_f(s, ARG_FI, 0, args[i].type, args[i].d); break;
+		case EJIT_OPERAND_GPR: emit_insn_r(s, ARG, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_FPR: emit_insn_r(s, ARG_F, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_IMM: emit_insn_i(s, ARG_I, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_FLT: emit_insn_f(s, ARG_FI, i, args[i].type, args[i].d); break;
 		default: abort();
 		}
 	}
@@ -116,10 +121,10 @@ void ejit_escapei(struct ejit_func *s, ejit_escape_t f, size_t argc, const struc
 {
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_i(s, ARG, args[i].r, 0, 0); break;
-		case EJIT_OPERAND_FPR: emit_insn_i(s, ARG_F, args[i].r, 0, 0); break;
-		case EJIT_OPERAND_IMM: emit_insn_i(s, ARG_I, 0, 0, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_f(s, ARG_FI, 0, 0, args[i].d); break;
+		case EJIT_OPERAND_GPR: emit_insn_r(s, ARG, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_FPR: emit_insn_r(s, ARG_F, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_IMM: emit_insn_r(s, ARG_I, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_FLT: emit_insn_f(s, ARG_FI, i, args[i].type, args[i].d); break;
 		default: abort();
 		}
 	}
@@ -131,10 +136,10 @@ void ejit_escapei_f(struct ejit_func *s, ejit_escape_f_t f, size_t argc, const s
 {
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_i(s, ARG, args[i].r, 0, 0); break;
-		case EJIT_OPERAND_FPR: emit_insn_i(s, ARG_F, args[i].r, 0, 0); break;
-		case EJIT_OPERAND_IMM: emit_insn_i(s, ARG_I, 0, 0, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_f(s, ARG_FI, 0, 0, args[i].d); break;
+		case EJIT_OPERAND_GPR: emit_insn_r(s, ARG, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_FPR: emit_insn_r(s, ARG_F, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_IMM: emit_insn_r(s, ARG_I, i, args[i].type, args[i].r); break;
+		case EJIT_OPERAND_FLT: emit_insn_f(s, ARG_FI, i, args[i].type, args[i].d); break;
 		default: abort();
 		}
 	}
@@ -142,6 +147,21 @@ void ejit_escapei_f(struct ejit_func *s, ejit_escape_f_t f, size_t argc, const s
 	emit_insn_p(s, ESCAPEI_F, 0, 0, f);
 }
 
+void ejit_retval(struct ejit_func *s, struct ejit_gpr r0)
+{
+	emit_insn_i(s, RETVAL, r0.r, 0, 0);
+}
+
+void ejit_stxi_64(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1, long o)
+{
+	emit_insn_i(s, STXI64, r0.r, r1.r, o);
+}
+
+void ejit_ldxi_u64(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1, long o)
+{
+	emit_insn_i(s, LDXIU64, r0.r, r1.r, o);
+}
+
 void ejit_ret(struct ejit_func *s, struct ejit_gpr r0)
 {
 	emit_insn_r(s, RET, r0.r, 0, 0);
@@ -182,11 +202,44 @@ void ejit_subr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, st
 	emit_insn_r(s, SUBR_F, r0.f, r1.f, r2.f);
 }
 
+void ejit_mulr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1, struct ejit_gpr r2)
+{
+	emit_insn_r(s, MULR, r0.r, r1.r, r2.r);
+}
+
+void ejit_divr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1, struct ejit_gpr r2)
+{
+	emit_insn_r(s, DIVR, r0.r, r1.r, r2.r);
+}
+
+void ejit_negr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1)
+{
+	emit_insn_i(s, NEGR, r0.r, r1.r, 0);
+}
+
 void ejit_movi(struct ejit_func *s, struct ejit_gpr r0, long o)
 {
 	emit_insn_i(s, MOVI, r0.r, 0, o);
 }
 
+void ejit_movr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1)
+{
+	if (r0.r == r1.r)
+		return;
+
+	emit_insn_i(s, MOVR, r0.r, r1.r, 0);
+}
+
+void ejit_eqr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1, struct ejit_gpr r2)
+{
+	emit_insn_r(s, EQR, r0.r, r1.r, r2.r);
+}
+
+void ejit_ltr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1, struct ejit_gpr r2)
+{
+	emit_insn_r(s, LTR, r0.r, r1.r, r2.r);
+}
+
 struct ejit_reloc ejit_bltr(struct ejit_func *s, struct ejit_gpr r0, struct ejit_gpr r1)
 {
 	size_t addr = vec_len(&s->insns);
@@ -194,6 +247,27 @@ struct ejit_reloc ejit_bltr(struct ejit_func *s, struct ejit_gpr r0, struct ejit
 	return (struct ejit_reloc){.insn = addr};
 }
 
+struct ejit_reloc ejit_bnei(struct ejit_func *s, struct ejit_gpr r0, long o)
+{
+	size_t addr = vec_len(&s->insns);
+	emit_insn_i(s, BNEI, 0, r0.r, o);
+	return (struct ejit_reloc){.insn = addr};
+}
+
+struct ejit_reloc ejit_beqi(struct ejit_func *s, struct ejit_gpr r0, long o)
+{
+	size_t addr = vec_len(&s->insns);
+	emit_insn_i(s, BEQI, 0, r0.r, o);
+	return (struct ejit_reloc){.insn = addr};
+}
+
+struct ejit_reloc ejit_jmp(struct ejit_func *s)
+{
+	size_t addr = vec_len(&s->insns);
+	emit_insn_i(s, JMP, 0, 0, 0);
+	return (struct ejit_reloc){.insn = addr};
+}
+
 long ejit_run_func(struct ejit_func *f, size_t argc, struct ejit_arg args[argc])
 {
 	assert(f->gpr && "trying to run a function that hasn't been compiled");
diff --git a/src/interp.c b/src/interp.c
index 158606b..f15adc5 100644
--- a/src/interp.c
+++ b/src/interp.c
@@ -6,35 +6,49 @@
 union interp_ret ejit_interp(struct ejit_func *f, size_t argc, struct ejit_arg args[argc], bool run, void ***labels_wb)
 {
 	static void *labels[OPCODE_COUNT] = {
-		[MOVI] = &&movi,
-		[MOVR] = &&movr,
-		[MOVR_F] = &&movr_f,
+		[MOVI] = &&MOVI,
+		[MOVR] = &&MOVR,
+		[MOVR_F] = &&MOVR_F,
 
-		[ADDR] = &&addr,
-		[ADDR_F] = &&addr_f,
-		[SUBR] = &&subr,
-		[SUBR_F] = &&subr_f,
+		[ADDR] = &&ADDR,
+		[ADDR_F] = &&ADDR_F,
+		[SUBR] = &&SUBR,
+		[SUBR_F] = &&SUBR_F,
 
-		[BLTR] = &&bltr,
+		[MULR] = &&MULR,
+		[DIVR] = &&DIVR,
 
-		[RET] = &&ret,
-		[RET_I] = &&ret_i,
-		[RET_F] = &&ret_f,
-		[RET_FI] = &&ret_fi,
+		[EQR] = &&EQR,
+		[LTR] = &&LTR,
 
-		[ARG] = &&arg,
-		[ARG_I] = &&arg_i,
-		[ARG_F] = &&arg_f,
-		[ARG_FI] = &&arg_fi,
+		[STXI64] = &&STXI64,
+		[LDXIU64] = &&LDXIU64,
 
-		[PARAM] = &&param,
-		[PARAM_F] = &&param_f,
+		[BLTR] = &&BLTR,
+		[BNEI] = &&BNEI,
+		[BEQI] = &&BEQI,
+		[JMP] = &&JMP,
 
-		[CALLI] = &&calli,
-		[CALLI_F] = &&calli_f,
+		[RET] = &&RET,
+		[RET_I] = &&RET_I,
+		[RET_F] = &&RET_F,
+		[RET_FI] = &&RET_FI,
 
-		[START] = &&start,
-		[END] = &&end,
+		[ARG] = &&ARG,
+		[ARG_I] = &&ARG_I,
+		[ARG_F] = &&ARG_F,
+		[ARG_FI] = &&ARG_FI,
+
+		[PARAM] = &&PARAM,
+		[PARAM_F] = &&PARAM_F,
+
+		[CALLI] = &&CALLI,
+		[CALLI_F] = &&CALLI_F,
+		[ESCAPEI] = &&ESCAPEI,
+
+		[LABEL] = &&LABEL,
+		[START] = &&START,
+		[END] = &&END,
 	};
 
 	if (!run) {
@@ -50,6 +64,9 @@ union interp_ret ejit_interp(struct ejit_func *f, size_t argc, struct ejit_arg a
 	struct ejit_insn *insns = f->insns.buf;
 
 	struct ejit_insn i;
+	/* retval is kind of an unfortunate extra bit of state to keep track of,
+	 * but having call and return value separated is pretty convenient for
+	 * void calls so I guess I don't mind? */
 	long retval = 0; double retval_f = 0.;
 	size_t pc = 0;
 
@@ -57,105 +74,158 @@ union interp_ret ejit_interp(struct ejit_func *f, size_t argc, struct ejit_arg a
 #define JUMP(a) goto *insns[pc = a].addr;
 #define DISPATCH() } goto *insns[++pc].addr;
 
-	DO(start);
+	JUMP(0);
+
+	DO(START);
 	DISPATCH();
 
-	DO(end);
+	DO(LABEL);
+	DISPATCH();
+
+	DO(END);
 	goto out_int;
 	DISPATCH();
 
-	DO(movi);
+	DO(MOVI);
 	gpr[i.r0] = i.o;
 	DISPATCH();
 
-	DO(movr);
+	DO(MOVR);
 	gpr[i.r0] = gpr[i.r1];
 	DISPATCH();
 
-	DO(movr_f);
+	DO(MOVR_F);
 	fpr[i.r0] = fpr[i.r1];
 	DISPATCH();
 
-	DO(addr);
+	DO(ADDR);
 	gpr[i.r0] = gpr[i.r1] + gpr[i.r2];
 	DISPATCH();
 
-	DO(addr_f);
+	DO(ADDR_F);
 	fpr[i.r0] = fpr[i.r1] + fpr[i.r2];
 	DISPATCH();
 
-	DO(subr);
+	DO(SUBR);
 	gpr[i.r0] = gpr[i.r1] - gpr[i.r2];
 	DISPATCH();
 
-	DO(subr_f);
+	DO(SUBR_F);
 	fpr[i.r0] = fpr[i.r1] - fpr[i.r2];
 	DISPATCH();
 
-	DO(bltr);
+	DO(MULR);
+	gpr[i.r0] = gpr[i.r1] * gpr[i.r2];
+	DISPATCH();
+
+	DO(DIVR);
+	gpr[i.r0] = gpr[i.r1] / gpr[i.r2];
+	DISPATCH();
+
+	DO(EQR);
+	gpr[i.r0] = gpr[i.r1] == gpr[i.r2];
+	DISPATCH();
+
+	DO(LTR);
+	gpr[i.r0] = gpr[i.r1] < gpr[i.r2];
+	DISPATCH();
+
+	DO(STXI64);
+	int64_t *addr = (int64_t *)(gpr[i.r1] + i.o);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(LDXIU64);
+	uint64_t *addr = (uint64_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(BLTR);
 	if (gpr[i.r1] < gpr[i.r2])
 		JUMP(i.r0);
 
 	DISPATCH();
 
-	DO(param);
-	gpr[i.r0] = args[i.o].l;
+	DO(BNEI);
+	if (gpr[i.r1] != i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BEQI);
+	if (gpr[i.r1] == i.o)
+		JUMP(i.r0);
+
 	DISPATCH();
 
-	DO(param_f);
-	fpr[i.r0] = args[i.o].d;
+	DO(JMP);
+	JUMP(i.r0);
 	DISPATCH();
 
-	DO(arg);
-	struct ejit_arg a = ejit_build_arg(i.r1, gpr[i.r0]);
+	DO(PARAM);
+	gpr[i.r2] = args[i.r0].l;
+	DISPATCH();
+
+	DO(PARAM_F);
+	fpr[i.r2] = args[i.r0].d;
+	DISPATCH();
+
+	DO(ARG);
+	struct ejit_arg a = ejit_build_arg(i.r1, gpr[i.r2]);
 	vec_append(&call_args, &a);
 	DISPATCH();
 
-	DO(arg_i);
+	DO(ARG_I);
 	struct ejit_arg a = ejit_build_arg(i.r1, i.o);
 	vec_append(&call_args, &a);
 	DISPATCH();
 
-	DO(arg_f);
-	struct ejit_arg a = ejit_build_arg_f(i.r1, fpr[i.r0]);
+	DO(ARG_F);
+	struct ejit_arg a = ejit_build_arg_f(i.r1, fpr[i.r2]);
 	vec_append(&call_args, &a);
 	DISPATCH();
 
-	DO(arg_fi);
+	DO(ARG_FI);
 	struct ejit_arg a = ejit_build_arg_f(i.r1, i.d);
 	vec_append(&call_args, &a);
 	DISPATCH();
 
-	DO(calli);
+	DO(CALLI);
 	struct ejit_func *f = i.p;
 	retval = ejit_run_func(f, vec_len(&call_args), call_args.buf);
 	vec_reset(&call_args);
 	DISPATCH();
 
-	DO(calli_f);
+	DO(CALLI_F);
 	struct ejit_func *f = i.p;
 	retval_f = ejit_run_func_f(f, vec_len(&call_args), call_args.buf);
 	vec_reset(&call_args);
 	DISPATCH();
 
+	DO(ESCAPEI);
+	ejit_escape_t f = i.p;
+	retval = f(vec_len(&call_args), call_args.buf);
+	vec_reset(&call_args);
+	DISPATCH();
+
 	/* dispatch is technically unnecessary for returns, but keep it for
 	 * symmetry */
-	DO(ret);
+	DO(RET);
 	retval = gpr[i.r0];
 	goto out_int;
 	DISPATCH();
 
-	DO(ret_i);
+	DO(RET_I);
 	retval = i.o;
 	goto out_int;
 	DISPATCH();
 
-	DO(ret_f);
+	DO(RET_F);
 	retval_f = fpr[i.r0];
 	goto out_float;
 	DISPATCH();
 
-	DO(ret_fi);
+	DO(RET_FI);
 	retval_f = i.d;
 	goto out_float;
 	DISPATCH();
-- 
cgit v1.2.3