From 3e8bbb6bcbb3b36e9813344e2f4528bb830d6ff4 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Tue, 22 Oct 2024 18:56:04 +0300
Subject: move interp into run_interp

+ This allows us to skip a potential extra function call
---
 Makefile       |    3 +-
 src/common.h   |   22 +-
 src/ejit.c     |   53 ++-
 src/interp.c   | 1088 --------------------------------------------------------
 src/interp.inc | 1077 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/vec.h      |    2 +-
 6 files changed, 1130 insertions(+), 1115 deletions(-)
 delete mode 100644 src/interp.c
 create mode 100644 src/interp.inc

diff --git a/Makefile b/Makefile
index 5fdaec7..133d796 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,8 @@ setup:
 	@echo -n > deps.mk
 	@./scripts/gen-deps -p EJIT -c COMPILE_EJIT -b ejit "$(EJIT_SOURCES)"
 
-CLEANUP		:= build deps.mk tests.md ejit.o examples/exec examples/*.d tests/test-*
+CLEANUP		:= build deps.mk tests.md ejit.o examples/fib examples/loop \
+			examples/*.d tests/test-*
 CLEANUP_CMD	:=
 EJIT_SOURCES	:=
 
diff --git a/src/common.h b/src/common.h
index eb91409..64ca250 100644
--- a/src/common.h
+++ b/src/common.h
@@ -261,27 +261,33 @@ struct ejit_func {
 };
 
 
-union interp_ret {
-	int64_t r;
-	double d;
-};
-
 struct interp_state {
 	struct gprs gprs;
 	struct fprs fprs;
 	struct args args;
 };
 
-union interp_ret ejit_interp(struct ejit_func *f, size_t argc,
+int64_t ejit_interp(struct ejit_func *f, size_t argc,
+                             struct ejit_arg args[argc],
+                             struct interp_state *state, bool run,
+                             void ***labels_wb);
+
+double ejit_interp_f(struct ejit_func *f, size_t argc,
                              struct ejit_arg args[argc],
                              struct interp_state *state, bool run,
                              void ***labels_wb);
 
 int64_t ejit_run_interp(struct ejit_func *f, size_t argc,
-                        struct ejit_arg args[static argc], struct interp_state *state);
+                        struct ejit_arg args[argc],
+			struct interp_state *state,
+			bool run,
+			void ***labels_wb);
 
 double ejit_run_interp_f(struct ejit_func *f, size_t argc,
-		struct ejit_arg args[static argc], struct interp_state *state);
+		struct ejit_arg args[argc],
+		struct interp_state *state,
+		bool run,
+		void ***labels_wb);
 
 bool ejit_compile(struct ejit_func *f, bool use_64);
 
diff --git a/src/ejit.c b/src/ejit.c
index 86329de..c202c7a 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -1,3 +1,4 @@
+#include <math.h>
 #include <assert.h>
 #include <sys/mman.h>
 
@@ -96,7 +97,11 @@ void ejit_select_compile_func(struct ejit_func *f, size_t gpr, size_t fpr,
 
 	void **labels;
 	/* just get labels, don't actually run anything yet */
-	ejit_interp(f, 0, NULL, NULL, false, &labels);
+	if (ejit_float_type(f->rtype))
+		ejit_run_interp_f(f, 0, NULL, NULL, false, &labels);
+	else
+		ejit_run_interp(f, 0, NULL, NULL, false, &labels);
+
 	foreach_vec(ii, f->insns) {
 		struct ejit_insn i = *insns_at(&f->insns, ii);
 		void *addr = labels[i.op];
@@ -1251,26 +1256,40 @@ static void destroy_interp_state(struct interp_state state)
 	args_destroy(&state.args);
 }
 
-long ejit_run_interp(struct ejit_func *f, size_t argc,
-                     struct ejit_arg args[argc], struct interp_state *state)
+int64_t ejit_run_interp(struct ejit_func *f, size_t argc,
+                     struct ejit_arg args[argc],
+		     struct interp_state *state,
+		     bool run,
+		     void ***labels_wb)
 {
-	assert(f->gpr && "trying to run a function that hasn't been compiled");
-	assert(f->rtype == EJIT_VOID || ejit_int_type(f->rtype));
-	if (f->arena)
-		return ((ejit_escape_t)f->arena)(argc, args);
+	if (run) {
+		assert(f->gpr && "trying to run a function that hasn't been compiled");
+		assert(f->rtype == EJIT_VOID || ejit_int_type(f->rtype));
+		if (f->arena)
+			return ((ejit_escape_t)f->arena)(argc, args);
+	}
 
-	return ejit_interp(f, argc, args, state, true, NULL).r;
+	int64_t retval = 0; double retval_f = 0.0;
+#include "interp.inc"
+	return retval;
 }
 
 double ejit_run_interp_f(struct ejit_func *f, size_t argc,
-                     struct ejit_arg args[argc], struct interp_state *state)
-{
-	assert(f->gpr && "trying to run a function that hasn't been compiled");
-	assert(f->rtype == EJIT_VOID || ejit_int_type(f->rtype));
-	if (f->arena)
-		return ((ejit_escape_f_t)f->arena)(argc, args);
+                     struct ejit_arg args[argc],
+		     struct interp_state *state,
+		     bool run,
+		     void ***labels_wb)
+{
+	if (run) {
+		assert(f->gpr && "trying to run a function that hasn't been compiled");
+		assert(f->rtype == EJIT_VOID || ejit_int_type(f->rtype));
+		if (f->arena)
+			return ((ejit_escape_f_t)f->arena)(argc, args);
+	}
 
-	return ejit_interp(f, argc, args, state, true, NULL).d;
+	int64_t retval = 0; double retval_f = 0.0;
+#include "interp.inc"
+	return retval_f;
 }
 
 int64_t ejit_run_func(struct ejit_func *f, size_t argc,
@@ -1282,7 +1301,7 @@ int64_t ejit_run_func(struct ejit_func *f, size_t argc,
 		return (int64_t)((ejit_escape_t)f->arena)(argc, args);
 
 	struct interp_state state = create_interp_state();
-	long r = ejit_interp(f, argc, args, &state, true, NULL).r;
+	long r = ejit_run_interp(f, argc, args, &state, true, NULL);
 	destroy_interp_state(state);
 	return r;
 }
@@ -1296,7 +1315,7 @@ double ejit_run_func_f(struct ejit_func *f, size_t argc,
 		return ((ejit_escape_f_t)f->arena)(argc, args);
 
 	struct interp_state state = create_interp_state();
-	double r = ejit_interp(f, argc, args, &state, true, NULL).d;
+	double r = ejit_run_interp_f(f, argc, args, &state, true, NULL);
 	destroy_interp_state(state);
 	return r;
 }
diff --git a/src/interp.c b/src/interp.c
deleted file mode 100644
index 01121e5..0000000
--- a/src/interp.c
+++ /dev/null
@@ -1,1088 +0,0 @@
-#include <ejit/ejit.h>
-#include <math.h>
-
-#include "common.h"
-
-union interp_ret ejit_interp(struct ejit_func *f, size_t argc,
-                             struct ejit_arg args[argc],
-                             struct interp_state *state, bool run,
-                             void ***labels_wb)
-{
-	static void *labels[OPCODE_COUNT] = {
-		[MOVI] = &&MOVI,
-		[MOVI_F] = &&MOVI_F,
-		[MOVI_D] = &&MOVI_D,
-		[MOVR] = &&MOVR,
-		[MOVR_F] = &&MOVR_F,
-		[MOVR_D] = &&MOVR_D,
-
-		[EXTR8] = &&EXTR8,
-		[EXTR16] = &&EXTR16,
-		[EXTR32] = &&EXTR32,
-		[EXTRU8] = &&EXTRU8,
-		[EXTRU16] = &&EXTRU16,
-		[EXTRU32] = &&EXTRU32,
-		[EXTRF] = &&EXTRF,
-		[EXTRD] = &&EXTRD,
-
-		[ADDR] = &&ADDR,
-		[ADDR_F] = &&ADDR_F,
-		[ADDR_D] = &&ADDR_D,
-		[ADDI] = &&ADDI,
-
-		[ABSR_F] = &&ABSR_F,
-		[ABSR_D] = &&ABSR_D,
-
-		[SUBR] = &&SUBR,
-		[SUBR_F] = &&SUBR_F,
-		[SUBR_D] = &&SUBR_D,
-		[SUBI] = &&SUBI,
-
-		[MULR] = &&MULR,
-		[MULR_F] = &&MULR_F,
-		[MULR_D] = &&MULR_D,
-
-		[DIVR] = &&DIVR,
-		[DIVR_U] = &&DIVR_U,
-		[DIVR_F] = &&DIVR_F,
-		[DIVR_D] = &&DIVR_D,
-
-		[REMR] = &&REMR,
-		[REMR_U] = &&REMR_U,
-
-		[LSHI] = &&LSHI,
-		[LSHR] = &&LSHR,
-		[RSHI] = &&RSHI,
-		[RSHR] = &&RSHR,
-		[RSHI_U] = &&RSHI_U,
-		[RSHR_U] = &&RSHR_U,
-
-		[ANDR] = &&ANDR,
-		[ANDI] = &&ANDI,
-
-		[ORR] = &&ORR,
-		[ORI] = &&ORI,
-
-		[XORR] = &&XORR,
-		[XORI] = &&XORI,
-
-		[COMR] = &&COMR,
-		[NEGR] = &&NEGR,
-		[NEGR_F] = &&NEGR_F,
-		[NEGR_D] = &&NEGR_D,
-
-		[EQR] = &&EQR,
-		[EQR_F] = &&EQR_F,
-		[EQR_D] = &&EQR_D,
-
-		[NER] = &&NER,
-		[NER_F] = &&NER_F,
-		[NER_D] = &&NER_D,
-
-		[GTR] = &&GTR,
-		[GTR_U] = &&GTR_U,
-		[GTR_F] = &&GTR_F,
-		[GTR_D] = &&GTR_D,
-
-		[GER] = &&GER,
-		[GER_U] = &&GER_U,
-		[GER_F] = &&GER_F,
-		[GER_D] = &&GER_D,
-
-		[STI8] = &&STI8,
-		[STI16] = &&STI16,
-		[STI32] = &&STI32,
-		[STI64] = &&STI64,
-		[STIF] = &&STIF,
-		[STID] = &&STID,
-
-		[STXI8] = &&STXI8,
-		[STXI16] = &&STXI16,
-		[STXI32] = &&STXI32,
-		[STXI64] = &&STXI64,
-		[STXIF] = &&STXIF,
-		[STXID] = &&STXID,
-
-		[STXR8] = &&STXR8,
-		[STXR16] = &&STXR16,
-		[STXR32] = &&STXR32,
-		[STXR64] = &&STXR64,
-		[STXRF] = &&STXRF,
-		[STXRD] = &&STXRD,
-
-		[LDI8] = &&LDI8,
-		[LDI16] = &&LDI16,
-		[LDI32] = &&LDI32,
-		[LDI64] = &&LDI64,
-		[LDIU8] = &&LDIU8,
-		[LDIU16] = &&LDIU16,
-		[LDIU32] = &&LDIU32,
-		[LDIU64] = &&LDIU64,
-		[LDIF] = &&LDIF,
-		[LDID] = &&LDID,
-
-		[LDXI8] = &&LDXI8,
-		[LDXI16] = &&LDXI16,
-		[LDXI32] = &&LDXI32,
-		[LDXI64] = &&LDXI64,
-		[LDXIU8] = &&LDXIU8,
-		[LDXIU16] = &&LDXIU16,
-		[LDXIU32] = &&LDXIU32,
-		[LDXIU64] = &&LDXIU64,
-		[LDXIF] = &&LDXIF,
-		[LDXID] = &&LDXID,
-
-		[LDXR8] = &&LDXR8,
-		[LDXR16] = &&LDXR16,
-		[LDXR32] = &&LDXR32,
-		[LDXR64] = &&LDXR64,
-		[LDXRU8] = &&LDXRU8,
-		[LDXRU16] = &&LDXRU16,
-		[LDXRU32] = &&LDXRU32,
-		[LDXRU64] = &&LDXRU64,
-		[LDXRF] = &&LDXRF,
-		[LDXRD] = &&LDXRD,
-
-		[TRUNCR_D_32] = &&TRUNCR_D_32,
-		[TRUNCR_D_64] = &&TRUNCR_D_64,
-		[TRUNCR_F_32] = &&TRUNCR_F_32,
-		[TRUNCR_F_64] = &&TRUNCR_F_64,
-
-		[BNER] = &&BNER,
-		[BNEI] = &&BNEI,
-		[BNER_F] = &&BNER_F,
-		[BNER_D] = &&BNER_D,
-
-		[BEQR] = &&BEQR,
-		[BEQI] = &&BEQI,
-		[BEQR_F] = &&BEQR_F,
-		[BEQR_D] = &&BEQR_D,
-
-		[BGER] = &&BGER,
-		[BGER_U] = &&BGER_U,
-		[BGEI] = &&BGEI,
-		[BGEI_U] = &&BGEI_U,
-		[BGER_F] = &&BGER_F,
-		[BGER_D] = &&BGER_D,
-
-		[BLEI] = &&BLEI,
-		[BLEI_U] = &&BLEI_U,
-
-		[BGTR] = &&BGTR,
-		[BGTR_U] = &&BGTR_U,
-		[BGTI] = &&BGTI,
-		[BGTI_U] = &&BGTI_U,
-		[BGTR_F] = &&BGTR_F,
-		[BGTR_D] = &&BGTR_D,
-
-		[BLTI] = &&BLTI,
-		[BLTI_U] = &&BLTI_U,
-
-		[JMP] = &&JMP,
-		[JMPR] = &&JMPR,
-
-		[BMCI] = &&BMCI,
-		[BMCR] = &&BMCR,
-		[BMSI] = &&BMSI,
-		[BMSR] = &&BMSR,
-
-		[RETR] = &&RETR,
-		[RETI] = &&RETI,
-		[RETR_F] = &&RETR_F,
-		[RETI_F] = &&RETI_F,
-		[RETR_D] = &&RETR_D,
-		[RETI_D] = &&RETI_D,
-
-		[RETVAL] = &&RETVAL,
-		[RETVAL_F] = &&RETVAL_F,
-		[RETVAL_D] = &&RETVAL_D,
-
-		[ARG] = &&ARG,
-		[ARG_I] = &&ARG_I,
-		[ARG_F] = &&ARG_F,
-		[ARG_FI] = &&ARG_FI,
-
-		[PARAM] = &&PARAM,
-		[PARAM_F] = &&PARAM_F,
-
-		[CALLI] = &&CALLI,
-		[CALLI_F] = &&CALLI_F,
-		[ESCAPEI] = &&ESCAPEI,
-		[ESCAPEI_F] = &&ESCAPEI_F,
-
-		[START] = &&START,
-		[END] = &&END,
-	};
-
-	if (!run) {
-		*labels_wb = labels;
-		return (union interp_ret){.r = 0};
-	}
-
-	size_t prev_gprs = gprs_len(&state->gprs);
-	size_t prev_fprs = fprs_len(&state->fprs);
-	size_t prev_argc = args_len(&state->args);
-
-	gprs_reserve(&state->gprs, prev_gprs + f->gpr);
-	fprs_reserve(&state->fprs, prev_fprs + f->fpr);
-
-	union fpr {
-		double d;
-		float f;
-	};
-	long *gpr = ((long *)state->gprs.buf) + prev_gprs;
-	union fpr *fpr = ((union fpr *)state->fprs.buf) + prev_fprs;
-
-	struct ejit_insn *insns = f->insns.buf;
-
-	/* retval is kind of an unfortunate extra bit of state to keep track of,
-	 * but having call and return value separated is pretty convenient for
-	 * void calls so I guess I don't mind? */
-	int64_t retval = 0; double retval_f = 0.;
-	size_t pc = 0;
-
-#define DO(x) x : { struct ejit_insn i = insns[pc]; (void)i;
-#define JUMP(a) goto *insns[pc = a].addr;
-#define DISPATCH() } goto *insns[++pc].addr;
-
-	JUMP(0);
-
-	DO(START);
-	DISPATCH();
-
-	DO(END);
-	goto out_int;
-	DISPATCH();
-
-	DO(MOVI);
-	gpr[i.r0] = i.o;
-	DISPATCH();
-
-	DO(MOVI_F);
-	fpr[i.r0].f = i.f;
-	DISPATCH();
-
-	DO(MOVI_D);
-	fpr[i.r0].d = i.d;
-	DISPATCH();
-
-	DO(MOVR);
-	gpr[i.r0] = gpr[i.r1];
-	DISPATCH();
-
-	DO(MOVR_F);
-	fpr[i.r0].f = fpr[i.r1].f;
-	DISPATCH();
-
-	DO(MOVR_D);
-	fpr[i.r0].d = fpr[i.r1].d;
-	DISPATCH();
-
-	DO(EXTR8);
-	gpr[i.r0] = (int8_t)gpr[i.r1];
-	DISPATCH();
-
-	DO(EXTR16);
-	gpr[i.r0] = (int16_t)gpr[i.r1];
-	DISPATCH();
-
-	DO(EXTR32);
-	gpr[i.r0] = (int32_t)gpr[i.r1];
-	DISPATCH();
-
-	DO(EXTRU8);
-	gpr[i.r0] = (uint8_t)gpr[i.r1];
-	DISPATCH();
-
-	DO(EXTRU16);
-	gpr[i.r0] = (uint16_t)gpr[i.r1];
-	DISPATCH();
-
-	DO(EXTRU32);
-	gpr[i.r0] = (uint32_t)gpr[i.r1];
-	DISPATCH();
-
-	DO(EXTRF);
-	fpr[i.r0].f = (float)gpr[i.r1];
-	DISPATCH();
-
-	DO(EXTRD);
-	fpr[i.r0].d = (double)gpr[i.r1];
-	DISPATCH();
-
-	DO(ADDR);
-	gpr[i.r0] = gpr[i.r1] + gpr[i.r2];
-	DISPATCH();
-
-	DO(ADDR_F);
-	fpr[i.r0].f = fpr[i.r1].f + fpr[i.r2].f;
-	DISPATCH();
-
-	DO(ADDR_D);
-	fpr[i.r0].d = fpr[i.r1].d + fpr[i.r2].d;
-	DISPATCH();
-
-	DO(ADDI);
-	gpr[i.r0] = gpr[i.r1] + i.o;
-	DISPATCH();
-
-	DO(ABSR_F);
-	fpr[i.r0].f = fabs(fpr[i.r1].f);
-	DISPATCH();
-
-	DO(ABSR_D);
-	fpr[i.r0].d = fabs(fpr[i.r1].d);
-	DISPATCH();
-
-	DO(SUBR);
-	gpr[i.r0] = gpr[i.r1] - gpr[i.r2];
-	DISPATCH();
-
-	DO(SUBR_F);
-	fpr[i.r0].f = fpr[i.r1].f - fpr[i.r2].f;
-	DISPATCH();
-
-	DO(SUBR_D);
-	fpr[i.r0].d = fpr[i.r1].d - fpr[i.r2].d;
-	DISPATCH();
-
-	DO(SUBI);
-	gpr[i.r0] = gpr[i.r1] - i.o;
-	DISPATCH();
-
-	DO(MULR);
-	gpr[i.r0] = gpr[i.r1] * gpr[i.r2];
-	DISPATCH();
-
-	DO(MULR_F);
-	fpr[i.r0].f = fpr[i.r1].f * fpr[i.r2].f;
-	DISPATCH();
-
-	DO(MULR_D);
-	fpr[i.r0].d = fpr[i.r1].d * fpr[i.r2].d;
-	DISPATCH();
-
-	DO(DIVR);
-	gpr[i.r0] = gpr[i.r1] / gpr[i.r2];
-	DISPATCH();
-
-	DO(DIVR_U);
-	gpr[i.r0] = (uint64_t)gpr[i.r1] / (uint64_t)gpr[i.r2];
-	DISPATCH();
-
-	DO(REMR);
-	gpr[i.r0] = gpr[i.r1] % gpr[i.r2];
-	DISPATCH();
-
-	DO(REMR_U);
-	gpr[i.r0] = (uint64_t)gpr[i.r1] % (uint64_t)gpr[i.r2];
-	DISPATCH();
-
-	DO(DIVR_F);
-	fpr[i.r0].f = fpr[i.r1].f / fpr[i.r2].f;
-	DISPATCH();
-
-	DO(DIVR_D);
-	fpr[i.r0].d = fpr[i.r1].d / fpr[i.r2].d;
-	DISPATCH();
-
-	DO(LSHI);
-	gpr[i.r0] = gpr[i.r1] << i.o;
-	DISPATCH();
-
-	DO(LSHR);
-	gpr[i.r0] = gpr[i.r1] << gpr[i.r2];
-	DISPATCH();
-
-	DO(RSHI);
-	gpr[i.r0] = gpr[i.r1] >> i.o;
-	DISPATCH();
-
-	DO(RSHR);
-	gpr[i.r0] = gpr[i.r1] >> gpr[i.r2];
-	DISPATCH();
-
-	DO(RSHI_U);
-	gpr[i.r0] = (uint64_t)gpr[i.r1] >> i.o;
-	DISPATCH();
-
-	DO(RSHR_U);
-	gpr[i.r0] = (uint64_t)gpr[i.r1] >> gpr[i.r2];
-	DISPATCH();
-
-	DO(ANDR);
-	gpr[i.r0] = gpr[i.r1] & gpr[i.r2];
-	DISPATCH();
-
-	DO(ANDI);
-	gpr[i.r0] = gpr[i.r1] & i.o;
-	DISPATCH();
-
-	DO(ORR);
-	gpr[i.r0] = gpr[i.r1] | gpr[i.r2];
-	DISPATCH();
-
-	DO(ORI);
-	gpr[i.r0] = gpr[i.r1] | i.o;
-	DISPATCH();
-
-	DO(XORR);
-	gpr[i.r0] = gpr[i.r1] ^ gpr[i.r2];
-	DISPATCH();
-
-	DO(XORI);
-	gpr[i.r0] = gpr[i.r1] ^ i.o;
-	DISPATCH();
-
-	DO(COMR);
-	gpr[i.r0] = ~gpr[i.r1];
-	DISPATCH();
-
-	DO(NEGR);
-	gpr[i.r0] = -gpr[i.r1];
-	DISPATCH();
-
-	DO(NEGR_F);
-	fpr[i.r0].f = -fpr[i.r1].f;
-	DISPATCH();
-
-	DO(NEGR_D);
-	fpr[i.r0].d = -fpr[i.r1].d;
-	DISPATCH();
-
-	DO(EQR);
-	gpr[i.r0] = gpr[i.r1] == gpr[i.r2];
-	DISPATCH();
-
-	DO(EQR_F);
-	gpr[i.r0] = fpr[i.r1].f == fpr[i.r2].f;
-	DISPATCH();
-
-	DO(EQR_D);
-	gpr[i.r0] = fpr[i.r1].d == fpr[i.r2].d;
-	DISPATCH();
-
-	DO(NER);
-	gpr[i.r0] = gpr[i.r1] != gpr[i.r2];
-	DISPATCH();
-
-	DO(NER_F);
-	gpr[i.r0] = fpr[i.r1].f != fpr[i.r2].f;
-	DISPATCH();
-
-	DO(NER_D);
-	gpr[i.r0] = fpr[i.r1].d != fpr[i.r2].d;
-	DISPATCH();
-
-	DO(GTR);
-	gpr[i.r0] = gpr[i.r1] > gpr[i.r2];
-	DISPATCH();
-
-	DO(GTR_U);
-	gpr[i.r0] = (uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2];
-	DISPATCH();
-
-	DO(GTR_F);
-	gpr[i.r0] = fpr[i.r1].f > fpr[i.r2].f;
-	DISPATCH();
-
-	DO(GTR_D);
-	gpr[i.r0] = fpr[i.r1].d > fpr[i.r2].d;
-	DISPATCH();
-
-	DO(GER);
-	gpr[i.r0] = gpr[i.r1] >= gpr[i.r2];
-	DISPATCH();
-
-	DO(GER_U);
-	gpr[i.r0] = (uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2];
-	DISPATCH();
-
-	DO(GER_F);
-	gpr[i.r0] = fpr[i.r1].f >= fpr[i.r2].f;
-	DISPATCH();
-
-	DO(GER_D);
-	gpr[i.r0] = fpr[i.r1].d >= fpr[i.r2].d;
-	DISPATCH();
-
-	DO(STI8);
-	int8_t *addr = (int8_t *)(i.p);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STI16);
-	int16_t *addr = (int16_t *)(i.p);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STI32);
-	int32_t *addr = (int32_t *)(i.p);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STI64);
-	int64_t *addr = (int64_t *)(i.p);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STIF);
-	float *addr = (float *)(i.p);
-	*addr = fpr[i.r0].f;
-	DISPATCH();
-
-	DO(STID);
-	double *addr = (double *)(i.p);
-	*addr = fpr[i.r0].d;
-	DISPATCH();
-
-	DO(STXI8);
-	int8_t *addr = (int8_t *)(gpr[i.r1] + i.o);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXI16);
-	int16_t *addr = (int16_t *)(gpr[i.r1] + i.o);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXI32);
-	int32_t *addr = (int32_t *)(gpr[i.r1] + i.o);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXI64);
-	int64_t *addr = (int64_t *)(gpr[i.r1] + i.o);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXIF);
-	float *addr = (float *)(gpr[i.r1] + i.o);
-	*addr = fpr[i.r0].f;
-	DISPATCH();
-
-	DO(STXID);
-	double *addr = (double *)(gpr[i.r1] + i.o);
-	*addr = fpr[i.r0].d;
-	DISPATCH();
-
-	DO(STXR8);
-	int8_t *addr = (int8_t *)(gpr[i.r1] + gpr[i.r2]);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXR16);
-	int16_t *addr = (int16_t *)(gpr[i.r1] + gpr[i.r2]);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXR32);
-	int32_t *addr = (int32_t *)(gpr[i.r1] + gpr[i.r2]);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXR64);
-	int64_t *addr = (int64_t *)(gpr[i.r1] + gpr[i.r2]);
-	*addr = gpr[i.r0];
-	DISPATCH();
-
-	DO(STXRF);
-	float *addr = (float *)(gpr[i.r1] + gpr[i.r2]);
-	*addr = fpr[i.r0].f;
-	DISPATCH();
-
-	DO(STXRD);
-	double *addr = (double *)(gpr[i.r1] + gpr[i.r2]);
-	*addr = fpr[i.r0].d;
-	DISPATCH();
-
-	DO(LDI8);
-	int8_t *addr = (int8_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDI16);
-	int16_t *addr = (int16_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDI32);
-	int32_t *addr = (int32_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDI64);
-	int64_t *addr = (int64_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDIU8);
-	uint8_t *addr = (uint8_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDIU16);
-	uint16_t *addr = (uint16_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDIU32);
-	uint32_t *addr = (uint32_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDIU64);
-	uint64_t *addr = (uint64_t *)i.p;
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDIF);
-	float *addr = (float *)i.p;
-	fpr[i.r0].f = *addr;
-	DISPATCH();
-
-	DO(LDID);
-	double *addr = (double *)i.p;
-	fpr[i.r0].d = *addr;
-	DISPATCH();
-
-	DO(LDXI8);
-	int8_t *addr = (int8_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXI16);
-	int16_t *addr = (int16_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXI32);
-	int32_t *addr = (int32_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXI64);
-	int64_t *addr = (int64_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXIU8);
-	uint8_t *addr = (uint8_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXIU16);
-	uint16_t *addr = (uint16_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXIU32);
-	uint32_t *addr = (uint32_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXIU64);
-	uint64_t *addr = (uint64_t *)(gpr[i.r1] + i.o);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXIF);
-	float *addr = (float *)(gpr[i.r1] + i.o);
-	fpr[i.r0].f = *addr;
-	DISPATCH();
-
-	DO(LDXID);
-	double *addr = (double *)(gpr[i.r1] + i.o);
-	fpr[i.r0].d = *addr;
-	DISPATCH();
-
-	DO(LDXR8);
-	int8_t *addr = (int8_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXR16);
-	int16_t *addr = (int16_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXR32);
-	int32_t *addr = (int32_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXR64);
-	int64_t *addr = (int64_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXRU8);
-	uint8_t *addr = (uint8_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXRU16);
-	uint16_t *addr = (uint16_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXRU32);
-	uint32_t *addr = (uint32_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXRU64);
-	uint64_t *addr = (uint64_t *)(gpr[i.r1] + gpr[i.r2]);
-	gpr[i.r0] = *addr;
-	DISPATCH();
-
-	DO(LDXRF);
-	float *addr = (float *)(gpr[i.r1] + gpr[i.r2]);
-	fpr[i.r0].f = *addr;
-	DISPATCH();
-
-	DO(LDXRD);
-	double *addr = (double *)(gpr[i.r1] + gpr[i.r2]);
-	fpr[i.r0].d = *addr;
-	DISPATCH();
-
-	DO(TRUNCR_D_32);
-	gpr[i.r0] = (int32_t)fpr[i.r1].d;
-	DISPATCH();
-
-	DO(TRUNCR_D_64);
-	gpr[i.r0] = (int64_t)fpr[i.r1].d;
-	DISPATCH();
-
-	DO(TRUNCR_F_32);
-	gpr[i.r0] = (int32_t)fpr[i.r1].f;
-	DISPATCH();
-
-	DO(TRUNCR_F_64);
-	gpr[i.r0] = (int64_t)fpr[i.r1].f;
-	DISPATCH();
-
-	DO(BNER);
-	if (gpr[i.r1] != gpr[i.r2])
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BNEI);
-	if (gpr[i.r1] != i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BNER_F);
-	if (fpr[i.r1].f != fpr[i.r2].f)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BNER_D);
-	if (fpr[i.r1].d != fpr[i.r2].d)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BEQR);
-	if (gpr[i.r1] == gpr[i.r2])
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BEQI);
-	if (gpr[i.r1] == i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BEQR_F);
-	if (fpr[i.r1].f == fpr[i.r2].f)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BEQR_D);
-	if (fpr[i.r1].d == fpr[i.r2].d)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGTR);
-	if (gpr[i.r1] > gpr[i.r2])
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGTR_U);
-	if ((uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2])
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGTI);
-	if (gpr[i.r1] > i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGTI_U);
-	if ((uint64_t)gpr[i.r1] > (uint64_t)i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGTR_F);
-	if (fpr[i.r1].f > fpr[i.r2].f)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGTR_D);
-	if (fpr[i.r1].d > fpr[i.r2].d)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BLTI);
-	if (gpr[i.r1] < i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BLTI_U);
-	if ((uint64_t)gpr[i.r1] < (uint64_t)i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGER);
-	if (gpr[i.r1] >= gpr[i.r2])
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGER_U);
-	if ((uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2])
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGEI);
-	if (gpr[i.r1] >= i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGEI_U);
-	if ((uint64_t)gpr[i.r1] >= (uint64_t)i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGER_F);
-	if (fpr[i.r1].f >= fpr[i.r2].f)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BGER_D);
-	if (fpr[i.r1].d >= fpr[i.r2].d)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BLEI);
-	if (gpr[i.r1] <= i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BLEI_U);
-	if ((uint64_t)gpr[i.r1] <= (uint64_t)i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(JMP);
-	JUMP(i.r0);
-	DISPATCH();
-
-	DO(JMPR);
-	JUMP(gpr[i.r1]);
-	DISPATCH();
-
-	DO(BMSR);
-	if (gpr[i.r1] & gpr[i.r2])
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BMSI);
-	if (gpr[i.r1] & i.o)
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BMCR);
-	if (!(gpr[i.r1] & gpr[i.r2]))
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(BMCI);
-	if (!(gpr[i.r1] & i.o))
-		JUMP(i.r0);
-
-	DISPATCH();
-
-	DO(RETVAL);
-	gpr[i.r0] = retval;
-	DISPATCH();
-
-	DO(RETVAL_F);
-	fpr[i.r0].f = retval_f;
-	DISPATCH();
-
-	DO(RETVAL_D);
-	fpr[i.r0].d = retval_f;
-	DISPATCH();
-
-	DO(PARAM);
-	gpr[i.r2] = args[i.r0].u64;
-	DISPATCH();
-
-	DO(PARAM_F);
-	if (i.r1 == EJIT_FLOAT)
-		fpr[i.r2].f = args[i.r0].f;
-	else
-		fpr[i.r2].d = args[i.r0].d;
-
-	DISPATCH();
-
-	DO(ARG);
-	struct ejit_arg a = ejit_build_arg(i.r1, gpr[i.r2]);
-	args_append(&state->args, a);
-	DISPATCH();
-
-	DO(ARG_I);
-	struct ejit_arg a = ejit_build_arg(i.r1, i.o);
-	args_append(&state->args, a);
-	DISPATCH();
-
-	DO(ARG_F);
-	struct ejit_arg a;
-	if (i.r1 == EJIT_DOUBLE)
-		a = ejit_build_arg_f(i.r1, fpr[i.r2].d);
-	else
-		a = ejit_build_arg_f(i.r1, fpr[i.r2].f);
-
-	args_append(&state->args, a);
-	DISPATCH();
-
-	DO(ARG_FI);
-	struct ejit_arg a;
-	if (i.r1 == EJIT_DOUBLE)
-		a = ejit_build_arg_f(i.r1, i.d);
-	else
-		a = ejit_build_arg_f(i.r1, i.f);
-
-	args_append(&state->args, a);
-	DISPATCH();
-
-	DO(CALLI);
-	struct ejit_func *f = i.p;
-	size_t argc = args_len(&state->args) - prev_argc;
-	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
-	                        prev_argc;
-
-	retval = ejit_run_interp(f, argc, args, state);
-
-	gpr = ((long *)state->gprs.buf) + prev_gprs;
-	fpr = ((union fpr *)state->fprs.buf) + prev_fprs;
-	args_shrink(&state->args, prev_argc);
-	DISPATCH();
-
-	DO(CALLI_F);
-	struct ejit_func *f = i.p;
-	size_t argc = args_len(&state->args) - prev_argc;
-	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
-	                        prev_argc;
-
-	retval_f = ejit_run_interp_f(f, argc, args, state);
-
-	gpr = ((long *)state->gprs.buf) + prev_gprs;
-	fpr = ((union fpr *)state->fprs.buf) + prev_fprs;
-	args_shrink(&state->args, prev_argc);
-	DISPATCH();
-
-	DO(ESCAPEI);
-	ejit_escape_t f = i.p;
-	size_t argc = args_len(&state->args) - prev_argc;
-	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
-	                        prev_argc;
-
-	retval = f(argc, args);
-
-	args_shrink(&state->args, prev_argc);
-	DISPATCH();
-
-	DO(ESCAPEI_F);
-	ejit_escape_f_t f = i.p;
-	size_t argc = args_len(&state->args) - prev_argc;
-	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
-	                        prev_argc;
-
-	retval_f = f(argc, args);
-
-	args_shrink(&state->args, prev_argc);
-	DISPATCH();
-
-	/* dispatch is technically unnecessary for returns, but keep it for
-	 * symmetry */
-	DO(RETR);
-	retval = gpr[i.r0];
-	goto out_int;
-	DISPATCH();
-
-	DO(RETI);
-	retval = i.o;
-	goto out_int;
-	DISPATCH();
-
-	DO(RETR_F);
-	retval_f = fpr[i.r0].f;
-	goto out_float;
-	DISPATCH();
-
-	DO(RETR_D);
-	retval_f = fpr[i.r0].d;
-	goto out_float;
-	DISPATCH();
-
-	DO(RETI_F);
-	retval_f = i.f;
-	goto out_float;
-	DISPATCH();
-
-	DO(RETI_D);
-	retval_f = i.d;
-	goto out_float;
-	DISPATCH();
-
-#undef DISPATCH
-#undef JUMP
-#undef DO
-
-out_int:
-	gprs_shrink(&state->gprs, prev_gprs);
-	fprs_shrink(&state->fprs, prev_fprs);
-	return (union interp_ret){.r = retval};
-
-out_float:
-	gprs_shrink(&state->gprs, prev_gprs);
-	fprs_shrink(&state->fprs, prev_fprs);
-	return (union interp_ret){.d = retval_f};
-}
diff --git a/src/interp.inc b/src/interp.inc
new file mode 100644
index 0000000..75e7ff2
--- /dev/null
+++ b/src/interp.inc
@@ -0,0 +1,1077 @@
+/* this is the body of a given ejit_interp function, it assumes there's an
+ * external int64_t retval and double retval_f into which it places the value to
+ * be returned. Included from src/interp.c */
+{
+	static void *labels[OPCODE_COUNT] = {
+		[MOVI] = &&MOVI,
+		[MOVI_F] = &&MOVI_F,
+		[MOVI_D] = &&MOVI_D,
+		[MOVR] = &&MOVR,
+		[MOVR_F] = &&MOVR_F,
+		[MOVR_D] = &&MOVR_D,
+
+		[EXTR8] = &&EXTR8,
+		[EXTR16] = &&EXTR16,
+		[EXTR32] = &&EXTR32,
+		[EXTRU8] = &&EXTRU8,
+		[EXTRU16] = &&EXTRU16,
+		[EXTRU32] = &&EXTRU32,
+		[EXTRF] = &&EXTRF,
+		[EXTRD] = &&EXTRD,
+
+		[ADDR] = &&ADDR,
+		[ADDR_F] = &&ADDR_F,
+		[ADDR_D] = &&ADDR_D,
+		[ADDI] = &&ADDI,
+
+		[ABSR_F] = &&ABSR_F,
+		[ABSR_D] = &&ABSR_D,
+
+		[SUBR] = &&SUBR,
+		[SUBR_F] = &&SUBR_F,
+		[SUBR_D] = &&SUBR_D,
+		[SUBI] = &&SUBI,
+
+		[MULR] = &&MULR,
+		[MULR_F] = &&MULR_F,
+		[MULR_D] = &&MULR_D,
+
+		[DIVR] = &&DIVR,
+		[DIVR_U] = &&DIVR_U,
+		[DIVR_F] = &&DIVR_F,
+		[DIVR_D] = &&DIVR_D,
+
+		[REMR] = &&REMR,
+		[REMR_U] = &&REMR_U,
+
+		[LSHI] = &&LSHI,
+		[LSHR] = &&LSHR,
+		[RSHI] = &&RSHI,
+		[RSHR] = &&RSHR,
+		[RSHI_U] = &&RSHI_U,
+		[RSHR_U] = &&RSHR_U,
+
+		[ANDR] = &&ANDR,
+		[ANDI] = &&ANDI,
+
+		[ORR] = &&ORR,
+		[ORI] = &&ORI,
+
+		[XORR] = &&XORR,
+		[XORI] = &&XORI,
+
+		[COMR] = &&COMR,
+		[NEGR] = &&NEGR,
+		[NEGR_F] = &&NEGR_F,
+		[NEGR_D] = &&NEGR_D,
+
+		[EQR] = &&EQR,
+		[EQR_F] = &&EQR_F,
+		[EQR_D] = &&EQR_D,
+
+		[NER] = &&NER,
+		[NER_F] = &&NER_F,
+		[NER_D] = &&NER_D,
+
+		[GTR] = &&GTR,
+		[GTR_U] = &&GTR_U,
+		[GTR_F] = &&GTR_F,
+		[GTR_D] = &&GTR_D,
+
+		[GER] = &&GER,
+		[GER_U] = &&GER_U,
+		[GER_F] = &&GER_F,
+		[GER_D] = &&GER_D,
+
+		[STI8] = &&STI8,
+		[STI16] = &&STI16,
+		[STI32] = &&STI32,
+		[STI64] = &&STI64,
+		[STIF] = &&STIF,
+		[STID] = &&STID,
+
+		[STXI8] = &&STXI8,
+		[STXI16] = &&STXI16,
+		[STXI32] = &&STXI32,
+		[STXI64] = &&STXI64,
+		[STXIF] = &&STXIF,
+		[STXID] = &&STXID,
+
+		[STXR8] = &&STXR8,
+		[STXR16] = &&STXR16,
+		[STXR32] = &&STXR32,
+		[STXR64] = &&STXR64,
+		[STXRF] = &&STXRF,
+		[STXRD] = &&STXRD,
+
+		[LDI8] = &&LDI8,
+		[LDI16] = &&LDI16,
+		[LDI32] = &&LDI32,
+		[LDI64] = &&LDI64,
+		[LDIU8] = &&LDIU8,
+		[LDIU16] = &&LDIU16,
+		[LDIU32] = &&LDIU32,
+		[LDIU64] = &&LDIU64,
+		[LDIF] = &&LDIF,
+		[LDID] = &&LDID,
+
+		[LDXI8] = &&LDXI8,
+		[LDXI16] = &&LDXI16,
+		[LDXI32] = &&LDXI32,
+		[LDXI64] = &&LDXI64,
+		[LDXIU8] = &&LDXIU8,
+		[LDXIU16] = &&LDXIU16,
+		[LDXIU32] = &&LDXIU32,
+		[LDXIU64] = &&LDXIU64,
+		[LDXIF] = &&LDXIF,
+		[LDXID] = &&LDXID,
+
+		[LDXR8] = &&LDXR8,
+		[LDXR16] = &&LDXR16,
+		[LDXR32] = &&LDXR32,
+		[LDXR64] = &&LDXR64,
+		[LDXRU8] = &&LDXRU8,
+		[LDXRU16] = &&LDXRU16,
+		[LDXRU32] = &&LDXRU32,
+		[LDXRU64] = &&LDXRU64,
+		[LDXRF] = &&LDXRF,
+		[LDXRD] = &&LDXRD,
+
+		[TRUNCR_D_32] = &&TRUNCR_D_32,
+		[TRUNCR_D_64] = &&TRUNCR_D_64,
+		[TRUNCR_F_32] = &&TRUNCR_F_32,
+		[TRUNCR_F_64] = &&TRUNCR_F_64,
+
+		[BNER] = &&BNER,
+		[BNEI] = &&BNEI,
+		[BNER_F] = &&BNER_F,
+		[BNER_D] = &&BNER_D,
+
+		[BEQR] = &&BEQR,
+		[BEQI] = &&BEQI,
+		[BEQR_F] = &&BEQR_F,
+		[BEQR_D] = &&BEQR_D,
+
+		[BGER] = &&BGER,
+		[BGER_U] = &&BGER_U,
+		[BGEI] = &&BGEI,
+		[BGEI_U] = &&BGEI_U,
+		[BGER_F] = &&BGER_F,
+		[BGER_D] = &&BGER_D,
+
+		[BLEI] = &&BLEI,
+		[BLEI_U] = &&BLEI_U,
+
+		[BGTR] = &&BGTR,
+		[BGTR_U] = &&BGTR_U,
+		[BGTI] = &&BGTI,
+		[BGTI_U] = &&BGTI_U,
+		[BGTR_F] = &&BGTR_F,
+		[BGTR_D] = &&BGTR_D,
+
+		[BLTI] = &&BLTI,
+		[BLTI_U] = &&BLTI_U,
+
+		[JMP] = &&JMP,
+		[JMPR] = &&JMPR,
+
+		[BMCI] = &&BMCI,
+		[BMCR] = &&BMCR,
+		[BMSI] = &&BMSI,
+		[BMSR] = &&BMSR,
+
+		[RETR] = &&RETR,
+		[RETI] = &&RETI,
+		[RETR_F] = &&RETR_F,
+		[RETI_F] = &&RETI_F,
+		[RETR_D] = &&RETR_D,
+		[RETI_D] = &&RETI_D,
+
+		[RETVAL] = &&RETVAL,
+		[RETVAL_F] = &&RETVAL_F,
+		[RETVAL_D] = &&RETVAL_D,
+
+		[ARG] = &&ARG,
+		[ARG_I] = &&ARG_I,
+		[ARG_F] = &&ARG_F,
+		[ARG_FI] = &&ARG_FI,
+
+		[PARAM] = &&PARAM,
+		[PARAM_F] = &&PARAM_F,
+
+		[CALLI] = &&CALLI,
+		[CALLI_F] = &&CALLI_F,
+		[ESCAPEI] = &&ESCAPEI,
+		[ESCAPEI_F] = &&ESCAPEI_F,
+
+		[START] = &&START,
+		[END] = &&END,
+	};
+
+	if (!run) {
+		*labels_wb = labels;
+		goto zero_out;
+	}
+
+	size_t prev_gprs = gprs_len(&state->gprs);
+	size_t prev_fprs = fprs_len(&state->fprs);
+	size_t prev_argc = args_len(&state->args);
+
+	gprs_reserve(&state->gprs, prev_gprs + f->gpr);
+	fprs_reserve(&state->fprs, prev_fprs + f->fpr);
+
+	union fpr {
+		double d;
+		float f;
+	};
+	long *gpr = ((long *)state->gprs.buf) + prev_gprs;
+	union fpr *fpr = ((union fpr *)state->fprs.buf) + prev_fprs;
+
+	struct ejit_insn *insns = f->insns.buf;
+
+	/* retval is kind of an unfortunate extra bit of state to keep track of,
+	 * but having call and return value separated is pretty convenient for
+	 * void calls so I guess I don't mind? */
+	size_t pc = 0;
+
+#define DO(x) x : { struct ejit_insn i = insns[pc]; (void)i;
+#define JUMP(a) goto *insns[pc = a].addr;
+#define DISPATCH() } goto *insns[++pc].addr;
+
+	JUMP(0);
+
+	DO(START);
+	DISPATCH();
+
+	DO(END);
+	goto out;
+	DISPATCH();
+
+	DO(MOVI);
+	gpr[i.r0] = i.o;
+	DISPATCH();
+
+	DO(MOVI_F);
+	fpr[i.r0].f = i.f;
+	DISPATCH();
+
+	DO(MOVI_D);
+	fpr[i.r0].d = i.d;
+	DISPATCH();
+
+	DO(MOVR);
+	gpr[i.r0] = gpr[i.r1];
+	DISPATCH();
+
+	DO(MOVR_F);
+	fpr[i.r0].f = fpr[i.r1].f;
+	DISPATCH();
+
+	DO(MOVR_D);
+	fpr[i.r0].d = fpr[i.r1].d;
+	DISPATCH();
+
+	DO(EXTR8);
+	gpr[i.r0] = (int8_t)gpr[i.r1];
+	DISPATCH();
+
+	DO(EXTR16);
+	gpr[i.r0] = (int16_t)gpr[i.r1];
+	DISPATCH();
+
+	DO(EXTR32);
+	gpr[i.r0] = (int32_t)gpr[i.r1];
+	DISPATCH();
+
+	DO(EXTRU8);
+	gpr[i.r0] = (uint8_t)gpr[i.r1];
+	DISPATCH();
+
+	DO(EXTRU16);
+	gpr[i.r0] = (uint16_t)gpr[i.r1];
+	DISPATCH();
+
+	DO(EXTRU32);
+	gpr[i.r0] = (uint32_t)gpr[i.r1];
+	DISPATCH();
+
+	DO(EXTRF);
+	fpr[i.r0].f = (float)gpr[i.r1];
+	DISPATCH();
+
+	DO(EXTRD);
+	fpr[i.r0].d = (double)gpr[i.r1];
+	DISPATCH();
+
+	DO(ADDR);
+	gpr[i.r0] = gpr[i.r1] + gpr[i.r2];
+	DISPATCH();
+
+	DO(ADDR_F);
+	fpr[i.r0].f = fpr[i.r1].f + fpr[i.r2].f;
+	DISPATCH();
+
+	DO(ADDR_D);
+	fpr[i.r0].d = fpr[i.r1].d + fpr[i.r2].d;
+	DISPATCH();
+
+	DO(ADDI);
+	gpr[i.r0] = gpr[i.r1] + i.o;
+	DISPATCH();
+
+	DO(ABSR_F);
+	fpr[i.r0].f = fabs(fpr[i.r1].f);
+	DISPATCH();
+
+	DO(ABSR_D);
+	fpr[i.r0].d = fabs(fpr[i.r1].d);
+	DISPATCH();
+
+	DO(SUBR);
+	gpr[i.r0] = gpr[i.r1] - gpr[i.r2];
+	DISPATCH();
+
+	DO(SUBR_F);
+	fpr[i.r0].f = fpr[i.r1].f - fpr[i.r2].f;
+	DISPATCH();
+
+	DO(SUBR_D);
+	fpr[i.r0].d = fpr[i.r1].d - fpr[i.r2].d;
+	DISPATCH();
+
+	DO(SUBI);
+	gpr[i.r0] = gpr[i.r1] - i.o;
+	DISPATCH();
+
+	DO(MULR);
+	gpr[i.r0] = gpr[i.r1] * gpr[i.r2];
+	DISPATCH();
+
+	DO(MULR_F);
+	fpr[i.r0].f = fpr[i.r1].f * fpr[i.r2].f;
+	DISPATCH();
+
+	DO(MULR_D);
+	fpr[i.r0].d = fpr[i.r1].d * fpr[i.r2].d;
+	DISPATCH();
+
+	DO(DIVR);
+	gpr[i.r0] = gpr[i.r1] / gpr[i.r2];
+	DISPATCH();
+
+	DO(DIVR_U);
+	gpr[i.r0] = (uint64_t)gpr[i.r1] / (uint64_t)gpr[i.r2];
+	DISPATCH();
+
+	DO(REMR);
+	gpr[i.r0] = gpr[i.r1] % gpr[i.r2];
+	DISPATCH();
+
+	DO(REMR_U);
+	gpr[i.r0] = (uint64_t)gpr[i.r1] % (uint64_t)gpr[i.r2];
+	DISPATCH();
+
+	DO(DIVR_F);
+	fpr[i.r0].f = fpr[i.r1].f / fpr[i.r2].f;
+	DISPATCH();
+
+	DO(DIVR_D);
+	fpr[i.r0].d = fpr[i.r1].d / fpr[i.r2].d;
+	DISPATCH();
+
+	DO(LSHI);
+	gpr[i.r0] = gpr[i.r1] << i.o;
+	DISPATCH();
+
+	DO(LSHR);
+	gpr[i.r0] = gpr[i.r1] << gpr[i.r2];
+	DISPATCH();
+
+	DO(RSHI);
+	gpr[i.r0] = gpr[i.r1] >> i.o;
+	DISPATCH();
+
+	DO(RSHR);
+	gpr[i.r0] = gpr[i.r1] >> gpr[i.r2];
+	DISPATCH();
+
+	DO(RSHI_U);
+	gpr[i.r0] = (uint64_t)gpr[i.r1] >> i.o;
+	DISPATCH();
+
+	DO(RSHR_U);
+	gpr[i.r0] = (uint64_t)gpr[i.r1] >> gpr[i.r2];
+	DISPATCH();
+
+	DO(ANDR);
+	gpr[i.r0] = gpr[i.r1] & gpr[i.r2];
+	DISPATCH();
+
+	DO(ANDI);
+	gpr[i.r0] = gpr[i.r1] & i.o;
+	DISPATCH();
+
+	DO(ORR);
+	gpr[i.r0] = gpr[i.r1] | gpr[i.r2];
+	DISPATCH();
+
+	DO(ORI);
+	gpr[i.r0] = gpr[i.r1] | i.o;
+	DISPATCH();
+
+	DO(XORR);
+	gpr[i.r0] = gpr[i.r1] ^ gpr[i.r2];
+	DISPATCH();
+
+	DO(XORI);
+	gpr[i.r0] = gpr[i.r1] ^ i.o;
+	DISPATCH();
+
+	DO(COMR);
+	gpr[i.r0] = ~gpr[i.r1];
+	DISPATCH();
+
+	DO(NEGR);
+	gpr[i.r0] = -gpr[i.r1];
+	DISPATCH();
+
+	DO(NEGR_F);
+	fpr[i.r0].f = -fpr[i.r1].f;
+	DISPATCH();
+
+	DO(NEGR_D);
+	fpr[i.r0].d = -fpr[i.r1].d;
+	DISPATCH();
+
+	DO(EQR);
+	gpr[i.r0] = gpr[i.r1] == gpr[i.r2];
+	DISPATCH();
+
+	DO(EQR_F);
+	gpr[i.r0] = fpr[i.r1].f == fpr[i.r2].f;
+	DISPATCH();
+
+	DO(EQR_D);
+	gpr[i.r0] = fpr[i.r1].d == fpr[i.r2].d;
+	DISPATCH();
+
+	DO(NER);
+	gpr[i.r0] = gpr[i.r1] != gpr[i.r2];
+	DISPATCH();
+
+	DO(NER_F);
+	gpr[i.r0] = fpr[i.r1].f != fpr[i.r2].f;
+	DISPATCH();
+
+	DO(NER_D);
+	gpr[i.r0] = fpr[i.r1].d != fpr[i.r2].d;
+	DISPATCH();
+
+	DO(GTR);
+	gpr[i.r0] = gpr[i.r1] > gpr[i.r2];
+	DISPATCH();
+
+	DO(GTR_U);
+	gpr[i.r0] = (uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2];
+	DISPATCH();
+
+	DO(GTR_F);
+	gpr[i.r0] = fpr[i.r1].f > fpr[i.r2].f;
+	DISPATCH();
+
+	DO(GTR_D);
+	gpr[i.r0] = fpr[i.r1].d > fpr[i.r2].d;
+	DISPATCH();
+
+	DO(GER);
+	gpr[i.r0] = gpr[i.r1] >= gpr[i.r2];
+	DISPATCH();
+
+	DO(GER_U);
+	gpr[i.r0] = (uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2];
+	DISPATCH();
+
+	DO(GER_F);
+	gpr[i.r0] = fpr[i.r1].f >= fpr[i.r2].f;
+	DISPATCH();
+
+	DO(GER_D);
+	gpr[i.r0] = fpr[i.r1].d >= fpr[i.r2].d;
+	DISPATCH();
+
+	DO(STI8);
+	int8_t *addr = (int8_t *)(i.p);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STI16);
+	int16_t *addr = (int16_t *)(i.p);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STI32);
+	int32_t *addr = (int32_t *)(i.p);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STI64);
+	int64_t *addr = (int64_t *)(i.p);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STIF);
+	float *addr = (float *)(i.p);
+	*addr = fpr[i.r0].f;
+	DISPATCH();
+
+	DO(STID);
+	double *addr = (double *)(i.p);
+	*addr = fpr[i.r0].d;
+	DISPATCH();
+
+	DO(STXI8);
+	int8_t *addr = (int8_t *)(gpr[i.r1] + i.o);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXI16);
+	int16_t *addr = (int16_t *)(gpr[i.r1] + i.o);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXI32);
+	int32_t *addr = (int32_t *)(gpr[i.r1] + i.o);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXI64);
+	int64_t *addr = (int64_t *)(gpr[i.r1] + i.o);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXIF);
+	float *addr = (float *)(gpr[i.r1] + i.o);
+	*addr = fpr[i.r0].f;
+	DISPATCH();
+
+	DO(STXID);
+	double *addr = (double *)(gpr[i.r1] + i.o);
+	*addr = fpr[i.r0].d;
+	DISPATCH();
+
+	DO(STXR8);
+	int8_t *addr = (int8_t *)(gpr[i.r1] + gpr[i.r2]);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXR16);
+	int16_t *addr = (int16_t *)(gpr[i.r1] + gpr[i.r2]);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXR32);
+	int32_t *addr = (int32_t *)(gpr[i.r1] + gpr[i.r2]);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXR64);
+	int64_t *addr = (int64_t *)(gpr[i.r1] + gpr[i.r2]);
+	*addr = gpr[i.r0];
+	DISPATCH();
+
+	DO(STXRF);
+	float *addr = (float *)(gpr[i.r1] + gpr[i.r2]);
+	*addr = fpr[i.r0].f;
+	DISPATCH();
+
+	DO(STXRD);
+	double *addr = (double *)(gpr[i.r1] + gpr[i.r2]);
+	*addr = fpr[i.r0].d;
+	DISPATCH();
+
+	DO(LDI8);
+	int8_t *addr = (int8_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDI16);
+	int16_t *addr = (int16_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDI32);
+	int32_t *addr = (int32_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDI64);
+	int64_t *addr = (int64_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDIU8);
+	uint8_t *addr = (uint8_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDIU16);
+	uint16_t *addr = (uint16_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDIU32);
+	uint32_t *addr = (uint32_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDIU64);
+	uint64_t *addr = (uint64_t *)i.p;
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDIF);
+	float *addr = (float *)i.p;
+	fpr[i.r0].f = *addr;
+	DISPATCH();
+
+	DO(LDID);
+	double *addr = (double *)i.p;
+	fpr[i.r0].d = *addr;
+	DISPATCH();
+
+	DO(LDXI8);
+	int8_t *addr = (int8_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXI16);
+	int16_t *addr = (int16_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXI32);
+	int32_t *addr = (int32_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXI64);
+	int64_t *addr = (int64_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXIU8);
+	uint8_t *addr = (uint8_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXIU16);
+	uint16_t *addr = (uint16_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXIU32);
+	uint32_t *addr = (uint32_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXIU64);
+	uint64_t *addr = (uint64_t *)(gpr[i.r1] + i.o);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXIF);
+	float *addr = (float *)(gpr[i.r1] + i.o);
+	fpr[i.r0].f = *addr;
+	DISPATCH();
+
+	DO(LDXID);
+	double *addr = (double *)(gpr[i.r1] + i.o);
+	fpr[i.r0].d = *addr;
+	DISPATCH();
+
+	DO(LDXR8);
+	int8_t *addr = (int8_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXR16);
+	int16_t *addr = (int16_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXR32);
+	int32_t *addr = (int32_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXR64);
+	int64_t *addr = (int64_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXRU8);
+	uint8_t *addr = (uint8_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXRU16);
+	uint16_t *addr = (uint16_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXRU32);
+	uint32_t *addr = (uint32_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXRU64);
+	uint64_t *addr = (uint64_t *)(gpr[i.r1] + gpr[i.r2]);
+	gpr[i.r0] = *addr;
+	DISPATCH();
+
+	DO(LDXRF);
+	float *addr = (float *)(gpr[i.r1] + gpr[i.r2]);
+	fpr[i.r0].f = *addr;
+	DISPATCH();
+
+	DO(LDXRD);
+	double *addr = (double *)(gpr[i.r1] + gpr[i.r2]);
+	fpr[i.r0].d = *addr;
+	DISPATCH();
+
+	DO(TRUNCR_D_32);
+	gpr[i.r0] = (int32_t)fpr[i.r1].d;
+	DISPATCH();
+
+	DO(TRUNCR_D_64);
+	gpr[i.r0] = (int64_t)fpr[i.r1].d;
+	DISPATCH();
+
+	DO(TRUNCR_F_32);
+	gpr[i.r0] = (int32_t)fpr[i.r1].f;
+	DISPATCH();
+
+	DO(TRUNCR_F_64);
+	gpr[i.r0] = (int64_t)fpr[i.r1].f;
+	DISPATCH();
+
+	DO(BNER);
+	if (gpr[i.r1] != gpr[i.r2])
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BNEI);
+	if (gpr[i.r1] != i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BNER_F);
+	if (fpr[i.r1].f != fpr[i.r2].f)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BNER_D);
+	if (fpr[i.r1].d != fpr[i.r2].d)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BEQR);
+	if (gpr[i.r1] == gpr[i.r2])
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BEQI);
+	if (gpr[i.r1] == i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BEQR_F);
+	if (fpr[i.r1].f == fpr[i.r2].f)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BEQR_D);
+	if (fpr[i.r1].d == fpr[i.r2].d)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGTR);
+	if (gpr[i.r1] > gpr[i.r2])
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGTR_U);
+	if ((uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2])
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGTI);
+	if (gpr[i.r1] > i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGTI_U);
+	if ((uint64_t)gpr[i.r1] > (uint64_t)i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGTR_F);
+	if (fpr[i.r1].f > fpr[i.r2].f)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGTR_D);
+	if (fpr[i.r1].d > fpr[i.r2].d)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BLTI);
+	if (gpr[i.r1] < i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BLTI_U);
+	if ((uint64_t)gpr[i.r1] < (uint64_t)i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGER);
+	if (gpr[i.r1] >= gpr[i.r2])
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGER_U);
+	if ((uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2])
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGEI);
+	if (gpr[i.r1] >= i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGEI_U);
+	if ((uint64_t)gpr[i.r1] >= (uint64_t)i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGER_F);
+	if (fpr[i.r1].f >= fpr[i.r2].f)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BGER_D);
+	if (fpr[i.r1].d >= fpr[i.r2].d)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BLEI);
+	if (gpr[i.r1] <= i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BLEI_U);
+	if ((uint64_t)gpr[i.r1] <= (uint64_t)i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(JMP);
+	JUMP(i.r0);
+	DISPATCH();
+
+	DO(JMPR);
+	JUMP(gpr[i.r1]);
+	DISPATCH();
+
+	DO(BMSR);
+	if (gpr[i.r1] & gpr[i.r2])
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BMSI);
+	if (gpr[i.r1] & i.o)
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BMCR);
+	if (!(gpr[i.r1] & gpr[i.r2]))
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(BMCI);
+	if (!(gpr[i.r1] & i.o))
+		JUMP(i.r0);
+
+	DISPATCH();
+
+	DO(RETVAL);
+	gpr[i.r0] = retval;
+	DISPATCH();
+
+	DO(RETVAL_F);
+	fpr[i.r0].f = retval_f;
+	DISPATCH();
+
+	DO(RETVAL_D);
+	fpr[i.r0].d = retval_f;
+	DISPATCH();
+
+	DO(PARAM);
+	gpr[i.r2] = args[i.r0].u64;
+	DISPATCH();
+
+	DO(PARAM_F);
+	if (i.r1 == EJIT_FLOAT)
+		fpr[i.r2].f = args[i.r0].f;
+	else
+		fpr[i.r2].d = args[i.r0].d;
+
+	DISPATCH();
+
+	DO(ARG);
+	struct ejit_arg a = ejit_build_arg(i.r1, gpr[i.r2]);
+	args_append(&state->args, a);
+	DISPATCH();
+
+	DO(ARG_I);
+	struct ejit_arg a = ejit_build_arg(i.r1, i.o);
+	args_append(&state->args, a);
+	DISPATCH();
+
+	DO(ARG_F);
+	struct ejit_arg a;
+	if (i.r1 == EJIT_DOUBLE)
+		a = ejit_build_arg_f(i.r1, fpr[i.r2].d);
+	else
+		a = ejit_build_arg_f(i.r1, fpr[i.r2].f);
+
+	args_append(&state->args, a);
+	DISPATCH();
+
+	DO(ARG_FI);
+	struct ejit_arg a;
+	if (i.r1 == EJIT_DOUBLE)
+		a = ejit_build_arg_f(i.r1, i.d);
+	else
+		a = ejit_build_arg_f(i.r1, i.f);
+
+	args_append(&state->args, a);
+	DISPATCH();
+
+	DO(CALLI);
+	struct ejit_func *f = i.p;
+	size_t argc = args_len(&state->args) - prev_argc;
+	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
+	                        prev_argc;
+
+	retval = ejit_run_interp(f, argc, args, state, true, NULL);
+
+	gpr = ((long *)state->gprs.buf) + prev_gprs;
+	fpr = ((union fpr *)state->fprs.buf) + prev_fprs;
+	args_shrink(&state->args, prev_argc);
+	DISPATCH();
+
+	DO(CALLI_F);
+	struct ejit_func *f = i.p;
+	size_t argc = args_len(&state->args) - prev_argc;
+	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
+	                        prev_argc;
+
+	retval_f = ejit_run_interp_f(f, argc, args, state, true, NULL);
+
+	gpr = ((long *)state->gprs.buf) + prev_gprs;
+	fpr = ((union fpr *)state->fprs.buf) + prev_fprs;
+	args_shrink(&state->args, prev_argc);
+	DISPATCH();
+
+	DO(ESCAPEI);
+	ejit_escape_t f = i.p;
+	size_t argc = args_len(&state->args) - prev_argc;
+	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
+	                        prev_argc;
+
+	retval = f(argc, args);
+
+	args_shrink(&state->args, prev_argc);
+	DISPATCH();
+
+	DO(ESCAPEI_F);
+	ejit_escape_f_t f = i.p;
+	size_t argc = args_len(&state->args) - prev_argc;
+	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
+	                        prev_argc;
+
+	retval_f = f(argc, args);
+
+	args_shrink(&state->args, prev_argc);
+	DISPATCH();
+
+	/* dispatch is technically unnecessary for returns, but keep it for
+	 * symmetry */
+	DO(RETR);
+	retval = gpr[i.r0];
+	goto out;
+	DISPATCH();
+
+	DO(RETI);
+	retval = i.o;
+	goto out;
+	DISPATCH();
+
+	DO(RETR_F);
+	retval_f = fpr[i.r0].f;
+	goto out;
+	DISPATCH();
+
+	DO(RETR_D);
+	retval_f = fpr[i.r0].d;
+	goto out;
+	DISPATCH();
+
+	DO(RETI_F);
+	retval_f = i.f;
+	goto out;
+	DISPATCH();
+
+	DO(RETI_D);
+	retval_f = i.d;
+	goto out;
+	DISPATCH();
+
+#undef DISPATCH
+#undef JUMP
+#undef DO
+
+out:
+	gprs_shrink(&state->gprs, prev_gprs);
+	fprs_shrink(&state->fprs, prev_fprs);
+
+zero_out:
+}
diff --git a/src/vec.h b/src/vec.h
index 37f29f6..90558ac 100644
--- a/src/vec.h
+++ b/src/vec.h
@@ -107,7 +107,7 @@ static inline void VEC(reserve)(struct VEC_STRUCT *v, size_t n)
 
 static inline void VEC(shrink)(struct VEC_STRUCT *v, size_t n)
 {
-	assert(v->n >= n);
+	/* assert(v->n >= n); */
 	v->n = n;
 }
 
-- 
cgit v1.2.3