#include <math.h>
#include <stdio.h>
#include "common.h"

/* this is the body of a given ejit_interp function, it assumes there's an
 * external int64_t retval and double retval_f into which it places the value to
 * be returned. Included from src/interp.c */
union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg params[paramc], bool run, void ***labels_wb)
{
	static void *labels[EJIT_OPCODE_COUNT] = {
		[EJIT_OP_MOVI] = &&MOVI,
		[EJIT_OP_MOVI_F] = &&MOVI_F,
		[EJIT_OP_MOVI_D] = &&MOVI_D,
		[EJIT_OP_MOVR] = &&MOVR,
		[EJIT_OP_MOVR_F] = &&MOVR_F,
		[EJIT_OP_MOVR_D] = &&MOVR_D,

		[EJIT_OP_EXTR8] = &&EXTR8,
		[EJIT_OP_EXTR16] = &&EXTR16,
		[EJIT_OP_EXTR32] = &&EXTR32,
		[EJIT_OP_EXTRU8] = &&EXTRU8,
		[EJIT_OP_EXTRU16] = &&EXTRU16,
		[EJIT_OP_EXTRU32] = &&EXTRU32,
		[EJIT_OP_EXTRF] = &&EXTRF,
		[EJIT_OP_EXTRD] = &&EXTRD,

		[EJIT_OP_ADDR] = &&ADDR,
		[EJIT_OP_ADDR_F] = &&ADDR_F,
		[EJIT_OP_ADDR_D] = &&ADDR_D,
		[EJIT_OP_ADDI] = &&ADDI,

		[EJIT_OP_ABSR_F] = &&ABSR_F,
		[EJIT_OP_ABSR_D] = &&ABSR_D,

		[EJIT_OP_SUBR] = &&SUBR,
		[EJIT_OP_SUBR_F] = &&SUBR_F,
		[EJIT_OP_SUBR_D] = &&SUBR_D,
		[EJIT_OP_SUBI] = &&SUBI,

		[EJIT_OP_MULR] = &&MULR,
		[EJIT_OP_MULR_F] = &&MULR_F,
		[EJIT_OP_MULR_D] = &&MULR_D,

		[EJIT_OP_DIVR] = &&DIVR,
		[EJIT_OP_DIVR_U] = &&DIVR_U,
		[EJIT_OP_DIVR_F] = &&DIVR_F,
		[EJIT_OP_DIVR_D] = &&DIVR_D,

		[EJIT_OP_REMR] = &&REMR,
		[EJIT_OP_REMR_U] = &&REMR_U,

		[EJIT_OP_LSHI] = &&LSHI,
		[EJIT_OP_LSHR] = &&LSHR,
		[EJIT_OP_RSHI] = &&RSHI,
		[EJIT_OP_RSHR] = &&RSHR,
		[EJIT_OP_RSHI_U] = &&RSHI_U,
		[EJIT_OP_RSHR_U] = &&RSHR_U,

		[EJIT_OP_ANDR] = &&ANDR,
		[EJIT_OP_ANDI] = &&ANDI,

		[EJIT_OP_ORR] = &&ORR,
		[EJIT_OP_ORI] = &&ORI,

		[EJIT_OP_XORR] = &&XORR,
		[EJIT_OP_XORI] = &&XORI,

		[EJIT_OP_COMR] = &&COMR,
		[EJIT_OP_NEGR] = &&NEGR,
		[EJIT_OP_NEGR_F] = &&NEGR_F,
		[EJIT_OP_NEGR_D] = &&NEGR_D,

		[EJIT_OP_EQR] = &&EQR,
		[EJIT_OP_EQR_F] = &&EQR_F,
		[EJIT_OP_EQR_D] = &&EQR_D,

		[EJIT_OP_NER] = &&NER,
		[EJIT_OP_NER_F] = &&NER_F,
		[EJIT_OP_NER_D] = &&NER_D,

		[EJIT_OP_GTR] = &&GTR,
		[EJIT_OP_GTR_U] = &&GTR_U,
		[EJIT_OP_GTR_F] = &&GTR_F,
		[EJIT_OP_GTR_D] = &&GTR_D,

		[EJIT_OP_GER] = &&GER,
		[EJIT_OP_GER_U] = &&GER_U,
		[EJIT_OP_GER_F] = &&GER_F,
		[EJIT_OP_GER_D] = &&GER_D,

		[EJIT_OP_STI8] = &&STI8,
		[EJIT_OP_STI16] = &&STI16,
		[EJIT_OP_STI32] = &&STI32,
		[EJIT_OP_STI64] = &&STI64,
		[EJIT_OP_STIF] = &&STIF,
		[EJIT_OP_STID] = &&STID,

		[EJIT_OP_STXI8] = &&STXI8,
		[EJIT_OP_STXI16] = &&STXI16,
		[EJIT_OP_STXI32] = &&STXI32,
		[EJIT_OP_STXI64] = &&STXI64,
		[EJIT_OP_STXIF] = &&STXIF,
		[EJIT_OP_STXID] = &&STXID,

		[EJIT_OP_STXR8] = &&STXR8,
		[EJIT_OP_STXR16] = &&STXR16,
		[EJIT_OP_STXR32] = &&STXR32,
		[EJIT_OP_STXR64] = &&STXR64,
		[EJIT_OP_STXRF] = &&STXRF,
		[EJIT_OP_STXRD] = &&STXRD,

		[EJIT_OP_LDI8] = &&LDI8,
		[EJIT_OP_LDI16] = &&LDI16,
		[EJIT_OP_LDI32] = &&LDI32,
		[EJIT_OP_LDI64] = &&LDI64,
		[EJIT_OP_LDIU8] = &&LDIU8,
		[EJIT_OP_LDIU16] = &&LDIU16,
		[EJIT_OP_LDIU32] = &&LDIU32,
		[EJIT_OP_LDIU64] = &&LDIU64,
		[EJIT_OP_LDIF] = &&LDIF,
		[EJIT_OP_LDID] = &&LDID,

		[EJIT_OP_LDXI8] = &&LDXI8,
		[EJIT_OP_LDXI16] = &&LDXI16,
		[EJIT_OP_LDXI32] = &&LDXI32,
		[EJIT_OP_LDXI64] = &&LDXI64,
		[EJIT_OP_LDXIU8] = &&LDXIU8,
		[EJIT_OP_LDXIU16] = &&LDXIU16,
		[EJIT_OP_LDXIU32] = &&LDXIU32,
		[EJIT_OP_LDXIU64] = &&LDXIU64,
		[EJIT_OP_LDXIF] = &&LDXIF,
		[EJIT_OP_LDXID] = &&LDXID,

		[EJIT_OP_LDXR8] = &&LDXR8,
		[EJIT_OP_LDXR16] = &&LDXR16,
		[EJIT_OP_LDXR32] = &&LDXR32,
		[EJIT_OP_LDXR64] = &&LDXR64,
		[EJIT_OP_LDXRU8] = &&LDXRU8,
		[EJIT_OP_LDXRU16] = &&LDXRU16,
		[EJIT_OP_LDXRU32] = &&LDXRU32,
		[EJIT_OP_LDXRU64] = &&LDXRU64,
		[EJIT_OP_LDXRF] = &&LDXRF,
		[EJIT_OP_LDXRD] = &&LDXRD,

		[EJIT_OP_TRUNCR_D_32] = &&TRUNCR_D_32,
		[EJIT_OP_TRUNCR_D_64] = &&TRUNCR_D_64,
		[EJIT_OP_TRUNCR_F_32] = &&TRUNCR_F_32,
		[EJIT_OP_TRUNCR_F_64] = &&TRUNCR_F_64,

		[EJIT_OP_SQRTR_F] = &&SQRTR_F,
		[EJIT_OP_SQRTR_D] = &&SQRTR_D,

		[EJIT_OP_BNER] = &&BNER,
		[EJIT_OP_BNEI] = &&BNEI,
		[EJIT_OP_BNER_F] = &&BNER_F,
		[EJIT_OP_BNER_D] = &&BNER_D,

		[EJIT_OP_BEQR] = &&BEQR,
		[EJIT_OP_BEQI] = &&BEQI,
		[EJIT_OP_BEQR_F] = &&BEQR_F,
		[EJIT_OP_BEQR_D] = &&BEQR_D,

		[EJIT_OP_BGER] = &&BGER,
		[EJIT_OP_BGER_U] = &&BGER_U,
		[EJIT_OP_BGEI] = &&BGEI,
		[EJIT_OP_BGEI_U] = &&BGEI_U,
		[EJIT_OP_BGER_F] = &&BGER_F,
		[EJIT_OP_BGER_D] = &&BGER_D,

		[EJIT_OP_BLEI] = &&BLEI,
		[EJIT_OP_BLEI_U] = &&BLEI_U,

		[EJIT_OP_BGTR] = &&BGTR,
		[EJIT_OP_BGTR_U] = &&BGTR_U,
		[EJIT_OP_BGTI] = &&BGTI,
		[EJIT_OP_BGTI_U] = &&BGTI_U,
		[EJIT_OP_BGTR_F] = &&BGTR_F,
		[EJIT_OP_BGTR_D] = &&BGTR_D,

		[EJIT_OP_BLTI] = &&BLTI,
		[EJIT_OP_BLTI_U] = &&BLTI_U,

		[EJIT_OP_JMP] = &&JMP,
		[EJIT_OP_JMPR] = &&JMPR,

		[EJIT_OP_BMCI] = &&BMCI,
		[EJIT_OP_BMCR] = &&BMCR,
		[EJIT_OP_BMSI] = &&BMSI,
		[EJIT_OP_BMSR] = &&BMSR,

		[EJIT_OP_RETR] = &&RETR,
		[EJIT_OP_RETI] = &&RETI,
		[EJIT_OP_RETR_F] = &&RETR_F,
		[EJIT_OP_RETI_F] = &&RETI_F,
		[EJIT_OP_RETR_D] = &&RETR_D,
		[EJIT_OP_RETI_D] = &&RETI_D,

		[EJIT_OP_RETVAL] = &&RETVAL,
		[EJIT_OP_RETVAL_F] = &&RETVAL_F,
		[EJIT_OP_RETVAL_D] = &&RETVAL_D,

		[EJIT_OP_ARG] = &&ARG,
		[EJIT_OP_ARG_I] = &&ARG_I,
		[EJIT_OP_ARG_F] = &&ARG_F,
		[EJIT_OP_ARG_FI] = &&ARG_FI,

		[EJIT_OP_PARAM] = &&PARAM,
		[EJIT_OP_PARAM_F] = &&PARAM_F,

		[EJIT_OP_CALLI_I] = &&CALLI_I,
		[EJIT_OP_CALLI_L] = &&CALLI_L,
		[EJIT_OP_CALLI_F] = &&CALLI_F,
		[EJIT_OP_CALLI_D] = &&CALLI_D,
		[EJIT_OP_ESCAPEI_I] = &&ESCAPEI_I,
		[EJIT_OP_ESCAPEI_F] = &&ESCAPEI_F,
		[EJIT_OP_ESCAPEI_L] = &&ESCAPEI_L,
		[EJIT_OP_ESCAPEI_D] = &&ESCAPEI_D,

		[EJIT_OP_START] = &&START,
		[EJIT_OP_END] = &&END,
	};

	if (!run) {
		*labels_wb = labels;
		goto zero_out;
	}

	assert(f->size && "trying to run a function that hasn't been compiled");

	if (f->extern_call) {
		if (f->rtype == EJIT_INT64 || f->rtype == EJIT_UINT64)
			return (union interp_ret){
				.i = ((ejit_escape_l_t)f->extern_call)(paramc, params)
			};

		if (f->rtype == EJIT_DOUBLE)
			return (union interp_ret){
				.f = ((ejit_escape_d_t)f->extern_call)(paramc, params)
			};

		if (f->rtype == EJIT_FLOAT)
			return (union interp_ret){
				.f = ((ejit_escape_f_t)f->extern_call)(paramc, params)
			};

		return (union interp_ret){
			.i = ((ejit_escape_i_t)f->extern_call)(paramc, params)
		};
	}

	int64_t retval = 0; double retval_f = 0.0;

	union fpr {
		double d;
		float f;
	};

	size_t argc = 0;
	int64_t *gpr = alloca(sizeof(int64_t) * gpr_stats_len(&f->gpr));
	union fpr *fpr = alloca(sizeof(int64_t) * fpr_stats_len(&f->fpr));
	struct ejit_arg *args = alloca(sizeof(struct ejit_arg) * f->max_args);
	struct ejit_insn *insns = f->insns.buf;

	/* retval is kind of an unfortunate extra bit of state to keep track of,
	 * but having call and return value separated is pretty convenient for
	 * void calls so I guess I don't mind? */
	size_t pc = 0;

#define DO(x) x : { struct ejit_insn i = insns[pc]; (void)i;
#define JUMP(a) goto *insns[pc = a].addr;
#define DISPATCH() } goto *insns[++pc].addr;

	JUMP(0);

	DO(START);
	DISPATCH();

	DO(END);
	goto zero_out;
	DISPATCH();

	DO(MOVI);
	gpr[i.r0] = i.o;
	DISPATCH();

	DO(MOVI_F);
	fpr[i.r0].f = i.f;
	DISPATCH();

	DO(MOVI_D);
	fpr[i.r0].d = i.d;
	DISPATCH();

	DO(MOVR);
	gpr[i.r0] = gpr[i.r1];
	DISPATCH();

	DO(MOVR_F);
	fpr[i.r0].f = fpr[i.r1].f;
	DISPATCH();

	DO(MOVR_D);
	fpr[i.r0].d = fpr[i.r1].d;
	DISPATCH();

	DO(EXTR8);
	gpr[i.r0] = (int8_t)gpr[i.r1];
	DISPATCH();

	DO(EXTR16);
	gpr[i.r0] = (int16_t)gpr[i.r1];
	DISPATCH();

	DO(EXTR32);
	gpr[i.r0] = (int32_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRU8);
	gpr[i.r0] = (uint8_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRU16);
	gpr[i.r0] = (uint16_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRU32);
	gpr[i.r0] = (uint32_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRF);
	fpr[i.r0].f = (float)gpr[i.r1];
	DISPATCH();

	DO(EXTRD);
	fpr[i.r0].d = (double)gpr[i.r1];
	DISPATCH();

	DO(ADDR);
	gpr[i.r0] = gpr[i.r1] + gpr[i.r2];
	DISPATCH();

	DO(ADDR_F);
	fpr[i.r0].f = fpr[i.r1].f + fpr[i.r2].f;
	DISPATCH();

	DO(ADDR_D);
	fpr[i.r0].d = fpr[i.r1].d + fpr[i.r2].d;
	DISPATCH();

	DO(ADDI);
	gpr[i.r0] = gpr[i.r1] + i.o;
	DISPATCH();

	DO(ABSR_F);
	fpr[i.r0].f = fabs(fpr[i.r1].f);
	DISPATCH();

	DO(ABSR_D);
	fpr[i.r0].d = fabs(fpr[i.r1].d);
	DISPATCH();

	DO(SUBR);
	gpr[i.r0] = gpr[i.r1] - gpr[i.r2];
	DISPATCH();

	DO(SUBR_F);
	fpr[i.r0].f = fpr[i.r1].f - fpr[i.r2].f;
	DISPATCH();

	DO(SUBR_D);
	fpr[i.r0].d = fpr[i.r1].d - fpr[i.r2].d;
	DISPATCH();

	DO(SUBI);
	gpr[i.r0] = gpr[i.r1] - i.o;
	DISPATCH();

	DO(MULR);
	gpr[i.r0] = gpr[i.r1] * gpr[i.r2];
	DISPATCH();

	DO(MULR_F);
	fpr[i.r0].f = fpr[i.r1].f * fpr[i.r2].f;
	DISPATCH();

	DO(MULR_D);
	fpr[i.r0].d = fpr[i.r1].d * fpr[i.r2].d;
	DISPATCH();

	DO(DIVR);
	gpr[i.r0] = gpr[i.r1] / gpr[i.r2];
	DISPATCH();

	DO(DIVR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] / (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(REMR);
	gpr[i.r0] = gpr[i.r1] % gpr[i.r2];
	DISPATCH();

	DO(REMR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] % (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(DIVR_F);
	fpr[i.r0].f = fpr[i.r1].f / fpr[i.r2].f;
	DISPATCH();

	DO(DIVR_D);
	fpr[i.r0].d = fpr[i.r1].d / fpr[i.r2].d;
	DISPATCH();

	DO(LSHI);
	gpr[i.r0] = gpr[i.r1] << i.o;
	DISPATCH();

	DO(LSHR);
	gpr[i.r0] = gpr[i.r1] << gpr[i.r2];
	DISPATCH();

	DO(RSHI);
	gpr[i.r0] = gpr[i.r1] >> i.o;
	DISPATCH();

	DO(RSHR);
	gpr[i.r0] = gpr[i.r1] >> gpr[i.r2];
	DISPATCH();

	DO(RSHI_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] >> i.o;
	DISPATCH();

	DO(RSHR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] >> gpr[i.r2];
	DISPATCH();

	DO(ANDR);
	gpr[i.r0] = gpr[i.r1] & gpr[i.r2];
	DISPATCH();

	DO(ANDI);
	gpr[i.r0] = gpr[i.r1] & i.o;
	DISPATCH();

	DO(ORR);
	gpr[i.r0] = gpr[i.r1] | gpr[i.r2];
	DISPATCH();

	DO(ORI);
	gpr[i.r0] = gpr[i.r1] | i.o;
	DISPATCH();

	DO(XORR);
	gpr[i.r0] = gpr[i.r1] ^ gpr[i.r2];
	DISPATCH();

	DO(XORI);
	gpr[i.r0] = gpr[i.r1] ^ i.o;
	DISPATCH();

	DO(COMR);
	gpr[i.r0] = ~gpr[i.r1];
	DISPATCH();

	DO(NEGR);
	gpr[i.r0] = -gpr[i.r1];
	DISPATCH();

	DO(NEGR_F);
	fpr[i.r0].f = -fpr[i.r1].f;
	DISPATCH();

	DO(NEGR_D);
	fpr[i.r0].d = -fpr[i.r1].d;
	DISPATCH();

	DO(EQR);
	gpr[i.r0] = gpr[i.r1] == gpr[i.r2];
	DISPATCH();

	DO(EQR_F);
	gpr[i.r0] = fpr[i.r1].f == fpr[i.r2].f;
	DISPATCH();

	DO(EQR_D);
	gpr[i.r0] = fpr[i.r1].d == fpr[i.r2].d;
	DISPATCH();

	DO(NER);
	gpr[i.r0] = gpr[i.r1] != gpr[i.r2];
	DISPATCH();

	DO(NER_F);
	gpr[i.r0] = fpr[i.r1].f != fpr[i.r2].f;
	DISPATCH();

	DO(NER_D);
	gpr[i.r0] = fpr[i.r1].d != fpr[i.r2].d;
	DISPATCH();

	DO(GTR);
	gpr[i.r0] = gpr[i.r1] > gpr[i.r2];
	DISPATCH();

	DO(GTR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(GTR_F);
	gpr[i.r0] = fpr[i.r1].f > fpr[i.r2].f;
	DISPATCH();

	DO(GTR_D);
	gpr[i.r0] = fpr[i.r1].d > fpr[i.r2].d;
	DISPATCH();

	DO(GER);
	gpr[i.r0] = gpr[i.r1] >= gpr[i.r2];
	DISPATCH();

	DO(GER_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(GER_F);
	gpr[i.r0] = fpr[i.r1].f >= fpr[i.r2].f;
	DISPATCH();

	DO(GER_D);
	gpr[i.r0] = fpr[i.r1].d >= fpr[i.r2].d;
	DISPATCH();

	DO(STI8);
	int8_t *addr = (int8_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STI16);
	int16_t *addr = (int16_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STI32);
	int32_t *addr = (int32_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STI64);
	int64_t *addr = (int64_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STIF);
	float *addr = (float *)(i.p);
	*addr = fpr[i.r0].f;
	DISPATCH();

	DO(STID);
	double *addr = (double *)(i.p);
	*addr = fpr[i.r0].d;
	DISPATCH();

	DO(STXI8);
	int8_t *addr = (int8_t *)(uintptr_t)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXI16);
	int16_t *addr = (int16_t *)(uintptr_t)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXI32);
	int32_t *addr = (int32_t *)(uintptr_t)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXI64);
	int64_t *addr = (int64_t *)(uintptr_t)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXIF);
	float *addr = (float *)(uintptr_t)(gpr[i.r1] + i.o);
	*addr = fpr[i.r0].f;
	DISPATCH();

	DO(STXID);
	double *addr = (double *)(uintptr_t)(gpr[i.r1] + i.o);
	*addr = fpr[i.r0].d;
	DISPATCH();

	DO(STXR8);
	int8_t *addr = (int8_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXR16);
	int16_t *addr = (int16_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXR32);
	int32_t *addr = (int32_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXR64);
	int64_t *addr = (int64_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXRF);
	float *addr = (float *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	*addr = fpr[i.r0].f;
	DISPATCH();

	DO(STXRD);
	double *addr = (double *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	*addr = fpr[i.r0].d;
	DISPATCH();

	DO(LDI8);
	int8_t *addr = (int8_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDI16);
	int16_t *addr = (int16_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDI32);
	int32_t *addr = (int32_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDI64);
	int64_t *addr = (int64_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU8);
	uint8_t *addr = (uint8_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU16);
	uint16_t *addr = (uint16_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU32);
	uint32_t *addr = (uint32_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU64);
	uint64_t *addr = (uint64_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIF);
	float *addr = (float *)i.p;
	fpr[i.r0].f = *addr;
	DISPATCH();

	DO(LDID);
	double *addr = (double *)i.p;
	fpr[i.r0].d = *addr;
	DISPATCH();

	DO(LDXI8);
	int8_t *addr = (int8_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXI16);
	int16_t *addr = (int16_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXI32);
	int32_t *addr = (int32_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXI64);
	int64_t *addr = (int64_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU8);
	uint8_t *addr = (uint8_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU16);
	uint16_t *addr = (uint16_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU32);
	uint32_t *addr = (uint32_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU64);
	uint64_t *addr = (uint64_t *)(uintptr_t)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIF);
	float *addr = (float *)(uintptr_t)(gpr[i.r1] + i.o);
	fpr[i.r0].f = *addr;
	DISPATCH();

	DO(LDXID);
	double *addr = (double *)(uintptr_t)(gpr[i.r1] + i.o);
	fpr[i.r0].d = *addr;
	DISPATCH();

	DO(LDXR8);
	int8_t *addr = (int8_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXR16);
	int16_t *addr = (int16_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXR32);
	int32_t *addr = (int32_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXR64);
	int64_t *addr = (int64_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU8);
	uint8_t *addr = (uint8_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU16);
	uint16_t *addr = (uint16_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU32);
	uint32_t *addr = (uint32_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU64);
	uint64_t *addr = (uint64_t *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRF);
	float *addr = (float *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	fpr[i.r0].f = *addr;
	DISPATCH();

	DO(LDXRD);
	double *addr = (double *)(uintptr_t)(gpr[i.r1] + gpr[i.r2]);
	fpr[i.r0].d = *addr;
	DISPATCH();

	DO(TRUNCR_D_32);
	gpr[i.r0] = (int32_t)fpr[i.r1].d;
	DISPATCH();

	DO(TRUNCR_D_64);
	gpr[i.r0] = (int64_t)fpr[i.r1].d;
	DISPATCH();

	DO(TRUNCR_F_32);
	gpr[i.r0] = (int32_t)fpr[i.r1].f;
	DISPATCH();

	DO(TRUNCR_F_64);
	gpr[i.r0] = (int64_t)fpr[i.r1].f;
	DISPATCH();

	DO(SQRTR_F);
	fpr[i.r0].f = sqrt(fpr[i.r1].f);
	DISPATCH();

	DO(SQRTR_D);
	fpr[i.r0].d = sqrt(fpr[i.r1].d);
	DISPATCH();

	DO(BNER);
	if (gpr[i.r1] != gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BNEI);
	if (gpr[i.r1] != i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BNER_F);
	if (fpr[i.r1].f != fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BNER_D);
	if (fpr[i.r1].d != fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BEQR);
	if (gpr[i.r1] == gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BEQI);
	if (gpr[i.r1] == i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BEQR_F);
	if (fpr[i.r1].f == fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BEQR_D);
	if (fpr[i.r1].d == fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR);
	if (gpr[i.r1] > gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR_U);
	if ((uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGTI);
	if (gpr[i.r1] > i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTI_U);
	if ((uint64_t)gpr[i.r1] > (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR_F);
	if (fpr[i.r1].f > fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR_D);
	if (fpr[i.r1].d > fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BLTI);
	if (gpr[i.r1] < i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BLTI_U);
	if ((uint64_t)gpr[i.r1] < (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGER);
	if (gpr[i.r1] >= gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGER_U);
	if ((uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGEI);
	if (gpr[i.r1] >= i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGEI_U);
	if ((uint64_t)gpr[i.r1] >= (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGER_F);
	if (fpr[i.r1].f >= fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BGER_D);
	if (fpr[i.r1].d >= fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BLEI);
	if (gpr[i.r1] <= i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BLEI_U);
	if ((uint64_t)gpr[i.r1] <= (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(JMP);
	JUMP(i.r0);
	DISPATCH();

	DO(JMPR);
	JUMP(gpr[i.r1]);
	DISPATCH();

	DO(BMSR);
	if (gpr[i.r1] & gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BMSI);
	if (gpr[i.r1] & i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BMCR);
	if (!(gpr[i.r1] & gpr[i.r2]))
		JUMP(i.r0);

	DISPATCH();

	DO(BMCI);
	if (!(gpr[i.r1] & i.o))
		JUMP(i.r0);

	DISPATCH();

	DO(RETVAL);
	gpr[i.r0] = retval;
	DISPATCH();

	DO(RETVAL_F);
	fpr[i.r0].f = retval_f;
	DISPATCH();

	DO(RETVAL_D);
	fpr[i.r0].d = retval_f;
	DISPATCH();

	DO(PARAM);
	switch (i.r1) {
	case EJIT_INT8:    gpr[i.r2] = params[i.r0].i8; break;
	case EJIT_INT16:   gpr[i.r2] = params[i.r0].i16; break;
	case EJIT_INT32:   gpr[i.r2] = params[i.r0].i32; break;
	case EJIT_INT64:   gpr[i.r2] = params[i.r0].i64; break;
	case EJIT_UINT8:   gpr[i.r2] = params[i.r0].u8; break;
	case EJIT_UINT16:  gpr[i.r2] = params[i.r0].u16; break;
	case EJIT_UINT32:  gpr[i.r2] = params[i.r0].u32; break;
	case EJIT_UINT64:  gpr[i.r2] = params[i.r0].u64; break;
	case EJIT_POINTER: gpr[i.r2] = (int64_t)params[i.r0].p; break;
	default: abort();
	}
	DISPATCH();

	DO(PARAM_F);
	if (i.r1 == EJIT_FLOAT)
		fpr[i.r2].f = params[i.r0].f;
	else
		fpr[i.r2].d = params[i.r0].d;

	DISPATCH();

	DO(ARG);
	struct ejit_arg a = ejit_build_arg(i.r1, gpr[i.r2]);
	args[argc++] = a;
	DISPATCH();

	DO(ARG_I);
	struct ejit_arg a = ejit_build_arg(i.r1, i.o);
	args[argc++] = a;
	DISPATCH();

	DO(ARG_F);
	struct ejit_arg a;
	if (i.r1 == EJIT_DOUBLE)
		a = ejit_build_arg_f(i.r1, fpr[i.r2].d);
	else
		a = ejit_build_arg_f(i.r1, fpr[i.r2].f);

	args[argc++] = a;
	DISPATCH();

	DO(ARG_FI);
	struct ejit_arg a;
	if (i.r1 == EJIT_DOUBLE)
		a = ejit_build_arg_f(i.r1, i.d);
	else
		a = ejit_build_arg_f(i.r1, i.d);

	args[argc++] = a;
	DISPATCH();

	DO(CALLI_I);
	struct ejit_func *f = i.p;
	retval = ejit_run(f, argc, args, true, NULL).i;
	argc = 0;
	DISPATCH();

	DO(CALLI_L);
	struct ejit_func *f = i.p;
	retval = ejit_run(f, argc, args, true, NULL).i;
	argc = 0;
	DISPATCH();

	DO(CALLI_F);
	struct ejit_func *f = i.p;
	retval_f = ejit_run(f, argc, args, true, NULL).f;
	argc = 0;
	DISPATCH();

	DO(CALLI_D);
	struct ejit_func *f = i.p;
	retval_f = ejit_run(f, argc, args, true, NULL).f;
	argc = 0;
	DISPATCH();

	DO(ESCAPEI_I);
	ejit_escape_i_t f = i.p;
	retval = f(argc, args);
	argc = 0;
	DISPATCH();

	DO(ESCAPEI_L);
	ejit_escape_l_t f = i.p;
	retval = f(argc, args);
	argc = 0;
	DISPATCH();

	DO(ESCAPEI_F);
	ejit_escape_f_t f = i.p;
	retval_f = f(argc, args);
	argc = 0;
	DISPATCH();

	DO(ESCAPEI_D);
	ejit_escape_d_t f = i.p;
	retval_f = f(argc, args);
	argc = 0;
	DISPATCH();

	/* dispatch is technically unnecessary for returns, but keep it for
	 * symmetry */
	DO(RETR);
	retval = gpr[i.r1];
	goto out_int;
	DISPATCH();

	DO(RETI);
	retval = i.o;
	goto out_int;
	DISPATCH();

	DO(RETR_F);
	retval_f = fpr[i.r1].f;
	goto out_float;
	DISPATCH();

	DO(RETR_D);
	retval_f = fpr[i.r1].d;
	goto out_float;
	DISPATCH();

	DO(RETI_F);
	retval_f = i.f;
	goto out_float;
	DISPATCH();

	DO(RETI_D);
	retval_f = i.d;
	goto out_float;
	DISPATCH();

#undef DISPATCH
#undef JUMP
#undef DO

out_float:
	return (union interp_ret){.f = retval_f};

out_int:
	return (union interp_ret){.i = retval};

zero_out:
	return (union interp_ret){.i = 0};
}