#include <ejit/ejit.h>
#include <math.h>

#include "common.h"

union interp_ret ejit_interp(struct ejit_func *f, size_t argc,
                             struct ejit_arg args[argc],
                             struct interp_state *state, bool run,
                             void ***labels_wb)
{
	static void *labels[OPCODE_COUNT] = {
		[MOVI] = &&MOVI,
		[MOVI_F] = &&MOVI_F,
		[MOVI_D] = &&MOVI_D,
		[MOVR] = &&MOVR,
		[MOVR_F] = &&MOVR_F,
		[MOVR_D] = &&MOVR_D,

		[EXTR8] = &&EXTR8,
		[EXTR16] = &&EXTR16,
		[EXTR32] = &&EXTR32,
		[EXTRU8] = &&EXTRU8,
		[EXTRU16] = &&EXTRU16,
		[EXTRU32] = &&EXTRU32,
		[EXTRF] = &&EXTRF,
		[EXTRD] = &&EXTRD,

		[ADDR] = &&ADDR,
		[ADDR_F] = &&ADDR_F,
		[ADDR_D] = &&ADDR_D,
		[ADDI] = &&ADDI,

		[ABSR_F] = &&ABSR_F,
		[ABSR_D] = &&ABSR_D,

		[SUBR] = &&SUBR,
		[SUBR_F] = &&SUBR_F,
		[SUBR_D] = &&SUBR_D,
		[SUBI] = &&SUBI,

		[MULR] = &&MULR,
		[MULR_F] = &&MULR_F,
		[MULR_D] = &&MULR_D,

		[DIVR] = &&DIVR,
		[DIVR_U] = &&DIVR_U,
		[DIVR_F] = &&DIVR_F,
		[DIVR_D] = &&DIVR_D,

		[REMR] = &&REMR,
		[REMR_U] = &&REMR_U,

		[LSHI] = &&LSHI,
		[LSHR] = &&LSHR,
		[RSHI] = &&RSHI,
		[RSHR] = &&RSHR,
		[RSHI_U] = &&RSHI_U,
		[RSHR_U] = &&RSHR_U,

		[ANDR] = &&ANDR,
		[ANDI] = &&ANDI,

		[ORR] = &&ORR,
		[ORI] = &&ORI,

		[XORR] = &&XORR,
		[XORI] = &&XORI,

		[COMR] = &&COMR,
		[NEGR] = &&NEGR,
		[NEGR_F] = &&NEGR_F,
		[NEGR_D] = &&NEGR_D,

		[EQR] = &&EQR,
		[EQR_F] = &&EQR_F,
		[EQR_D] = &&EQR_D,

		[NER] = &&NER,
		[NER_F] = &&NER_F,
		[NER_D] = &&NER_D,

		[GTR] = &&GTR,
		[GTR_U] = &&GTR_U,
		[GTR_F] = &&GTR_F,
		[GTR_D] = &&GTR_D,

		[GER] = &&GER,
		[GER_U] = &&GER_U,
		[GER_F] = &&GER_F,
		[GER_D] = &&GER_D,

		[STI8] = &&STI8,
		[STI16] = &&STI16,
		[STI32] = &&STI32,
		[STI64] = &&STI64,
		[STIF] = &&STIF,
		[STID] = &&STID,

		[STXI8] = &&STXI8,
		[STXI16] = &&STXI16,
		[STXI32] = &&STXI32,
		[STXI64] = &&STXI64,
		[STXIF] = &&STXIF,
		[STXID] = &&STXID,

		[STXR8] = &&STXR8,
		[STXR16] = &&STXR16,
		[STXR32] = &&STXR32,
		[STXR64] = &&STXR64,
		[STXRF] = &&STXRF,
		[STXRD] = &&STXRD,

		[LDI8] = &&LDI8,
		[LDI16] = &&LDI16,
		[LDI32] = &&LDI32,
		[LDI64] = &&LDI64,
		[LDIU8] = &&LDIU8,
		[LDIU16] = &&LDIU16,
		[LDIU32] = &&LDIU32,
		[LDIU64] = &&LDIU64,
		[LDIF] = &&LDIF,
		[LDID] = &&LDID,

		[LDXI8] = &&LDXI8,
		[LDXI16] = &&LDXI16,
		[LDXI32] = &&LDXI32,
		[LDXI64] = &&LDXI64,
		[LDXIU8] = &&LDXIU8,
		[LDXIU16] = &&LDXIU16,
		[LDXIU32] = &&LDXIU32,
		[LDXIU64] = &&LDXIU64,
		[LDXIF] = &&LDXIF,
		[LDXID] = &&LDXID,

		[LDXR8] = &&LDXR8,
		[LDXR16] = &&LDXR16,
		[LDXR32] = &&LDXR32,
		[LDXR64] = &&LDXR64,
		[LDXRU8] = &&LDXRU8,
		[LDXRU16] = &&LDXRU16,
		[LDXRU32] = &&LDXRU32,
		[LDXRU64] = &&LDXRU64,
		[LDXRF] = &&LDXRF,
		[LDXRD] = &&LDXRD,

		[TRUNCR_D_32] = &&TRUNCR_D_32,
		[TRUNCR_D_64] = &&TRUNCR_D_64,
		[TRUNCR_F_32] = &&TRUNCR_F_32,
		[TRUNCR_F_64] = &&TRUNCR_F_64,

		[BNER] = &&BNER,
		[BNEI] = &&BNEI,
		[BNER_F] = &&BNER_F,
		[BNER_D] = &&BNER_D,

		[BEQR] = &&BEQR,
		[BEQI] = &&BEQI,
		[BEQR_F] = &&BEQR_F,
		[BEQR_D] = &&BEQR_D,

		[BGER] = &&BGER,
		[BGER_U] = &&BGER_U,
		[BGEI] = &&BGEI,
		[BGEI_U] = &&BGEI_U,
		[BGER_F] = &&BGER_F,
		[BGER_D] = &&BGER_D,

		[BLEI] = &&BLEI,
		[BLEI_U] = &&BLEI_U,

		[BGTR] = &&BGTR,
		[BGTR_U] = &&BGTR_U,
		[BGTI] = &&BGTI,
		[BGTI_U] = &&BGTI_U,
		[BGTR_F] = &&BGTR_F,
		[BGTR_D] = &&BGTR_D,

		[BLTI] = &&BLTI,
		[BLTI_U] = &&BLTI_U,

		[JMP] = &&JMP,
		[JMPR] = &&JMPR,

		[BMCI] = &&BMCI,
		[BMCR] = &&BMCR,
		[BMSI] = &&BMSI,
		[BMSR] = &&BMSR,

		[RETR] = &&RETR,
		[RETI] = &&RETI,
		[RETR_F] = &&RETR_F,
		[RETI_F] = &&RETI_F,
		[RETR_D] = &&RETR_D,
		[RETI_D] = &&RETI_D,

		[RETVAL] = &&RETVAL,
		[RETVAL_F] = &&RETVAL_F,
		[RETVAL_D] = &&RETVAL_D,

		[ARG] = &&ARG,
		[ARG_I] = &&ARG_I,
		[ARG_F] = &&ARG_F,
		[ARG_FI] = &&ARG_FI,

		[PARAM] = &&PARAM,
		[PARAM_F] = &&PARAM_F,

		[CALLI] = &&CALLI,
		[CALLI_F] = &&CALLI_F,
		[ESCAPEI] = &&ESCAPEI,
		[ESCAPEI_F] = &&ESCAPEI_F,

		[START] = &&START,
		[END] = &&END,
	};

	if (!run) {
		*labels_wb = labels;
		return (union interp_ret){.r = 0};
	}

	size_t prev_gprs = vec_len(&state->gprs);
	size_t prev_fprs = vec_len(&state->fprs);
	size_t prev_argc = vec_len(&state->args);

	vec_reserve(&state->gprs, prev_gprs + f->gpr);
	vec_reserve(&state->fprs, prev_fprs + f->fpr);

	union fpr {
		double d;
		float f;
	};
	long *gpr = ((long *)state->gprs.buf) + prev_gprs;
	union fpr *fpr = ((union fpr *)state->fprs.buf) + prev_fprs;

	struct ejit_insn *insns = f->insns.buf;

	/* retval is kind of an unfortunate extra bit of state to keep track of,
	 * but having call and return value separated is pretty convenient for
	 * void calls so I guess I don't mind? */
	int64_t retval = 0; double retval_f = 0.;
	size_t pc = 0;

#define DO(x) x : { struct ejit_insn i = insns[pc]; (void)i;
#define JUMP(a) goto *insns[pc = a].addr;
#define DISPATCH() } goto *insns[++pc].addr;

	JUMP(0);

	DO(START);
	DISPATCH();

	DO(END);
	goto out_int;
	DISPATCH();

	DO(MOVI);
	gpr[i.r0] = i.o;
	DISPATCH();

	DO(MOVI_F);
	fpr[i.r0].f = i.f;
	DISPATCH();

	DO(MOVI_D);
	fpr[i.r0].d = i.d;
	DISPATCH();

	DO(MOVR);
	gpr[i.r0] = gpr[i.r1];
	DISPATCH();

	DO(MOVR_F);
	fpr[i.r0].f = fpr[i.r1].f;
	DISPATCH();

	DO(MOVR_D);
	fpr[i.r0].d = fpr[i.r1].d;
	DISPATCH();

	DO(EXTR8);
	gpr[i.r0] = (int8_t)gpr[i.r1];
	DISPATCH();

	DO(EXTR16);
	gpr[i.r0] = (int16_t)gpr[i.r1];
	DISPATCH();

	DO(EXTR32);
	gpr[i.r0] = (int32_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRU8);
	gpr[i.r0] = (uint8_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRU16);
	gpr[i.r0] = (uint16_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRU32);
	gpr[i.r0] = (uint32_t)gpr[i.r1];
	DISPATCH();

	DO(EXTRF);
	fpr[i.r0].f = (float)gpr[i.r1];
	DISPATCH();

	DO(EXTRD);
	fpr[i.r0].d = (double)gpr[i.r1];
	DISPATCH();

	DO(ADDR);
	gpr[i.r0] = gpr[i.r1] + gpr[i.r2];
	DISPATCH();

	DO(ADDR_F);
	fpr[i.r0].f = fpr[i.r1].f + fpr[i.r2].f;
	DISPATCH();

	DO(ADDR_D);
	fpr[i.r0].d = fpr[i.r1].d + fpr[i.r2].d;
	DISPATCH();

	DO(ADDI);
	gpr[i.r0] = gpr[i.r1] + i.o;
	DISPATCH();

	DO(ABSR_F);
	fpr[i.r0].f = fabs(fpr[i.r1].f);
	DISPATCH();

	DO(ABSR_D);
	fpr[i.r0].d = fabs(fpr[i.r1].d);
	DISPATCH();

	DO(SUBR);
	gpr[i.r0] = gpr[i.r1] - gpr[i.r2];
	DISPATCH();

	DO(SUBR_F);
	fpr[i.r0].f = fpr[i.r1].f - fpr[i.r2].f;
	DISPATCH();

	DO(SUBR_D);
	fpr[i.r0].d = fpr[i.r1].d - fpr[i.r2].d;
	DISPATCH();

	DO(SUBI);
	gpr[i.r0] = gpr[i.r1] - i.o;
	DISPATCH();

	DO(MULR);
	gpr[i.r0] = gpr[i.r1] * gpr[i.r2];
	DISPATCH();

	DO(MULR_F);
	fpr[i.r0].f = fpr[i.r1].f * fpr[i.r2].f;
	DISPATCH();

	DO(MULR_D);
	fpr[i.r0].d = fpr[i.r1].d * fpr[i.r2].d;
	DISPATCH();

	DO(DIVR);
	gpr[i.r0] = gpr[i.r1] / gpr[i.r2];
	DISPATCH();

	DO(DIVR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] / (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(REMR);
	gpr[i.r0] = gpr[i.r1] % gpr[i.r2];
	DISPATCH();

	DO(REMR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] % (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(DIVR_F);
	fpr[i.r0].f = fpr[i.r1].f / fpr[i.r2].f;
	DISPATCH();

	DO(DIVR_D);
	fpr[i.r0].d = fpr[i.r1].d / fpr[i.r2].d;
	DISPATCH();

	DO(LSHI);
	gpr[i.r0] = gpr[i.r1] << i.o;
	DISPATCH();

	DO(LSHR);
	gpr[i.r0] = gpr[i.r1] << gpr[i.r2];
	DISPATCH();

	DO(RSHI);
	gpr[i.r0] = gpr[i.r1] >> i.o;
	DISPATCH();

	DO(RSHR);
	gpr[i.r0] = gpr[i.r1] >> gpr[i.r2];
	DISPATCH();

	DO(RSHI_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] >> i.o;
	DISPATCH();

	DO(RSHR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] >> gpr[i.r2];
	DISPATCH();

	DO(ANDR);
	gpr[i.r0] = gpr[i.r1] & gpr[i.r2];
	DISPATCH();

	DO(ANDI);
	gpr[i.r0] = gpr[i.r1] & i.o;
	DISPATCH();

	DO(ORR);
	gpr[i.r0] = gpr[i.r1] | gpr[i.r2];
	DISPATCH();

	DO(ORI);
	gpr[i.r0] = gpr[i.r1] | i.o;
	DISPATCH();

	DO(XORR);
	gpr[i.r0] = gpr[i.r1] ^ gpr[i.r2];
	DISPATCH();

	DO(XORI);
	gpr[i.r0] = gpr[i.r1] ^ i.o;
	DISPATCH();

	DO(COMR);
	gpr[i.r0] = ~gpr[i.r1];
	DISPATCH();

	DO(NEGR);
	gpr[i.r0] = -gpr[i.r1];
	DISPATCH();

	DO(NEGR_F);
	fpr[i.r0].f = -fpr[i.r1].f;
	DISPATCH();

	DO(NEGR_D);
	fpr[i.r0].d = -fpr[i.r1].d;
	DISPATCH();

	DO(EQR);
	gpr[i.r0] = gpr[i.r1] == gpr[i.r2];
	DISPATCH();

	DO(EQR_F);
	gpr[i.r0] = fpr[i.r1].f == fpr[i.r2].f;
	DISPATCH();

	DO(EQR_D);
	gpr[i.r0] = fpr[i.r1].d == fpr[i.r2].d;
	DISPATCH();

	DO(NER);
	gpr[i.r0] = gpr[i.r1] != gpr[i.r2];
	DISPATCH();

	DO(NER_F);
	gpr[i.r0] = fpr[i.r1].f != fpr[i.r2].f;
	DISPATCH();

	DO(NER_D);
	gpr[i.r0] = fpr[i.r1].d != fpr[i.r2].d;
	DISPATCH();

	DO(GTR);
	gpr[i.r0] = gpr[i.r1] > gpr[i.r2];
	DISPATCH();

	DO(GTR_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(GTR_F);
	gpr[i.r0] = fpr[i.r1].f > fpr[i.r2].f;
	DISPATCH();

	DO(GTR_D);
	gpr[i.r0] = fpr[i.r1].d > fpr[i.r2].d;
	DISPATCH();

	DO(GER);
	gpr[i.r0] = gpr[i.r1] >= gpr[i.r2];
	DISPATCH();

	DO(GER_U);
	gpr[i.r0] = (uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2];
	DISPATCH();

	DO(GER_F);
	gpr[i.r0] = fpr[i.r1].f >= fpr[i.r2].f;
	DISPATCH();

	DO(GER_D);
	gpr[i.r0] = fpr[i.r1].d >= fpr[i.r2].d;
	DISPATCH();

	DO(STI8);
	int8_t *addr = (int8_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STI16);
	int16_t *addr = (int16_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STI32);
	int32_t *addr = (int32_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STI64);
	int64_t *addr = (int64_t *)(i.p);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STIF);
	float *addr = (float *)(i.p);
	*addr = fpr[i.r0].f;
	DISPATCH();

	DO(STID);
	double *addr = (double *)(i.p);
	*addr = fpr[i.r0].d;
	DISPATCH();

	DO(STXI8);
	int8_t *addr = (int8_t *)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXI16);
	int16_t *addr = (int16_t *)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXI32);
	int32_t *addr = (int32_t *)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXI64);
	int64_t *addr = (int64_t *)(gpr[i.r1] + i.o);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXIF);
	float *addr = (float *)(gpr[i.r1] + i.o);
	*addr = fpr[i.r0].f;
	DISPATCH();

	DO(STXID);
	double *addr = (double *)(gpr[i.r1] + i.o);
	*addr = fpr[i.r0].d;
	DISPATCH();

	DO(STXR8);
	int8_t *addr = (int8_t *)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXR16);
	int16_t *addr = (int16_t *)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXR32);
	int32_t *addr = (int32_t *)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXR64);
	int64_t *addr = (int64_t *)(gpr[i.r1] + gpr[i.r2]);
	*addr = gpr[i.r0];
	DISPATCH();

	DO(STXRF);
	float *addr = (float *)(gpr[i.r1] + gpr[i.r2]);
	*addr = fpr[i.r0].f;
	DISPATCH();

	DO(STXRD);
	double *addr = (double *)(gpr[i.r1] + gpr[i.r2]);
	*addr = fpr[i.r0].d;
	DISPATCH();

	DO(LDI8);
	int8_t *addr = (int8_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDI16);
	int16_t *addr = (int16_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDI32);
	int32_t *addr = (int32_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDI64);
	int64_t *addr = (int64_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU8);
	uint8_t *addr = (uint8_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU16);
	uint16_t *addr = (uint16_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU32);
	uint32_t *addr = (uint32_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIU64);
	uint64_t *addr = (uint64_t *)i.p;
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDIF);
	float *addr = (float *)i.p;
	fpr[i.r0].f = *addr;
	DISPATCH();

	DO(LDID);
	double *addr = (double *)i.p;
	fpr[i.r0].d = *addr;
	DISPATCH();

	DO(LDXI8);
	int8_t *addr = (int8_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXI16);
	int16_t *addr = (int16_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXI32);
	int32_t *addr = (int32_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXI64);
	int64_t *addr = (int64_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU8);
	uint8_t *addr = (uint8_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU16);
	uint16_t *addr = (uint16_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU32);
	uint32_t *addr = (uint32_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIU64);
	uint64_t *addr = (uint64_t *)(gpr[i.r1] + i.o);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXIF);
	float *addr = (float *)(gpr[i.r1] + i.o);
	fpr[i.r0].f = *addr;
	DISPATCH();

	DO(LDXID);
	double *addr = (double *)(gpr[i.r1] + i.o);
	fpr[i.r0].d = *addr;
	DISPATCH();

	DO(LDXR8);
	int8_t *addr = (int8_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXR16);
	int16_t *addr = (int16_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXR32);
	int32_t *addr = (int32_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXR64);
	int64_t *addr = (int64_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU8);
	uint8_t *addr = (uint8_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU16);
	uint16_t *addr = (uint16_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU32);
	uint32_t *addr = (uint32_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRU64);
	uint64_t *addr = (uint64_t *)(gpr[i.r1] + gpr[i.r2]);
	gpr[i.r0] = *addr;
	DISPATCH();

	DO(LDXRF);
	float *addr = (float *)(gpr[i.r1] + gpr[i.r2]);
	fpr[i.r0].f = *addr;
	DISPATCH();

	DO(LDXRD);
	double *addr = (double *)(gpr[i.r1] + gpr[i.r2]);
	fpr[i.r0].d = *addr;
	DISPATCH();

	DO(TRUNCR_D_32);
	gpr[i.r0] = (int32_t)fpr[i.r1].d;
	DISPATCH();

	DO(TRUNCR_D_64);
	gpr[i.r0] = (int64_t)fpr[i.r1].d;
	DISPATCH();

	DO(TRUNCR_F_32);
	gpr[i.r0] = (int32_t)fpr[i.r1].f;
	DISPATCH();

	DO(TRUNCR_F_64);
	gpr[i.r0] = (int64_t)fpr[i.r1].f;
	DISPATCH();

	DO(BNER);
	if (gpr[i.r1] != gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BNEI);
	if (gpr[i.r1] != i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BNER_F);
	if (fpr[i.r1].f != fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BNER_D);
	if (fpr[i.r1].d != fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BEQR);
	if (gpr[i.r1] == gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BEQI);
	if (gpr[i.r1] == i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BEQR_F);
	if (fpr[i.r1].f == fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BEQR_D);
	if (fpr[i.r1].d == fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR);
	if (gpr[i.r1] > gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR_U);
	if ((uint64_t)gpr[i.r1] > (uint64_t)gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGTI);
	if (gpr[i.r1] > i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTI_U);
	if ((uint64_t)gpr[i.r1] > (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR_F);
	if (fpr[i.r1].f > fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BGTR_D);
	if (fpr[i.r1].d > fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BLTI);
	if (gpr[i.r1] < i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BLTI_U);
	if ((uint64_t)gpr[i.r1] < (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGER);
	if (gpr[i.r1] >= gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGER_U);
	if ((uint64_t)gpr[i.r1] >= (uint64_t)gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BGEI);
	if (gpr[i.r1] >= i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGEI_U);
	if ((uint64_t)gpr[i.r1] >= (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BGER_F);
	if (fpr[i.r1].f >= fpr[i.r2].f)
		JUMP(i.r0);

	DISPATCH();

	DO(BGER_D);
	if (fpr[i.r1].d >= fpr[i.r2].d)
		JUMP(i.r0);

	DISPATCH();

	DO(BLEI);
	if (gpr[i.r1] <= i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BLEI_U);
	if ((uint64_t)gpr[i.r1] <= (uint64_t)i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(JMP);
	JUMP(i.r0);
	DISPATCH();

	DO(JMPR);
	JUMP(gpr[i.r1]);
	DISPATCH();

	DO(BMSR);
	if (gpr[i.r1] & gpr[i.r2])
		JUMP(i.r0);

	DISPATCH();

	DO(BMSI);
	if (gpr[i.r1] & i.o)
		JUMP(i.r0);

	DISPATCH();

	DO(BMCR);
	if (!(gpr[i.r1] & gpr[i.r2]))
		JUMP(i.r0);

	DISPATCH();

	DO(BMCI);
	if (!(gpr[i.r1] & i.o))
		JUMP(i.r0);

	DISPATCH();

	DO(RETVAL);
	gpr[i.r0] = retval;
	DISPATCH();

	DO(RETVAL_F);
	fpr[i.r0].f = retval_f;
	DISPATCH();

	DO(RETVAL_D);
	fpr[i.r0].d = retval_f;
	DISPATCH();

	DO(PARAM);
	gpr[i.r2] = args[i.r0].u64;
	DISPATCH();

	DO(PARAM_F);
	if (i.r1 == EJIT_FLOAT)
		fpr[i.r2].f = args[i.r0].f;
	else
		fpr[i.r2].d = args[i.r0].d;

	DISPATCH();

	DO(ARG);
	struct ejit_arg a = ejit_build_arg(i.r1, gpr[i.r2]);
	vec_append(&state->args, &a);
	DISPATCH();

	DO(ARG_I);
	struct ejit_arg a = ejit_build_arg(i.r1, i.o);
	vec_append(&state->args, &a);
	DISPATCH();

	DO(ARG_F);
	struct ejit_arg a;
	if (i.r1 == EJIT_DOUBLE)
		a = ejit_build_arg_f(i.r1, fpr[i.r2].d);
	else
		a = ejit_build_arg_f(i.r1, fpr[i.r2].f);

	vec_append(&state->args, &a);
	DISPATCH();

	DO(ARG_FI);
	struct ejit_arg a;
	if (i.r1 == EJIT_DOUBLE)
		a = ejit_build_arg_f(i.r1, i.d);
	else
		a = ejit_build_arg_f(i.r1, i.f);

	vec_append(&state->args, &a);
	DISPATCH();

	DO(CALLI);
	struct ejit_func *f = i.p;
	size_t argc = vec_len(&state->args) - prev_argc;
	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
	                        prev_argc;

	retval = ejit_run_interp(f, argc, args, state);

	gpr = ((long *)state->gprs.buf) + prev_gprs;
	fpr = ((union fpr *)state->fprs.buf) + prev_fprs;
	vec_shrink(&state->args, prev_argc);
	DISPATCH();

	DO(CALLI_F);
	DISPATCH();

	DO(ESCAPEI);
	ejit_escape_t f = i.p;
	size_t argc = vec_len(&state->args) - prev_argc;
	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
	                        prev_argc;

	retval = f(argc, args);

	vec_shrink(&state->args, prev_argc);
	DISPATCH();

	DO(ESCAPEI_F);
	ejit_escape_f_t f = i.p;
	size_t argc = vec_len(&state->args) - prev_argc;
	struct ejit_arg *args = ((struct ejit_arg *)state->args.buf) +
	                        prev_argc;

	retval_f = f(argc, args);

	vec_shrink(&state->args, prev_argc);
	DISPATCH();

	/* dispatch is technically unnecessary for returns, but keep it for
	 * symmetry */
	DO(RETR);
	retval = gpr[i.r0];
	goto out_int;
	DISPATCH();

	DO(RETI);
	retval = i.o;
	goto out_int;
	DISPATCH();

	DO(RETR_F);
	retval_f = fpr[i.r0].f;
	goto out_float;
	DISPATCH();

	DO(RETR_D);
	retval_f = fpr[i.r0].d;
	goto out_float;
	DISPATCH();

	DO(RETI_F);
	retval_f = i.f;
	goto out_float;
	DISPATCH();

	DO(RETI_D);
	retval_f = i.d;
	goto out_float;
	DISPATCH();

#undef DISPATCH
#undef JUMP
#undef DO

out_int:
	vec_shrink(&state->gprs, prev_gprs);
	vec_shrink(&state->fprs, prev_fprs);
	return (union interp_ret){.r = retval};

out_float:
	vec_shrink(&state->gprs, prev_gprs);
	vec_shrink(&state->fprs, prev_fprs);
	return (union interp_ret){.d = retval_f};
}