From 67a13eb82534996fbd6ba2fc0c36d3e1284bcd8e Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Wed, 26 Mar 2025 13:17:58 +0200
Subject: handle immediates a bit better

+ Passing floats as immediate values is not supported in lightening, but
  I might have a go at adding it since it seems like a useful feature at
  some point
---
 tests/escapei_immediate_10.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 tests/escapei_immediate_10.c

(limited to 'tests')

diff --git a/tests/escapei_immediate_10.c b/tests/escapei_immediate_10.c
new file mode 100644
index 0000000..381c79f
--- /dev/null
+++ b/tests/escapei_immediate_10.c
@@ -0,0 +1,73 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+static int32_t func(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e,
+                    int32_t f, int32_t g, int32_t h, int32_t i, int32_t j)
+{
+	assert(a == 0);
+	assert(b == 1);
+	assert(c == 2);
+	assert(d == 3);
+	assert(e == 4);
+	assert(f == 5);
+	assert(g == 6);
+	assert(h == 7);
+	assert(i == 8);
+	assert(j == 9);
+	return 42;
+}
+
+static long escape_func(size_t argc, const struct ejit_arg args[argc])
+{
+	assert(argc == 10);
+	assert(args[0].type == EJIT_INT32);
+	assert(args[1].type == EJIT_INT32);
+	assert(args[3].type == EJIT_INT32);
+	assert(args[4].type == EJIT_INT32);
+	assert(args[5].type == EJIT_INT32);
+	assert(args[6].type == EJIT_INT32);
+	assert(args[7].type == EJIT_INT32);
+	assert(args[8].type == EJIT_INT32);
+	assert(args[9].type == EJIT_INT32);
+
+	int32_t a = args[0].i32;
+	int32_t b = args[1].i32;
+	int32_t c = args[2].i32;
+	int32_t d = args[3].i32;
+	int32_t e = args[4].i32;
+	int32_t f = args[5].i32;
+	int32_t g = args[6].i32;
+	int32_t h = args[7].i32;
+	int32_t i = args[8].i32;
+	int32_t j = args[9].i32;
+	return func(a, b, c, d, e, f, g, h, i, j);
+}
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_func *f = ejit_create_func(EJIT_INT32, 0, NULL);
+	struct ejit_operand args[10] = {
+		EJIT_OPERAND_IMM(0, EJIT_INT32),
+		EJIT_OPERAND_IMM(1, EJIT_INT32),
+		EJIT_OPERAND_IMM(2, EJIT_INT32),
+		EJIT_OPERAND_IMM(3, EJIT_INT32),
+		EJIT_OPERAND_IMM(4, EJIT_INT32),
+		EJIT_OPERAND_IMM(5, EJIT_INT32),
+		EJIT_OPERAND_IMM(6, EJIT_INT32),
+		EJIT_OPERAND_IMM(7, EJIT_INT32),
+		EJIT_OPERAND_IMM(8, EJIT_INT32),
+		EJIT_OPERAND_IMM(9, EJIT_INT32),
+	};
+	ejit_escapei_i(f, escape_func, 10, args);
+	ejit_retval(f, EJIT_GPR(0));
+	ejit_retr(f, EJIT_GPR(0));
+
+	ejit_select_compile_func(f, 11, 0, false, do_jit, true);
+
+	assert(ejit_run_func_i(f, 0, NULL) == 42);
+
+	ejit_destroy_func(f);
+}
-- 
cgit v1.2.3


From 478c92b425eca53a0d884fb8f5dea8d769016858 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Tue, 1 Apr 2025 22:16:25 +0300
Subject: expose sqrt

+ Requires linking with libm in some cases, which is fine I suppose, but
  kind of annoying
---
 include/ejit/ejit.h   |  3 +++
 scripts/gen-tests     |  2 +-
 src/common.h          |  3 +++
 src/compile/compile.c | 20 ++++++++++++++++++++
 src/ejit.c            | 10 ++++++++++
 src/interp.c          | 11 +++++++++++
 tests/sqrtr_d.c       | 23 +++++++++++++++++++++++
 tests/sqrtr_f.c       | 23 +++++++++++++++++++++++
 8 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 tests/sqrtr_d.c
 create mode 100644 tests/sqrtr_f.c

(limited to 'tests')

diff --git a/include/ejit/ejit.h b/include/ejit/ejit.h
index d4bb725..920fdc5 100644
--- a/include/ejit/ejit.h
+++ b/include/ejit/ejit.h
@@ -805,6 +805,9 @@ void ejit_truncr_d_32(struct ejit_func *s, struct ejit_gpr r0,
 void ejit_truncr_d_64(struct ejit_func *s, struct ejit_gpr r0,
                       struct ejit_fpr r1);
 
+void ejit_sqrtr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1);
+void ejit_sqrtr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1);
+
 struct ejit_reloc ejit_bltr(struct ejit_func *s, struct ejit_gpr r0,
                             struct ejit_gpr r1);
 struct ejit_reloc ejit_bner(struct ejit_func *s, struct ejit_gpr r0,
diff --git a/scripts/gen-tests b/scripts/gen-tests
index 5521006..deac247 100755
--- a/scripts/gen-tests
+++ b/scripts/gen-tests
@@ -11,5 +11,5 @@ do
 	echo "${dep}:"						>> tests.mk
 	echo "-include ${dep}"					>> tests.mk
 	echo "${exe}: ${s} libejit.a"				>> tests.mk
-	echo "	\$(COMPILE_TEST) ${s} libejit.a -o ${exe}"	>> tests.mk
+	echo "	\$(COMPILE_TEST) ${s} libejit.a -o ${exe} -lm"	>> tests.mk
 done
diff --git a/src/common.h b/src/common.h
index 6a3c754..c690f8f 100644
--- a/src/common.h
+++ b/src/common.h
@@ -142,6 +142,9 @@ enum ejit_opcode {
 	EJIT_OP_TRUNCR_F_32,
 	EJIT_OP_TRUNCR_F_64,
 
+	EJIT_OP_SQRTR_F,
+	EJIT_OP_SQRTR_D,
+
 	EJIT_OP_EQR,
 	EJIT_OP_NER,
 	EJIT_OP_GTR,
diff --git a/src/compile/compile.c b/src/compile/compile.c
index b90ee54..580b7fa 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -1326,6 +1326,23 @@ static void compile_truncr_f_32(struct ejit_func *f, jit_state_t *j,
 #endif
 }
 
+static void compile_sqrtr_f(struct ejit_func *f, jit_state_t *j,
+		struct ejit_insn i)
+{
+	jit_fpr_t r0 = getfpr(f, i.r0, 0);
+	jit_fpr_t r1 = getloc_f(f, j, i.r1, 1);
+	jit_sqrtr_f(j, r0, r1);
+	putloc_d(f, j, i.r0, r0);
+}
+
+static void compile_sqrtr_d(struct ejit_func *f, jit_state_t *j,
+		struct ejit_insn i)
+{
+	jit_fpr_t r0 = getfpr(f, i.r0, 0);
+	jit_fpr_t r1 = getloc_d(f, j, i.r1, 1);
+	jit_sqrtr_d(j, r0, r1);
+	putloc_d(f, j, i.r0, r0);
+}
 
 static void compile_reg_cmp(struct ejit_func *f, jit_state_t *j,
                             struct ejit_insn i,
@@ -2084,6 +2101,9 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 		case EJIT_OP_TRUNCR_F_32: compile_truncr_f_32(f, j, i); break;
 		case EJIT_OP_TRUNCR_F_64: compile_truncr_f_64(f, j, i); break;
 
+		case EJIT_OP_SQRTR_F: compile_sqrtr_f(f, j, i); break;
+		case EJIT_OP_SQRTR_D: compile_sqrtr_d(f, j, i); break;
+
 		case EJIT_OP_EQR: compile_eqr(f, j, i); break;
 		case EJIT_OP_EQR_F: compile_eqr_f(f, j, i); break;
 		case EJIT_OP_EQR_D: compile_eqr_d(f, j, i); break;
diff --git a/src/ejit.c b/src/ejit.c
index e7e2ff2..2224198 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -1371,6 +1371,16 @@ void ejit_truncr_f_64(struct ejit_func *s, struct ejit_gpr r0,
 	emit_insn_orf(s, EJIT_OP_TRUNCR_F_64, r0, f1);
 }
 
+void ejit_sqrtr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1)
+{
+	emit_insn_off(s, EJIT_OP_SQRTR_F, r0, r1);
+}
+
+void ejit_sqrtr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1)
+{
+	emit_insn_off(s, EJIT_OP_SQRTR_D, r0, r1);
+}
+
 struct ejit_reloc ejit_bner(struct ejit_func *s, struct ejit_gpr r0,
                             struct ejit_gpr r1)
 {
diff --git a/src/interp.c b/src/interp.c
index b858f26..2d9b7c7 100644
--- a/src/interp.c
+++ b/src/interp.c
@@ -147,6 +147,9 @@ union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg pa
 		[EJIT_OP_TRUNCR_F_32] = &&TRUNCR_F_32,
 		[EJIT_OP_TRUNCR_F_64] = &&TRUNCR_F_64,
 
+		[EJIT_OP_SQRTR_F] = &&SQRTR_F,
+		[EJIT_OP_SQRTR_D] = &&SQRTR_D,
+
 		[EJIT_OP_BNER] = &&BNER,
 		[EJIT_OP_BNEI] = &&BNEI,
 		[EJIT_OP_BNER_F] = &&BNER_F,
@@ -784,6 +787,14 @@ union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg pa
 	gpr[i.r0] = (int64_t)fpr[i.r1].f;
 	DISPATCH();
 
+	DO(SQRTR_F);
+	fpr[i.r0].f = sqrt(fpr[i.r1].f);
+	DISPATCH();
+
+	DO(SQRTR_D);
+	fpr[i.r0].d = sqrt(fpr[i.r1].d);
+	DISPATCH();
+
 	DO(BNER);
 	if (gpr[i.r1] != gpr[i.r2])
 		JUMP(i.r0);
diff --git a/tests/sqrtr_d.c b/tests/sqrtr_d.c
new file mode 100644
index 0000000..06e7894
--- /dev/null
+++ b/tests/sqrtr_d.c
@@ -0,0 +1,23 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+        (void)argv;
+        bool do_jit = argc > 1;
+        struct ejit_operand operands[1] = {
+                EJIT_OPERAND_FPR(0, EJIT_TYPE(double)),
+        };
+        struct ejit_func *f = ejit_create_func(EJIT_TYPE(double), 1, operands);
+
+        ejit_sqrtr_d(f, EJIT_FPR(0), EJIT_FPR(0));
+        ejit_retr_d(f, EJIT_FPR(0));
+
+        ejit_select_compile_func(f, 0, 1, EJIT_USE64(double), do_jit, true);
+
+        assert(erfd1(f, EJIT_ARG( 0.0, double)) == 0.0);
+        assert(erfd1(f, EJIT_ARG( 4.0, double)) == 2.0);
+        assert(erfd1(f, EJIT_ARG(-4.0, double))
+                        != erfd1(f, EJIT_ARG(-4.0, double))); // nan
+}
diff --git a/tests/sqrtr_f.c b/tests/sqrtr_f.c
new file mode 100644
index 0000000..3baa00d
--- /dev/null
+++ b/tests/sqrtr_f.c
@@ -0,0 +1,23 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+        (void)argv;
+        bool do_jit = argc > 1;
+        struct ejit_operand operands[1] = {
+                EJIT_OPERAND_FPR(0, EJIT_TYPE(float)),
+        };
+        struct ejit_func *f = ejit_create_func(EJIT_TYPE(float), 1, operands);
+
+        ejit_sqrtr_f(f, EJIT_FPR(0), EJIT_FPR(0));
+        ejit_retr_f(f, EJIT_FPR(0));
+
+        ejit_select_compile_func(f, 0, 1, EJIT_USE64(float), do_jit, true);
+
+        assert(erff1(f, EJIT_ARG( 0.0, float)) == 0.0);
+        assert(erff1(f, EJIT_ARG( 4.0, float)) == 2.0);
+        assert(erff1(f, EJIT_ARG(-4.0, float))
+                        != erff1(f, EJIT_ARG(-4.0, float))); // nan
+}
-- 
cgit v1.2.3


From d8f9699debd435da5e1aef22c94c47154be4e2be Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Wed, 2 Apr 2025 21:13:03 +0300
Subject: fix big endian compilation

+ Code used some assumptions about type aliasing that might not hold for
  all systems
---
 include/ejit/ejit.h          | 124 ++++++++++++++++++++++++++++++++-----------
 src/ejit.c                   |   6 ++-
 src/interp.c                 |  13 ++++-
 tests/escapei_10.c           |  30 ++++-------
 tests/escapei_double.c       |   6 +--
 tests/escapei_float.c        |   6 +--
 tests/escapei_immediate_10.c |  30 ++++-------
 7 files changed, 134 insertions(+), 81 deletions(-)

(limited to 'tests')

diff --git a/include/ejit/ejit.h b/include/ejit/ejit.h
index 920fdc5..2afdc61 100644
--- a/include/ejit/ejit.h
+++ b/include/ejit/ejit.h
@@ -72,29 +72,20 @@ static inline enum ejit_type ejit_signed_type(size_t w)
 
 struct ejit_arg {
 	union {
-		int8_t i8;
-		uint8_t u8;
+		int8_t  i8;
 		int16_t i16;
-		uint16_t u16;
 		int32_t i32;
-		uint32_t u32;
 		int64_t i64;
-		uint64_t u64;
 
-		signed char c;
-		unsigned char uc;
-		signed short s;
-		unsigned short us;
-		signed int i;
-		unsigned int ui;
-		signed long l;
-		unsigned long ul;
-		signed long long ll;
-		unsigned long long ull;
+		uint8_t  u8;
+		uint16_t u16;
+		uint32_t u32;
+		uint64_t u64;
 
 		float f;
 		double d;
 		void *p;
+		long l;
 	};
 	enum ejit_type type;
 };
@@ -136,15 +127,15 @@ static inline struct ejit_arg ejit_build_arg(enum ejit_type type, uint64_t x)
 	a.type = type;
 
 	switch (type) {
-	case EJIT_INT8: a.u64 = (int8_t)x; break;
-	case EJIT_INT16: a.u64 = (int16_t)x; break;
-	case EJIT_INT32: a.u64 = (int32_t)x; break;
-	case EJIT_INT64: a.u64 = (int64_t)x; break;
-	case EJIT_UINT8: a.u64 = (uint8_t)x; break;
-	case EJIT_UINT16: a.u64 = (uint16_t)x; break;
-	case EJIT_UINT32: a.u64 = (uint32_t)x; break;
-	case EJIT_UINT64: a.u64 = (uint64_t)x; break;
-	case EJIT_POINTER: a.p = (void *)(uintptr_t)x; break;
+	case EJIT_INT8:    a.i8  = (int8_t)x; break;
+	case EJIT_INT16:   a.i16 = (int16_t)x; break;
+	case EJIT_INT32:   a.i32 = (int32_t)x; break;
+	case EJIT_INT64:   a.i64 = (int64_t)x; break;
+	case EJIT_UINT8:   a.u8  = (uint8_t)x; break;
+	case EJIT_UINT16:  a.u16 = (uint16_t)x; break;
+	case EJIT_UINT32:  a.u32 = (uint32_t)x; break;
+	case EJIT_UINT64:  a.u64 = (uint64_t)x; break;
+	case EJIT_POINTER: a.p   = (void *)(uintptr_t)x; break;
 	default: abort();
 	}
 
@@ -251,17 +242,17 @@ void ejit_destroy_func(struct ejit_func *s);
 /* maybe slight hack, but increase width to interpeter register width */
 static inline struct ejit_arg ejit_i8(int8_t a)
 {
-	return (struct ejit_arg){.i64 = a, .type = EJIT_INT8};
+	return (struct ejit_arg){.i8 = a, .type = EJIT_INT8};
 }
 
 static inline struct ejit_arg ejit_i16(int16_t a)
 {
-	return (struct ejit_arg){.i64 = a, .type = EJIT_INT16};
+	return (struct ejit_arg){.i16 = a, .type = EJIT_INT16};
 }
 
 static inline struct ejit_arg ejit_i32(int32_t a)
 {
-	return (struct ejit_arg){.i64 = a, .type = EJIT_INT32};
+	return (struct ejit_arg){.i32 = a, .type = EJIT_INT32};
 }
 
 static inline struct ejit_arg ejit_i64(int64_t a)
@@ -271,22 +262,22 @@ static inline struct ejit_arg ejit_i64(int64_t a)
 
 static inline struct ejit_arg ejit_u8(uint8_t a)
 {
-	return (struct ejit_arg){.i64 = a, .type = EJIT_UINT8};
+	return (struct ejit_arg){.u8 = a, .type = EJIT_UINT8};
 }
 
 static inline struct ejit_arg ejit_u16(uint16_t a)
 {
-	return (struct ejit_arg){.i64 = a, .type = EJIT_UINT16};
+	return (struct ejit_arg){.u16 = a, .type = EJIT_UINT16};
 }
 
 static inline struct ejit_arg ejit_u32(uint32_t a)
 {
-	return (struct ejit_arg){.i64 = a, .type = EJIT_UINT32};
+	return (struct ejit_arg){.u32 = a, .type = EJIT_UINT32};
 }
 
 static inline struct ejit_arg ejit_u64(uint64_t a)
 {
-	return (struct ejit_arg){.i64 = a, .type = EJIT_UINT64};
+	return (struct ejit_arg){.u64 = a, .type = EJIT_UINT64};
 }
 
 static inline struct ejit_arg ejit_pointer(void *p)
@@ -372,6 +363,77 @@ static inline struct ejit_arg ejit_pointer_arg(void *p, size_t w)
 #define EJIT_AUTO(x) \
 	EJIT_ARG(x, typeof(x))
 
+static inline int64_t ejit_signed_param(size_t argc, const struct ejit_arg args[argc],
+		size_t idx, enum ejit_type type)
+{
+	assert(idx < argc);
+	assert(args[idx].type == type);
+	switch (type) {
+	case EJIT_INT64: return args[idx].i64;
+	case EJIT_INT32: return args[idx].i32;
+	case EJIT_INT16: return args[idx].i16;
+	case EJIT_INT8:  return args[idx].i8;
+	default: abort();
+	}
+
+	return 0;
+}
+
+static inline uint64_t ejit_unsigned_param(size_t argc, const struct ejit_arg args[argc],
+		size_t idx, enum ejit_type type)
+{
+	assert(idx < argc);
+	assert(args[idx].type == type);
+	switch (type) {
+	case EJIT_UINT64: return args[idx].u64;
+	case EJIT_UINT32: return args[idx].u32;
+	case EJIT_UINT16: return args[idx].u16;
+	case EJIT_UINT8:  return args[idx].u8;
+	default: abort();
+	}
+}
+
+static inline float ejit_float_param(size_t argc, const struct ejit_arg args[argc],
+		size_t idx, enum ejit_type type)
+{
+	assert(idx < argc);
+	assert(args[idx].type == type && type == EJIT_FLOAT);
+	return args[idx].f;
+}
+
+static inline double ejit_double_param(size_t argc, const struct ejit_arg args[argc],
+		size_t idx, enum ejit_type type)
+{
+	assert(idx < argc);
+	assert(args[idx].type == type && type == EJIT_DOUBLE);
+	return args[idx].d;
+}
+
+static inline void *ejit_pointer_param(size_t argc, const struct ejit_arg args[argc],
+		size_t idx, enum ejit_type type)
+{
+	assert(idx < argc);
+	assert(args[idx].type == type && type == EJIT_POINTER);
+	return args[idx].p;
+}
+
+#define EJIT_PARAM(argc, args, idx, t)                 \
+	_Generic((t)(0),               \
+		 signed char       : ejit_signed_param,      \
+		 signed short      : ejit_signed_param,    \
+		 signed int        : ejit_signed_param,    \
+		 signed long       : ejit_signed_param,    \
+		 signed long long  : ejit_signed_param, \
+		 unsigned char     : ejit_unsigned_param,      \
+		 unsigned short    : ejit_unsigned_param,    \
+		 unsigned int      : ejit_unsigned_param,    \
+		 unsigned long     : ejit_unsigned_param,    \
+		 unsigned long long: ejit_unsigned_param, \
+		 float             : ejit_float_param,    \
+		 double            : ejit_double_param,  \
+		 default           : ejit_pointer_param \
+		 )(argc, args, idx, EJIT_TYPE(t))
+
 static inline bool ejit_use64(struct ejit_arg a)
 {
 	if (a.type == EJIT_INT64 || a.type == EJIT_UINT64)
diff --git a/src/ejit.c b/src/ejit.c
index 2224198..0ee3986 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -1726,6 +1726,10 @@ struct ejit_arg ejit_run_func(struct ejit_func *f, size_t argc, struct ejit_arg
 		};
 
 	case EJIT_UINT64:
+		return (struct ejit_arg){
+			.u64 = ejit_run_func_l(f, argc, args),
+			.type = f->rtype
+		};
 	case EJIT_INT64:
 		return (struct ejit_arg){
 			.i64 = ejit_run_func_l(f, argc, args),
@@ -1734,7 +1738,7 @@ struct ejit_arg ejit_run_func(struct ejit_func *f, size_t argc, struct ejit_arg
 
 	default:
 		return (struct ejit_arg){
-			.i64 = ejit_run_func_i(f, argc, args),
+			.l = ejit_run_func_i(f, argc, args),
 			.type = f->rtype
 		};
 	}
diff --git a/src/interp.c b/src/interp.c
index 2d9b7c7..049498a 100644
--- a/src/interp.c
+++ b/src/interp.c
@@ -984,7 +984,18 @@ union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg pa
 	DISPATCH();
 
 	DO(PARAM);
-	gpr[i.r2] = params[i.r0].u64;
+	switch (i.r1) {
+	case EJIT_INT8:    gpr[i.r2] = params[i.r0].i8; break;
+	case EJIT_INT16:   gpr[i.r2] = params[i.r0].i16; break;
+	case EJIT_INT32:   gpr[i.r2] = params[i.r0].i32; break;
+	case EJIT_INT64:   gpr[i.r2] = params[i.r0].i64; break;
+	case EJIT_UINT8:   gpr[i.r2] = params[i.r0].u8; break;
+	case EJIT_UINT16:  gpr[i.r2] = params[i.r0].u16; break;
+	case EJIT_UINT32:  gpr[i.r2] = params[i.r0].u32; break;
+	case EJIT_UINT64:  gpr[i.r2] = params[i.r0].u64; break;
+	case EJIT_POINTER: gpr[i.r2] = (int64_t)params[i.r0].p; break;
+	default: abort();
+	}
 	DISPATCH();
 
 	DO(PARAM_F);
diff --git a/tests/escapei_10.c b/tests/escapei_10.c
index 4ae00b8..ec48df0 100644
--- a/tests/escapei_10.c
+++ b/tests/escapei_10.c
@@ -21,26 +21,16 @@ static int32_t func(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e,
 static long escape_func(size_t argc, const struct ejit_arg args[argc])
 {
 	assert(argc == 10);
-	assert(args[0].type == EJIT_INT32);
-	assert(args[1].type == EJIT_INT32);
-	assert(args[3].type == EJIT_INT32);
-	assert(args[4].type == EJIT_INT32);
-	assert(args[5].type == EJIT_INT32);
-	assert(args[6].type == EJIT_INT32);
-	assert(args[7].type == EJIT_INT32);
-	assert(args[8].type == EJIT_INT32);
-	assert(args[9].type == EJIT_INT32);
-
-	int32_t a = args[0].i32;
-	int32_t b = args[1].i32;
-	int32_t c = args[2].i32;
-	int32_t d = args[3].i32;
-	int32_t e = args[4].i32;
-	int32_t f = args[5].i32;
-	int32_t g = args[6].i32;
-	int32_t h = args[7].i32;
-	int32_t i = args[8].i32;
-	int32_t j = args[9].i32;
+	int32_t a = EJIT_PARAM(argc, args, 0, int32_t);
+	int32_t b = EJIT_PARAM(argc, args, 1, int32_t);
+	int32_t c = EJIT_PARAM(argc, args, 2, int32_t);
+	int32_t d = EJIT_PARAM(argc, args, 3, int32_t);
+	int32_t e = EJIT_PARAM(argc, args, 4, int32_t);
+	int32_t f = EJIT_PARAM(argc, args, 5, int32_t);
+	int32_t g = EJIT_PARAM(argc, args, 6, int32_t);
+	int32_t h = EJIT_PARAM(argc, args, 7, int32_t);
+	int32_t i = EJIT_PARAM(argc, args, 8, int32_t);
+	int32_t j = EJIT_PARAM(argc, args, 9, int32_t);
 	return func(a, b, c, d, e, f, g, h, i, j);
 }
 
diff --git a/tests/escapei_double.c b/tests/escapei_double.c
index 6ea9f90..736e978 100644
--- a/tests/escapei_double.c
+++ b/tests/escapei_double.c
@@ -9,10 +9,8 @@ static double func(int32_t a, double b) {
 static double escape_func(size_t argc, const struct ejit_arg args[argc])
 {
 	assert(argc == 2);
-	assert(args[0].type == EJIT_INT32);
-	assert(args[1].type == EJIT_DOUBLE);
-	int32_t a = args[0].i32;
-	double b = args[1].d;
+	int32_t a = EJIT_PARAM(argc, args, 0, int32_t);
+	double b = EJIT_PARAM(argc, args, 1, double);
 	return func(a, b);
 }
 
diff --git a/tests/escapei_float.c b/tests/escapei_float.c
index 7a1b923..7cdc30d 100644
--- a/tests/escapei_float.c
+++ b/tests/escapei_float.c
@@ -9,10 +9,8 @@ static float func(int32_t a, float b) {
 static float escape_func(size_t argc, const struct ejit_arg args[argc])
 {
 	assert(argc == 2);
-	assert(args[0].type == EJIT_INT32);
-	assert(args[1].type == EJIT_FLOAT);
-	int32_t a = args[0].i32;
-	float b = args[1].f;
+	int32_t a = EJIT_PARAM(argc, args, 0, int32_t);
+	float b = EJIT_PARAM(argc, args, 1, float);
 	return func(a, b);
 }
 
diff --git a/tests/escapei_immediate_10.c b/tests/escapei_immediate_10.c
index 381c79f..5517c35 100644
--- a/tests/escapei_immediate_10.c
+++ b/tests/escapei_immediate_10.c
@@ -21,26 +21,16 @@ static int32_t func(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e,
 static long escape_func(size_t argc, const struct ejit_arg args[argc])
 {
 	assert(argc == 10);
-	assert(args[0].type == EJIT_INT32);
-	assert(args[1].type == EJIT_INT32);
-	assert(args[3].type == EJIT_INT32);
-	assert(args[4].type == EJIT_INT32);
-	assert(args[5].type == EJIT_INT32);
-	assert(args[6].type == EJIT_INT32);
-	assert(args[7].type == EJIT_INT32);
-	assert(args[8].type == EJIT_INT32);
-	assert(args[9].type == EJIT_INT32);
-
-	int32_t a = args[0].i32;
-	int32_t b = args[1].i32;
-	int32_t c = args[2].i32;
-	int32_t d = args[3].i32;
-	int32_t e = args[4].i32;
-	int32_t f = args[5].i32;
-	int32_t g = args[6].i32;
-	int32_t h = args[7].i32;
-	int32_t i = args[8].i32;
-	int32_t j = args[9].i32;
+	int32_t a = EJIT_PARAM(argc, args, 0, int32_t);
+	int32_t b = EJIT_PARAM(argc, args, 1, int32_t);
+	int32_t c = EJIT_PARAM(argc, args, 2, int32_t);
+	int32_t d = EJIT_PARAM(argc, args, 3, int32_t);
+	int32_t e = EJIT_PARAM(argc, args, 4, int32_t);
+	int32_t f = EJIT_PARAM(argc, args, 5, int32_t);
+	int32_t g = EJIT_PARAM(argc, args, 6, int32_t);
+	int32_t h = EJIT_PARAM(argc, args, 7, int32_t);
+	int32_t i = EJIT_PARAM(argc, args, 8, int32_t);
+	int32_t j = EJIT_PARAM(argc, args, 9, int32_t);
 	return func(a, b, c, d, e, f, g, h, i, j);
 }
 
-- 
cgit v1.2.3


From 6c2e51a3f8695cb95d6a4a6859d3f934e28c8f9f Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Wed, 2 Apr 2025 21:14:23 +0300
Subject: improve build system a bit

---
 scripts/makefile | 13 +++++++++----
 tests/makefile   | 34 +++++++++++++++++++++++++++-------
 2 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'tests')

diff --git a/scripts/makefile b/scripts/makefile
index 3a4f200..bd02c98 100644
--- a/scripts/makefile
+++ b/scripts/makefile
@@ -29,16 +29,21 @@ OBJCOPY		!= [ "$(LLVM)" != "0" ] \
 			&& echo llvm-objcopy \
 			|| echo $(CROSS_COMPILE)objcopy
 
-COMPILER	!= [ "$(LLVM)" != "0" ] \
-			&& echo clang --target="$(CROSS_COMPILE)" \
-			|| echo $(CROSS_COMPILE)gcc
+COMPILER	!= [ -n "$(CROSS_COMPILE)" ]						\
+			&& {								\
+				[ "$(LLVM)" != "0" ]					\
+					&& echo clang --target="$(CROSS_COMPILE)"	\
+					|| echo $(CROSS_COMPILE)gcc			\
+					;						\
+			}								\
+			|| echo $(CC)
 
 
 OBFLAGS		:= -g
 WARNFLAGS	:= -Wall -Wextra
 
 COMPILE_FLAGS	:= $(CFLAGS) $(WARNFLAGS) $(OPTFLAGS) $(LTOFLAGS) \
-		   $(OBFLAGS) $(ASSERTFLAGS) $(DEBUGFLAGS)
+		   $(OBFLAGS) $(DEBUGFLAGS)
 
 INCLUDE_FLAGS	:= -I include
 
diff --git a/tests/makefile b/tests/makefile
index 5aeef98..081170f 100644
--- a/tests/makefile
+++ b/tests/makefile
@@ -1,14 +1,34 @@
 include ./tests.mk
 
-LLVM			?= 0
-CROSS_COMPILE		:=
-COMPILER		!= [ "$(LLVM)" != "0" ] \
-				&& echo clang --target="$(CROSS_COMPILE)" \
-				|| echo $(CROSS_COMPILE)gcc
+LLVM		?= 0
+COMPILER	!= [ -n "$(CROSS_COMPILE)" ]						\
+			&& {								\
+				[ "$(LLVM)" != "0" ]					\
+					&& echo clang --target="$(CROSS_COMPILE)"	\
+					|| echo $(CROSS_COMPILE)gcc			\
+					;						\
+			}								\
+			|| echo $(CC)
 
-CFLAGS			:= -Wall -Wextra -O0 -g
+RELEASE		?= 0
+OPTFLAGS	!= [ "$(RELEASE)" != "0" ] \
+			&& echo "-O2" \
+			|| echo "-O0"
+
+LTO		?= 0
+LTOFLAGS	!= [ "$(LTO)" != "0" ] \
+			&& echo "-flto=auto"
+
+DEBUG		?= 1
+DEBUGFLAGS	!= [ "$(DEBUG)" != "0" ] \
+			&& echo "-DDEBUG=1" \
+			|| echo "-DNDEBUG=1"
+
+OBFLAGS			:= -g
+WARNFLAGS		:= -Wall -Wextra
 INCLUDE_FLAGS		:= -I include
-COMPILE_TEST		:= $(COMPILER) $(CFLAGS) $(INCLUDE_FLAGS)
+COMPILE_TEST		:= $(COMPILER) $(WARNFLAGS) $(OPTFLAGS) $(LTOFLAGS) \
+			   $(OBFLAGS) $(CFLAGS) $(DEBUGFLAGS) $(INCLUDE_FLAGS)
 
 .PHONY: check
 check: $(TESTS)
-- 
cgit v1.2.3


From 42f89542550033a3f22700e6b0fd71627a252f96 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Sat, 5 Apr 2025 13:46:09 +0300
Subject: implement minr_d/maxr_d

---
 deps/lightening       |  2 +-
 include/ejit/ejit.h   |  6 ++++++
 src/common.h          |  6 ++++++
 src/compile/compile.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 src/ejit.c            | 26 ++++++++++++++++++++++++++
 src/interp.c          | 22 ++++++++++++++++++++++
 tests/maxr_d.c        | 28 ++++++++++++++++++++++++++++
 tests/maxr_f.c        | 28 ++++++++++++++++++++++++++++
 tests/minr_d.c        | 28 ++++++++++++++++++++++++++++
 tests/minr_f.c        | 28 ++++++++++++++++++++++++++++
 10 files changed, 219 insertions(+), 1 deletion(-)
 create mode 100644 tests/maxr_d.c
 create mode 100644 tests/maxr_f.c
 create mode 100644 tests/minr_d.c
 create mode 100644 tests/minr_f.c

(limited to 'tests')

diff --git a/deps/lightening b/deps/lightening
index 6421af4..5a72993 160000
--- a/deps/lightening
+++ b/deps/lightening
@@ -1 +1 @@
-Subproject commit 6421af4db1570a6a5fc3a15f3bcb2601d854ed0e
+Subproject commit 5a72993827bc2b1735a7611c0036640cdb01b93b
diff --git a/include/ejit/ejit.h b/include/ejit/ejit.h
index b4b20c1..965103c 100644
--- a/include/ejit/ejit.h
+++ b/include/ejit/ejit.h
@@ -872,6 +872,12 @@ void ejit_truncr_d_64(struct ejit_func *s, struct ejit_gpr r0,
 void ejit_sqrtr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1);
 void ejit_sqrtr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1);
 
+void ejit_minr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2);
+void ejit_minr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2);
+
+void ejit_maxr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2);
+void ejit_maxr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2);
+
 struct ejit_reloc ejit_bltr(struct ejit_func *s, struct ejit_gpr r0,
                             struct ejit_gpr r1);
 struct ejit_reloc ejit_bner(struct ejit_func *s, struct ejit_gpr r0,
diff --git a/src/common.h b/src/common.h
index 661220b..69f1441 100644
--- a/src/common.h
+++ b/src/common.h
@@ -145,6 +145,12 @@ enum ejit_opcode {
 	EJIT_OP_SQRTR_F,
 	EJIT_OP_SQRTR_D,
 
+	EJIT_OP_MINR_F,
+	EJIT_OP_MINR_D,
+
+	EJIT_OP_MAXR_F,
+	EJIT_OP_MAXR_D,
+
 	EJIT_OP_EQR,
 	EJIT_OP_NER,
 	EJIT_OP_GTR,
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 38c368f..dcf662b 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -1977,6 +1977,46 @@ static void resolve_relocs(jit_state_t *j, struct relocs *relocs, struct addrs *
 	}
 }
 
+static void compile_maxr_f(struct ejit_func *f, jit_state_t *j,
+		struct ejit_insn i)
+{
+	jit_fpr_t r0 = getfpr(f, i.r0, 0);
+	jit_fpr_t r1 = getloc_f(f, j, i.r1, 1);
+	jit_fpr_t r2 = getloc_f(f, j, i.r2, 2);
+	jit_maxr_f(j, r0, r1, r2);
+	putloc_f(f, j, i.r0, r0);
+}
+
+static void compile_maxr_d(struct ejit_func *f, jit_state_t *j,
+		struct ejit_insn i)
+{
+	jit_fpr_t r0 = getfpr(f, i.r0, 0);
+	jit_fpr_t r1 = getloc_d(f, j, i.r1, 1);
+	jit_fpr_t r2 = getloc_d(f, j, i.r2, 2);
+	jit_maxr_d(j, r0, r1, r2);
+	putloc_d(f, j, i.r0, r0);
+}
+
+static void compile_minr_f(struct ejit_func *f, jit_state_t *j,
+		struct ejit_insn i)
+{
+	jit_fpr_t r0 = getfpr(f, i.r0, 0);
+	jit_fpr_t r1 = getloc_f(f, j, i.r1, 1);
+	jit_fpr_t r2 = getloc_f(f, j, i.r2, 2);
+	jit_minr_f(j, r0, r1, r2);
+	putloc_f(f, j, i.r0, r0);
+}
+
+static void compile_minr_d(struct ejit_func *f, jit_state_t *j,
+		struct ejit_insn i)
+{
+	jit_fpr_t r0 = getfpr(f, i.r0, 0);
+	jit_fpr_t r1 = getloc_d(f, j, i.r1, 1);
+	jit_fpr_t r2 = getloc_d(f, j, i.r2, 2);
+	jit_minr_d(j, r0, r1, r2);
+	putloc_d(f, j, i.r0, r0);
+}
+
 static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
                               size_t size)
 {
@@ -2143,6 +2183,12 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 		case EJIT_OP_SQRTR_F: compile_sqrtr_f(f, j, i); break;
 		case EJIT_OP_SQRTR_D: compile_sqrtr_d(f, j, i); break;
 
+		case EJIT_OP_MINR_F: compile_minr_f(f, j, i); break;
+		case EJIT_OP_MINR_D: compile_minr_d(f, j, i); break;
+
+		case EJIT_OP_MAXR_F: compile_maxr_f(f, j, i); break;
+		case EJIT_OP_MAXR_D: compile_maxr_d(f, j, i); break;
+
 		case EJIT_OP_EQR: compile_eqr(f, j, i); break;
 		case EJIT_OP_EQR_F: compile_eqr_f(f, j, i); break;
 		case EJIT_OP_EQR_D: compile_eqr_d(f, j, i); break;
diff --git a/src/ejit.c b/src/ejit.c
index 75f6a6a..c997a01 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -901,31 +901,37 @@ void ejit_ret(struct ejit_func *s)
 
 void ejit_retr(struct ejit_func *s, struct ejit_gpr r0)
 {
+	assert(s->rtype != EJIT_FLOAT && s->rtype != EJIT_DOUBLE);
 	emit_insn_oxr(s, EJIT_OP_RETR, r0);
 }
 
 void ejit_retr_f(struct ejit_func *s, struct ejit_fpr f0)
 {
+	assert(s->rtype == EJIT_FLOAT);
 	emit_insn_oxf(s, EJIT_OP_RETR_F, f0);
 }
 
 void ejit_retr_d(struct ejit_func *s, struct ejit_fpr f0)
 {
+	assert(s->rtype == EJIT_DOUBLE);
 	emit_insn_oxf(s, EJIT_OP_RETR_D, f0);
 }
 
 void ejit_reti(struct ejit_func *s, int64_t i)
 {
+	assert(s->rtype != EJIT_FLOAT && s->rtype != EJIT_DOUBLE);
 	emit_insn_oi(s, EJIT_OP_RETI, i);
 }
 
 void ejit_reti_f(struct ejit_func *s, float f)
 {
+	assert(s->rtype == EJIT_FLOAT);
 	emit_insn_oF(s, EJIT_OP_RETI_F, f);
 }
 
 void ejit_reti_d(struct ejit_func *s, double f)
 {
+	assert(s->rtype == EJIT_DOUBLE);
 	emit_insn_oD(s, EJIT_OP_RETI_D, f);
 }
 
@@ -1382,6 +1388,26 @@ void ejit_sqrtr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1)
 	emit_insn_off(s, EJIT_OP_SQRTR_D, r0, r1);
 }
 
+void ejit_minr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2)
+{
+	emit_insn_offf(s, EJIT_OP_MINR_F, r0, r1, r2);
+}
+
+void ejit_minr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2)
+{
+	emit_insn_offf(s, EJIT_OP_MINR_D, r0, r1, r2);
+}
+
+void ejit_maxr_f(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2)
+{
+	emit_insn_offf(s, EJIT_OP_MAXR_F, r0, r1, r2);
+}
+
+void ejit_maxr_d(struct ejit_func *s, struct ejit_fpr r0, struct ejit_fpr r1, struct ejit_fpr r2)
+{
+	emit_insn_offf(s, EJIT_OP_MAXR_D, r0, r1, r2);
+}
+
 struct ejit_reloc ejit_bner(struct ejit_func *s, struct ejit_gpr r0,
                             struct ejit_gpr r1)
 {
diff --git a/src/interp.c b/src/interp.c
index e7be77b..57dbfbe 100644
--- a/src/interp.c
+++ b/src/interp.c
@@ -150,6 +150,12 @@ union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg pa
 		[EJIT_OP_SQRTR_F] = &&SQRTR_F,
 		[EJIT_OP_SQRTR_D] = &&SQRTR_D,
 
+		[EJIT_OP_MINR_F] = &&MINR_F,
+		[EJIT_OP_MINR_D] = &&MINR_D,
+
+		[EJIT_OP_MAXR_F] = &&MAXR_F,
+		[EJIT_OP_MAXR_D] = &&MAXR_D,
+
 		[EJIT_OP_BNER] = &&BNER,
 		[EJIT_OP_BNEI] = &&BNEI,
 		[EJIT_OP_BNER_F] = &&BNER_F,
@@ -791,6 +797,22 @@ union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg pa
 	fpr[i.r0].f = sqrt(fpr[i.r1].f);
 	DISPATCH();
 
+	DO(MINR_F);
+	fpr[i.r0].f = fminf(fpr[i.r1].f, fpr[i.r2].f);
+	DISPATCH();
+
+	DO(MINR_D);
+	fpr[i.r0].d = fmin(fpr[i.r1].d, fpr[i.r2].d);
+	DISPATCH();
+
+	DO(MAXR_F);
+	fpr[i.r0].f = fmaxf(fpr[i.r1].f, fpr[i.r2].f);
+	DISPATCH();
+
+	DO(MAXR_D);
+	fpr[i.r0].d = fmax(fpr[i.r1].d, fpr[i.r2].d);
+	DISPATCH();
+
 	DO(SQRTR_D);
 	fpr[i.r0].d = sqrt(fpr[i.r1].d);
 	DISPATCH();
diff --git a/tests/maxr_d.c b/tests/maxr_d.c
new file mode 100644
index 0000000..3e35665
--- /dev/null
+++ b/tests/maxr_d.c
@@ -0,0 +1,28 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_FPR(0, EJIT_TYPE(double)),
+		EJIT_OPERAND_FPR(1, EJIT_TYPE(double))
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(double), 2, operands);
+
+	ejit_maxr_d(f, EJIT_FPR(2), EJIT_FPR(0), EJIT_FPR(1));
+	ejit_retr_d(f, EJIT_FPR(2));
+
+	ejit_select_compile_func(f, 0, 3, EJIT_USE64(double), do_jit, true);
+
+	assert(erfd2(f, EJIT_ARG(42., double), EJIT_ARG(69., double)
+	             ) == 69.);
+
+	assert(erfd2(f, EJIT_ARG(-42., double), EJIT_ARG(-69., double)
+	             ) == -42.);
+
+	ejit_destroy_func(f);
+}
diff --git a/tests/maxr_f.c b/tests/maxr_f.c
new file mode 100644
index 0000000..581f867
--- /dev/null
+++ b/tests/maxr_f.c
@@ -0,0 +1,28 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_FPR(0, EJIT_TYPE(float)),
+		EJIT_OPERAND_FPR(1, EJIT_TYPE(float))
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(float), 2, operands);
+
+	ejit_maxr_f(f, EJIT_FPR(2), EJIT_FPR(0), EJIT_FPR(1));
+	ejit_retr_f(f, EJIT_FPR(2));
+
+	ejit_select_compile_func(f, 0, 3, EJIT_USE64(float), do_jit, true);
+
+	assert(erff2(f, EJIT_ARG(42., float), EJIT_ARG(69., float)
+	             ) == 69.);
+
+	assert(erff2(f, EJIT_ARG(-42., float), EJIT_ARG(-69., float)
+	             ) == -42.);
+
+	ejit_destroy_func(f);
+}
diff --git a/tests/minr_d.c b/tests/minr_d.c
new file mode 100644
index 0000000..d0fb7c8
--- /dev/null
+++ b/tests/minr_d.c
@@ -0,0 +1,28 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_FPR(0, EJIT_TYPE(double)),
+		EJIT_OPERAND_FPR(1, EJIT_TYPE(double))
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(double), 2, operands);
+
+	ejit_minr_d(f, EJIT_FPR(2), EJIT_FPR(0), EJIT_FPR(1));
+	ejit_retr_d(f, EJIT_FPR(2));
+
+	ejit_select_compile_func(f, 0, 3, EJIT_USE64(double), do_jit, true);
+
+	assert(erfd2(f, EJIT_ARG(42., double), EJIT_ARG(69., double)
+	             ) == 42.);
+
+	assert(erfd2(f, EJIT_ARG(-42., double), EJIT_ARG(-69., double)
+	             ) == -69.);
+
+	ejit_destroy_func(f);
+}
diff --git a/tests/minr_f.c b/tests/minr_f.c
new file mode 100644
index 0000000..b02ec06
--- /dev/null
+++ b/tests/minr_f.c
@@ -0,0 +1,28 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_FPR(0, EJIT_TYPE(float)),
+		EJIT_OPERAND_FPR(1, EJIT_TYPE(float))
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(float), 2, operands);
+
+	ejit_minr_f(f, EJIT_FPR(2), EJIT_FPR(0), EJIT_FPR(1));
+	ejit_retr_f(f, EJIT_FPR(2));
+
+	ejit_select_compile_func(f, 0, 3, EJIT_USE64(float), do_jit, true);
+
+	assert(erff2(f, EJIT_ARG(42., float), EJIT_ARG(69., float)
+	             ) == 42.);
+
+	assert(erff2(f, EJIT_ARG(-42., float), EJIT_ARG(-69., float)
+	             ) == -69.);
+
+	ejit_destroy_func(f);
+}
-- 
cgit v1.2.3


From 057131cb20fb1c46e90adecfb4a16eb62f100580 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Wed, 9 Apr 2025 20:20:48 +0300
Subject: add taili

---
 include/ejit/ejit.h   |  3 +++
 src/common.h          |  2 ++
 src/compile/compile.c | 35 +++++++++++++++++++++++++++++++++++
 src/ejit.c            | 44 +++++++++++++++++++++++++++++++++++++++++++-
 src/interp.c          | 14 ++++++++++++++
 tests/taili.c         | 39 +++++++++++++++++++++++++++++++++++++++
 tests/tailr.c         | 41 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 tests/taili.c
 create mode 100644 tests/tailr.c

(limited to 'tests')

diff --git a/include/ejit/ejit.h b/include/ejit/ejit.h
index aa42eca..5baaab6 100644
--- a/include/ejit/ejit.h
+++ b/include/ejit/ejit.h
@@ -457,6 +457,9 @@ struct ejit_label ejit_label(struct ejit_func *s);
 void ejit_tailr(struct ejit_func *s, struct ejit_gpr target,
 		size_t argc, const struct ejit_operand args[argc]);
 
+void ejit_taili(struct ejit_func *s, struct ejit_func *f,
+		size_t argc, const struct ejit_operand args[argc]);
+
 void ejit_calli(struct ejit_func *s, struct ejit_func *f, size_t argc,
                 const struct ejit_operand args[argc]);
 
diff --git a/src/common.h b/src/common.h
index 3512717..333c794 100644
--- a/src/common.h
+++ b/src/common.h
@@ -219,7 +219,9 @@ enum ejit_opcode {
 	EJIT_OP_ESCAPEI_D,
 
 	EJIT_OP_CALLI,
+
 	EJIT_OP_TAILR,
+	EJIT_OP_TAILI,
 
 	EJIT_OP_RETR,
 	EJIT_OP_RETI,
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 54d79f2..60059d5 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -2505,6 +2505,40 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 			break;
 		}
 
+		case EJIT_OP_TAILI: {
+			/* a bit of copy-paste between this and the next func,
+			 * hmm */
+			assert(operands_len(&direct) <= 2);
+			struct ejit_func *f = (struct ejit_func *)(uintptr_t)i.o;
+			assert(f->direct_call);
+
+			jit_operand_t regs[2] = {
+				jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R1),
+				jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R2)
+			};
+			jit_move_operands(j, regs, direct.buf, operands_len(&direct));
+
+			int frame_size = j->frame_size;
+			jit_shrink_stack(j, stack);
+			jit_leave_jit_abi(j, gprs, fprs, frame);
+
+			/* now move args into place */
+			jit_operand_t args[2] = {};
+			foreach_vec(oi, direct) {
+				args[oi] = *operands_at(&direct, oi);
+			}
+
+			jit_locate_args(j, operands_len(&direct), args);
+			jit_move_operands(j, args, regs, operands_len(&direct));
+			jit_jmpi(j, f->direct_call);
+			j->frame_size = frame_size;
+
+			operands_reset(&src);
+			operands_reset(&dst);
+			operands_reset(&direct);
+			break;
+		}
+
 		case EJIT_OP_TAILR: {
 			/* this is admittedly a slightly roundabout way of
 			 * implementing tail calls and is arguably not the most
@@ -2518,6 +2552,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 			jit_gpr_t r = getloc(f, j, i.r1, 0);
 			jit_ldxi(j, JIT_R0, r, offsetof(struct ejit_func, direct_call));
 #if defined(DEBUG)
+			/** @todo other checks? */
 			jit_reloc_t assert_reloc = jit_bnei(j, JIT_R0, 0); /* null */
 			jit_calli_1(j, assert_helper,
 					jit_operand_imm(JIT_OPERAND_ABI_POINTER,
diff --git a/src/ejit.c b/src/ejit.c
index 059d5d4..0701b90 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -456,12 +456,54 @@ void ejit_patch(struct ejit_func *f, struct ejit_reloc r, struct ejit_label l)
 	*insns_at(&f->insns, r.insn) = i;
 }
 
+void ejit_taili(struct ejit_func *s, struct ejit_func *f,
+		size_t argc, const struct ejit_operand args[argc])
+{
+	assert(s->rtype == f->rtype);
+
+	s->max_args = argc > s->max_args ? argc : s->max_args;
+	check_operands(f, argc, args);
+
+	size_t gpr_args = 0, fpr_args = 0;
+	for (size_t i = 0; i < argc; ++i) {
+		switch (args[i].kind) {
+		case EJIT_OPERAND_GPR:
+			gpr_args++;
+			emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_FPR:
+			fpr_args++;
+			emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_IMM:
+			gpr_args++;
+			emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r);
+			break;
+
+		case EJIT_OPERAND_FLT:
+			fpr_args++;
+			emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d);
+			break;
+
+		default: abort();
+		}
+	}
+
+	assert(gpr_args <= 2 && fpr_args == 0
+			&& "only 2 gpr args and 0 fpr args supported in tail calls for now");
+	emit_insn_op(s, EJIT_OP_TAILI, f);
+}
+
 void ejit_tailr(struct ejit_func *s, struct ejit_gpr target, size_t argc,
                 const struct ejit_operand args[argc])
 {
 	s->max_args = argc > s->max_args ? argc : s->max_args;
 
-	/** @todo check that gpr_args <= 2 and fpr_args <= 3 (?) */
+	/* operands must match */
+	check_operands(s, argc, args);
+
 	size_t gpr_args = 0, fpr_args = 0;
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
diff --git a/src/interp.c b/src/interp.c
index 132ba4a..268bfb3 100644
--- a/src/interp.c
+++ b/src/interp.c
@@ -214,7 +214,9 @@ union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg pa
 		[EJIT_OP_PARAM_F] = &&PARAM_F,
 
 		[EJIT_OP_CALLI] = &&CALLI,
+
 		[EJIT_OP_TAILR] = &&TAILR,
+		[EJIT_OP_TAILI] = &&TAILI,
 
 		[EJIT_OP_ESCAPEI_I] = &&ESCAPEI_I,
 		[EJIT_OP_ESCAPEI_F] = &&ESCAPEI_F,
@@ -1056,6 +1058,18 @@ top:
 	args[argc++] = a;
 	DISPATCH();
 
+	DO(TAILI);
+	f = (struct ejit_func *)(uintptr_t)i.o;
+
+	assert(!f->direct_call && "trying to interpret compiled fun");
+
+	paramc = argc;
+	for (size_t i = 0; i < argc; ++i)
+		params[i] = args[i];
+
+	goto top;
+	DISPATCH();
+
 	DO(TAILR);
 	f = (struct ejit_func *)gpr[i.r1];
 
diff --git a/tests/taili.c b/tests/taili.c
new file mode 100644
index 0000000..cc09f59
--- /dev/null
+++ b/tests/taili.c
@@ -0,0 +1,39 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_INT32), /* s */
+		EJIT_OPERAND_GPR(1, EJIT_INT32)  /* n */
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_INT32, 2, operands);
+
+	/* n == 0, return s */
+	struct ejit_reloc r = ejit_bnei(f, EJIT_GPR(1), 0);
+	ejit_retr(f, EJIT_GPR(0));
+	ejit_patch(f, r, ejit_label(f));
+
+	/* s += n */
+	ejit_addr(f, EJIT_GPR(0), EJIT_GPR(0), EJIT_GPR(1));
+
+	/* n -= 1 */
+	ejit_subi(f, EJIT_GPR(1), EJIT_GPR(1), 1);
+
+	struct ejit_operand args[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_INT32), /* s */
+		EJIT_OPERAND_GPR(1, EJIT_INT32)  /* n */
+	};
+	ejit_taili(f, f, 2, args);
+
+	ejit_select_compile_func(f, 2, 0, EJIT_USE64(uintptr_t), do_jit, true);
+
+	/* arbitrary number but large enough to most likely cause a stack fault
+	 * if the tail call leaks memory or something */
+	assert((int32_t)erfi2(f, EJIT_ARG(0, int32_t), EJIT_ARG(1000000, int32_t)) == 1784293664);
+	ejit_destroy_func(f);
+}
diff --git a/tests/tailr.c b/tests/tailr.c
new file mode 100644
index 0000000..69ad44b
--- /dev/null
+++ b/tests/tailr.c
@@ -0,0 +1,41 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_INT32), /* s */
+		EJIT_OPERAND_GPR(1, EJIT_INT32)  /* n */
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_INT32, 2, operands);
+
+	/* n == 0, return s */
+	struct ejit_reloc r = ejit_bnei(f, EJIT_GPR(1), 0);
+	ejit_retr(f, EJIT_GPR(0));
+	ejit_patch(f, r, ejit_label(f));
+
+	/* s += n */
+	ejit_addr(f, EJIT_GPR(0), EJIT_GPR(0), EJIT_GPR(1));
+
+	/* n -= 1 */
+	ejit_subi(f, EJIT_GPR(1), EJIT_GPR(1), 1);
+
+	struct ejit_operand args[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_INT32), /* s */
+		EJIT_OPERAND_GPR(1, EJIT_INT32)  /* n */
+	};
+
+	ejit_movi(f, EJIT_GPR(2), (uintptr_t)f);
+	ejit_tailr(f, EJIT_GPR(2), 2, args);
+
+	ejit_select_compile_func(f, 3, 0, EJIT_USE64(uintptr_t), do_jit, true);
+
+	/* arbitrary number but large enough to most likely cause a stack fault
+	 * if the tail call leaks memory or something */
+	assert((int32_t)erfi2(f, EJIT_ARG(0, int32_t), EJIT_ARG(1000000, int32_t)) == 1784293664);
+	ejit_destroy_func(f);
+}
-- 
cgit v1.2.3


From be5c83ba8e57bc67beee41bc2c7227e6b8ebd9d5 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Wed, 9 Apr 2025 22:20:01 +0300
Subject: add callr_i/l/f/d

---
 include/ejit/ejit.h   |  33 ++++++++----
 src/common.h          |   6 ++-
 src/compile/compile.c |  73 +++++++++++++++++++++-----
 src/ejit.c            | 142 ++++++++++++++++++++++++++++++++++----------------
 src/interp.c          |  41 +++++++++++----
 tests/calli.c         |  41 +++++++++++++++
 tests/callr_i.c       |  42 +++++++++++++++
 7 files changed, 298 insertions(+), 80 deletions(-)
 create mode 100644 tests/calli.c
 create mode 100644 tests/callr_i.c

(limited to 'tests')

diff --git a/include/ejit/ejit.h b/include/ejit/ejit.h
index 5baaab6..ab06d8f 100644
--- a/include/ejit/ejit.h
+++ b/include/ejit/ejit.h
@@ -460,20 +460,33 @@ void ejit_tailr(struct ejit_func *s, struct ejit_gpr target,
 void ejit_taili(struct ejit_func *s, struct ejit_func *f,
 		size_t argc, const struct ejit_operand args[argc]);
 
-void ejit_calli(struct ejit_func *s, struct ejit_func *f, size_t argc,
-                const struct ejit_operand args[argc]);
+/* return type can be deduced */
+void ejit_calli(struct ejit_func *s, struct ejit_func *f,
+		size_t argc, const struct ejit_operand args[argc]);
 
-void ejit_escapei_i(struct ejit_func *s, ejit_escape_i_t f, size_t argc,
-                  const struct ejit_operand args[argc]);
+void ejit_callr_i(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc]);
 
-void ejit_escapei_l(struct ejit_func *s, ejit_escape_l_t f, size_t argc,
-                  const struct ejit_operand args[argc]);
+void ejit_callr_l(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc]);
 
-void ejit_escapei_f(struct ejit_func *s, ejit_escape_f_t f, size_t argc,
-                    const struct ejit_operand args[argc]);
+void ejit_callr_f(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc]);
 
-void ejit_escapei_d(struct ejit_func *s, ejit_escape_d_t f, size_t argc,
-                    const struct ejit_operand args[argc]);
+void ejit_callr_d(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc]);
+
+void ejit_escapei_i(struct ejit_func *s, ejit_escape_i_t f,
+		size_t argc, const struct ejit_operand args[argc]);
+
+void ejit_escapei_l(struct ejit_func *s, ejit_escape_l_t f,
+		size_t argc, const struct ejit_operand args[argc]);
+
+void ejit_escapei_f(struct ejit_func *s, ejit_escape_f_t f,
+		size_t argc, const struct ejit_operand args[argc]);
+
+void ejit_escapei_d(struct ejit_func *s, ejit_escape_d_t f,
+		size_t argc, const struct ejit_operand args[argc]);
 
 void ejit_ret(struct ejit_func *s);
 void ejit_retr(struct ejit_func *s, struct ejit_gpr r0);
diff --git a/src/common.h b/src/common.h
index 333c794..dc970f0 100644
--- a/src/common.h
+++ b/src/common.h
@@ -218,8 +218,12 @@ enum ejit_opcode {
 	EJIT_OP_ESCAPEI_F,
 	EJIT_OP_ESCAPEI_D,
 
-	EJIT_OP_CALLI,
+	EJIT_OP_CALLR_I,
+	EJIT_OP_CALLR_L,
+	EJIT_OP_CALLR_F,
+	EJIT_OP_CALLR_D,
 
+	EJIT_OP_CALLI,
 	EJIT_OP_TAILR,
 	EJIT_OP_TAILI,
 
diff --git a/src/compile/compile.c b/src/compile/compile.c
index 60059d5..bfcb12d 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -2476,17 +2476,10 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 			break;
 		}
 
+		case EJIT_OP_ESCAPEI_I:
 		case EJIT_OP_ESCAPEI_L:
-#if __WORDSIZE == 64
-			  /* fallthrough */
-#else
-			  assert(0 && "trying to compile escapei_l on 32bit arch");
-			  break;
-#endif
-
-		case EJIT_OP_ESCAPEI_D:
 		case EJIT_OP_ESCAPEI_F:
-		case EJIT_OP_ESCAPEI_I: {
+		case EJIT_OP_ESCAPEI_D: {
 			save_caller_save_regs(f, j);
 
 			jit_operand_t args[2] = {
@@ -2550,15 +2543,25 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 
 			assert(operands_len(&direct) <= 2);
 			jit_gpr_t r = getloc(f, j, i.r1, 0);
-			jit_ldxi(j, JIT_R0, r, offsetof(struct ejit_func, direct_call));
+
 #if defined(DEBUG)
-			/** @todo other checks? */
-			jit_reloc_t assert_reloc = jit_bnei(j, JIT_R0, 0); /* null */
+			jit_ldxi(j, JIT_R1, r, offsetof(struct ejit_func, rtype));
+			jit_reloc_t rtype_reloc = jit_beqi(j, JIT_R1, f->rtype);
+			jit_calli_1(j, assert_helper,
+					jit_operand_imm(JIT_OPERAND_ABI_POINTER,
+						(jit_imm_t)"trying to tail call different rtype"));
+
+			jit_patch_here(j, rtype_reloc);
+
+			jit_ldxi(j, JIT_R1, r, offsetof(struct ejit_func, direct_call));
+			jit_reloc_t direct_reloc = jit_bnei(j, JIT_R1, 0); /* null */
 			jit_calli_1(j, assert_helper,
 					jit_operand_imm(JIT_OPERAND_ABI_POINTER,
 						(jit_imm_t)"trying to tail call interpreted function"));
-			jit_patch_here(j, assert_reloc);
+			jit_patch_here(j, direct_reloc);
 #endif
+
+			jit_ldxi(j, JIT_R0, r, offsetof(struct ejit_func, direct_call));
 			jit_operand_t regs[2] = {
 				jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R1),
 				jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R2)
@@ -2591,6 +2594,50 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 			break;
 		}
 
+		case EJIT_OP_CALLR_I:
+		case EJIT_OP_CALLR_L:
+		case EJIT_OP_CALLR_F:
+		case EJIT_OP_CALLR_D: {
+			save_caller_save_regs(f, j);
+
+			jit_gpr_t target = getgpr(f, i.r1, 0);
+
+			/* check if there's a direct call avaiable */
+			jit_ldxi(j, JIT_R1, target, offsetof(struct ejit_func, direct_call));
+			jit_reloc_t direct_reloc = jit_beqi(j, JIT_R0, 0);
+			/* we can do a jit -> jit call */
+			jit_callr(j, JIT_R1, operands_len(&direct), direct.buf);
+			jit_reloc_t out_reloc = jit_jmp(j);
+
+			jit_patch_here(j, direct_reloc);
+
+			/* we must do a jit -> bytecode call */
+			jit_operand_t args[3] = {
+				jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R1),
+				jit_operand_imm(JIT_OPERAND_ABI_WORD, operands_len(&src) / 2),
+				/* compile_imm_call populate JIT_R0 with the
+				 * argument stack address */
+				jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0)
+			};
+			void *call = NULL;
+			switch (i.op) {
+			case EJIT_OP_CALLR_I: call = ejit_run_func_i; break;
+			case EJIT_OP_CALLR_L: call = ejit_run_func_l; break;
+			case EJIT_OP_CALLR_F: call = ejit_run_func_f; break;
+			case EJIT_OP_CALLR_D: call = ejit_run_func_d; break;
+			default: abort();
+			}
+
+			compile_imm_call(j, &src, &dst, call, 3, args);
+			jit_patch_here(j, out_reloc);
+			restore_caller_save_regs(f, j);
+
+			operands_reset(&src);
+			operands_reset(&dst);
+			operands_reset(&direct);
+			break;
+		}
+
 		case EJIT_OP_CALLI: {
 			save_caller_save_regs(f, j);
 
diff --git a/src/ejit.c b/src/ejit.c
index 0701b90..e8ff99e 100644
--- a/src/ejit.c
+++ b/src/ejit.c
@@ -545,10 +545,22 @@ void ejit_calli(struct ejit_func *s, struct ejit_func *f, size_t argc,
 
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r)); break;
-		case EJIT_OPERAND_FPR: emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r)); break;
-		case EJIT_OPERAND_IMM: emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d); break;
+		case EJIT_OPERAND_GPR:
+			emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_FPR:
+			emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_IMM:
+			emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r);
+			break;
+
+		case EJIT_OPERAND_FLT:
+			emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d);
+			break;
+
 		default: abort();
 		}
 	}
@@ -556,73 +568,113 @@ void ejit_calli(struct ejit_func *s, struct ejit_func *f, size_t argc,
 	emit_insn_op(s, EJIT_OP_CALLI, f);
 }
 
-void ejit_escapei_i(struct ejit_func *s, ejit_escape_i_t f, size_t argc,
-                  const struct ejit_operand args[argc])
+static void ejit_callr(struct ejit_func *s, enum ejit_opcode op, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc])
 {
+	s->use_64 = op == EJIT_OP_CALLR_L;
 	s->max_args = argc > s->max_args ? argc : s->max_args;
+
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r)); break;
-		case EJIT_OPERAND_FPR: emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r)); break;
-		case EJIT_OPERAND_IMM: emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d); break;
+		case EJIT_OPERAND_GPR:
+			emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_FPR:
+			emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_IMM:
+			emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r);
+			break;
+
+		case EJIT_OPERAND_FLT:
+			emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d);
+			break;
+
 		default: abort();
 		}
 	}
 
-	emit_insn_op(s, EJIT_OP_ESCAPEI_I, f);
+	emit_insn_oxr(s, op, target);
 }
 
-void ejit_escapei_l(struct ejit_func *s, ejit_escape_l_t f, size_t argc,
-                  const struct ejit_operand args[argc])
+void ejit_callr_i(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc])
 {
-	s->use_64 = true;
-	s->max_args = argc > s->max_args ? argc : s->max_args;
-	for (size_t i = 0; i < argc; ++i) {
-		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r)); break;
-		case EJIT_OPERAND_FPR: emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r)); break;
-		case EJIT_OPERAND_IMM: emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d); break;
-		default: abort();
-		}
-	}
+	ejit_callr(s, EJIT_OP_CALLR_I, target, argc, args);
+}
+
+void ejit_callr_l(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc])
+{
+	ejit_callr(s, EJIT_OP_CALLR_L, target, argc, args);
+}
 
-	emit_insn_op(s, EJIT_OP_ESCAPEI_L, f);
+void ejit_callr_f(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc])
+{
+	ejit_callr(s, EJIT_OP_CALLR_F, target, argc, args);
 }
 
-void ejit_escapei_f(struct ejit_func *s, ejit_escape_f_t f, size_t argc,
-                    const struct ejit_operand args[argc])
+void ejit_callr_d(struct ejit_func *s, struct ejit_gpr target,
+		size_t argc, const struct ejit_operand args[argc])
 {
+	ejit_callr(s, EJIT_OP_CALLR_D, target, argc, args);
+}
+
+static void ejit_escapei(struct ejit_func *s, enum ejit_opcode op, void *f,
+		size_t argc, const struct ejit_operand args[argc])
+{
+	s->use_64 = op == EJIT_OP_ESCAPEI_L;
 	s->max_args = argc > s->max_args ? argc : s->max_args;
 	for (size_t i = 0; i < argc; ++i) {
 		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r)); break;
-		case EJIT_OPERAND_FPR: emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r)); break;
-		case EJIT_OPERAND_IMM: emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d); break;
+		case EJIT_OPERAND_GPR:
+			emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_FPR:
+			emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r));
+			break;
+
+		case EJIT_OPERAND_IMM:
+			emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r);
+			break;
+
+		case EJIT_OPERAND_FLT:
+			emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d);
+			break;
+
 		default: abort();
 		}
 	}
 
-	emit_insn_op(s, EJIT_OP_ESCAPEI_F, f);
+	emit_insn_op(s, op, f);
 }
 
-void ejit_escapei_d(struct ejit_func *s, ejit_escape_d_t f, size_t argc,
-                    const struct ejit_operand args[argc])
+void ejit_escapei_i(struct ejit_func *s, ejit_escape_i_t f,
+		size_t argc, const struct ejit_operand args[argc])
 {
-	s->max_args = argc > s->max_args ? argc : s->max_args;
-	for (size_t i = 0; i < argc; ++i) {
-		switch (args[i].kind) {
-		case EJIT_OPERAND_GPR: emit_insn_ar(s, EJIT_OP_ARG, i, args[i].type, EJIT_GPR(args[i].r)); break;
-		case EJIT_OPERAND_FPR: emit_insn_af(s, EJIT_OP_ARG_F, i, args[i].type, EJIT_FPR(args[i].r)); break;
-		case EJIT_OPERAND_IMM: emit_insn_ai(s, EJIT_OP_ARG_I, i, args[i].type, args[i].r); break;
-		case EJIT_OPERAND_FLT: emit_insn_ad(s, EJIT_OP_ARG_FI, i, args[i].type, args[i].d); break;
-		default: abort();
-		}
-	}
+	ejit_escapei(s, EJIT_OP_ESCAPEI_I, f, argc, args);
+}
+
+void ejit_escapei_l(struct ejit_func *s, ejit_escape_l_t f,
+		size_t argc, const struct ejit_operand args[argc])
+{
+	ejit_escapei(s, EJIT_OP_ESCAPEI_L, f, argc, args);
+}
 
-	emit_insn_op(s, EJIT_OP_ESCAPEI_D, f);
+void ejit_escapei_f(struct ejit_func *s, ejit_escape_f_t f,
+		size_t argc, const struct ejit_operand args[argc])
+{
+	ejit_escapei(s, EJIT_OP_ESCAPEI_F, f, argc, args);
+}
+
+void ejit_escapei_d(struct ejit_func *s, ejit_escape_d_t f,
+		size_t argc, const struct ejit_operand args[argc])
+{
+	ejit_escapei(s, EJIT_OP_ESCAPEI_D, f, argc, args);
 }
 
 void ejit_retval(struct ejit_func *s, struct ejit_gpr r0)
diff --git a/src/interp.c b/src/interp.c
index 268bfb3..6f94f98 100644
--- a/src/interp.c
+++ b/src/interp.c
@@ -213,14 +213,18 @@ union interp_ret ejit_run(struct ejit_func *f, size_t paramc, struct ejit_arg pa
 		[EJIT_OP_PARAM] = &&PARAM,
 		[EJIT_OP_PARAM_F] = &&PARAM_F,
 
+		[EJIT_OP_CALLR_I] = &&CALLR_I,
+		[EJIT_OP_CALLR_L] = &&CALLR_L,
+		[EJIT_OP_CALLR_F] = &&CALLR_F,
+		[EJIT_OP_CALLR_D] = &&CALLR_D,
 		[EJIT_OP_CALLI] = &&CALLI,
 
 		[EJIT_OP_TAILR] = &&TAILR,
 		[EJIT_OP_TAILI] = &&TAILI,
 
 		[EJIT_OP_ESCAPEI_I] = &&ESCAPEI_I,
-		[EJIT_OP_ESCAPEI_F] = &&ESCAPEI_F,
 		[EJIT_OP_ESCAPEI_L] = &&ESCAPEI_L,
+		[EJIT_OP_ESCAPEI_F] = &&ESCAPEI_F,
 		[EJIT_OP_ESCAPEI_D] = &&ESCAPEI_D,
 
 		[EJIT_OP_START] = &&START,
@@ -1084,33 +1088,48 @@ top:
 	goto top;
 	DISPATCH();
 
+	DO(CALLR_I);
+	retval = ejit_run((struct ejit_func *)gpr[i.r1], argc, args, NULL);
+	argc = 0;
+	DISPATCH();
+
+	DO(CALLR_L);
+	retval = ejit_run((struct ejit_func *)gpr[i.r1], argc, args, NULL);
+	argc = 0;
+	DISPATCH();
+
+	DO(CALLR_F);
+	retval = ejit_run((struct ejit_func *)gpr[i.r1], argc, args, NULL);
+	argc = 0;
+	DISPATCH();
+
+	DO(CALLR_D);
+	retval = ejit_run((struct ejit_func *)gpr[i.r1], argc, args, NULL);
+	argc = 0;
+	DISPATCH();
+
 	DO(CALLI);
-	struct ejit_func *f = i.p;
-	retval = ejit_run(f, argc, args, NULL);
+	retval = ejit_run((struct ejit_func *)i.p, argc, args, NULL);
 	argc = 0;
 	DISPATCH();
 
 	DO(ESCAPEI_I);
-	ejit_escape_i_t f = i.p;
-	retval.i = f(argc, args);
+	retval.i = ((ejit_escape_i_t)i.p)(argc, args);
 	argc = 0;
 	DISPATCH();
 
 	DO(ESCAPEI_L);
-	ejit_escape_l_t f = i.p;
-	retval.i = f(argc, args);
+	retval.i = ((ejit_escape_l_t)i.p)(argc, args);
 	argc = 0;
 	DISPATCH();
 
 	DO(ESCAPEI_F);
-	ejit_escape_f_t f = i.p;
-	retval.f = f(argc, args);
+	retval.f = ((ejit_escape_f_t)i.p)(argc, args);
 	argc = 0;
 	DISPATCH();
 
 	DO(ESCAPEI_D);
-	ejit_escape_d_t f = i.p;
-	retval.f = f(argc, args);
+	retval.f = ((ejit_escape_d_t)i.p)(argc, args);
 	argc = 0;
 	DISPATCH();
 
diff --git a/tests/calli.c b/tests/calli.c
new file mode 100644
index 0000000..991e97d
--- /dev/null
+++ b/tests/calli.c
@@ -0,0 +1,41 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+struct ejit_func *compile(bool do_jit)
+{
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_TYPE(long)),
+		EJIT_OPERAND_GPR(1, EJIT_TYPE(long))
+	};
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(long), 2, operands);
+	ejit_addr(f, EJIT_GPR(0), EJIT_GPR(0), EJIT_GPR(1));
+	ejit_retr(f, EJIT_GPR(0));
+	ejit_select_compile_func(f, 2, 0, EJIT_USE64(long), do_jit, true);
+	return f;
+}
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_func *target = compile(do_jit);
+
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_TYPE(long)),
+		EJIT_OPERAND_GPR(1, EJIT_TYPE(long))
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(long), 2, operands);
+	ejit_calli(f, target, 2, operands);
+	ejit_retval(f, EJIT_GPR(0));
+	ejit_retr(f, EJIT_GPR(0));
+	ejit_select_compile_func(f, 2, 0, EJIT_USE64(long), do_jit, true);
+
+	assert(erfi2(f,
+	                       EJIT_ARG(42, long),
+	                       EJIT_ARG(69, long)) == 111);
+
+	ejit_destroy_func(target);
+	ejit_destroy_func(f);
+}
diff --git a/tests/callr_i.c b/tests/callr_i.c
new file mode 100644
index 0000000..00b5374
--- /dev/null
+++ b/tests/callr_i.c
@@ -0,0 +1,42 @@
+#include <ejit/ejit.h>
+#include <assert.h>
+#include "do_jit.h"
+
+struct ejit_func *compile(bool do_jit)
+{
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_TYPE(long)),
+		EJIT_OPERAND_GPR(1, EJIT_TYPE(long))
+	};
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(long), 2, operands);
+	ejit_addr(f, EJIT_GPR(0), EJIT_GPR(0), EJIT_GPR(1));
+	ejit_retr(f, EJIT_GPR(0));
+	ejit_select_compile_func(f, 2, 0, EJIT_USE64(long), do_jit, true);
+	return f;
+}
+
+int main(int argc, char *argv[])
+{
+	(void)argv;
+	bool do_jit = argc > 1;
+	struct ejit_func *target = compile(do_jit);
+
+	struct ejit_operand operands[2] = {
+		EJIT_OPERAND_GPR(0, EJIT_TYPE(long)),
+		EJIT_OPERAND_GPR(1, EJIT_TYPE(long))
+	};
+
+	struct ejit_func *f = ejit_create_func(EJIT_TYPE(long), 2, operands);
+	ejit_movi(f, EJIT_GPR(2), (uintptr_t)target);
+	ejit_callr_i(f, EJIT_GPR(2), 2, operands);
+	ejit_retval(f, EJIT_GPR(0));
+	ejit_retr(f, EJIT_GPR(0));
+	ejit_select_compile_func(f, 3, 0, EJIT_USE64(long), do_jit, true);
+
+	assert(erfi2(f,
+	                       EJIT_ARG(42, long),
+	                       EJIT_ARG(69, long)) == 111);
+
+	ejit_destroy_func(target);
+	ejit_destroy_func(f);
+}
-- 
cgit v1.2.3


From 531d307d310881e69efc8ae8c8119f5f5799e0f9 Mon Sep 17 00:00:00 2001
From: Kimplul <kimi.h.kuparinen@gmail.com>
Date: Thu, 10 Apr 2025 22:22:33 +0300
Subject: improve tests a bit

+ Runs tests on all arches that I have easy access to
---
 Makefile              | 125 ++++++++++++++++++++++++++++++++++++++++++++++----
 scripts/gen-tests     |   2 +
 src/compile/compile.c |  53 +++++++++++++++------
 src/interp.c          |   2 +-
 tests/makefile        |  10 ----
 5 files changed, 157 insertions(+), 35 deletions(-)

(limited to 'tests')

diff --git a/Makefile b/Makefile
index 7a482b7..548e5b8 100644
--- a/Makefile
+++ b/Makefile
@@ -12,16 +12,121 @@ check: all
 	@./scripts/gen-tests $$(echo tests/*.c)
 	$(MAKE) -f tests/makefile check
 
-# this kicks all unrecognised targets to the client script.
-# note that trying to compile individual files, e.g.
-#
-#	make kernel.elf
-#
-# will not work, you would need
-#
-#	make -f scripts/makefile kernel.elf
-#
-# instead
+# supported by jit
+.PHONY: check_linux_amd64
+check_linux_amd64:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/x86_64-linux-gnu \
+	$(MAKE) ARCH=amd64 CROSS_COMPILE=x86_64-linux-gnu- check
+
+.PHONY: check_linux_x86
+check_linux_x86:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/i686-linux-gnu \
+	$(MAKE) ARCH=x86 CROSS_COMPILE=i686-linux-gnu- check
+
+.PHONY: check_linux_aarch64
+check_linux_aarch64:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \
+	$(MAKE) ARCH=aarch64 CROSS_COMPILE=aarch64-linux-gnu- check
+
+.PHONY: check_linux_armhf
+check_linux_armhf:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \
+	$(MAKE) ARCH=armhf CROSS_COMPILE=arm-linux-gnueabihf- check
+
+.PHONY: check_linux_powerpc64le
+check_linux_powerpc64le:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \
+	$(MAKE) ARCH=powerpc64le CROSS_COMPILE=powerpc64le-linux-gnu- check
+
+# note: older than revision 6
+.PHONY: check_linux_mips64el
+check_linux_mips64el:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \
+	$(MAKE) ARCH=mips64el CROSS_COMPILE=mips64el-linux-gnuabi64- check
+
+.PHONY: check_linux_mipsel
+check_linux_mipsel:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \
+	$(MAKE) ARCH=mipsel CROSS_COMPILE=mipsel-linux-gnu- check
+
+# not supported by jit atm
+.PHONY: check_linux_powerpc64
+check_linux_powerpc64:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \
+	$(MAKE) ARCH=powerpc64 CROSS_COMPILE=powerpc64-linux-gnu- check
+
+.PHONY: check_linux_powerpc
+check_linux_powerpc:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \
+	$(MAKE) ARCH=powerpc CROSS_COMPILE=powerpc-linux-gnu- check
+
+.PHONY: check_linux_sparc64
+check_linux_sparc64:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/sparc64-linux-gnu \
+	$(MAKE) ARCH=sparc64 CROSS_COMPILE=sparc64-linux-gnu- check
+
+.PHONY: check_linux_riscv64
+check_linux_riscv64:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \
+	$(MAKE) ARCH=riscv64 CROSS_COMPILE=riscv64-linux-gnu- check
+
+.PHONY: check_linux_s390x
+check_linux_s390x:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/s390x-linux-gnu \
+	$(MAKE) ARCH=s390x CROSS_COMPILE=s390x-linux-gnu- check
+
+.PHONY: check_linux_alpha
+check_linux_alpha:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/alpha-linux-gnu \
+	$(MAKE) ARCH=alpha CROSS_COMPILE=alpha-linux-gnu- check
+
+.PHONY: check_linux_hppa
+check_linux_hppa:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/hppa-linux-gnu \
+	$(MAKE) ARCH=hppa CROSS_COMPILE=hppa-linux-gnu- check
+
+.PHONY: check_linux_m68k
+check_linux_m68k:
+	$(MAKE) clean
+	QEMU_LD_PREFIX=/usr/m68k-linux-gnu \
+	$(MAKE) ARCH=m68k CROSS_COMPILE=m68k-linux-gnu- check
+
+.PHONY: check_linux
+check_linux:
+	$(MAKE) check_linux_amd64
+	$(MAKE) check_linux_x86
+	$(MAKE) check_linux_aarch64
+	$(MAKE) check_linux_armhf
+	$(MAKE) check_linux_powerpc64le
+	$(MAKE) check_linux_mips64el
+	$(MAKE) check_linux_mipsel
+	$(MAKE) check_linux_powerpc64
+	$(MAKE) check_linux_powerpc
+	$(MAKE) check_linux_sparc64
+	$(MAKE) check_linux_riscv64
+	$(MAKE) check_linux_s390x
+	$(MAKE) check_linux_alpha
+	$(MAKE) check_linux_hppa
+	$(MAKE) check_linux_m68k
+	# compiler+emulator not available or broken
+	#$(MAKE) check_linux_hppa64
+	#$(MAKE) check_linux_arc
+	#$(MAKE) check_linux_sh4
+
 .DEFAULT: setup
 	$(MAKE) -f scripts/makefile $<
 
diff --git a/scripts/gen-tests b/scripts/gen-tests
index deac247..47ff9c9 100755
--- a/scripts/gen-tests
+++ b/scripts/gen-tests
@@ -12,4 +12,6 @@ do
 	echo "-include ${dep}"					>> tests.mk
 	echo "${exe}: ${s} libejit.a"				>> tests.mk
 	echo "	\$(COMPILE_TEST) ${s} libejit.a -o ${exe} -lm"	>> tests.mk
+	echo "	./${exe}  \t# bytecode"				>> tests.mk
+	echo "	./${exe} 1\t# jit"				>> tests.mk
 done
diff --git a/src/compile/compile.c b/src/compile/compile.c
index bfcb12d..5432bc1 100644
--- a/src/compile/compile.c
+++ b/src/compile/compile.c
@@ -52,6 +52,19 @@ static void assert_helper(const char *msg)
 	assert(false && msg);
 }
 
+static bool gpr_free(size_t argc, jit_operand_t args[argc], jit_gpr_t r)
+{
+	for (size_t i = 0; i < argc; ++i) {
+		if (args[i].kind != JIT_OPERAND_KIND_GPR)
+			continue;
+
+		if (jit_gpr_regno(args[i].loc.gpr.gpr) == jit_gpr_regno(r))
+			return false;
+	}
+
+	return true;
+}
+
 static void free_arena(void *arena, size_t size)
 {
 	munmap(arena, size);
@@ -2489,7 +2502,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 				 * argument stack address */
 				jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0)
 			};
-			compile_imm_call(j, &src, &dst, (void *)(uintptr_t)i.o, 2, args);
+			compile_imm_call(j, &src, &dst, (void *)i.p, 2, args);
 			restore_caller_save_regs(f, j);
 
 			operands_reset(&src);
@@ -2502,7 +2515,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 			/* a bit of copy-paste between this and the next func,
 			 * hmm */
 			assert(operands_len(&direct) <= 2);
-			struct ejit_func *f = (struct ejit_func *)(uintptr_t)i.o;
+			struct ejit_func *f = (struct ejit_func *)i.p;
 			assert(f->direct_call);
 
 			jit_operand_t regs[2] = {
@@ -2560,32 +2573,44 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 						(jit_imm_t)"trying to tail call interpreted function"));
 			jit_patch_here(j, direct_reloc);
 #endif
+			size_t argc = operands_len(&direct);
 
+			/* r0 = target, r1 = arg1, r2 = arg2 */
 			jit_ldxi(j, JIT_R0, r, offsetof(struct ejit_func, direct_call));
-			jit_operand_t regs[2] = {
+			jit_operand_t regs[3] = {
 				jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R1),
 				jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_R2)
 			};
-			jit_move_operands(j, regs, direct.buf, operands_len(&direct));
+			jit_move_operands(j, regs, direct.buf, argc);
 
 			/* with args safely in registers, reset stack/state
 			 * while avoiding overwriting the call target */
-			jit_gpr_t tmp = get_callr_temp(j);
-			jit_movr(j, tmp, JIT_R0);
-
 			int frame_size = j->frame_size;
 			jit_shrink_stack(j, stack);
 			jit_leave_jit_abi(j, gprs, fprs, frame);
 
-			/* now move args into place */
-			jit_operand_t args[2] = {};
-			foreach_vec(oi, direct) {
+			/* now move args into place, making sure we avoid our
+			 * target register  */
+			jit_operand_t args[3] = {};
+			for (size_t oi = 0; oi < argc; ++oi) {
 				args[oi] = *operands_at(&direct, oi);
 			}
 
-			jit_locate_args(j, operands_len(&direct), args);
-			jit_move_operands(j, args, regs, operands_len(&direct));
-			jit_jmpr(j, tmp);
+			jit_locate_args(j, argc, args);
+
+			/* we know that at least one gpr must be free */
+			jit_gpr_t target = gpr_free(argc, args, JIT_R0) ? JIT_R0
+				         : gpr_free(argc, args, JIT_R1) ? JIT_R1
+					 : gpr_free(argc, args, JIT_R2) ? JIT_R2
+					 : (abort(), JIT_R0);
+
+			/* move our target in JIT_R0 to whatever the free
+			 * register is to avoid it being clobbered when we move
+			 * the actual arguments */
+			args[argc] = jit_operand_gpr(JIT_OPERAND_ABI_POINTER, target);
+			regs[argc] = jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0);
+			jit_move_operands(j, args, regs, argc + 1);
+			jit_jmpr(j, target);
 			j->frame_size = frame_size;
 
 			operands_reset(&src);
@@ -2641,7 +2666,7 @@ static size_t compile_fn_body(struct ejit_func *f, jit_state_t *j, void *arena,
 		case EJIT_OP_CALLI: {
 			save_caller_save_regs(f, j);
 
-			struct ejit_func *f = (struct ejit_func *)(uintptr_t)i.o;
+			struct ejit_func *f = (struct ejit_func *)i.p;
 #if __WORDSIZE != 64
 			assert(f->rtype != EJIT_INT64 && f->rtype != EJIT_UINT64);
 #endif
diff --git a/src/interp.c b/src/interp.c
index 6f94f98..894be30 100644
--- a/src/interp.c
+++ b/src/interp.c
@@ -1063,7 +1063,7 @@ top:
 	DISPATCH();
 
 	DO(TAILI);
-	f = (struct ejit_func *)(uintptr_t)i.o;
+	f = (struct ejit_func *)i.p;
 
 	assert(!f->direct_call && "trying to interpret compiled fun");
 
diff --git a/tests/makefile b/tests/makefile
index 081170f..53115de 100644
--- a/tests/makefile
+++ b/tests/makefile
@@ -32,14 +32,4 @@ COMPILE_TEST		:= $(COMPILER) $(WARNFLAGS) $(OPTFLAGS) $(LTOFLAGS) \
 
 .PHONY: check
 check: $(TESTS)
-	@echo "Running bytecode tests..."
-	@set -e; for test in $(TESTS); do \
-		echo "Testing: $$test"; \
-		./$$test; \
-	done
-	@echo "Running jit tests..."
-	@set -e; for test in $(TESTS); do \
-		echo "Testing: $$test"; \
-		./$$test 1; \
-	done
 	@echo "Success!"
-- 
cgit v1.2.3