diff options
Diffstat (limited to 'deps/lightening/lightening/arm-vfp.c')
-rw-r--r-- | deps/lightening/lightening/arm-vfp.c | 1168 |
1 files changed, 1168 insertions, 0 deletions
diff --git a/deps/lightening/lightening/arm-vfp.c b/deps/lightening/lightening/arm-vfp.c new file mode 100644 index 0000000..208edc3 --- /dev/null +++ b/deps/lightening/lightening/arm-vfp.c @@ -0,0 +1,1168 @@ +/* + * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define ARM_V_F64 0x00000100 +#define ARM_VADD_F 0x0e300a00 +#define ARM_VSUB_F 0x0e300a40 +#define ARM_VMUL_F 0x0e200a00 +#define ARM_VDIV_F 0x0e800a00 +#define ARM_VABS_F 0x0eb00ac0 +#define ARM_VNEG_F 0x0eb10a40 +#define ARM_VSQRT_F 0x0eb10ac0 +#define ARM_VMOV_F 0x0eb00a40 +#define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */ +#define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */ +#define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */ +#define ARM_VCMP 0x0eb40a40 +#define ARM_VMRS 0x0ef10a10 +#define ARM_VCVT_2I 0x00040000 /* to integer */ +#define ARM_VCVT_2S 0x00010000 /* to signed */ +#define ARM_VCVT_RS 0x00000080 /* round to zero or signed */ +#define ARM_VCVT 0x0eb80a40 +#define ARM_VCVT_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS +#define ARM_VCVT_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64 +#define ARM_VCVT_F32_S32 ARM_VCVT|ARM_VCVT_RS +#define ARM_VCVT_F64_S32 ARM_VCVT|ARM_VCVT_RS|ARM_V_F64 +#define ARM_VCVT_F 0x0eb70ac0 +#define ARM_VCVT_F32_F64 ARM_VCVT_F +#define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64 +#define ARM_P 0x00800000 /* positive offset */ +#define ARM_V_D 0x00400000 +#define ARM_V_N 0x00000080 +#define ARM_V_M 0x00000020 +#define ARM_V_I32 0x00200000 +#define ARM_VMOVI 0x02800010 +#define ARM_VMVNI 0x02800030 +#define ARM_VLDR 0x0d100a00 +#define ARM_VSTR 0x0d000a00 +#define ARM_VM 0x0c000a00 +#define ARM_VMOV_A_D 0x0e100b10 +#define ARM_VMOV_D_A 0x0e000b10 + +#define vfp_regno(rn) ((rn) >> 1) + +static void +vodi(jit_state_t *_jit, int oi, int r0) +{ + ASSERT(!(oi & 0x0000f000)); + ASSERT(!(r0 & 1)); + r0 >>= 1; + emit_wide_thumb(_jit, oi|(_u4(r0)<<12)); +} + +static void +vo_ss(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + if (r0 & 1) o |= ARM_V_D; + if (r1 & 1) o |= ARM_V_M; + r0 >>= 1; r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); +} + +static void +vo_dd(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + ASSERT(!(r0 & 1) && !(r1 & 1)); + r0 >>= 1; r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); +} + +static void +vors_(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + if (r1 & 1) o |= ARM_V_N; + r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); +} + +static void +vori_(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + /* use same bit pattern, to set opc1... */ + if (r1 & 1) o |= ARM_V_I32; + r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); +} + +static void +vorrd(jit_state_t *_jit, int o, int r0, int r1, int r2) +{ + ASSERT(!(o & 0xf00ff00f)); + ASSERT(!(r2 & 1)); + r2 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); +} + +static void +vosss(jit_state_t *_jit, int o, int r0, int r1, int r2) +{ + ASSERT(!(o & 0xf00ff00f)); + if (r0 & 1) o |= ARM_V_D; + if (r1 & 1) o |= ARM_V_N; + if (r2 & 1) o |= ARM_V_M; + r0 >>= 1; r1 >>= 1; r2 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); +} + +static void +voddd(jit_state_t *_jit, int o, int r0, int r1, int r2) +{ + ASSERT(!(o & 0xf00ff00f)); + ASSERT(!(r0 & 1) && !(r1 & 1) && !(r2 & 1)); + r0 >>= 1; r1 >>= 1; r2 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); +} + +static void +vldst(jit_state_t *_jit, int o, int r0, int r1, int i0) +{ + /* i0 << 2 is byte offset */ + ASSERT(!(o & 0xf00ff0ff)); + if (r0 & 1) { + ASSERT(!(o & ARM_V_F64)); + o |= ARM_V_D; + } + r0 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0)); +} + +static void +VADD_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VADD_F,r0,r1,r2); +} + +static void +VADD_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VADD_F|ARM_V_F64,r0,r1,r2); +} + +static void +VSUB_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VSUB_F,r0,r1,r2); +} + +static void +VSUB_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VSUB_F|ARM_V_F64,r0,r1,r2); +} + +static void +VMUL_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VMUL_F,r0,r1,r2); +} + +static void +VMUL_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VMUL_F|ARM_V_F64,r0,r1,r2); +} + +static void +VDIV_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VDIV_F,r0,r1,r2); +} + +static void +VDIV_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VDIV_F|ARM_V_F64,r0,r1,r2); +} + +static void +VABS_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VABS_F,r0,r1); +} + +static void +VABS_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VABS_F|ARM_V_F64,r0,r1); +} + +static void +VNEG_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VNEG_F,r0,r1); +} + +static void +VNEG_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VNEG_F|ARM_V_F64,r0,r1); +} + +static void +VSQRT_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VSQRT_F,r0,r1); +} + +static void +VSQRT_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VSQRT_F|ARM_V_F64,r0,r1); +} + +static void +VMOV_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VMOV_F,r0,r1); +} + +static void +VMOV_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VMOV_F|ARM_V_F64,r0,r1); +} + +static void +VMOV_D_AA(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vorrd(_jit,ARM_VMOV_D_AA,r1,r2,r0); +} + +static void +VMOV_S_A(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vors_(_jit,ARM_VMOV_S_A,r1,r0); +} + +static void +VCMP_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCMP,r0,r1); +} + +static void +VCMP_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VCMP|ARM_V_F64,r0,r1); +} + +static void +VMRS(jit_state_t *_jit) +{ + emit_wide_thumb(_jit, ARM_CC_AL|ARM_VMRS|(0xf<<12)); +} + +static void +VCVT_S32_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_S32_F32,r0,r1); +} + +static void +VCVT_S32_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_S32_F64,r0,r1); +} + +static void +VCVT_F32_S32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F32_S32,r0,r1); +} + +static void +VCVT_F64_S32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F64_S32,r0,r1); +} + +static void +VCVT_F32_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F32_F64,r0,r1); +} + +static void +VCVT_F64_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F64_F32,r0,r1); +} + +static void +VMOV_A_S32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vori_(_jit,ARM_VMOV_A_D,r0,r1); +} + +static void +VMOV_V_I32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vori_(_jit,ARM_VMOV_D_A,r1,r0); +} + +/* "oi" should be the result of encode_vfp_double */ +static void +VIMM(jit_state_t *_jit, int32_t oi, int32_t r0) +{ + vodi(_jit, oi,r0); +} + +/* index is multipled by four */ +static void +VLDRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR,r0,r1,i0); +} + +static void +VLDR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR|ARM_P,r0,r1,i0); +} + +static void +VLDRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR|ARM_V_F64,r0,r1,i0); +} + +static void +VLDR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0); +} + +static void +VSTRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR,r0,r1,i0); +} + +static void +VSTR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR|ARM_P,r0,r1,i0); +} + +static void +VSTRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR|ARM_V_F64,r0,r1,i0); +} + +static void +VSTR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0); +} + +static void +absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VABS_F32(_jit, r0,r1); +} + +static void +absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VABS_F64(_jit, r0,r1); +} + +static void +negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VNEG_F32(_jit, r0,r1); +} + +static void +negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VNEG_F64(_jit, r0,r1); +} + +static void +sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSQRT_F32(_jit, r0,r1); +} + +static void +sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSQRT_F64(_jit, r0,r1); +} + +static void +addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VADD_F32(_jit, r0,r1,r2); +} + +static void +addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VADD_F64(_jit, r0,r1,r2); +} + +static void +subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VSUB_F32(_jit, r0,r1,r2); +} + +static void +subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VSUB_F64(_jit, r0,r1,r2); +} + +static void +mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VMUL_F32(_jit, r0,r1,r2); +} + +static void +mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VMUL_F64(_jit, r0,r1,r2); +} + +static void +divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VDIV_F32(_jit, r0,r1,r2); +} + +static void +divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VDIV_F64(_jit, r0,r1,r2); +} + +static void +cmp_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCMP_F32(_jit, r0, r1); +} + +static void +cmp_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCMP_F64(_jit, r0, r1); +} + +static jit_reloc_t +vbcmp_x(jit_state_t *_jit, int cc) +{ + VMRS(_jit); + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +vbcmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return vbcmp_x(_jit, cc); +} + +static jit_reloc_t +vbcmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return vbcmp_x(_jit, cc); +} + +static jit_reloc_t +vbncmp_x(jit_state_t *_jit, int cc) +{ + VMRS(_jit); + jit_reloc_t cont = T2_CC_B(_jit, cc); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, cont); + return ret; +} + +static jit_reloc_t +vbncmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return vbncmp_x(_jit, cc); +} + +static jit_reloc_t +vbncmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return vbncmp_x(_jit, cc); +} + +static jit_reloc_t +bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_MI, r0, r1); +} + +static jit_reloc_t +bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_MI, r0, r1); +} + +static jit_reloc_t +bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_LS, r0, r1); +} + +static jit_reloc_t +bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_LS, r0, r1); +} + +static jit_reloc_t +beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_EQ, r0, r1); +} + +static jit_reloc_t +beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_EQ, r0, r1); +} + +static jit_reloc_t +bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_NE, r0, r1); +} + +static jit_reloc_t +bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_NE, r0, r1); +} + +static jit_reloc_t +bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_f(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_d(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_f(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_d(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_HI, r0, r1); +} + +static jit_reloc_t +bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_HI, r0, r1); +} + +static jit_reloc_t +bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_VC, r0, r1); +} + +static jit_reloc_t +bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_VC, r0, r1); +} + +static jit_reloc_t +bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_VS, r0, r1); +} + +static jit_reloc_t +bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_VS, r0, r1); +} + +static jit_reloc_t +buneqr_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS); + jit_reloc_t b = T2_CC_B(_jit, ARM_CC_NE); + jit_patch_here(_jit, a); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, b); + return ret; +} + +static jit_reloc_t +buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return buneqr_x(_jit); +} + +static jit_reloc_t +buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return buneqr_x(_jit); +} + +static jit_reloc_t +bunger_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_MI); + jit_reloc_t ret = T2_CC_B(_jit, ARM_CC_HS); + jit_patch_here(_jit, a); + return ret; +} + +static jit_reloc_t +bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return bunger_x(_jit); +} + +static jit_reloc_t +bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return bunger_x(_jit); +} + +static jit_reloc_t +bltgtr_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS); + jit_reloc_t b = T2_CC_B(_jit, ARM_CC_EQ); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, a); + jit_patch_here(_jit, b); + return ret; +} + +static jit_reloc_t +bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return bltgtr_x(_jit); +} + +static jit_reloc_t +bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return bltgtr_x(_jit); +} + +static void +ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VLDR_F32(_jit, r0,r1,0); +} + +static void +ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VLDR_F64(_jit, r0,r1,0); +} + +static void +str_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSTR_F32(_jit, r1,r0,0); +} + +static void +str_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSTR_F64(_jit, r1,r0,0); +} + +static void +movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + VMOV_F32(_jit, r0, r1); +} + +static void +movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + VMOV_F64(_jit, r0, r1); +} + +static int +encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi) +{ + int code, mode, imm, mask; + + if (hi != lo) { + if (mov && !inv) { + /* (I64) + * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh + */ + for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) { + imm = lo & mask; + if (imm != mask && imm != 0) + goto fail; + imm = hi & mask; + if (imm != mask && imm != 0) + goto fail; + } + mode = 0xe20; + imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) | + ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >> 3) | + ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) | + ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >> 7)); + goto success; + } + goto fail; + } + /* (I32) + * 00000000 00000000 00000000 abcdefgh + * 00000000 00000000 abcdefgh 00000000 + * 00000000 abcdefgh 00000000 00000000 + * abcdefgh 00000000 00000000 00000000 */ + for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) { + if ((lo & mask) == lo) { + imm = lo >> (mode << 3); + mode <<= 9; + goto success; + } + } + /* (I16) + * 00000000 abcdefgh 00000000 abcdefgh + * abcdefgh 00000000 abcdefgh 00000000 */ + for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) { + if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) { + imm = lo >> (mode << 3); + mode = 0x800 | (mode << 9); + goto success; + } + } + if (mov) { + /* (I32) + * 00000000 00000000 abcdefgh 11111111 + * 00000000 abcdefgh 11111111 11111111 */ + for (mode = 0, mask = 0xff; mode < 2; + mask = (mask << 8) | 0xff, mode++) { + if ((lo & mask) == mask && + !((lo & ~mask) >> 8) && + (imm = lo >> (8 + (mode << 8)))) { + mode = 0xc00 | (mode << 8); + goto success; + } + } + if (!inv) { + /* (F32) + * aBbbbbbc defgh000 00000000 00000000 + * from the ARM Architecture Reference Manual: + * In this entry, B = NOT(b). The bit pattern represents the + * floating-point number (-1)^s* 2^exp * mantissa, where + * S = UInt(a), + * exp = UInt(NOT(b):c:d)-3 and + * mantissa = (16+UInt(e:f:g:h))/16. */ + if ((lo & 0x7ffff) == 0 && + (((lo & 0x7e000000) == 0x3e000000) || + ((lo & 0x7e000000) == 0x40000000))) { + mode = 0xf00; + imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f); + goto success; + } + } + } + +fail: + /* need another approach (load from memory, move from arm register, etc) */ + return -1; + +success: + code = inv ? ARM_VMVNI : ARM_VMOVI; + switch ((mode & 0xf00) >> 8) { + case 0x0: case 0x2: case 0x4: case 0x6: + case 0x8: case 0xa: + if (inv) mode |= 0x20; + if (!mov) mode |= 0x100; + break; + case 0x1: case 0x3: case 0x5: case 0x7: + /* should actually not reach here */ + ASSERT(!inv); + case 0x9: case 0xb: + ASSERT(!mov); + break; + case 0xc: case 0xd: + /* should actually not reach here */ + ASSERT(inv); + case 0xe: + ASSERT(mode & 0x20); + ASSERT(mov && !inv); + break; + default: + ASSERT(!(mode & 0x20)); + break; + } + imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f); + code |= mode | imm; + + if (code & 0x1000000) + code |= 0xff000000; + else + code |= 0xef000000; + + return code; +} + +static void +movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0) +{ + union { int32_t i; jit_float32_t f; } u = { .f = i0 }; + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), u.i); + VMOV_S_A(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0) +{ + union { int32_t i[2]; jit_float64_t d; } u = { .d = i0 }; + int32_t code; + if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 || + (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1) + VIMM(_jit, code, r0); + else { + jit_gpr_t rg0 = get_temp_gpr(_jit); + jit_gpr_t rg1 = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(rg0), u.i[0]); + movi(_jit, jit_gpr_regno(rg1), u.i[1]); + VMOV_D_AA(_jit, r0, jit_gpr_regno(rg0), jit_gpr_regno(rg1)); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + } +} + +static void +extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCVT_F64_F32(_jit, r0, r1); +} + +static void +extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCVT_F32_F64(_jit, r0, r1); +} + +static void +extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VMOV_V_I32(_jit, r0, r1); + VCVT_F32_S32(_jit, r0, r0); +} + +static void +extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VMOV_V_I32(_jit, r0, r1); + VCVT_F64_S32(_jit, r0, r0); +} + +static void +truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + VCVT_S32_F32(_jit, jit_fpr_regno(reg), r1); + VMOV_A_S32(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); +} + +static void +truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + VCVT_S32_F64(_jit, jit_fpr_regno(reg), r1); + VMOV_A_S32(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); +} + +static void +ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t gpr = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(gpr), i0); + VLDR_F32(_jit, r0, jit_gpr_regno(gpr), 0); + unget_temp_gpr(_jit); +} + +static void +ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r1, r2); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r1, r2); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDR_F32(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDRN_F32(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDR_F64(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDRN_F64(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VSTR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VSTR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r0, r1); + VSTR_F32(_jit, r2, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r0, r1); + VSTR_F64(_jit, r2, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTR_F32(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTRN_F32(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 0124) + VSTR_F64(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTRN_F64(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +retr_d(jit_state_t *_jit, int32_t r) +{ + movr_d(_jit, jit_fpr_regno(_D0), r); + ret(_jit); +} + +static void +retr_f(jit_state_t *_jit, int32_t r) +{ + movr_f(_jit, jit_fpr_regno(_S0), r); + ret(_jit); +} + +static void +retval_f(jit_state_t *_jit, int32_t r0) +{ + movr_f(_jit, r0, jit_fpr_regno(_S0)); +} + +static void +retval_d(jit_state_t *_jit, int32_t r0) +{ + movr_d(_jit, r0, jit_fpr_regno(_D0)); +} |